diff mbox series

[1/1] KVM: selftests: pmu_counters_test: increase robustness of LLC cache misses

Message ID 20240621204305.1730677-2-mlevitsk@redhat.com
State New
Headers show
Series KVM: selftests: pmu_counters_test: increase robustness of LLC cache misses | expand

Commit Message

Maxim Levitsky June 21, 2024, 8:43 p.m. UTC
Currently this test does a single CLFLUSH on its memory location
but due to speculative execution this might not cause LLC misses.

Instead, do a cache flush on each loop iteration to confuse the prediction
and make sure that cache misses always occur.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 .../selftests/kvm/x86_64/pmu_counters_test.c  | 20 +++++++++----------
 1 file changed, 9 insertions(+), 11 deletions(-)

Comments

Sean Christopherson June 27, 2024, 5:42 p.m. UTC | #1
On Fri, Jun 21, 2024, Maxim Levitsky wrote:
> Currently this test does a single CLFLUSH on its memory location
> but due to speculative execution this might not cause LLC misses.
> 
> Instead, do a cache flush on each loop iteration to confuse the prediction
> and make sure that cache misses always occur.
> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  .../selftests/kvm/x86_64/pmu_counters_test.c  | 20 +++++++++----------
>  1 file changed, 9 insertions(+), 11 deletions(-)
> 
> diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> index 96446134c00b7..ddc0b7e4a888e 100644
> --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> @@ -14,8 +14,8 @@
>   * instructions that are needed to set up the loop and then disabled the
>   * counter.  1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR.
>   */
> -#define NUM_EXTRA_INSNS		7
> -#define NUM_INSNS_RETIRED	(NUM_BRANCHES + NUM_EXTRA_INSNS)
> +#define NUM_EXTRA_INSNS		5
> +#define NUM_INSNS_RETIRED	(NUM_BRANCHES * 2 + NUM_EXTRA_INSNS)

The comment above is stale.  I also think it's worth adding a macro to capture
that the '2' comes from having two instructions in the loop body (three, if we
keep the MFENCE).

>  static uint8_t kvm_pmu_version;
>  static bool kvm_has_perf_caps;
> @@ -133,9 +133,8 @@ static void guest_assert_event_count(uint8_t idx,
>   * doesn't need to be clobbered as the input value, @pmc_msr, is restored
>   * before the end of the sequence.
>   *
> - * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
> - * start of the loop to force LLC references and misses, i.e. to allow testing
> - * that those events actually count.
> + * If CLFUSH{,OPT} is supported, flush the cacheline containing the CLFUSH{,OPT}
> + * instruction on each loop iteration to ensure that LLC cache misses happen.
>   *
>   * If forced emulation is enabled (and specified), force emulation on a subset
>   * of the measured code to verify that KVM correctly emulates instructions and
> @@ -145,10 +144,9 @@ static void guest_assert_event_count(uint8_t idx,
>  #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP)				\
>  do {										\
>  	__asm__ __volatile__("wrmsr\n\t"					\
> -			     clflush "\n\t"					\
> -			     "mfence\n\t"					\

Based on your testing, it's probably ok to drop the mfence, but I don't see any
reason to do so.  It's not like that mfence meaningfully affects the runtime, and
anything easy/free we can do to avoid flaky tests is worth doing.

I'll post and apply a v2, with a prep patch to add a NUM_INSNS_PER_LOOP macro and
keep the MFENCE (I'll be offline all of next week, and don't want to push anything
to -next tomorrow, even though the risk of breaking anything is minimal).

> -			     "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t"	\
> -			     FEP "loop .\n\t"					\
> +			     " mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t"	\
> +			     "1: " clflush "\n\t"				\
> +			     FEP "loop 1b\n\t"					\
>  			     FEP "mov %%edi, %%ecx\n\t"				\
>  			     FEP "xor %%eax, %%eax\n\t"				\
>  			     FEP "xor %%edx, %%edx\n\t"				\
> @@ -163,9 +161,9 @@ do {										\
>  	wrmsr(pmc_msr, 0);							\
>  										\
>  	if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))				\
> -		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt 1f", FEP);	\
> +		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP);	\
>  	else if (this_cpu_has(X86_FEATURE_CLFLUSH))				\
> -		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush 1f", FEP);	\
> +		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP);	\
>  	else									\
>  		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);		\
>  										\
> -- 
> 2.26.3
>
Maxim Levitsky July 5, 2024, 2:48 a.m. UTC | #2
On Thu, 2024-06-27 at 10:42 -0700, Sean Christopherson wrote:
> On Fri, Jun 21, 2024, Maxim Levitsky wrote:
> > Currently this test does a single CLFLUSH on its memory location
> > but due to speculative execution this might not cause LLC misses.
> > 
> > Instead, do a cache flush on each loop iteration to confuse the prediction
> > and make sure that cache misses always occur.
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  .../selftests/kvm/x86_64/pmu_counters_test.c  | 20 +++++++++----------
> >  1 file changed, 9 insertions(+), 11 deletions(-)
> > 
> > diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> > index 96446134c00b7..ddc0b7e4a888e 100644
> > --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> > +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> > @@ -14,8 +14,8 @@
> >   * instructions that are needed to set up the loop and then disabled the
> >   * counter.  1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR.
> >   */
> > -#define NUM_EXTRA_INSNS		7
> > -#define NUM_INSNS_RETIRED	(NUM_BRANCHES + NUM_EXTRA_INSNS)
> > +#define NUM_EXTRA_INSNS		5
> > +#define NUM_INSNS_RETIRED	(NUM_BRANCHES * 2 + NUM_EXTRA_INSNS)
> 
> The comment above is stale.  I also think it's worth adding a macro to capture
> that the '2' comes from having two instructions in the loop body (three, if we
> keep the MFENCE).

True, my mistake.

> 
> >  static uint8_t kvm_pmu_version;
> >  static bool kvm_has_perf_caps;
> > @@ -133,9 +133,8 @@ static void guest_assert_event_count(uint8_t idx,
> >   * doesn't need to be clobbered as the input value, @pmc_msr, is restored
> >   * before the end of the sequence.
> >   *
> > - * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
> > - * start of the loop to force LLC references and misses, i.e. to allow testing
> > - * that those events actually count.
> > + * If CLFUSH{,OPT} is supported, flush the cacheline containing the CLFUSH{,OPT}
> > + * instruction on each loop iteration to ensure that LLC cache misses happen.
> >   *
> >   * If forced emulation is enabled (and specified), force emulation on a subset
> >   * of the measured code to verify that KVM correctly emulates instructions and
> > @@ -145,10 +144,9 @@ static void guest_assert_event_count(uint8_t idx,
> >  #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP)				\
> >  do {										\
> >  	__asm__ __volatile__("wrmsr\n\t"					\
> > -			     clflush "\n\t"					\
> > -			     "mfence\n\t"					\
> 
> Based on your testing, it's probably ok to drop the mfence, but I don't see any
> reason to do so.  It's not like that mfence meaningfully affects the runtime, and
> anything easy/free we can do to avoid flaky tests is worth doing.

Hi,

I just didn't want to add another instruction to the loop, since in theory
that will slow the test down.
diff mbox series

Patch

diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
index 96446134c00b7..ddc0b7e4a888e 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
@@ -14,8 +14,8 @@ 
  * instructions that are needed to set up the loop and then disabled the
  * counter.  1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR.
  */
-#define NUM_EXTRA_INSNS		7
-#define NUM_INSNS_RETIRED	(NUM_BRANCHES + NUM_EXTRA_INSNS)
+#define NUM_EXTRA_INSNS		5
+#define NUM_INSNS_RETIRED	(NUM_BRANCHES * 2 + NUM_EXTRA_INSNS)
 
 static uint8_t kvm_pmu_version;
 static bool kvm_has_perf_caps;
@@ -133,9 +133,8 @@  static void guest_assert_event_count(uint8_t idx,
  * doesn't need to be clobbered as the input value, @pmc_msr, is restored
  * before the end of the sequence.
  *
- * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
- * start of the loop to force LLC references and misses, i.e. to allow testing
- * that those events actually count.
+ * If CLFUSH{,OPT} is supported, flush the cacheline containing the CLFUSH{,OPT}
+ * instruction on each loop iteration to ensure that LLC cache misses happen.
  *
  * If forced emulation is enabled (and specified), force emulation on a subset
  * of the measured code to verify that KVM correctly emulates instructions and
@@ -145,10 +144,9 @@  static void guest_assert_event_count(uint8_t idx,
 #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP)				\
 do {										\
 	__asm__ __volatile__("wrmsr\n\t"					\
-			     clflush "\n\t"					\
-			     "mfence\n\t"					\
-			     "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t"	\
-			     FEP "loop .\n\t"					\
+			     " mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t"	\
+			     "1: " clflush "\n\t"				\
+			     FEP "loop 1b\n\t"					\
 			     FEP "mov %%edi, %%ecx\n\t"				\
 			     FEP "xor %%eax, %%eax\n\t"				\
 			     FEP "xor %%edx, %%edx\n\t"				\
@@ -163,9 +161,9 @@  do {										\
 	wrmsr(pmc_msr, 0);							\
 										\
 	if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))				\
-		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt 1f", FEP);	\
+		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP);	\
 	else if (this_cpu_has(X86_FEATURE_CLFLUSH))				\
-		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush 1f", FEP);	\
+		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP);	\
 	else									\
 		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);		\
 										\