diff mbox series

[v4,14/57] tcg/i386: Add have_atomic16

Message ID 20230503070656.1746170-15-richard.henderson@linaro.org
State Superseded
Headers show
Series tcg: Improve atomicity support | expand

Commit Message

Richard Henderson May 3, 2023, 7:06 a.m. UTC
Notice when Intel or AMD have guaranteed that vmovdqa is atomic.
The new variable will also be used in generated code.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/qemu/cpuid.h      | 18 ++++++++++++++++++
 tcg/i386/tcg-target.h     |  1 +
 tcg/i386/tcg-target.c.inc | 27 +++++++++++++++++++++++++++
 3 files changed, 46 insertions(+)

Comments

Peter Maydell May 5, 2023, 10:34 a.m. UTC | #1
On Wed, 3 May 2023 at 08:10, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Notice when Intel or AMD have guaranteed that vmovdqa is atomic.
> The new variable will also be used in generated code.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  include/qemu/cpuid.h      | 18 ++++++++++++++++++
>  tcg/i386/tcg-target.h     |  1 +
>  tcg/i386/tcg-target.c.inc | 27 +++++++++++++++++++++++++++
>  3 files changed, 46 insertions(+)
>
> diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
> index 1451e8ef2f..35325f1995 100644
> --- a/include/qemu/cpuid.h
> +++ b/include/qemu/cpuid.h
> @@ -71,6 +71,24 @@
>  #define bit_LZCNT       (1 << 5)
>  #endif
>
> +/*
> + * Signatures for different CPU implementations as returned from Leaf 0.
> + */
> +
> +#ifndef signature_INTEL_ecx
> +/* "Genu" "ineI" "ntel" */
> +#define signature_INTEL_ebx     0x756e6547
> +#define signature_INTEL_edx     0x49656e69
> +#define signature_INTEL_ecx     0x6c65746e
> +#endif
> +
> +#ifndef signature_AMD_ecx
> +/* "Auth" "enti" "cAMD" */
> +#define signature_AMD_ebx       0x68747541
> +#define signature_AMD_edx       0x69746e65
> +#define signature_AMD_ecx       0x444d4163
> +#endif

> @@ -4024,6 +4025,32 @@ static void tcg_target_init(TCGContext *s)
>                      have_avx512dq = (b7 & bit_AVX512DQ) != 0;
>                      have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
>                  }
> +
> +                /*
> +                 * The Intel SDM has added:
> +                 *   Processors that enumerate support for Intel® AVX
> +                 *   (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
> +                 *   guarantee that the 16-byte memory operations performed
> +                 *   by the following instructions will always be carried
> +                 *   out atomically:
> +                 *   - MOVAPD, MOVAPS, and MOVDQA.
> +                 *   - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
> +                 *   - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
> +                 *     with EVEX.128 and k0 (masking disabled).
> +                 * Note that these instructions require the linear addresses
> +                 * of their memory operands to be 16-byte aligned.
> +                 *
> +                 * AMD has provided an even stronger guarantee that processors
> +                 * with AVX provide 16-byte atomicity for all cachable,
> +                 * naturally aligned single loads and stores, e.g. MOVDQU.
> +                 *
> +                 * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
> +                 */
> +                if (have_avx1) {
> +                    __cpuid(0, a, b, c, d);
> +                    have_atomic16 = (c == signature_INTEL_ecx ||
> +                                     c == signature_AMD_ecx);
> +                }

If the signature is 3 words why are we only checking one here ?

thanks
-- PMM
Richard Henderson May 8, 2023, 1:41 p.m. UTC | #2
On 5/5/23 11:34, Peter Maydell wrote:
> On Wed, 3 May 2023 at 08:10, Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> Notice when Intel or AMD have guaranteed that vmovdqa is atomic.
>> The new variable will also be used in generated code.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   include/qemu/cpuid.h      | 18 ++++++++++++++++++
>>   tcg/i386/tcg-target.h     |  1 +
>>   tcg/i386/tcg-target.c.inc | 27 +++++++++++++++++++++++++++
>>   3 files changed, 46 insertions(+)
>>
>> diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
>> index 1451e8ef2f..35325f1995 100644
>> --- a/include/qemu/cpuid.h
>> +++ b/include/qemu/cpuid.h
>> @@ -71,6 +71,24 @@
>>   #define bit_LZCNT       (1 << 5)
>>   #endif
>>
>> +/*
>> + * Signatures for different CPU implementations as returned from Leaf 0.
>> + */
>> +
>> +#ifndef signature_INTEL_ecx
>> +/* "Genu" "ineI" "ntel" */
>> +#define signature_INTEL_ebx     0x756e6547
>> +#define signature_INTEL_edx     0x49656e69
>> +#define signature_INTEL_ecx     0x6c65746e
>> +#endif
>> +
>> +#ifndef signature_AMD_ecx
>> +/* "Auth" "enti" "cAMD" */
>> +#define signature_AMD_ebx       0x68747541
>> +#define signature_AMD_edx       0x69746e65
>> +#define signature_AMD_ecx       0x444d4163
>> +#endif
> 
>> @@ -4024,6 +4025,32 @@ static void tcg_target_init(TCGContext *s)
>>                       have_avx512dq = (b7 & bit_AVX512DQ) != 0;
>>                       have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
>>                   }
>> +
>> +                /*
>> +                 * The Intel SDM has added:
>> +                 *   Processors that enumerate support for Intel® AVX
>> +                 *   (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
>> +                 *   guarantee that the 16-byte memory operations performed
>> +                 *   by the following instructions will always be carried
>> +                 *   out atomically:
>> +                 *   - MOVAPD, MOVAPS, and MOVDQA.
>> +                 *   - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
>> +                 *   - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
>> +                 *     with EVEX.128 and k0 (masking disabled).
>> +                 * Note that these instructions require the linear addresses
>> +                 * of their memory operands to be 16-byte aligned.
>> +                 *
>> +                 * AMD has provided an even stronger guarantee that processors
>> +                 * with AVX provide 16-byte atomicity for all cachable,
>> +                 * naturally aligned single loads and stores, e.g. MOVDQU.
>> +                 *
>> +                 * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
>> +                 */
>> +                if (have_avx1) {
>> +                    __cpuid(0, a, b, c, d);
>> +                    have_atomic16 = (c == signature_INTEL_ecx ||
>> +                                     c == signature_AMD_ecx);
>> +                }
> 
> If the signature is 3 words why are we only checking one here ?

Because one is sufficient.  I don't know why the signature is 3 words and not 1.


r~
diff mbox series

Patch

diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
index 1451e8ef2f..35325f1995 100644
--- a/include/qemu/cpuid.h
+++ b/include/qemu/cpuid.h
@@ -71,6 +71,24 @@ 
 #define bit_LZCNT       (1 << 5)
 #endif
 
+/*
+ * Signatures for different CPU implementations as returned from Leaf 0.
+ */
+
+#ifndef signature_INTEL_ecx
+/* "Genu" "ineI" "ntel" */
+#define signature_INTEL_ebx     0x756e6547
+#define signature_INTEL_edx     0x49656e69
+#define signature_INTEL_ecx     0x6c65746e
+#endif
+
+#ifndef signature_AMD_ecx
+/* "Auth" "enti" "cAMD" */
+#define signature_AMD_ebx       0x68747541
+#define signature_AMD_edx       0x69746e65
+#define signature_AMD_ecx       0x444d4163
+#endif
+
 static inline unsigned xgetbv_low(unsigned c)
 {
     unsigned a, d;
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index d4f2a6f8c2..0421776cb8 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -120,6 +120,7 @@  extern bool have_avx512dq;
 extern bool have_avx512vbmi2;
 extern bool have_avx512vl;
 extern bool have_movbe;
+extern bool have_atomic16;
 
 /* optional instructions */
 #define TCG_TARGET_HAS_div2_i32         1
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index bb603e7968..f838683fc3 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -185,6 +185,7 @@  bool have_avx512dq;
 bool have_avx512vbmi2;
 bool have_avx512vl;
 bool have_movbe;
+bool have_atomic16;
 
 #ifdef CONFIG_CPUID_H
 static bool have_bmi2;
@@ -4024,6 +4025,32 @@  static void tcg_target_init(TCGContext *s)
                     have_avx512dq = (b7 & bit_AVX512DQ) != 0;
                     have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
                 }
+
+                /*
+                 * The Intel SDM has added:
+                 *   Processors that enumerate support for Intel® AVX
+                 *   (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
+                 *   guarantee that the 16-byte memory operations performed
+                 *   by the following instructions will always be carried
+                 *   out atomically:
+                 *   - MOVAPD, MOVAPS, and MOVDQA.
+                 *   - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
+                 *   - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
+                 *     with EVEX.128 and k0 (masking disabled).
+                 * Note that these instructions require the linear addresses
+                 * of their memory operands to be 16-byte aligned.
+                 *
+                 * AMD has provided an even stronger guarantee that processors
+                 * with AVX provide 16-byte atomicity for all cachable,
+                 * naturally aligned single loads and stores, e.g. MOVDQU.
+                 *
+                 * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
+                 */
+                if (have_avx1) {
+                    __cpuid(0, a, b, c, d);
+                    have_atomic16 = (c == signature_INTEL_ecx ||
+                                     c == signature_AMD_ecx);
+                }
             }
         }
     }