@@ -127,6 +127,50 @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
: [l] "r"(l), [h] "r"(h));
}
+# define HAVE_ATOMIC128 1
+#elif !defined(CONFIG_USER_ONLY) && defined(__x86_64__)
+/*
+ * The latest Intel SDM has added:
+ * Processors that enumerate support for IntelĀ® AVX (by setting
+ * the feature flag CPUID.01H:ECX.AVX[bit 28]) guarantee that the
+ * 16-byte memory operations performed by the following instructions
+ * will always be carried out atomically:
+ * - MOVAPD, MOVAPS, and MOVDQA.
+ * - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
+ * - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
+ * with EVEX.128 and k0 (masking disabled).
+ * Note that these instructions require the linear addresses of their
+ * memory operands to be 16-byte aligned.
+ *
+ * We do not yet have a similar guarantee from AMD, so we detect this
+ * at runtime rather than assuming the fact when __AVX__ is defined.
+ */
+extern bool have_atomic128;
+
+static inline Int128 atomic16_read(Int128 *ptr)
+{
+ Int128 ret;
+ if (have_atomic128) {
+ asm("vmovdqa %1, %0" : "=x" (ret) : "m" (*ptr));
+ } else {
+ ret = atomic16_cmpxchg(ptr, 0, 0);
+ }
+ return ret;
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+ if (have_atomic128) {
+ asm("vmovdqa %1, %0" : "=m" (*ptr) : "x" (val));
+ } else {
+ Int128 old = *ptr, cmp;
+ do {
+ cmp = old;
+ old = atomic16_cmpxchg(ptr, cmp, val);
+ } while (old != cmp);
+ }
+}
+
# define HAVE_ATOMIC128 1
#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
static inline Int128 atomic16_read(Int128 *ptr)
new file mode 100644
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2022, Linaro Ltd.
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/atomic128.h"
+
+#ifdef __x86_64__
+#include "qemu/cpuid.h"
+
+#ifndef signature_INTEL_ecx
+/* "Genu ineI ntel" */
+#define signature_INTEL_ebx 0x756e6547
+#define signature_INTEL_edx 0x49656e69
+#define signature_INTEL_ecx 0x6c65746e
+#endif
+
+/*
+ * The latest Intel SDM has added:
+ * Processors that enumerate support for IntelĀ® AVX (by setting
+ * the feature flag CPUID.01H:ECX.AVX[bit 28]) guarantee that the
+ * 16-byte memory operations performed by the following instructions
+ * will always be carried out atomically:
+ * - MOVAPD, MOVAPS, and MOVDQA.
+ * - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
+ * - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
+ * with EVEX.128 and k0 (masking disabled).
+ * Note that these instructions require the linear addresses of their
+ * memory operands to be 16-byte aligned.
+ *
+ * We do not yet have a similar guarantee from AMD, so we detect this
+ * at runtime rather than assuming the fact when __AVX__ is defined.
+ */
+bool have_atomic128;
+
+static void __attribute__((constructor))
+init_have_atomic128(void)
+{
+ unsigned int a, b, c, d, xcrl, xcrh;
+
+ __cpuid(0, a, b, c, d);
+ if (a < 1) {
+ return; /* AVX leaf not present */
+ }
+ if (c != signature_INTEL_ecx) {
+ return; /* Not an Intel product */
+ }
+
+ __cpuid(1, a, b, c, d);
+ if ((c & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) {
+ return; /* AVX not present or XSAVE not enabled by OS */
+ }
+
+ /*
+ * The xgetbv instruction is not available to older versions of
+ * the assembler, so we encode the instruction manually.
+ */
+ asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (0));
+ if ((xcrl & 6) != 6) {
+ return; /* AVX not enabled by OS */
+ }
+
+ have_atomic128 = true;
+}
+#endif /* __x86_64__ */
@@ -2,6 +2,7 @@ util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c'))
if not config_host_data.get('CONFIG_ATOMIC64')
util_ss.add(files('atomic64.c'))
endif
+util_ss.add(when: 'CONFIG_SOFTMMU', if_true: files('atomic128.c'))
util_ss.add(when: 'CONFIG_POSIX', if_true: files('aio-posix.c'))
util_ss.add(when: 'CONFIG_POSIX', if_true: files('fdmon-poll.c'))
if config_host_data.get('CONFIG_EPOLL_CREATE1')
Intel has now given guarantees about the atomicity of SSE read and write instructions on cpus supporting AVX. We can use these instead of the much slower cmpxchg16b. Derived from https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- include/qemu/atomic128.h | 44 ++++++++++++++++++++++++++ util/atomic128.c | 67 ++++++++++++++++++++++++++++++++++++++++ util/meson.build | 1 + 3 files changed, 112 insertions(+) create mode 100644 util/atomic128.c