new file mode 100644
@@ -0,0 +1,52 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Load/store for 128-bit atomic operations, LoongArch version.
+ *
+ * See docs/devel/atomics.rst for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef LOONGARCH_ATOMIC128_LDST_H
+#define LOONGARCH_ATOMIC128_LDST_H
+
+#include "host/cpuinfo.h"
+#include "tcg/debug-assert.h"
+
+#define HAVE_ATOMIC128_RO likely(cpuinfo & CPUINFO_LSX)
+#define HAVE_ATOMIC128_RW HAVE_ATOMIC128_RO
+
+/*
+ * As of gcc 13 and clang 16, there is no compiler support for LSX at all.
+ * Use inline assembly throughout.
+ */
+
+static inline Int128 atomic16_read_ro(const Int128 *ptr)
+{
+ uint64_t l, h;
+
+ tcg_debug_assert(HAVE_ATOMIC128_RO);
+ asm("vld $vr0, %2, 0\n\t"
+ "vpickve2gr.d %0, $vr0, 0\n\t"
+ "vpickve2gr.d %1, $vr0, 1"
+ : "=r"(l), "=r"(h) : "r"(ptr), "m"(*ptr) : "f0");
+
+ return int128_make128(l, h);
+}
+
+static inline Int128 atomic16_read_rw(Int128 *ptr)
+{
+ return atomic16_read_ro(ptr);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+ uint64_t l = int128_getlo(val), h = int128_gethi(val);
+
+ tcg_debug_assert(HAVE_ATOMIC128_RW);
+ asm("vinsgr2vr.d $vr0, %1, 0\n\t"
+ "vinsgr2vr.d $vr0, %2, 1\n\t"
+ "vst $vr0, %3, 0"
+ : "=m"(*ptr) : "r"(l), "r"(h), "r"(ptr) : "f0");
+}
+
+#endif /* LOONGARCH_ATOMIC128_LDST_H */
new file mode 100644
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Atomic extract 64 from 128-bit, LoongArch version.
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#ifndef LOONGARCH_LOAD_EXTRACT_AL16_AL8_H
+#define LOONGARCH_LOAD_EXTRACT_AL16_AL8_H
+
+#include "host/cpuinfo.h"
+#include "tcg/debug-assert.h"
+
+/**
+ * load_atom_extract_al16_or_al8:
+ * @pv: host address
+ * @s: object size in bytes, @s <= 8.
+ *
+ * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
+ * otherwise the access must be 8-byte atomic.
+ */
+static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
+{
+ uintptr_t pi = (uintptr_t)pv;
+ Int128 *ptr_align = (Int128 *)(pi & ~7);
+ int shr = (pi & 7) * 8;
+ uint64_t l, h;
+
+ tcg_debug_assert(HAVE_ATOMIC128_RO);
+ asm("vld $vr0, %2, 0\n\t"
+ "vpickve2gr.d %0, $vr0, 0\n\t"
+ "vpickve2gr.d %1, $vr0, 1"
+ : "=r"(l), "=r"(h) : "r"(ptr_align), "m"(*ptr_align) : "f0");
+
+ return (l >> shr) | (h << (-shr & 63));
+}
+
+#endif /* LOONGARCH_LOAD_EXTRACT_AL16_AL8_H */
new file mode 100644
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Atomic store insert into 128-bit, LoongArch version.
+ */
+
+#ifndef LOONGARCH_STORE_INSERT_AL16_H
+#define LOONGARCH_STORE_INSERT_AL16_H
+
+void store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
+ QEMU_ERROR("unsupported atomic");
+
+#endif /* LOONGARCH_STORE_INSERT_AL16_H */
While loongarch64 does not have a 128-bit cmpxchg, it does have 128-bit atomic load and store via the vector unit. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20230916220151.526140-6-richard.henderson@linaro.org> --- .../include/loongarch64/host/atomic128-ldst.h | 52 +++++++++++++++++++ .../loongarch64/host/load-extract-al16-al8.h | 39 ++++++++++++++ .../loongarch64/host/store-insert-al16.h | 12 +++++ 3 files changed, 103 insertions(+) create mode 100644 host/include/loongarch64/host/atomic128-ldst.h create mode 100644 host/include/loongarch64/host/load-extract-al16-al8.h create mode 100644 host/include/loongarch64/host/store-insert-al16.h