diff mbox series

[v3,4/5] tcg: Add host memory barriers to cpu_ldst.h interfaces

Message ID 20230619142333.429028-5-richard.henderson@linaro.org
State Superseded
Headers show
Series tcg: Issue memory barriers for guest memory model | expand

Commit Message

Richard Henderson June 19, 2023, 2:23 p.m. UTC
Bring the helpers into line with the rest of tcg in respecting
guest memory ordering.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/internal.h  | 34 ++++++++++++++++++++++++++++++++++
 accel/tcg/cputlb.c    | 10 ++++++++++
 accel/tcg/user-exec.c | 10 ++++++++++
 3 files changed, 54 insertions(+)

Comments

Philippe Mathieu-Daudé June 20, 2023, 2:22 p.m. UTC | #1
On 19/6/23 16:23, Richard Henderson wrote:
> Bring the helpers into line with the rest of tcg in respecting
> guest memory ordering.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   accel/tcg/internal.h  | 34 ++++++++++++++++++++++++++++++++++
>   accel/tcg/cputlb.c    | 10 ++++++++++
>   accel/tcg/user-exec.c | 10 ++++++++++
>   3 files changed, 54 insertions(+)

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
diff mbox series

Patch

diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
index 24f225cac7..be0c7753fb 100644
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -78,4 +78,38 @@  extern int64_t max_advance;
 
 extern bool one_insn_per_tb;
 
+/**
+ * tcg_req_mo:
+ * @type: TCGBar
+ *
+ * Filter @type to the barrier that is required for the guest
+ * memory ordering vs the host memory ordering.  A non-zero
+ * result indicates that some barrier is required.
+ *
+ * If TCG_GUEST_DEFAULT_MO is not defined, assume that the
+ * guest requires strict ordering.
+ *
+ * This is a macro so that it's constant even without optimization.
+ */
+#ifdef TCG_GUEST_DEFAULT_MO
+# define tcg_req_mo(type) \
+    ((type) & TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO)
+#else
+# define tcg_req_mo(type) ((type) & ~TCG_TARGET_DEFAULT_MO)
+#endif
+
+/**
+ * cpu_req_mo:
+ * @type: TCGBar
+ *
+ * If tcg_req_mo indicates a barrier for @type is required
+ * for the guest memory model, issue a host memory barrier.
+ */
+#define cpu_req_mo(type)          \
+    do {                          \
+        if (tcg_req_mo(type)) {   \
+            smp_mb();             \
+        }                         \
+    } while (0)
+
 #endif /* ACCEL_TCG_INTERNAL_H */
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 5e2ca47243..a48e1c9693 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2342,6 +2342,7 @@  static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
     MMULookupLocals l;
     bool crosspage;
 
+    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
     tcg_debug_assert(!crosspage);
 
@@ -2363,6 +2364,7 @@  static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
     uint16_t ret;
     uint8_t a, b;
 
+    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
     if (likely(!crosspage)) {
         return do_ld_2(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
@@ -2393,6 +2395,7 @@  static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
     bool crosspage;
     uint32_t ret;
 
+    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
     if (likely(!crosspage)) {
         return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
@@ -2420,6 +2423,7 @@  static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
     bool crosspage;
     uint64_t ret;
 
+    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
     if (likely(!crosspage)) {
         return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
@@ -2472,6 +2476,7 @@  static Int128 do_ld16_mmu(CPUArchState *env, target_ulong addr,
     Int128 ret;
     int first;
 
+    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l);
     if (likely(!crosspage)) {
         /* Perform the load host endian. */
@@ -2804,6 +2809,7 @@  void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
     bool crosspage;
 
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
+    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
     tcg_debug_assert(!crosspage);
 
@@ -2817,6 +2823,7 @@  static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
     bool crosspage;
     uint8_t a, b;
 
+    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
     if (likely(!crosspage)) {
         do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
@@ -2845,6 +2852,7 @@  static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
     MMULookupLocals l;
     bool crosspage;
 
+    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
     if (likely(!crosspage)) {
         do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
@@ -2872,6 +2880,7 @@  static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
     MMULookupLocals l;
     bool crosspage;
 
+    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
     if (likely(!crosspage)) {
         do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
@@ -2901,6 +2910,7 @@  static void do_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
     uint64_t a, b;
     int first;
 
+    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
     if (likely(!crosspage)) {
         /* Swap to host endian if necessary, then store. */
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index dc8d6b5d40..ce65021cd4 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -914,6 +914,7 @@  static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr,
     uint8_t ret;
 
     tcg_debug_assert((mop & MO_SIZE) == MO_8);
+    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
     ret = ldub_p(haddr);
     clear_helper_retaddr();
@@ -947,6 +948,7 @@  static uint16_t do_ld2_mmu(CPUArchState *env, abi_ptr addr,
     uint16_t ret;
 
     tcg_debug_assert((mop & MO_SIZE) == MO_16);
+    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
     ret = load_atom_2(env, ra, haddr, mop);
     clear_helper_retaddr();
@@ -984,6 +986,7 @@  static uint32_t do_ld4_mmu(CPUArchState *env, abi_ptr addr,
     uint32_t ret;
 
     tcg_debug_assert((mop & MO_SIZE) == MO_32);
+    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
     ret = load_atom_4(env, ra, haddr, mop);
     clear_helper_retaddr();
@@ -1021,6 +1024,7 @@  static uint64_t do_ld8_mmu(CPUArchState *env, abi_ptr addr,
     uint64_t ret;
 
     tcg_debug_assert((mop & MO_SIZE) == MO_64);
+    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
     ret = load_atom_8(env, ra, haddr, mop);
     clear_helper_retaddr();
@@ -1052,6 +1056,7 @@  static Int128 do_ld16_mmu(CPUArchState *env, abi_ptr addr,
     Int128 ret;
 
     tcg_debug_assert((mop & MO_SIZE) == MO_128);
+    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
     ret = load_atom_16(env, ra, haddr, mop);
     clear_helper_retaddr();
@@ -1087,6 +1092,7 @@  static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
     void *haddr;
 
     tcg_debug_assert((mop & MO_SIZE) == MO_8);
+    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
     stb_p(haddr, val);
     clear_helper_retaddr();
@@ -1111,6 +1117,7 @@  static void do_st2_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
     void *haddr;
 
     tcg_debug_assert((mop & MO_SIZE) == MO_16);
+    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
 
     if (mop & MO_BSWAP) {
@@ -1139,6 +1146,7 @@  static void do_st4_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
     void *haddr;
 
     tcg_debug_assert((mop & MO_SIZE) == MO_32);
+    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
 
     if (mop & MO_BSWAP) {
@@ -1167,6 +1175,7 @@  static void do_st8_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
     void *haddr;
 
     tcg_debug_assert((mop & MO_SIZE) == MO_64);
+    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
 
     if (mop & MO_BSWAP) {
@@ -1195,6 +1204,7 @@  static void do_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
     void *haddr;
 
     tcg_debug_assert((mop & MO_SIZE) == MO_128);
+    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
 
     if (mop & MO_BSWAP) {