diff mbox series

[v1,02/19] target/arm: Use tcg_gen_qemu_ld_i128 for LDXP

Message ID 20230216030854.1212208-3-richard.henderson@linaro.org
State New
Headers show
Series target/arm: Implement FEAT_LSE2 | expand

Commit Message

Richard Henderson Feb. 16, 2023, 3:08 a.m. UTC
While we don't require 16-byte atomicity here, using
a single larger load simplifies the code, and makes it
a closer match to STXP.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/translate-a64.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)
diff mbox series

Patch

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 78a2141224..d7d4b68328 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -2545,30 +2545,27 @@  static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
             }
         } else {
-            /* The pair must be single-copy atomic for *each* doubleword, not
-               the entire quadword, however it must be quadword aligned.  */
-            TCGv_i64 t0 = tcg_temp_new_i64();
-            TCGv_i64 t1 = tcg_temp_new_i64();
+            /*
+             * The pair must be single-copy atomic for *each* doubleword, not
+             * the entire quadword, however it must be quadword aligned.
+             * Expose the complete load to tcg, for ease of tlb lookup,
+             * but indicate that only 8-byte atomicity is required.
+             */
+            TCGv_i128 t16 = tcg_temp_new_i128();
 
-            memop |= MO_64;
-            tcg_gen_qemu_ld_i64(t0, addr, idx, memop | MO_ALIGN_16);
+            memop |= MO_128 | MO_ALIGN_16 | MO_ATMAX_8;
+            tcg_gen_qemu_ld_i128(t16, addr, idx, memop);
 
-            tcg_gen_addi_i64(t1, addr, 8);
-            tcg_gen_qemu_ld_i64(t1, t1, idx, memop);
+            tcg_gen_extr_i128_i64(cpu_exclusive_val, cpu_exclusive_high, t16);
+            tcg_temp_free_i128(t16);
 
             if (s->be_data == MO_LE) {
-                tcg_gen_mov_i64(cpu_exclusive_val, t0);
-                tcg_gen_mov_i64(cpu_exclusive_high, t1);
+                tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
+                tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
             } else {
-                tcg_gen_mov_i64(cpu_exclusive_high, t0);
-                tcg_gen_mov_i64(cpu_exclusive_val, t1);
+                tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_high);
+                tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_val);
             }
-
-            tcg_gen_mov_i64(cpu_reg(s, rt), t0);
-            tcg_gen_mov_i64(cpu_reg(s, rt2), t1);
-
-            tcg_temp_free_i64(t0);
-            tcg_temp_free_i64(t1);
         }
     } else {
         memop |= size | MO_ALIGN;