diff mbox series

[v1,09/19] target/arm: Load/store integer pair with one tcg operation

Message ID 20230216030854.1212208-10-richard.henderson@linaro.org
State New
Headers show
Series target/arm: Implement FEAT_LSE2 | expand

Commit Message

Richard Henderson Feb. 16, 2023, 3:08 a.m. UTC
This is required for LSE2, where the pair must be treated
atomically if it does not cross a 16-byte boundary.  But
it simplifies the code to do this always, just use the
unpaired atomicity without LSE2.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/translate-a64.c | 77 ++++++++++++++++++++++++++++++--------
 1 file changed, 61 insertions(+), 16 deletions(-)

Comments

Peter Maydell Feb. 23, 2023, 3:57 p.m. UTC | #1
On Thu, 16 Feb 2023 at 03:10, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> This is required for LSE2, where the pair must be treated
> atomically if it does not cross a 16-byte boundary.  But
> it simplifies the code to do this always, just use the
> unpaired atomicity without LSE2.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM
diff mbox series

Patch

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index fa793485c3..c0d55c9204 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3089,27 +3089,72 @@  static void disas_ldst_pair(DisasContext *s, uint32_t insn)
     } else {
         TCGv_i64 tcg_rt = cpu_reg(s, rt);
         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
+        MemOp mop = (size + 1) | s->be_data;
+
+        /*
+         * With LSE2, non-sign-extending pairs are treated atomically if
+         * aligned, and if unaligned one of the pair will be completely
+         * within a 16-byte block and that element will be atomic.
+         * Otherwise each element is separately atomic.
+         * In all cases, issue one operation with the correct atomicity.
+         *
+         * This treats sign-extending loads like zero-extending loads,
+         * since that reuses the most code below.
+         */
+        mop |= size << MO_ATMAX_SHIFT;
+        mop |= s->atom_data;
+        if (s->align_mem) {
+            mop |= (size == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
+        }
 
         if (is_load) {
-            TCGv_i64 tmp = tcg_temp_new_i64();
+            if (size == 2) {
+                TCGv_i64 tmp = tcg_temp_new_i64();
 
-            /* Do not modify tcg_rt before recognizing any exception
-             * from the second load.
-             */
-            do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN,
-                      false, false, 0, false, false);
-            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
-            do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN,
-                      false, false, 0, false, false);
+                tcg_gen_qemu_ld_i64(tmp, clean_addr, get_mem_index(s), mop);
+                if (s->be_data == MO_LE) {
+                    tcg_gen_extr32_i64(tcg_rt, tcg_rt2, tmp);
+                } else {
+                    tcg_gen_extr32_i64(tcg_rt2, tcg_rt, tmp);
+                }
+                if (is_signed) {
+                    tcg_gen_ext32s_i64(tcg_rt, tcg_rt);
+                    tcg_gen_ext32s_i64(tcg_rt2, tcg_rt2);
+                }
+                tcg_temp_free_i64(tmp);
+            } else {
+                TCGv_i128 tmp = tcg_temp_new_i128();
 
-            tcg_gen_mov_i64(tcg_rt, tmp);
-            tcg_temp_free_i64(tmp);
+                tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
+                if (s->be_data == MO_LE) {
+                    tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
+                } else {
+                    tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
+                }
+                tcg_temp_free_i128(tmp);
+            }
         } else {
-            do_gpr_st(s, tcg_rt, clean_addr, size,
-                      false, 0, false, false);
-            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
-            do_gpr_st(s, tcg_rt2, clean_addr, size,
-                      false, 0, false, false);
+            if (size == 2) {
+                TCGv_i64 tmp = tcg_temp_new_i64();
+
+                if (s->be_data == MO_LE) {
+                    tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
+                } else {
+                    tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
+                }
+                tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
+                tcg_temp_free_i64(tmp);
+            } else {
+                TCGv_i128 tmp = tcg_temp_new_i128();
+
+                if (s->be_data == MO_LE) {
+                    tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
+                } else {
+                    tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
+                }
+                tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
+                tcg_temp_free_i128(tmp);
+            }
         }
     }