@@ -3089,27 +3089,72 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
} else {
TCGv_i64 tcg_rt = cpu_reg(s, rt);
TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
+ MemOp mop = (size + 1) | s->be_data;
+
+ /*
+ * With LSE2, non-sign-extending pairs are treated atomically if
+ * aligned, and if unaligned one of the pair will be completely
+ * within a 16-byte block and that element will be atomic.
+ * Otherwise each element is separately atomic.
+ * In all cases, issue one operation with the correct atomicity.
+ *
+ * This treats sign-extending loads like zero-extending loads,
+ * since that reuses the most code below.
+ */
+ mop |= size << MO_ATMAX_SHIFT;
+ mop |= s->atom_data;
+ if (s->align_mem) {
+ mop |= (size == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
+ }
if (is_load) {
- TCGv_i64 tmp = tcg_temp_new_i64();
+ if (size == 2) {
+ TCGv_i64 tmp = tcg_temp_new_i64();
- /* Do not modify tcg_rt before recognizing any exception
- * from the second load.
- */
- do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN,
- false, false, 0, false, false);
- tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
- do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN,
- false, false, 0, false, false);
+ tcg_gen_qemu_ld_i64(tmp, clean_addr, get_mem_index(s), mop);
+ if (s->be_data == MO_LE) {
+ tcg_gen_extr32_i64(tcg_rt, tcg_rt2, tmp);
+ } else {
+ tcg_gen_extr32_i64(tcg_rt2, tcg_rt, tmp);
+ }
+ if (is_signed) {
+ tcg_gen_ext32s_i64(tcg_rt, tcg_rt);
+ tcg_gen_ext32s_i64(tcg_rt2, tcg_rt2);
+ }
+ tcg_temp_free_i64(tmp);
+ } else {
+ TCGv_i128 tmp = tcg_temp_new_i128();
- tcg_gen_mov_i64(tcg_rt, tmp);
- tcg_temp_free_i64(tmp);
+ tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
+ if (s->be_data == MO_LE) {
+ tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
+ } else {
+ tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
+ }
+ tcg_temp_free_i128(tmp);
+ }
} else {
- do_gpr_st(s, tcg_rt, clean_addr, size,
- false, 0, false, false);
- tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
- do_gpr_st(s, tcg_rt2, clean_addr, size,
- false, 0, false, false);
+ if (size == 2) {
+ TCGv_i64 tmp = tcg_temp_new_i64();
+
+ if (s->be_data == MO_LE) {
+ tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
+ } else {
+ tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
+ }
+ tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
+ tcg_temp_free_i64(tmp);
+ } else {
+ TCGv_i128 tmp = tcg_temp_new_i128();
+
+ if (s->be_data == MO_LE) {
+ tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
+ } else {
+ tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
+ }
+ tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
+ tcg_temp_free_i128(tmp);
+ }
}
}
This is required for LSE2, where the pair must be treated atomically if it does not cross a 16-byte boundary. But it simplifies the code to do this always, just use the unpaired atomicity without LSE2. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/translate-a64.c | 77 ++++++++++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 16 deletions(-)