@@ -121,11 +121,11 @@
#define TCG_TARGET_HAS_rot_i64 1
#define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_muls2_i64 1
-#define TCG_TARGET_HAS_add2_i32 0
-#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
#define TCG_TARGET_HAS_mulu2_i32 1
-#define TCG_TARGET_HAS_add2_i64 0
-#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 1
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0
@@ -189,7 +189,6 @@ static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
*c5 = extract32(insn, 28, 4);
}
-#if TCG_TARGET_REG_BITS == 32
static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5)
{
@@ -200,7 +199,6 @@ static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
*r4 = extract32(insn, 24, 4);
*r5 = extract32(insn, 28, 4);
}
-#endif
static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition)
{
@@ -368,17 +366,14 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
for (;;) {
uint32_t insn;
TCGOpcode opc;
- TCGReg r0, r1, r2, r3, r4;
+ TCGReg r0, r1, r2, r3, r4, r5;
tcg_target_ulong t1;
TCGCond condition;
target_ulong taddr;
uint8_t pos, len;
uint32_t tmp32;
uint64_t tmp64;
-#if TCG_TARGET_REG_BITS == 32
- TCGReg r5;
uint64_t T1, T2;
-#endif
TCGMemOpIdx oi;
int32_t ofs;
void *ptr;
@@ -665,20 +660,22 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tb_ptr = ptr;
}
break;
-#if TCG_TARGET_REG_BITS == 32
+#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_add2_i32
case INDEX_op_add2_i32:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
T1 = tci_uint64(regs[r3], regs[r2]);
T2 = tci_uint64(regs[r5], regs[r4]);
tci_write_reg64(regs, r1, r0, T1 + T2);
break;
+#endif
+#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_sub2_i32
case INDEX_op_sub2_i32:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
T1 = tci_uint64(regs[r3], regs[r2]);
T2 = tci_uint64(regs[r5], regs[r4]);
tci_write_reg64(regs, r1, r0, T1 - T2);
break;
-#endif /* TCG_TARGET_REG_BITS == 32 */
+#endif
#if TCG_TARGET_HAS_mulu2_i32
case INDEX_op_mulu2_i32:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
@@ -808,6 +805,24 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
muls64(®s[r0], ®s[r1], regs[r2], regs[r3]);
break;
#endif
+#if TCG_TARGET_HAS_add2_i64
+ case INDEX_op_add2_i64:
+ tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
+ T1 = regs[r2] + regs[r4];
+ T2 = regs[r3] + regs[r5] + (T1 < regs[r2]);
+ regs[r0] = T1;
+ regs[r1] = T2;
+ break;
+#endif
+#if TCG_TARGET_HAS_add2_i64
+ case INDEX_op_sub2_i64:
+ tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
+ T1 = regs[r2] - regs[r4];
+ T2 = regs[r3] - regs[r5] - (regs[r2] < regs[r4]);
+ regs[r0] = T1;
+ regs[r1] = T2;
+ break;
+#endif
/* Shift/rotate operations (64 bit). */
@@ -1124,10 +1139,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
const char *op_name;
uint32_t insn;
TCGOpcode op;
- TCGReg r0, r1, r2, r3, r4;
-#if TCG_TARGET_REG_BITS == 32
- TCGReg r5;
-#endif
+ TCGReg r0, r1, r2, r3, r4, r5;
tcg_target_ulong i1;
int32_t s2;
TCGCond c;
@@ -1325,15 +1337,15 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
str_r(r2), str_r(r3));
break;
-#if TCG_TARGET_REG_BITS == 32
case INDEX_op_add2_i32:
+ case INDEX_op_add2_i64:
case INDEX_op_sub2_i32:
+ case INDEX_op_sub2_i64:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
info->fprintf_func(info->stream, "%-12s %s,%s,%s,%s,%s,%s",
op_name, str_r(r0), str_r(r1), str_r(r2),
str_r(r3), str_r(r4), str_r(r5));
break;
-#endif
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i64:
@@ -134,11 +134,13 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_brcond_i64:
return C_O0_I2(r, r);
-#if TCG_TARGET_REG_BITS == 32
- /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */
case INDEX_op_add2_i32:
+ case INDEX_op_add2_i64:
case INDEX_op_sub2_i32:
+ case INDEX_op_sub2_i64:
return C_O2_I4(r, r, r, r, r, r);
+
+#if TCG_TARGET_REG_BITS == 32
case INDEX_op_brcond2_i32:
return C_O0_I4(r, r, r, r);
#endif
@@ -467,7 +469,6 @@ static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
tcg_out32(s, insn);
}
-#if TCG_TARGET_REG_BITS == 32
static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2,
TCGReg r3, TCGReg r4, TCGReg r5)
@@ -483,7 +484,6 @@ static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op,
insn = deposit32(insn, 28, 4, r5);
tcg_out32(s, insn);
}
-#endif
static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val,
TCGReg base, intptr_t offset)
@@ -719,12 +719,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out_op_rr(s, opc, args[0], args[1]);
break;
-#if TCG_TARGET_REG_BITS == 32
- case INDEX_op_add2_i32:
- case INDEX_op_sub2_i32:
+ CASE_32_64(add2)
+ CASE_32_64(sub2)
tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2],
args[3], args[4], args[5]);
break;
+
+#if TCG_TARGET_REG_BITS == 32
case INDEX_op_brcond2_i32:
tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP,
args[0], args[1], args[2], args[3], args[4]);
We already had the 32-bit versions for a 32-bit host; expand this to 64-bit hosts as well. The 64-bit opcodes are new. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/tci/tcg-target.h | 8 ++++---- tcg/tci.c | 40 ++++++++++++++++++++++++++-------------- tcg/tci/tcg-target.c.inc | 15 ++++++++------- 3 files changed, 38 insertions(+), 25 deletions(-) -- 2.25.1