@@ -115,8 +115,7 @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
-static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
- __attribute__((unused));
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
static void tcg_out_goto_tb(TCGContext *s, int which);
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
@@ -365,9 +364,8 @@ void tcg_raise_tb_overflow(TCGContext *s)
*
* Move or extend @src into @dst, depending on @src_ext and the types.
*/
-static void __attribute__((unused))
-tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
- TCGType src_type, MemOp src_ext, TCGReg src)
+static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
+ TCGType src_type, MemOp src_ext, TCGReg src)
{
switch (src_ext) {
case MO_UB:
@@ -413,6 +411,48 @@ tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
}
}
+/**
+ * tcg_out_movext2 -- move and extend two pair
+ * @s: tcg context
+ * @d1_type: integral type for destination
+ * @d1: destination register
+ * @s1_type: integral type for source
+ * @s1_ext: extension to apply to source
+ * @s1: source register
+ * @d2_type: integral type for destination
+ * @d2: destination register
+ * @s2_type: integral type for source
+ * @s2_ext: extension to apply to source
+ * @s2: source register
+ * @scratch: temporary register, or -1 for none
+ *
+ * As tcg_out_movext, for both s1->d1 and s2->d2, caring for overlap
+ * between the sources and destinations.
+ */
+static void __attribute__((unused))
+tcg_out_movext2(TCGContext *s, TCGType d1_type, TCGReg d1, TCGType s1_type,
+ MemOp s1_ext, TCGReg s1, TCGType d2_type, TCGReg d2,
+ TCGType s2_type, MemOp s2_ext, TCGReg s2, int scratch)
+{
+ if (d1 != s2) {
+ tcg_out_movext(s, d1_type, d1, s1_type, s1_ext, s1);
+ tcg_out_movext(s, d2_type, d2, s2_type, s2_ext, s2);
+ return;
+ }
+ if (d2 == s1) {
+ if (tcg_out_xchg(s, MAX(s1_type, s2_type), s1, s2)) {
+ /* The data is now in the correct registers, now extend. */
+ s1 = d1, s2 = d2;
+ } else {
+ tcg_debug_assert(scratch >= 0);
+ tcg_out_mov(s, s1_type, scratch, s1);
+ s1 = scratch;
+ }
+ }
+ tcg_out_movext(s, d2_type, d2, s2_type, s2_ext, s2);
+ tcg_out_movext(s, d1_type, d1, s1_type, s1_ext, s1);
+}
+
#define C_PFX1(P, A) P##A
#define C_PFX2(P, A, B) P##A##_##B
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
@@ -1545,7 +1545,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
- TCGReg argreg, datalo, datahi;
+ TCGReg argreg;
MemOpIdx oi = lb->oi;
MemOp opc = get_memop(oi);
@@ -1565,20 +1565,11 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
/* Use the canonical unsigned helpers and minimize icache usage. */
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
- datalo = lb->datalo_reg;
- datahi = lb->datahi_reg;
if ((opc & MO_SIZE) == MO_64) {
- if (datalo != TCG_REG_R1) {
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
- } else if (datahi != TCG_REG_R0) {
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
- } else {
- tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
- }
+ tcg_out_movext2(s, TCG_TYPE_I32, lb->datalo_reg,
+ TCG_TYPE_I32, MO_UL, TCG_REG_R0,
+ TCG_TYPE_I32, lb->datahi_reg,
+ TCG_TYPE_I32, MO_UL, TCG_REG_R1, TCG_REG_TMP);
} else {
tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg,
TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0);
@@ -1663,17 +1654,10 @@ static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
if (TARGET_LONG_BITS == 64) {
/* 64-bit target address is aligned into R2:R3. */
- if (l->addrhi_reg != TCG_REG_R2) {
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
- } else if (l->addrlo_reg != TCG_REG_R3) {
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
- } else {
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, TCG_REG_R2);
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, TCG_REG_R3);
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, TCG_REG_R1);
- }
+ tcg_out_movext2(s, TCG_TYPE_I32, TCG_REG_R2,
+ TCG_TYPE_I32, MO_UL, l->addrlo_reg,
+ TCG_TYPE_I32, TCG_REG_R3,
+ TCG_TYPE_I32, MO_UL, l->addrhi_reg, TCG_REG_TMP);
} else {
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
}
@@ -1916,7 +1916,6 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
MemOpIdx oi = l->oi;
MemOp opc = get_memop(oi);
- TCGReg data_reg;
tcg_insn_unit **label_ptr = &l->label_ptr[0];
/* resolve label address */
@@ -1953,18 +1952,13 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
- data_reg = l->datalo_reg;
if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
- if (data_reg == TCG_REG_EDX) {
- /* xchg %edx, %eax */
- tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
- } else {
- tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
- }
+ tcg_out_movext2(s, TCG_TYPE_I32, l->datalo_reg,
+ TCG_TYPE_I32, MO_UL, TCG_REG_EAX,
+ TCG_TYPE_I32, l->datahi_reg,
+ TCG_TYPE_I32, MO_UL, TCG_REG_EDX, -1);
} else {
- tcg_out_movext(s, l->type, data_reg,
+ tcg_out_movext(s, l->type, l->datalo_reg,
TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX);
}
This is common code in most qemu_{ld,st} slow paths, moving two registers when there may be overlap between sources and destinations. At present, this is only used by 32-bit hosts for 64-bit data, but will shortly be used for more than that. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/tcg.c | 50 +++++++++++++++++++++++++++++++++++---- tcg/arm/tcg-target.c.inc | 34 +++++++------------------- tcg/i386/tcg-target.c.inc | 16 ++++--------- 3 files changed, 59 insertions(+), 41 deletions(-)