@@ -508,9 +508,15 @@ static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
return x <= y;
case TCG_COND_GTU:
return x > y;
- default:
- g_assert_not_reached();
+ case TCG_COND_TSTEQ:
+ return (x & y) == 0;
+ case TCG_COND_TSTNE:
+ return (x & y) != 0;
+ case TCG_COND_ALWAYS:
+ case TCG_COND_NEVER:
+ break;
}
+ g_assert_not_reached();
}
static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
@@ -536,12 +542,18 @@ static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
return x <= y;
case TCG_COND_GTU:
return x > y;
- default:
- g_assert_not_reached();
+ case TCG_COND_TSTEQ:
+ return (x & y) == 0;
+ case TCG_COND_TSTNE:
+ return (x & y) != 0;
+ case TCG_COND_ALWAYS:
+ case TCG_COND_NEVER:
+ break;
}
+ g_assert_not_reached();
}
-static bool do_constant_folding_cond_eq(TCGCond c)
+static int do_constant_folding_cond_eq(TCGCond c)
{
switch (c) {
case TCG_COND_GT:
@@ -556,9 +568,14 @@ static bool do_constant_folding_cond_eq(TCGCond c)
case TCG_COND_LEU:
case TCG_COND_EQ:
return 1;
- default:
- g_assert_not_reached();
+ case TCG_COND_TSTEQ:
+ case TCG_COND_TSTNE:
+ return -1;
+ case TCG_COND_ALWAYS:
+ case TCG_COND_NEVER:
+ break;
}
+ g_assert_not_reached();
}
/*
@@ -660,7 +677,27 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGArg dest,
}
r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
- return r;
+ if (r >= 0) {
+ return r;
+ }
+ if (!is_tst_cond(cond)) {
+ return -1;
+ }
+
+ /* TSTNE x,x -> NE x,0 */
+ if (args_are_copies(*p1, *p2)) {
+ *p2 = arg_new_constant(ctx, 0);
+ *pcond = tcg_tst_eqne_cond(cond);
+ return -1;
+ }
+
+ /* TSTNE x,sign -> LT x,0 */
+ if (arg_is_const_val(*p2, (ctx->type == TCG_TYPE_I32
+ ? INT32_MIN : INT64_MIN))) {
+ *p2 = arg_new_constant(ctx, 0);
+ *pcond = tcg_tst_ltge_cond(cond);
+ }
+ return -1;
}
static int do_constant_folding_cond2(OptContext *ctx, TCGArg *args)
@@ -668,6 +705,7 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGArg *args)
TCGArg al, ah, bl, bh;
TCGCond c;
bool swap;
+ int r;
swap = swap_commutative2(args, args + 2);
c = args[4];
@@ -689,8 +727,13 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGArg *args)
tcg_target_ulong alv = arg_info(al)->val;
tcg_target_ulong ahv = arg_info(ah)->val;
uint64_t a = deposit64(alv, 32, 32, ahv);
- return do_constant_folding_cond_64(a, b, c);
+
+ r = do_constant_folding_cond_64(a, b, c);
+ if (r >= 0) {
+ return r;
+ }
}
+
if (b == 0) {
switch (c) {
case TCG_COND_LTU:
@@ -701,9 +744,28 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGArg *args)
break;
}
}
+
+ /* TSTNE x,sign -> LT x,0 */
+ if (b == INT64_MIN && is_tst_cond(c)) {
+ /* bl must be 0, so copy that to bh */
+ args[3] = bl;
+ args[4] = tcg_tst_ltge_cond(c);
+ return -1;
+ }
}
+
if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
- return do_constant_folding_cond_eq(c);
+ r = do_constant_folding_cond_eq(c);
+ if (r >= 0) {
+ return r;
+ }
+
+ /* TSTNE x,x -> NE x,0 */
+ if (is_tst_cond(c)) {
+ args[3] = args[2] = arg_new_constant(ctx, 0);
+ args[4] = tcg_tst_eqne_cond(c);
+ return -1;
+ }
}
return -1;
}
@@ -1151,17 +1213,30 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
case 0:
goto do_brcond_const;
case 1:
- op->opc = INDEX_op_brcond_i32;
- op->args[1] = op->args[2];
- op->args[2] = cond;
- op->args[3] = label;
- break;
+ goto do_brcond_low;
+ }
+ break;
+
+ case TCG_COND_TSTEQ:
+ case TCG_COND_TSTNE:
+ if (arg_is_const_val(op->args[2], 0)) {
+ goto do_brcond_high;
+ }
+ if (arg_is_const_val(op->args[3], 0)) {
+ goto do_brcond_low;
}
break;
default:
break;
+ do_brcond_low:
+ op->opc = INDEX_op_brcond_i32;
+ op->args[1] = op->args[2];
+ op->args[2] = cond;
+ op->args[3] = label;
+ break;
+
do_brcond_high:
op->opc = INDEX_op_brcond_i32;
op->args[0] = op->args[1];
@@ -1829,6 +1904,100 @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
return false;
}
+static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
+{
+ TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc, uext_opc, sext_opc;
+ TCGCond cond = op->args[3];
+ TCGArg ret, src1, src2;
+ TCGOp *op2;
+ uint64_t val;
+ int sh;
+ bool inv;
+
+ if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
+ return;
+ }
+
+ src2 = op->args[2];
+ val = arg_info(src2)->val;
+ if (!is_power_of_2(val)) {
+ return;
+ }
+
+ switch (ctx->type) {
+ case TCG_TYPE_I32:
+ and_opc = INDEX_op_and_i32;
+ sub_opc = INDEX_op_sub_i32;
+ xor_opc = INDEX_op_xor_i32;
+ shr_opc = INDEX_op_shr_i32;
+ neg_opc = TCG_TARGET_HAS_neg_i32 ? INDEX_op_neg_i32 : 0;
+ uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
+ sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0;
+ break;
+ case TCG_TYPE_I64:
+ and_opc = INDEX_op_and_i64;
+ sub_opc = INDEX_op_sub_i64;
+ xor_opc = INDEX_op_xor_i64;
+ shr_opc = INDEX_op_shr_i64;
+ neg_opc = TCG_TARGET_HAS_neg_i64 ? INDEX_op_neg_i64 : 0;
+ uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
+ sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ sh = ctz64(val);
+ ret = op->args[0];
+ src1 = op->args[1];
+ inv = cond == TCG_COND_TSTEQ;
+
+ if (neg && !inv && sext_opc) {
+ op->opc = sext_opc;
+ op->args[1] = src1;
+ op->args[2] = sh;
+ op->args[3] = 1;
+ neg = false;
+ } else if (uext_opc) {
+ op->opc = uext_opc;
+ op->args[1] = src1;
+ op->args[2] = sh;
+ op->args[3] = 1;
+ } else {
+ if (sh) {
+ op2 = tcg_op_insert_before(ctx->tcg, op, shr_opc, 3);
+ op2->args[0] = ret;
+ op2->args[1] = src1;
+ op2->args[2] = arg_new_constant(ctx, sh);
+ src1 = ret;
+ }
+ op->opc = and_opc;
+ op->args[1] = src1;
+ op->args[2] = arg_new_constant(ctx, 1);
+ }
+
+ if (neg && inv) {
+ op2 = tcg_op_insert_after(ctx->tcg, op, sub_opc, 3);
+ op2->args[0] = ret;
+ op2->args[1] = ret;
+ op2->args[2] = arg_new_constant(ctx, 1);
+ } else if (inv) {
+ op2 = tcg_op_insert_after(ctx->tcg, op, xor_opc, 3);
+ op2->args[0] = ret;
+ op2->args[1] = ret;
+ op2->args[2] = arg_new_constant(ctx, 1);
+ } else if (neg && neg_opc) {
+ op2 = tcg_op_insert_after(ctx->tcg, op, neg_opc, 2);
+ op2->args[0] = ret;
+ op2->args[1] = ret;
+ } else if (neg) {
+ op2 = tcg_op_insert_after(ctx->tcg, op, sub_opc, 3);
+ op2->args[0] = ret;
+ op2->args[1] = arg_new_constant(ctx, 0);
+ op2->args[2] = ret;
+ }
+}
+
static bool fold_setcond(OptContext *ctx, TCGOp *op)
{
int i = do_constant_folding_cond1(ctx, op->args[0], &op->args[1],
@@ -1836,6 +2005,7 @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
if (i >= 0) {
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
}
+ fold_setcond_tst_pow2(ctx, op, false);
ctx->z_mask = 1;
ctx->s_mask = smask_from_zmask(1);
@@ -1849,13 +2019,13 @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
if (i >= 0) {
return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
}
+ fold_setcond_tst_pow2(ctx, op, true);
/* Value is {0,-1} so all bits are repetitions of the sign. */
ctx->s_mask = -1;
return false;
}
-
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
{
TCGCond cond;
@@ -1903,21 +2073,36 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
case 0:
goto do_setcond_const;
case 1:
- op->args[2] = op->args[3];
- op->args[3] = cond;
- op->opc = INDEX_op_setcond_i32;
- break;
+ goto do_setcond_low;
+ }
+ break;
+
+ case TCG_COND_TSTEQ:
+ case TCG_COND_TSTNE:
+ if (arg_is_const_val(op->args[2], 0)) {
+ goto do_setcond_high;
+ }
+ if (arg_is_const_val(op->args[4], 0)) {
+ goto do_setcond_low;
}
break;
default:
break;
+ do_setcond_low:
+ op->args[2] = op->args[3];
+ op->args[3] = cond;
+ op->opc = INDEX_op_setcond_i32;
+ fold_setcond_tst_pow2(ctx, op, false);
+ break;
+
do_setcond_high:
op->args[1] = op->args[2];
op->args[2] = op->args[4];
op->args[3] = cond;
op->opc = INDEX_op_setcond_i32;
+ fold_setcond_tst_pow2(ctx, op, false);
break;
}
Fold constant comparisons. Canonicalize "tst x,x" to equality vs zero. Canonicalize "tst x,sign" to sign test vs zero. Fold double-word comparisons with zero parts. Fold setcond of "tst x,pow2" to a bit extract. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/optimize.c | 225 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 205 insertions(+), 20 deletions(-)