Message ID | 20231026013945.1152174-5-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | tcg: Canonicalize SUBI to ANDI | expand |
On 10/25/23 18:39, Richard Henderson wrote: > A release build should simply accept and emit the subtract. > I'm not even sure if this is reasonable to keep for debug. > > Not-Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > tcg/tcg.c | 49 ++++++++++++++++++++++++++------------- > tcg/i386/tcg-target.c.inc | 13 ++++++++--- > 2 files changed, 43 insertions(+), 19 deletions(-) Oops, accidental merge of two patches. The tcg.c change *is* required. r~ > > diff --git a/tcg/tcg.c b/tcg/tcg.c > index a507c111cf..408647af7e 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -3618,23 +3618,40 @@ liveness_pass_1(TCGContext *s) > do_addsub2: > nb_iargs = 4; > nb_oargs = 2; > - /* Test if the high part of the operation is dead, but not > - the low part. The result can be optimized to a simple > - add or sub. This happens often for x86_64 guest when the > - cpu mode is set to 32 bit. */ > - if (arg_temp(op->args[1])->state == TS_DEAD) { > - if (arg_temp(op->args[0])->state == TS_DEAD) { > - goto do_remove; > - } > - /* Replace the opcode and adjust the args in place, > - leaving 3 unused args at the end. */ > - op->opc = opc = opc_new; > - op->args[1] = op->args[2]; > - op->args[2] = op->args[4]; > - /* Fall through and mark the single-word operation live. */ > - nb_iargs = 2; > - nb_oargs = 1; > + /* > + * Test if the high part of the operation is dead, but the low > + * part is still live. The result can be optimized to a simple > + * add or sub. > + */ > + if (arg_temp(op->args[1])->state != TS_DEAD) { > + goto do_not_remove; > } > + if (arg_temp(op->args[0])->state == TS_DEAD) { > + goto do_remove; > + } > + /* > + * Replace the opcode and adjust the args in place, leaving 3 > + * unused args at the end. Canonicalize subi to andi. > + */ > + op->args[1] = op->args[2]; > + { > + TCGTemp *src2 = arg_temp(op->args[4]); > + if (src2->kind == TEMP_CONST) { > + if (opc_new == INDEX_op_sub_i32) { > + src2 = tcg_constant_internal(TCG_TYPE_I32, > + (int32_t)-src2->val); > + opc_new = INDEX_op_add_i32; > + } else if (opc_new == INDEX_op_sub_i64) { > + src2 = tcg_constant_internal(TCG_TYPE_I64, -src2->val); > + opc_new = INDEX_op_add_i64; > + } > + } > + op->args[2] = temp_arg(src2); > + } > + op->opc = opc = opc_new; > + /* Mark the single-word operation live. */ > + nb_iargs = 2; > + nb_oargs = 1; > goto do_not_remove; > > case INDEX_op_mulu2_i32: > diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc > index 0d97864174..39d03fa698 100644 > --- a/tcg/i386/tcg-target.c.inc > +++ b/tcg/i386/tcg-target.c.inc > @@ -2544,8 +2544,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, > c = ARITH_ADD; > goto gen_arith; > OP_32_64(sub): > - c = ARITH_SUB; > - goto gen_arith; > + /* > + * Should have canonicalized all constants to add. > + * Keep the constraint allowing any constant so that we catch this > + * case without register allocation loading the constant first. > + */ > + tcg_debug_assert(!const_a2); > + tgen_arithr(s, ARITH_SUB + rexw, a0, a2); > + break; > OP_32_64(and): > c = ARITH_AND; > goto gen_arith; > @@ -3325,7 +3331,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) > return C_O1_I2(r, r, re); > > case INDEX_op_sub_i32: > - case INDEX_op_sub_i64: > case INDEX_op_mul_i32: > case INDEX_op_mul_i64: > case INDEX_op_or_i32: > @@ -3333,6 +3338,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) > case INDEX_op_xor_i32: > case INDEX_op_xor_i64: > return C_O1_I2(r, 0, re); > + case INDEX_op_sub_i64: > + return C_O1_I2(r, 0, ri); > > case INDEX_op_and_i32: > case INDEX_op_and_i64:
On 26/10/23 03:39, Richard Henderson wrote: > A release build should simply accept and emit the subtract. > I'm not even sure if this is reasonable to keep for debug. > > Not-Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > tcg/tcg.c | 49 ++++++++++++++++++++++++++------------- > tcg/i386/tcg-target.c.inc | 13 ++++++++--- > 2 files changed, 43 insertions(+), 19 deletions(-) > > diff --git a/tcg/tcg.c b/tcg/tcg.c > index a507c111cf..408647af7e 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -3618,23 +3618,40 @@ liveness_pass_1(TCGContext *s) > do_addsub2: > nb_iargs = 4; > nb_oargs = 2; > - /* Test if the high part of the operation is dead, but not > - the low part. The result can be optimized to a simple > - add or sub. This happens often for x86_64 guest when the > - cpu mode is set to 32 bit. */ > - if (arg_temp(op->args[1])->state == TS_DEAD) { > - if (arg_temp(op->args[0])->state == TS_DEAD) { > - goto do_remove; > - } > - /* Replace the opcode and adjust the args in place, > - leaving 3 unused args at the end. */ > - op->opc = opc = opc_new; > - op->args[1] = op->args[2]; > - op->args[2] = op->args[4]; > - /* Fall through and mark the single-word operation live. */ > - nb_iargs = 2; > - nb_oargs = 1; > + /* > + * Test if the high part of the operation is dead, but the low > + * part is still live. The result can be optimized to a simple > + * add or sub. > + */ > + if (arg_temp(op->args[1])->state != TS_DEAD) { > + goto do_not_remove; > } > + if (arg_temp(op->args[0])->state == TS_DEAD) { > + goto do_remove; > + } > + /* > + * Replace the opcode and adjust the args in place, leaving 3 > + * unused args at the end. Canonicalize subi to andi. Typo s/andi/addi/. > + */ > + op->args[1] = op->args[2]; > + { > + TCGTemp *src2 = arg_temp(op->args[4]); > + if (src2->kind == TEMP_CONST) { > + if (opc_new == INDEX_op_sub_i32) { > + src2 = tcg_constant_internal(TCG_TYPE_I32, > + (int32_t)-src2->val); > + opc_new = INDEX_op_add_i32; > + } else if (opc_new == INDEX_op_sub_i64) { > + src2 = tcg_constant_internal(TCG_TYPE_I64, -src2->val); > + opc_new = INDEX_op_add_i64; > + } > + } > + op->args[2] = temp_arg(src2); > + } > + op->opc = opc = opc_new; > + /* Mark the single-word operation live. */ > + nb_iargs = 2; > + nb_oargs = 1; > goto do_not_remove; For tcg/tcg.c with your S-o-b and a reworded description: Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
diff --git a/tcg/tcg.c b/tcg/tcg.c index a507c111cf..408647af7e 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -3618,23 +3618,40 @@ liveness_pass_1(TCGContext *s) do_addsub2: nb_iargs = 4; nb_oargs = 2; - /* Test if the high part of the operation is dead, but not - the low part. The result can be optimized to a simple - add or sub. This happens often for x86_64 guest when the - cpu mode is set to 32 bit. */ - if (arg_temp(op->args[1])->state == TS_DEAD) { - if (arg_temp(op->args[0])->state == TS_DEAD) { - goto do_remove; - } - /* Replace the opcode and adjust the args in place, - leaving 3 unused args at the end. */ - op->opc = opc = opc_new; - op->args[1] = op->args[2]; - op->args[2] = op->args[4]; - /* Fall through and mark the single-word operation live. */ - nb_iargs = 2; - nb_oargs = 1; + /* + * Test if the high part of the operation is dead, but the low + * part is still live. The result can be optimized to a simple + * add or sub. + */ + if (arg_temp(op->args[1])->state != TS_DEAD) { + goto do_not_remove; } + if (arg_temp(op->args[0])->state == TS_DEAD) { + goto do_remove; + } + /* + * Replace the opcode and adjust the args in place, leaving 3 + * unused args at the end. Canonicalize subi to andi. + */ + op->args[1] = op->args[2]; + { + TCGTemp *src2 = arg_temp(op->args[4]); + if (src2->kind == TEMP_CONST) { + if (opc_new == INDEX_op_sub_i32) { + src2 = tcg_constant_internal(TCG_TYPE_I32, + (int32_t)-src2->val); + opc_new = INDEX_op_add_i32; + } else if (opc_new == INDEX_op_sub_i64) { + src2 = tcg_constant_internal(TCG_TYPE_I64, -src2->val); + opc_new = INDEX_op_add_i64; + } + } + op->args[2] = temp_arg(src2); + } + op->opc = opc = opc_new; + /* Mark the single-word operation live. */ + nb_iargs = 2; + nb_oargs = 1; goto do_not_remove; case INDEX_op_mulu2_i32: diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 0d97864174..39d03fa698 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2544,8 +2544,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, c = ARITH_ADD; goto gen_arith; OP_32_64(sub): - c = ARITH_SUB; - goto gen_arith; + /* + * Should have canonicalized all constants to add. + * Keep the constraint allowing any constant so that we catch this + * case without register allocation loading the constant first. + */ + tcg_debug_assert(!const_a2); + tgen_arithr(s, ARITH_SUB + rexw, a0, a2); + break; OP_32_64(and): c = ARITH_AND; goto gen_arith; @@ -3325,7 +3331,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) return C_O1_I2(r, r, re); case INDEX_op_sub_i32: - case INDEX_op_sub_i64: case INDEX_op_mul_i32: case INDEX_op_mul_i64: case INDEX_op_or_i32: @@ -3333,6 +3338,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_xor_i32: case INDEX_op_xor_i64: return C_O1_I2(r, 0, re); + case INDEX_op_sub_i64: + return C_O1_I2(r, 0, ri); case INDEX_op_and_i32: case INDEX_op_and_i64: