diff mbox series

[4/4] NOTFORMERGE tcg/i386: Assert sub of immediate has been folded

Message ID 20231026013945.1152174-5-richard.henderson@linaro.org
State New
Headers show
Series tcg: Canonicalize SUBI to ANDI | expand

Commit Message

Richard Henderson Oct. 26, 2023, 1:39 a.m. UTC
A release build should simply accept and emit the subtract.
I'm not even sure if this is reasonable to keep for debug.

Not-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tcg.c                 | 49 ++++++++++++++++++++++++++-------------
 tcg/i386/tcg-target.c.inc | 13 ++++++++---
 2 files changed, 43 insertions(+), 19 deletions(-)

Comments

Richard Henderson Oct. 26, 2023, 1:53 a.m. UTC | #1
On 10/25/23 18:39, Richard Henderson wrote:
> A release build should simply accept and emit the subtract.
> I'm not even sure if this is reasonable to keep for debug.
> 
> Not-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/tcg.c                 | 49 ++++++++++++++++++++++++++-------------
>   tcg/i386/tcg-target.c.inc | 13 ++++++++---
>   2 files changed, 43 insertions(+), 19 deletions(-)

Oops, accidental merge of two patches.
The tcg.c change *is* required.


r~

> 
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index a507c111cf..408647af7e 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -3618,23 +3618,40 @@ liveness_pass_1(TCGContext *s)
>           do_addsub2:
>               nb_iargs = 4;
>               nb_oargs = 2;
> -            /* Test if the high part of the operation is dead, but not
> -               the low part.  The result can be optimized to a simple
> -               add or sub.  This happens often for x86_64 guest when the
> -               cpu mode is set to 32 bit.  */
> -            if (arg_temp(op->args[1])->state == TS_DEAD) {
> -                if (arg_temp(op->args[0])->state == TS_DEAD) {
> -                    goto do_remove;
> -                }
> -                /* Replace the opcode and adjust the args in place,
> -                   leaving 3 unused args at the end.  */
> -                op->opc = opc = opc_new;
> -                op->args[1] = op->args[2];
> -                op->args[2] = op->args[4];
> -                /* Fall through and mark the single-word operation live.  */
> -                nb_iargs = 2;
> -                nb_oargs = 1;
> +            /*
> +             * Test if the high part of the operation is dead, but the low
> +             * part is still live.  The result can be optimized to a simple
> +             * add or sub.
> +             */
> +            if (arg_temp(op->args[1])->state != TS_DEAD) {
> +                goto do_not_remove;
>               }
> +            if (arg_temp(op->args[0])->state == TS_DEAD) {
> +                goto do_remove;
> +            }
> +            /*
> +             * Replace the opcode and adjust the args in place, leaving 3
> +             * unused args at the end.  Canonicalize subi to andi.
> +             */
> +            op->args[1] = op->args[2];
> +            {
> +                TCGTemp *src2 = arg_temp(op->args[4]);
> +                if (src2->kind == TEMP_CONST) {
> +                    if (opc_new == INDEX_op_sub_i32) {
> +                        src2 = tcg_constant_internal(TCG_TYPE_I32,
> +                                                     (int32_t)-src2->val);
> +                        opc_new = INDEX_op_add_i32;
> +                    } else if (opc_new == INDEX_op_sub_i64) {
> +                        src2 = tcg_constant_internal(TCG_TYPE_I64, -src2->val);
> +                        opc_new = INDEX_op_add_i64;
> +                    }
> +                }
> +                op->args[2] = temp_arg(src2);
> +            }
> +            op->opc = opc = opc_new;
> +            /* Mark the single-word operation live.  */
> +            nb_iargs = 2;
> +            nb_oargs = 1;
>               goto do_not_remove;
>   
>           case INDEX_op_mulu2_i32:
> diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
> index 0d97864174..39d03fa698 100644
> --- a/tcg/i386/tcg-target.c.inc
> +++ b/tcg/i386/tcg-target.c.inc
> @@ -2544,8 +2544,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>           c = ARITH_ADD;
>           goto gen_arith;
>       OP_32_64(sub):
> -        c = ARITH_SUB;
> -        goto gen_arith;
> +        /*
> +         * Should have canonicalized all constants to add.
> +         * Keep the constraint allowing any constant so that we catch this
> +         * case without register allocation loading the constant first.
> +         */
> +        tcg_debug_assert(!const_a2);
> +        tgen_arithr(s, ARITH_SUB + rexw, a0, a2);
> +        break;
>       OP_32_64(and):
>           c = ARITH_AND;
>           goto gen_arith;
> @@ -3325,7 +3331,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
>           return C_O1_I2(r, r, re);
>   
>       case INDEX_op_sub_i32:
> -    case INDEX_op_sub_i64:
>       case INDEX_op_mul_i32:
>       case INDEX_op_mul_i64:
>       case INDEX_op_or_i32:
> @@ -3333,6 +3338,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
>       case INDEX_op_xor_i32:
>       case INDEX_op_xor_i64:
>           return C_O1_I2(r, 0, re);
> +    case INDEX_op_sub_i64:
> +        return C_O1_I2(r, 0, ri);
>   
>       case INDEX_op_and_i32:
>       case INDEX_op_and_i64:
Philippe Mathieu-Daudé Nov. 6, 2023, 2:49 p.m. UTC | #2
On 26/10/23 03:39, Richard Henderson wrote:
> A release build should simply accept and emit the subtract.
> I'm not even sure if this is reasonable to keep for debug.
> 
> Not-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/tcg.c                 | 49 ++++++++++++++++++++++++++-------------
>   tcg/i386/tcg-target.c.inc | 13 ++++++++---
>   2 files changed, 43 insertions(+), 19 deletions(-)
> 
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index a507c111cf..408647af7e 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -3618,23 +3618,40 @@ liveness_pass_1(TCGContext *s)
>           do_addsub2:
>               nb_iargs = 4;
>               nb_oargs = 2;
> -            /* Test if the high part of the operation is dead, but not
> -               the low part.  The result can be optimized to a simple
> -               add or sub.  This happens often for x86_64 guest when the
> -               cpu mode is set to 32 bit.  */
> -            if (arg_temp(op->args[1])->state == TS_DEAD) {
> -                if (arg_temp(op->args[0])->state == TS_DEAD) {
> -                    goto do_remove;
> -                }
> -                /* Replace the opcode and adjust the args in place,
> -                   leaving 3 unused args at the end.  */
> -                op->opc = opc = opc_new;
> -                op->args[1] = op->args[2];
> -                op->args[2] = op->args[4];
> -                /* Fall through and mark the single-word operation live.  */
> -                nb_iargs = 2;
> -                nb_oargs = 1;
> +            /*
> +             * Test if the high part of the operation is dead, but the low
> +             * part is still live.  The result can be optimized to a simple
> +             * add or sub.
> +             */
> +            if (arg_temp(op->args[1])->state != TS_DEAD) {
> +                goto do_not_remove;
>               }
> +            if (arg_temp(op->args[0])->state == TS_DEAD) {
> +                goto do_remove;
> +            }
> +            /*
> +             * Replace the opcode and adjust the args in place, leaving 3
> +             * unused args at the end.  Canonicalize subi to andi.

Typo s/andi/addi/.

> +             */
> +            op->args[1] = op->args[2];
> +            {
> +                TCGTemp *src2 = arg_temp(op->args[4]);
> +                if (src2->kind == TEMP_CONST) {
> +                    if (opc_new == INDEX_op_sub_i32) {
> +                        src2 = tcg_constant_internal(TCG_TYPE_I32,
> +                                                     (int32_t)-src2->val);
> +                        opc_new = INDEX_op_add_i32;
> +                    } else if (opc_new == INDEX_op_sub_i64) {
> +                        src2 = tcg_constant_internal(TCG_TYPE_I64, -src2->val);
> +                        opc_new = INDEX_op_add_i64;
> +                    }
> +                }
> +                op->args[2] = temp_arg(src2);
> +            }
> +            op->opc = opc = opc_new;
> +            /* Mark the single-word operation live.  */
> +            nb_iargs = 2;
> +            nb_oargs = 1;
>               goto do_not_remove;

For tcg/tcg.c with your S-o-b and a reworded description:

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
diff mbox series

Patch

diff --git a/tcg/tcg.c b/tcg/tcg.c
index a507c111cf..408647af7e 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -3618,23 +3618,40 @@  liveness_pass_1(TCGContext *s)
         do_addsub2:
             nb_iargs = 4;
             nb_oargs = 2;
-            /* Test if the high part of the operation is dead, but not
-               the low part.  The result can be optimized to a simple
-               add or sub.  This happens often for x86_64 guest when the
-               cpu mode is set to 32 bit.  */
-            if (arg_temp(op->args[1])->state == TS_DEAD) {
-                if (arg_temp(op->args[0])->state == TS_DEAD) {
-                    goto do_remove;
-                }
-                /* Replace the opcode and adjust the args in place,
-                   leaving 3 unused args at the end.  */
-                op->opc = opc = opc_new;
-                op->args[1] = op->args[2];
-                op->args[2] = op->args[4];
-                /* Fall through and mark the single-word operation live.  */
-                nb_iargs = 2;
-                nb_oargs = 1;
+            /*
+             * Test if the high part of the operation is dead, but the low
+             * part is still live.  The result can be optimized to a simple
+             * add or sub.
+             */
+            if (arg_temp(op->args[1])->state != TS_DEAD) {
+                goto do_not_remove;
             }
+            if (arg_temp(op->args[0])->state == TS_DEAD) {
+                goto do_remove;
+            }
+            /*
+             * Replace the opcode and adjust the args in place, leaving 3
+             * unused args at the end.  Canonicalize subi to andi.
+             */
+            op->args[1] = op->args[2];
+            {
+                TCGTemp *src2 = arg_temp(op->args[4]);
+                if (src2->kind == TEMP_CONST) {
+                    if (opc_new == INDEX_op_sub_i32) {
+                        src2 = tcg_constant_internal(TCG_TYPE_I32,
+                                                     (int32_t)-src2->val);
+                        opc_new = INDEX_op_add_i32;
+                    } else if (opc_new == INDEX_op_sub_i64) {
+                        src2 = tcg_constant_internal(TCG_TYPE_I64, -src2->val);
+                        opc_new = INDEX_op_add_i64;
+                    }
+                }
+                op->args[2] = temp_arg(src2);
+            }
+            op->opc = opc = opc_new;
+            /* Mark the single-word operation live.  */
+            nb_iargs = 2;
+            nb_oargs = 1;
             goto do_not_remove;
 
         case INDEX_op_mulu2_i32:
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 0d97864174..39d03fa698 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -2544,8 +2544,14 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         c = ARITH_ADD;
         goto gen_arith;
     OP_32_64(sub):
-        c = ARITH_SUB;
-        goto gen_arith;
+        /*
+         * Should have canonicalized all constants to add.
+         * Keep the constraint allowing any constant so that we catch this
+         * case without register allocation loading the constant first.
+         */
+        tcg_debug_assert(!const_a2);
+        tgen_arithr(s, ARITH_SUB + rexw, a0, a2);
+        break;
     OP_32_64(and):
         c = ARITH_AND;
         goto gen_arith;
@@ -3325,7 +3331,6 @@  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
         return C_O1_I2(r, r, re);
 
     case INDEX_op_sub_i32:
-    case INDEX_op_sub_i64:
     case INDEX_op_mul_i32:
     case INDEX_op_mul_i64:
     case INDEX_op_or_i32:
@@ -3333,6 +3338,8 @@  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_xor_i32:
     case INDEX_op_xor_i64:
         return C_O1_I2(r, 0, re);
+    case INDEX_op_sub_i64:
+        return C_O1_I2(r, 0, ri);
 
     case INDEX_op_and_i32:
     case INDEX_op_and_i64: