diff mbox series

[07/28] tcg/ppc: Split out tcg_out_bswap16

Message ID 20210614083800.1166166-8-richard.henderson@linaro.org
State Superseded
Headers show
Series tcg: bswap improvements | expand

Commit Message

Richard Henderson June 14, 2021, 8:37 a.m. UTC
With the use of a suitable temporary, we can use the same
algorithm when src overlaps dst.  The result is the same
number of instructions either way.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 tcg/ppc/tcg-target.c.inc | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

-- 
2.25.1

Comments

Peter Maydell June 21, 2021, 2:29 p.m. UTC | #1
On Mon, 14 Jun 2021 at 09:47, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> With the use of a suitable temporary, we can use the same

> algorithm when src overlaps dst.  The result is the same

> number of instructions either way.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  tcg/ppc/tcg-target.c.inc | 26 +++++++++++---------------

>  1 file changed, 11 insertions(+), 15 deletions(-)

>

> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc

> index b49204f707..64c24382a8 100644

> --- a/tcg/ppc/tcg-target.c.inc

> +++ b/tcg/ppc/tcg-target.c.inc

> @@ -788,6 +788,16 @@ static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)

>      tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));

>  }

>

> +static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src)

> +{

> +    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;

> +

> +                                                   /* src = abcd */

> +    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);  /* tmp = 000c */

> +    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);   /* tmp = 00dc */

> +    tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);

> +}


TCG_REG_R0 is implicitly available as a temp because it's not
listed in tcg_target_reg_alloc_order[], right? (There's a comment
in the definition of that array that says V0 and V1 are reserved
as temporaries, but not a comment about R0.)

>  /* Emit a move into ret of arg, if it can be done in one insn.  */

>  static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)

>  {

> @@ -2779,21 +2789,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,

>

>      case INDEX_op_bswap16_i32:

>      case INDEX_op_bswap16_i64:

> -        a0 = args[0], a1 = args[1];

> -        /* a1 = abcd */

> -        if (a0 != a1) {

> -            /* a0 = (a1 r<< 24) & 0xff # 000c */

> -            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);

> -            /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */

> -            tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);


Would be nice to keep these comments about what operations we think
the insns are doing, given that RLWINM and RLWIMI are pretty confusing.

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>


thanks
-- PMM
Richard Henderson June 21, 2021, 2:41 p.m. UTC | #2
On 6/21/21 7:29 AM, Peter Maydell wrote:
>> +static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src)

>> +{

>> +    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;

>> +

>> +                                                   /* src = abcd */

>> +    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);  /* tmp = 000c */

>> +    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);   /* tmp = 00dc */

>> +    tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);

>> +}

> 

> TCG_REG_R0 is implicitly available as a temp because it's not

> listed in tcg_target_reg_alloc_order[], right?


Slightly better than that:

     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */


> (There's a comment

> in the definition of that array that says V0 and V1 are reserved

> as temporaries, but not a comment about R0.)


Yeah, tcg/ppc/ is due for a bit of cleanup.

> Would be nice to keep these comments about what operations we think

> the insns are doing, given that RLWINM and RLWIMI are pretty confusing.


Hmm, I guess.  I found them intrusive.


r~
diff mbox series

Patch

diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index b49204f707..64c24382a8 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -788,6 +788,16 @@  static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
     tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
 }
 
+static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src)
+{
+    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
+
+                                                   /* src = abcd */
+    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);  /* tmp = 000c */
+    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);   /* tmp = 00dc */
+    tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
+}
+
 /* Emit a move into ret of arg, if it can be done in one insn.  */
 static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
 {
@@ -2779,21 +2789,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_bswap16_i32:
     case INDEX_op_bswap16_i64:
-        a0 = args[0], a1 = args[1];
-        /* a1 = abcd */
-        if (a0 != a1) {
-            /* a0 = (a1 r<< 24) & 0xff # 000c */
-            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
-            /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
-            tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
-        } else {
-            /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
-            tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23);
-            /* a0 = (a1 r<< 24) & 0xff # 000c */
-            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
-            /* a0 = a0 | r0 # 00dc */
-            tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0));
-        }
+        tcg_out_bswap16(s, args[0], args[1]);
         break;
 
     case INDEX_op_bswap32_i32: