diff mbox series

[1/7] tcg/tcg-op: Document bswap16() byte pattern

Message ID 20230822093712.38922-2-philmd@linaro.org
State New
Headers show
Series tcg: Document *swap/deposit helpers | expand

Commit Message

Philippe Mathieu-Daudé Aug. 22, 2023, 9:37 a.m. UTC
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
 tcg/tcg-op.c | 48 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 16 deletions(-)

Comments

Richard Henderson Aug. 22, 2023, 3:58 p.m. UTC | #1
On 8/22/23 02:37, Philippe Mathieu-Daudé wrote:
> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> ---
>   tcg/tcg-op.c | 48 ++++++++++++++++++++++++++++++++----------------
>   1 file changed, 32 insertions(+), 16 deletions(-)
> 
> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index 7aadb37756..f164ddc95e 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -1021,6 +1021,13 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
>       }
>   }
>   
> +/*
> + * bswap16_i32: 16-bit byte swap on the low bits of a 32-bit value.
> + *
> + * Byte pattern:  bswap16_i32(xxab) -> ..ba             (TCG_BSWAP_OZ)
> + *                bswap16_i32(xxab) -> ssba             (TCG_BSWAP_OS)
> + *                bswap16_i32(xxab) -> xxba
> + */

Don't forget TCG_BSWAP_IZ, which means the input is already zero-extended.
Which makes

> +                                            /* arg = xxab */
> +        tcg_gen_shri_i32(t0, arg, 8);       /*  t0 = .xxa */

this

>           if (!(flags & TCG_BSWAP_IZ)) {
> -            tcg_gen_ext8u_i32(t0, t0);
> +            tcg_gen_ext8u_i32(t0, t0);      /*  t0 = ...a */
>           }
>   
>           if (flags & TCG_BSWAP_OS) {
> -            tcg_gen_shli_i32(t1, arg, 24);
> -            tcg_gen_sari_i32(t1, t1, 16);
> +            tcg_gen_shli_i32(t1, arg, 24);  /*  t1 = b... */
> +            tcg_gen_sari_i32(t1, t1, 16);   /*  t1 = ssb. */
>           } else if (flags & TCG_BSWAP_OZ) {
> -            tcg_gen_ext8u_i32(t1, arg);
> -            tcg_gen_shli_i32(t1, t1, 8);
> +            tcg_gen_ext8u_i32(t1, arg);     /*  t1 = ...b */
> +            tcg_gen_shli_i32(t1, t1, 8);    /*  t1 = ..b. */
>           } else {
> -            tcg_gen_shli_i32(t1, arg, 8);
> +            tcg_gen_shli_i32(t1, arg, 8);   /*  t1 = xab. */

and this slightly inaccurate.

>           }
>   
> -        tcg_gen_or_i32(ret, t0, t1);
> +        tcg_gen_or_i32(ret, t0, t1);        /* ret = ssba */

This one is just confusing, since each of the three cases above have different outputs.


r~




r~
Philippe Mathieu-Daudé Aug. 22, 2023, 5:22 p.m. UTC | #2
On 22/8/23 17:58, Richard Henderson wrote:
> On 8/22/23 02:37, Philippe Mathieu-Daudé wrote:
>> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>> ---
>>   tcg/tcg-op.c | 48 ++++++++++++++++++++++++++++++++----------------
>>   1 file changed, 32 insertions(+), 16 deletions(-)
>>
>> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
>> index 7aadb37756..f164ddc95e 100644
>> --- a/tcg/tcg-op.c
>> +++ b/tcg/tcg-op.c
>> @@ -1021,6 +1021,13 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 
>> arg)
>>       }
>>   }
>> +/*
>> + * bswap16_i32: 16-bit byte swap on the low bits of a 32-bit value.
>> + *
>> + * Byte pattern:  bswap16_i32(xxab) -> ..ba             (TCG_BSWAP_OZ)
>> + *                bswap16_i32(xxab) -> ssba             (TCG_BSWAP_OS)
>> + *                bswap16_i32(xxab) -> xxba
>> + */
> 
> Don't forget TCG_BSWAP_IZ, which means the input is already zero-extended.
> Which makes
> 
>> +                                            /* arg = xxab */
>> +        tcg_gen_shri_i32(t0, arg, 8);       /*  t0 = .xxa */
> 
> this
> 
>>           if (!(flags & TCG_BSWAP_IZ)) {
>> -            tcg_gen_ext8u_i32(t0, t0);
>> +            tcg_gen_ext8u_i32(t0, t0);      /*  t0 = ...a */
>>           }
>>           if (flags & TCG_BSWAP_OS) {
>> -            tcg_gen_shli_i32(t1, arg, 24);
>> -            tcg_gen_sari_i32(t1, t1, 16);
>> +            tcg_gen_shli_i32(t1, arg, 24);  /*  t1 = b... */
>> +            tcg_gen_sari_i32(t1, t1, 16);   /*  t1 = ssb. */
>>           } else if (flags & TCG_BSWAP_OZ) {
>> -            tcg_gen_ext8u_i32(t1, arg);
>> -            tcg_gen_shli_i32(t1, t1, 8);
>> +            tcg_gen_ext8u_i32(t1, arg);     /*  t1 = ...b */
>> +            tcg_gen_shli_i32(t1, t1, 8);    /*  t1 = ..b. */
>>           } else {
>> -            tcg_gen_shli_i32(t1, arg, 8);
>> +            tcg_gen_shli_i32(t1, arg, 8);   /*  t1 = xab. */
> 
> and this slightly inaccurate.
> 
>>           }
>> -        tcg_gen_or_i32(ret, t0, t1);
>> +        tcg_gen_or_i32(ret, t0, t1);        /* ret = ssba */
> 
> This one is just confusing, since each of the three cases above have 
> different outputs.

Is that formatting OK with you?

/*
  * bswap16_i32: 16-bit byte swap on the low bits of a 32-bit value.
  *
  * Byte pattern:  bswap16_i32(..ab) -> ..ba             (TCG_BSWAP_IZ)
  *                bswap16_i32(xxab) -> ..ba             (TCG_BSWAP_OZ)
  *                bswap16_i32(xxab) -> ssba             (TCG_BSWAP_OS)
  *                bswap16_i32(xxab) -> xxba
  */
void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
{
     /* Only one extension flag may be present. */
     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));

     if (TCG_TARGET_HAS_bswap16_i32) {
         tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags);
     } else {
         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
         TCGv_i32 t1 = tcg_temp_ebb_new_i32();

                                             /* arg = xxab (IZ=0) */
                                             /*       ..ab (IZ=1) */
         tcg_gen_shri_i32(t0, arg, 8);       /*  t0 = .xxa (IZ=0) */
                                             /*       ...a (IZ=1) */
         if (!(flags & TCG_BSWAP_IZ)) {
             tcg_gen_ext8u_i32(t0, t0);      /*  t0 = ...a */
         }

         if (flags & TCG_BSWAP_OS) {
             tcg_gen_shli_i32(t1, arg, 24);  /*  t1 = b... */
             tcg_gen_sari_i32(t1, t1, 16);   /*  t1 = ssb. */
         } else if (flags & TCG_BSWAP_OZ) {
             tcg_gen_ext8u_i32(t1, arg);     /*  t1 = ...b */
             tcg_gen_shli_i32(t1, t1, 8);    /*  t1 = ..b. */
         } else {
             tcg_gen_shli_i32(t1, arg, 8);   /*  t1 = xab. (IZ=0) */
                                             /*       .ab. (IZ=1) */
         }

         tcg_gen_or_i32(ret, t0, t1);        /* ret = ..ba (IZ=1 or OZ=1) */
                                             /*     = ssba (OS=1)         */
                                             /*     = xxba (no flag)      */
         tcg_temp_free_i32(t0);
         tcg_temp_free_i32(t1);
     }
}

---
Richard Henderson Aug. 22, 2023, 5:29 p.m. UTC | #3
On 8/22/23 10:22, Philippe Mathieu-Daudé wrote:
>          } else {
>              tcg_gen_shli_i32(t1, arg, 8);   /*  t1 = xab. (IZ=0) */
>                                              /*       .ab. (IZ=1) */
>          }
> 
>          tcg_gen_or_i32(ret, t0, t1);        /* ret = ..ba (IZ=1 or OZ=1) */
>                                              /*     = ssba (OS=1)         */
>                                              /*     = xxba (no flag)      */

Clearer with xaba for no flag?

Otherwise it looks ok.


r~
Philippe Mathieu-Daudé Aug. 22, 2023, 10:04 p.m. UTC | #4
On 22/8/23 19:29, Richard Henderson wrote:
> On 8/22/23 10:22, Philippe Mathieu-Daudé wrote:
>>          } else {
>>              tcg_gen_shli_i32(t1, arg, 8);   /*  t1 = xab. (IZ=0) */
>>                                              /*       .ab. (IZ=1) */
>>          }
>>
>>          tcg_gen_or_i32(ret, t0, t1);        /* ret = ..ba (IZ=1 or 
>> OZ=1) */
>>                                              /*     = ssba 
>> (OS=1)         */
>>                                              /*     = xxba (no 
>> flag)      */
> 
> Clearer with xaba for no flag?

Right :)

> Otherwise it looks ok.

Thanks!
diff mbox series

Patch

diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 7aadb37756..f164ddc95e 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -1021,6 +1021,13 @@  void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
     }
 }
 
+/*
+ * bswap16_i32: 16-bit byte swap on the low bits of a 32-bit value.
+ *
+ * Byte pattern:  bswap16_i32(xxab) -> ..ba             (TCG_BSWAP_OZ)
+ *                bswap16_i32(xxab) -> ssba             (TCG_BSWAP_OS)
+ *                bswap16_i32(xxab) -> xxba
+ */
 void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
 {
     /* Only one extension flag may be present. */
@@ -1032,22 +1039,23 @@  void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
         TCGv_i32 t1 = tcg_temp_ebb_new_i32();
 
-        tcg_gen_shri_i32(t0, arg, 8);
+                                            /* arg = xxab */
+        tcg_gen_shri_i32(t0, arg, 8);       /*  t0 = .xxa */
         if (!(flags & TCG_BSWAP_IZ)) {
-            tcg_gen_ext8u_i32(t0, t0);
+            tcg_gen_ext8u_i32(t0, t0);      /*  t0 = ...a */
         }
 
         if (flags & TCG_BSWAP_OS) {
-            tcg_gen_shli_i32(t1, arg, 24);
-            tcg_gen_sari_i32(t1, t1, 16);
+            tcg_gen_shli_i32(t1, arg, 24);  /*  t1 = b... */
+            tcg_gen_sari_i32(t1, t1, 16);   /*  t1 = ssb. */
         } else if (flags & TCG_BSWAP_OZ) {
-            tcg_gen_ext8u_i32(t1, arg);
-            tcg_gen_shli_i32(t1, t1, 8);
+            tcg_gen_ext8u_i32(t1, arg);     /*  t1 = ...b */
+            tcg_gen_shli_i32(t1, t1, 8);    /*  t1 = ..b. */
         } else {
-            tcg_gen_shli_i32(t1, arg, 8);
+            tcg_gen_shli_i32(t1, arg, 8);   /*  t1 = xab. */
         }
 
-        tcg_gen_or_i32(ret, t0, t1);
+        tcg_gen_or_i32(ret, t0, t1);        /* ret = ssba */
         tcg_temp_free_i32(t0);
         tcg_temp_free_i32(t1);
     }
@@ -1721,6 +1729,13 @@  void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
     }
 }
 
+/*
+ * bswap16_i64: 16-bit byte swap on the low bits of a 64-bit value.
+ *
+ * Byte pattern:  bswap16_i32(xxxxxxab) -> ......ba     (TCG_BSWAP_OZ)
+ *                bswap16_i32(xxxxxxab) -> ssssssba     (TCG_BSWAP_OS)
+ *                bswap16_i32(xxxxxxab) -> xxxxxxba
+ */
 void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
 {
     /* Only one extension flag may be present. */
@@ -1739,22 +1754,23 @@  void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
 
-        tcg_gen_shri_i64(t0, arg, 8);
+                                            /* arg = xxxxxxab */
+        tcg_gen_shri_i64(t0, arg, 8);       /*  t0 = .xxxxxxa */
         if (!(flags & TCG_BSWAP_IZ)) {
-            tcg_gen_ext8u_i64(t0, t0);
+            tcg_gen_ext8u_i64(t0, t0);      /*  t0 = .......a */
         }
 
         if (flags & TCG_BSWAP_OS) {
-            tcg_gen_shli_i64(t1, arg, 56);
-            tcg_gen_sari_i64(t1, t1, 48);
+            tcg_gen_shli_i64(t1, arg, 56);  /*  t1 = b....... */
+            tcg_gen_sari_i64(t1, t1, 48);   /*  t1 = ssssssb. */
         } else if (flags & TCG_BSWAP_OZ) {
-            tcg_gen_ext8u_i64(t1, arg);
-            tcg_gen_shli_i64(t1, t1, 8);
+            tcg_gen_ext8u_i64(t1, arg);     /*  t1 = .......b */
+            tcg_gen_shli_i64(t1, t1, 8);    /*  t1 = ......b. */
         } else {
-            tcg_gen_shli_i64(t1, arg, 8);
+            tcg_gen_shli_i64(t1, arg, 8);   /*  t1 = xxxxxxb. */
         }
 
-        tcg_gen_or_i64(ret, t0, t1);
+        tcg_gen_or_i64(ret, t0, t1);        /* ret = ssssssba */
         tcg_temp_free_i64(t0);
         tcg_temp_free_i64(t1);
     }