diff mbox series

target/s390x: Use tcg_gen_gvec_bitsel

Message ID 20190603165735.8934-1-richard.henderson@linaro.org
State New
Headers show
Series target/s390x: Use tcg_gen_gvec_bitsel | expand

Commit Message

Richard Henderson June 3, 2019, 4:57 p.m. UTC
This replaces the target-specific implementations for VSEL.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 target/s390x/translate_vx.inc.c | 38 ++++++---------------------------
 1 file changed, 6 insertions(+), 32 deletions(-)

-- 
2.17.1

Comments

David Hildenbrand June 3, 2019, 7:41 p.m. UTC | #1
On 03.06.19 18:57, Richard Henderson wrote:
> This replaces the target-specific implementations for VSEL.

> 

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  target/s390x/translate_vx.inc.c | 38 ++++++---------------------------

>  1 file changed, 6 insertions(+), 32 deletions(-)

> 

> diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c

> index 7e0bfcb190..a8603cbfd6 100644

> --- a/target/s390x/translate_vx.inc.c

> +++ b/target/s390x/translate_vx.inc.c

> @@ -233,6 +233,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,

>  #define gen_gvec_fn_3(fn, es, v1, v2, v3) \

>      tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \

>                        vec_full_reg_offset(v3), 16, 16)

> +#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \

> +    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \

> +                      vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16)

>  

>  /*

>   * Helper to carry out a 128 bit vector computation using 2 i64 values per

> @@ -903,40 +906,11 @@ static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)

>      return DISAS_NEXT;

>  }

>  

> -static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)

> -{

> -    TCGv_i64 t = tcg_temp_new_i64();

> -

> -    /* bit in c not set -> copy bit from b */

> -    tcg_gen_andc_i64(t, b, c);

> -    /* bit in c set -> copy bit from a */

> -    tcg_gen_and_i64(d, a, c);

> -    /* merge the results */

> -    tcg_gen_or_i64(d, d, t);

> -    tcg_temp_free_i64(t);

> -}

> -

> -static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,

> -                        TCGv_vec c)

> -{

> -    TCGv_vec t = tcg_temp_new_vec_matching(d);

> -

> -    tcg_gen_andc_vec(vece, t, b, c);

> -    tcg_gen_and_vec(vece, d, a, c);

> -    tcg_gen_or_vec(vece, d, d, t);

> -    tcg_temp_free_vec(t);

> -}

> -


Comparing against tcg_gen_bitsel_i64()

1. a and c are switched
2. b is _not_ switched (and() and andc() are switched)

Shouldn't this be

gen_gvec_fn_4(bitsel, ES_8, get_field(s->fields, v1),
              get_field(s->fields, v4), get_field(s->fields, v3),
              get_field(s->fields, v2));

?

Maybe I am missing something. It was a long day :)

Should I send this patch with the next s390x/tcg pull request?

-- 

Thanks,

David / dhildenb
Richard Henderson June 3, 2019, 9:59 p.m. UTC | #2
On 6/3/19 2:41 PM, David Hildenbrand wrote:
>> -static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,

>> -                        TCGv_vec c)

>> -{

>> -    TCGv_vec t = tcg_temp_new_vec_matching(d);

>> -

>> -    tcg_gen_andc_vec(vece, t, b, c);

>> -    tcg_gen_and_vec(vece, d, a, c);

>> -    tcg_gen_or_vec(vece, d, d, t);

>> -    tcg_temp_free_vec(t);

>> -}

>> -

> 

> Comparing against tcg_gen_bitsel_i64()

> 

> 1. a and c are switched

> 2. b is _not_ switched (and() and andc() are switched)


Not quite.  {a,b,c} from your s390 implementation becomes {c,a,b} for tcg.

Running tests would show for sure; I guess you have those later in your vx
patch set?

> Should I send this patch with the next s390x/tcg pull request?


Yes please.


r~
David Hildenbrand June 4, 2019, 7:44 a.m. UTC | #3
On 03.06.19 23:59, Richard Henderson wrote:
> On 6/3/19 2:41 PM, David Hildenbrand wrote:

>>> -static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,

>>> -                        TCGv_vec c)

>>> -{

>>> -    TCGv_vec t = tcg_temp_new_vec_matching(d);

>>> -

>>> -    tcg_gen_andc_vec(vece, t, b, c);

>>> -    tcg_gen_and_vec(vece, d, a, c);

>>> -    tcg_gen_or_vec(vece, d, d, t);

>>> -    tcg_temp_free_vec(t);

>>> -}

>>> -

>>

>> Comparing against tcg_gen_bitsel_i64()

>>

>> 1. a and c are switched

>> 2. b is _not_ switched (and() and andc() are switched)

> 

> Not quite.  {a,b,c} from your s390 implementation becomes {c,a,b} for tcg.

> 

> Running tests would show for sure; I guess you have those later in your vx

> patch set?


I only have a small selection of tests for now

84fa6254e4 tests/tcg: target/s390x: Test VECTOR ADD *
9e9a9e5246 tests/tcg: target/s390x: Test VECTOR UNPACK *
9d2b7184c6 tests/tcg: target/s390x: Test VECTOR PACK *
4872c1b6bd tests/tcg: target/s390x: Test VECTOR MERGE (HIGH|LOW)
914502d1d1 tests/tcg: target/s390x: Test VECTOR LOAD AND REPLICATE
7d01bbca17 tests/tcg: target/s390x: Test VECTOR GENERATE MASK
dc107ebdf7 tests/tcg: target/s390x: Test VECTOR GENERATE BYTE MASK
17212a732d tests/tcg: target/s390x: Test VECTOR LOAD GR FROM VR ELEMENT
f120e93c15 tests/tcg: target/s390x: Test VECTOR GATHER ELEMENT

But I just added a simple

6c43fe6a8c tests/tcg: target/s390x: Test VECTOR SELECT

That test confirmed that your implementation works correctly. :)

> 

>> Should I send this patch with the next s390x/tcg pull request?

> 

> Yes please.


I'll change the subject to "s390x/tcg: Use tcg_gen_gvec_bitsel for
VECTOR SELECT" if you don't object. Thanks!

> 

> 

> r~

> 



-- 

Thanks,

David / dhildenb
diff mbox series

Patch

diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 7e0bfcb190..a8603cbfd6 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -233,6 +233,9 @@  static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
 #define gen_gvec_fn_3(fn, es, v1, v2, v3) \
     tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
                       vec_full_reg_offset(v3), 16, 16)
+#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \
+    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                      vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16)
 
 /*
  * Helper to carry out a 128 bit vector computation using 2 i64 values per
@@ -903,40 +906,11 @@  static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
     return DISAS_NEXT;
 }
 
-static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
-{
-    TCGv_i64 t = tcg_temp_new_i64();
-
-    /* bit in c not set -> copy bit from b */
-    tcg_gen_andc_i64(t, b, c);
-    /* bit in c set -> copy bit from a */
-    tcg_gen_and_i64(d, a, c);
-    /* merge the results */
-    tcg_gen_or_i64(d, d, t);
-    tcg_temp_free_i64(t);
-}
-
-static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,
-                        TCGv_vec c)
-{
-    TCGv_vec t = tcg_temp_new_vec_matching(d);
-
-    tcg_gen_andc_vec(vece, t, b, c);
-    tcg_gen_and_vec(vece, d, a, c);
-    tcg_gen_or_vec(vece, d, d, t);
-    tcg_temp_free_vec(t);
-}
-
 static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
 {
-    static const GVecGen4 gvec_op = {
-        .fni8 = gen_sel_i64,
-        .fniv = gen_sel_vec,
-        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-    };
-
-    gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
-               get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op);
+    gen_gvec_fn_4(bitsel, ES_8, get_field(s->fields, v1),
+                  get_field(s->fields, v4), get_field(s->fields, v2),
+                  get_field(s->fields, v3));
     return DISAS_NEXT;
 }