diff mbox series

[21/31] target/arm: Implement SVE2 integer absolute difference and accumulate long

Message ID 20200326230838.31112-22-richard.henderson@linaro.org
State New
Headers show
Series target/arm: SVE2, part 1 | expand

Commit Message

Richard Henderson March 26, 2020, 11:08 p.m. UTC
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 target/arm/helper-sve.h    | 14 ++++++++++
 target/arm/sve.decode      | 12 +++++++++
 target/arm/sve_helper.c    | 24 +++++++++++++++++
 target/arm/translate-sve.c | 54 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 104 insertions(+)

-- 
2.20.1

Comments

Laurent Desnogues April 13, 2020, 4:15 p.m. UTC | #1
On Fri, Mar 27, 2020 at 12:18 AM Richard Henderson
<richard.henderson@linaro.org> wrote:
[...]
> diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c

> index a3653007ac..a0995d95c7 100644

> --- a/target/arm/sve_helper.c

> +++ b/target/arm/sve_helper.c

> @@ -1216,6 +1216,30 @@ DO_ZZZ_NTB(sve2_eoril_d, uint64_t,     , DO_EOR)

>

>  #undef DO_ZZZ_NTB

>

> +#define DO_ABAL(NAME, TYPE, TYPEN) \

> +void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, uint32_t desc) \

> +{                                                              \

> +    intptr_t i, opr_sz = simd_oprsz(desc);                     \

> +    int sel1 = (simd_data(desc) & 1) * sizeof(TYPE);           \

> +    int sel2 = (simd_data(desc) & 2) * (sizeof(TYPE) / 2);     \

> +    for (i = 0; i < opr_sz; i += sizeof(TYPE)) {               \

> +        TYPE nn = (TYPEN)(*(TYPE *)(vn + i) >> sel1);          \

> +        TYPE mm = (TYPEN)(*(TYPE *)(vm + i) >> sel2);          \

> +        TYPE aa = *(TYPE *)(va + i);                           \

> +        *(TYPE *)(vd + i) = DO_ABD(nn, mm) + aa;               \

> +    }                                                          \

> +}


ABAL is either top or bottom not a mix of two.  So only sel1 is needed
and its multiplicand should be the number of bits of TYPEN.
vd is both a source and a destination so a temporary should be used.

Laurent
Richard Henderson April 13, 2020, 11:19 p.m. UTC | #2
On 4/13/20 9:15 AM, Laurent Desnogues wrote:
> On Fri, Mar 27, 2020 at 12:18 AM Richard Henderson

> <richard.henderson@linaro.org> wrote:

> [...]

>> diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c

>> index a3653007ac..a0995d95c7 100644

>> --- a/target/arm/sve_helper.c

>> +++ b/target/arm/sve_helper.c

>> @@ -1216,6 +1216,30 @@ DO_ZZZ_NTB(sve2_eoril_d, uint64_t,     , DO_EOR)

>>

>>  #undef DO_ZZZ_NTB

>>

>> +#define DO_ABAL(NAME, TYPE, TYPEN) \

>> +void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, uint32_t desc) \

>> +{                                                              \

>> +    intptr_t i, opr_sz = simd_oprsz(desc);                     \

>> +    int sel1 = (simd_data(desc) & 1) * sizeof(TYPE);           \

>> +    int sel2 = (simd_data(desc) & 2) * (sizeof(TYPE) / 2);     \

>> +    for (i = 0; i < opr_sz; i += sizeof(TYPE)) {               \

>> +        TYPE nn = (TYPEN)(*(TYPE *)(vn + i) >> sel1);          \

>> +        TYPE mm = (TYPEN)(*(TYPE *)(vm + i) >> sel2);          \

>> +        TYPE aa = *(TYPE *)(va + i);                           \

>> +        *(TYPE *)(vd + i) = DO_ABD(nn, mm) + aa;               \

>> +    }                                                          \

>> +}

> 

> ABAL is either top or bottom not a mix of two.  So only sel1 is needed

> and its multiplicand should be the number of bits of TYPEN.


Yep.

> vd is both a source and a destination so a temporary should be used.


In what way am I not?  Both sources are read before the write.  The operands
are all in columns of the wide type (unlike the addp case you pointed out).


r~
Laurent Desnogues April 14, 2020, 7:04 a.m. UTC | #3
On Tue, Apr 14, 2020 at 1:19 AM Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> On 4/13/20 9:15 AM, Laurent Desnogues wrote:

> > On Fri, Mar 27, 2020 at 12:18 AM Richard Henderson

> > <richard.henderson@linaro.org> wrote:

> > [...]

> >> diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c

> >> index a3653007ac..a0995d95c7 100644

> >> --- a/target/arm/sve_helper.c

> >> +++ b/target/arm/sve_helper.c

> >> @@ -1216,6 +1216,30 @@ DO_ZZZ_NTB(sve2_eoril_d, uint64_t,     , DO_EOR)

> >>

> >>  #undef DO_ZZZ_NTB

> >>

> >> +#define DO_ABAL(NAME, TYPE, TYPEN) \

> >> +void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, uint32_t desc) \

> >> +{                                                              \

> >> +    intptr_t i, opr_sz = simd_oprsz(desc);                     \

> >> +    int sel1 = (simd_data(desc) & 1) * sizeof(TYPE);           \

> >> +    int sel2 = (simd_data(desc) & 2) * (sizeof(TYPE) / 2);     \

> >> +    for (i = 0; i < opr_sz; i += sizeof(TYPE)) {               \

> >> +        TYPE nn = (TYPEN)(*(TYPE *)(vn + i) >> sel1);          \

> >> +        TYPE mm = (TYPEN)(*(TYPE *)(vm + i) >> sel2);          \

> >> +        TYPE aa = *(TYPE *)(va + i);                           \

> >> +        *(TYPE *)(vd + i) = DO_ABD(nn, mm) + aa;               \

> >> +    }                                                          \

> >> +}

> >

> > ABAL is either top or bottom not a mix of two.  So only sel1 is needed

> > and its multiplicand should be the number of bits of TYPEN.

>

> Yep.

>

> > vd is both a source and a destination so a temporary should be used.

>

> In what way am I not?  Both sources are read before the write.  The operands

> are all in columns of the wide type (unlike the addp case you pointed out).


You're right, sorry.

Laurent
diff mbox series

Patch

diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 0e4b4c48da..b48a88135f 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -2410,3 +2410,17 @@  DEF_HELPER_FLAGS_4(sve2_sqcadd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve2_sqcadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve2_sqcadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve2_sqcadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sabal_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sabal_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sabal_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uabal_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uabal_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uabal_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 5fb4b5f977..f66a6c242f 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -70,6 +70,7 @@ 
 &rpr_s          rd pg rn s
 &rprr_s         rd pg rn rm s
 &rprr_esz       rd pg rn rm esz
+&rrrr_esz       rd ra rn rm esz
 &rprrr_esz      rd pg rn rm ra esz
 &rpri_esz       rd pg rn imm esz
 &ptrue          rd esz pat s
@@ -120,6 +121,10 @@ 
 @rdn_i8s        ........ esz:2 ...... ... imm:s8 rd:5 \
                 &rri_esz rn=%reg_movprfx
 
+# Four operand, vector element size
+@rda_rn_rm      ........ esz:2 . rm:5 ... ... rn:5 rd:5 \
+                &rrrr_esz ra=%reg_movprfx
+
 # Three operand with "memory" size, aka immediate left shift
 @rd_rn_msz_rm   ........ ... rm:5 .... imm:2 rn:5 rd:5          &rrri
 
@@ -1235,3 +1240,10 @@  CADD_rot90      01000101 .. 00000 0 11011 0 ..... .....  @rdn_rm
 CADD_rot270     01000101 .. 00000 0 11011 1 ..... .....  @rdn_rm
 SQCADD_rot90    01000101 .. 00000 1 11011 0 ..... .....  @rdn_rm
 SQCADD_rot270   01000101 .. 00000 1 11011 1 ..... .....  @rdn_rm
+
+## SVE2 integer absolute difference and accumulate long
+
+SABALB          01000101 .. 0 ..... 1100 00 ..... .....  @rda_rn_rm
+SABALT          01000101 .. 0 ..... 1100 01 ..... .....  @rda_rn_rm
+UABALB          01000101 .. 0 ..... 1100 10 ..... .....  @rda_rn_rm
+UABALT          01000101 .. 0 ..... 1100 11 ..... .....  @rda_rn_rm
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index a3653007ac..a0995d95c7 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -1216,6 +1216,30 @@  DO_ZZZ_NTB(sve2_eoril_d, uint64_t,     , DO_EOR)
 
 #undef DO_ZZZ_NTB
 
+#define DO_ABAL(NAME, TYPE, TYPEN) \
+void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, uint32_t desc) \
+{                                                              \
+    intptr_t i, opr_sz = simd_oprsz(desc);                     \
+    int sel1 = (simd_data(desc) & 1) * sizeof(TYPE);           \
+    int sel2 = (simd_data(desc) & 2) * (sizeof(TYPE) / 2);     \
+    for (i = 0; i < opr_sz; i += sizeof(TYPE)) {               \
+        TYPE nn = (TYPEN)(*(TYPE *)(vn + i) >> sel1);          \
+        TYPE mm = (TYPEN)(*(TYPE *)(vm + i) >> sel2);          \
+        TYPE aa = *(TYPE *)(va + i);                           \
+        *(TYPE *)(vd + i) = DO_ABD(nn, mm) + aa;               \
+    }                                                          \
+}
+
+DO_ABAL(sve2_sabal_h, int16_t, int8_t)
+DO_ABAL(sve2_sabal_s, int32_t, int16_t)
+DO_ABAL(sve2_sabal_d, int64_t, int32_t)
+
+DO_ABAL(sve2_uabal_h, uint16_t, uint8_t)
+DO_ABAL(sve2_uabal_s, uint32_t, uint16_t)
+DO_ABAL(sve2_uabal_d, uint64_t, uint32_t)
+
+#undef DO_ABAL
+
 #define DO_BITPERM(NAME, TYPE, OP) \
 void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
 {                                                              \
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 3b0aa86e79..c6161d2ce2 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -6240,3 +6240,57 @@  static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a)
 {
     return do_cadd(s, a, true, true);
 }
+
+static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
+                             gen_helper_gvec_4 *fn, int data)
+{
+    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->ra),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vsz, vsz, data, fn);
+    }
+    return true;
+}
+
+static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel)
+{
+    static gen_helper_gvec_4 * const fns[2][3] = {
+        { gen_helper_sve2_sabal_h,
+          gen_helper_sve2_sabal_s,
+          gen_helper_sve2_sabal_d },
+        { gen_helper_sve2_uabal_h,
+          gen_helper_sve2_uabal_s,
+          gen_helper_sve2_uabal_d },
+    };
+
+    if (a->esz == 0) {
+        return false;
+    }
+    return do_sve2_zzzz_ool(s, a, fns[uns][a->esz - 1], sel);
+}
+
+static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_abal(s, a, false, false);
+}
+
+static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_abal(s, a, false, true);
+}
+
+static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_abal(s, a, true, false);
+}
+
+static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_abal(s, a, true, true);
+}