diff mbox series

[v1,06/12] target/arm: Decode aa32 armv8.1 three same

Message ID 20171004184325.24157-7-richard.henderson@linaro.org
State New
Headers show
Series ARM v8.1 simd + v8.3 complex insns | expand

Commit Message

Richard Henderson Oct. 4, 2017, 6:43 p.m. UTC
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 target/arm/translate.c | 83 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 64 insertions(+), 19 deletions(-)

-- 
2.13.6

Comments

Alex Bennée Nov. 13, 2017, 4:55 p.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  target/arm/translate.c | 83 ++++++++++++++++++++++++++++++++++++++------------

>  1 file changed, 64 insertions(+), 19 deletions(-)

>

> diff --git a/target/arm/translate.c b/target/arm/translate.c

> index ab1a12a1b8..0cd58710b3 100644

> --- a/target/arm/translate.c

> +++ b/target/arm/translate.c

> @@ -25,6 +25,7 @@

>  #include "disas/disas.h"

>  #include "exec/exec-all.h"

>  #include "tcg-op.h"

> +#include "tcg-op-gvec.h"

>  #include "qemu/log.h"

>  #include "qemu/bitops.h"

>  #include "arm_ldst.h"

> @@ -5334,9 +5335,9 @@ static void gen_neon_narrow_op(int op, int u, int size,

>  #define NEON_3R_VPMAX 20

>  #define NEON_3R_VPMIN 21

>  #define NEON_3R_VQDMULH_VQRDMULH 22

> -#define NEON_3R_VPADD 23

> +#define NEON_3R_VPADD_VQRDMLAH 23

>  #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */

> -#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */

> +#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS : float fused multiply-add */

>  #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */

>  #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */

>  #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */

> @@ -5368,9 +5369,9 @@ static const uint8_t neon_3r_sizes[] = {

>      [NEON_3R_VPMAX] = 0x7,

>      [NEON_3R_VPMIN] = 0x7,

>      [NEON_3R_VQDMULH_VQRDMULH] = 0x6,

> -    [NEON_3R_VPADD] = 0x7,

> +    [NEON_3R_VPADD_VQRDMLAH] = 0x7,

>      [NEON_3R_SHA] = 0xf, /* size field encodes op type */

> -    [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */

> +    [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */

>      [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */

>      [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */

>      [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */

> @@ -5556,6 +5557,7 @@ static const uint8_t neon_2rm_sizes[] = {

>

>  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>  {

> +    void (*fn_gvec_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);

>      int op;

>      int q;

>      int rd, rn, rm;

> @@ -5600,12 +5602,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>          if (q && ((rd | rn | rm) & 1)) {

>              return 1;

>          }

> -        /*

> -         * The SHA-1/SHA-256 3-register instructions require special treatment

> -         * here, as their size field is overloaded as an op type selector, and

> -         * they all consume their input in a single pass.

> -         */

> -        if (op == NEON_3R_SHA) {

> +        switch (op) {

> +        case NEON_3R_SHA:

> +            /* The SHA-1/SHA-256 3-register instructions require special

> +             * treatment here, as their size field is overloaded as an

> +             * op type selector, and they all consume their input in a

> +             * single pass.  */

>              if (!q) {

>                  return 1;

>              }

> @@ -5642,6 +5644,53 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>              tcg_temp_free_i32(tmp2);

>              tcg_temp_free_i32(tmp3);

>              return 0;

> +

> +        case NEON_3R_VPADD_VQRDMLAH:

> +            if (!u) {

> +                break;  /* VPADD */

> +            }

> +            /* VQRDMLAH */

> +            switch (size) {

> +            case 1:

> +                fn_gvec_ptr = gen_helper_gvec_qrdmlah_s16;

> +                break;

> +            case 2:

> +                fn_gvec_ptr = gen_helper_gvec_qrdmlah_s32;

> +                break;

> +            default:

> +                return 1;

> +            }

> +        do_vqrdmlx:

> +            if (arm_dc_feature(s, ARM_FEATURE_V8_1_SIMD)) {

> +                int opr_sz = (1 + q) * 8;

> +                tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),

> +                                   vfp_reg_offset(1, rn),

> +                                   vfp_reg_offset(1, rm), cpu_env,

> +                                   opr_sz, opr_sz, 0, fn_gvec_ptr);

> +                return 0;

> +            }

> +            return 1;

> +

> +        case NEON_3R_VFM_VQRDMLSH:

> +            if (!u) {

> +                /* VFM, VFMS */

> +                if ((5 & (1 << size)) == 0) {

> +                    return 1;

> +                }

> +                break;

> +            }

> +            /* VQRDMLSH */

> +            switch (size) {

> +            case 1:

> +                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s16;

> +                break;

> +            case 2:

> +                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s32;

> +                break;

> +            default:

> +                return 1;

> +            }

> +            goto do_vqrdmlx;


Could we not take the opportunity to re-factor out the common bit rather
than make this mega function even more byzantine?

>          }

>          if (size == 3 && op != NEON_3R_LOGIC) {

>              /* 64-bit element instructions. */

> @@ -5727,11 +5776,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>                  rm = rtmp;

>              }

>              break;

> -        case NEON_3R_VPADD:

> -            if (u) {

> -                return 1;

> -            }

> -            /* Fall through */

> +        case NEON_3R_VPADD_VQRDMLAH:

>          case NEON_3R_VPMAX:

>          case NEON_3R_VPMIN:

>              pairwise = 1;

> @@ -5765,8 +5810,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>                  return 1;

>              }

>              break;

> -        case NEON_3R_VFM:

> -            if (!arm_dc_feature(s, ARM_FEATURE_VFP4) || u) {

> +        case NEON_3R_VFM_VQRDMLSH:

> +            if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {

>                  return 1;

>              }

>              break;

> @@ -5963,7 +6008,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>                  }

>              }

>              break;

> -        case NEON_3R_VPADD:

> +        case NEON_3R_VPADD_VQRDMLAH:

>              switch (size) {

>              case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;

>              case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;

> @@ -6062,7 +6107,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>                }

>              }

>              break;

> -        case NEON_3R_VFM:

> +        case NEON_3R_VFM_VQRDMLSH:

>          {

>              /* VFMA, VFMS: fused multiply-add */

>              TCGv_ptr fpstatus = get_fpstatus_ptr(1);



--
Alex Bennée
Richard Henderson Nov. 14, 2017, 8:46 a.m. UTC | #2
On 11/13/2017 05:55 PM, Alex Bennée wrote:
>> +        case NEON_3R_VFM_VQRDMLSH:

>> +            if (!u) {

>> +                /* VFM, VFMS */

>> +                if ((5 & (1 << size)) == 0) {

>> +                    return 1;

>> +                }

>> +                break;

>> +            }

>> +            /* VQRDMLSH */

>> +            switch (size) {

>> +            case 1:

>> +                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s16;

>> +                break;

>> +            case 2:

>> +                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s32;

>> +                break;

>> +            default:

>> +                return 1;

>> +            }

>> +            goto do_vqrdmlx;

> Could we not take the opportunity to re-factor out the common bit rather

> than make this mega 


What, specifically, did you have in mind?


r~
Alex Bennée Nov. 14, 2017, 10:06 a.m. UTC | #3
Richard Henderson <richard.henderson@linaro.org> writes:

> On 11/13/2017 05:55 PM, Alex Bennée wrote:

>>> +        case NEON_3R_VFM_VQRDMLSH:

>>> +            if (!u) {

>>> +                /* VFM, VFMS */

>>> +                if ((5 & (1 << size)) == 0) {

>>> +                    return 1;

>>> +                }

>>> +                break;

>>> +            }

>>> +            /* VQRDMLSH */

>>> +            switch (size) {

>>> +            case 1:

>>> +                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s16;

>>> +                break;

>>> +            case 2:

>>> +                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s32;

>>> +                break;

>>> +            default:

>>> +                return 1;

>>> +            }

>>> +            goto do_vqrdmlx;

>> Could we not take the opportunity to re-factor out the common bit rather

>> than make this mega

>

> What, specifically, did you have in mind?


Something like:

translate: use helper to avoid goto shenanigans

1 file changed, 18 insertions(+), 17 deletions(-)
target/arm/translate.c | 35 ++++++++++++++++++-----------------

modified   target/arm/translate.c
@@ -5576,6 +5576,20 @@ static const uint8_t neon_2rm_sizes[] = {
     [NEON_2RM_VCVT_UF] = 0x4,
 };

+/* expand v8.1 simd helper */
+static int do_qrdml(DisasContext *s, gen_helper_gvec_3_ptr *fn, int q, int rd, int rn, int rm)
+{
+    if (arm_dc_feature(s, ARM_FEATURE_V8_1_SIMD)) {
+        int opr_sz = (1 + q) * 8;
+        tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
+                           vfp_reg_offset(1, rn),
+                           vfp_reg_offset(1, rm), cpu_env,
+                           opr_sz, opr_sz, 0, fn);
+        return 0;
+    }
+    return 1;
+}
+
 /* Translate a NEON data processing instruction.  Return nonzero if the
    instruction is invalid.
    We process data in a mixture of 32-bit and 64-bit chunks.
@@ -5583,7 +5597,6 @@ static const uint8_t neon_2rm_sizes[] = {

 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
 {
-    void (*fn_gvec_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     int op;
     int q;
     int rd, rn, rm;
@@ -5678,24 +5691,13 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
             /* VQRDMLAH */
             switch (size) {
             case 1:
-                fn_gvec_ptr = gen_helper_gvec_qrdmlah_s16;
-                break;
+                return do_qrdml(s, gen_helper_gvec_qrdmlah_s16, q, rd, rn, rm);
             case 2:
-                fn_gvec_ptr = gen_helper_gvec_qrdmlah_s32;
+                return do_qrdml(s, gen_helper_gvec_qrdmlah_s32, q, rd, rn, rm);
                 break;
             default:
                 return 1;
             }
-        do_vqrdmlx:
-            if (arm_dc_feature(s, ARM_FEATURE_V8_1_SIMD)) {
-                int opr_sz = (1 + q) * 8;
-                tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
-                                   vfp_reg_offset(1, rn),
-                                   vfp_reg_offset(1, rm), cpu_env,
-                                   opr_sz, opr_sz, 0, fn_gvec_ptr);
-                return 0;
-            }
-            return 1;

         case NEON_3R_VFM_VQRDMLSH:
             if (!u) {
@@ -5708,15 +5710,14 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
             /* VQRDMLSH */
             switch (size) {
             case 1:
-                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s16;
+                return do_qrdml(s, gen_helper_gvec_qrdmlsh_s16, q, rd, rn, rm);
                 break;
             case 2:
-                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s32;
+                return do_qrdml(s, gen_helper_gvec_qrdmlsh_s32, q, rd, rn, rm);
                 break;
             default:
                 return 1;
             }
-            goto do_vqrdmlx;
         }
         if (size == 3 && op != NEON_3R_LOGIC) {
             /* 64-bit element instructions. */


--
Alex Bennée
Richard Henderson Nov. 14, 2017, 10:46 a.m. UTC | #4
On 11/14/2017 11:06 AM, Alex Bennée wrote:
> 

> Richard Henderson <richard.henderson@linaro.org> writes:

> 

>> On 11/13/2017 05:55 PM, Alex Bennée wrote:

>>>> +        case NEON_3R_VFM_VQRDMLSH:

>>>> +            if (!u) {

>>>> +                /* VFM, VFMS */

>>>> +                if ((5 & (1 << size)) == 0) {

>>>> +                    return 1;

>>>> +                }

>>>> +                break;

>>>> +            }

>>>> +            /* VQRDMLSH */

>>>> +            switch (size) {

>>>> +            case 1:

>>>> +                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s16;

>>>> +                break;

>>>> +            case 2:

>>>> +                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s32;

>>>> +                break;

>>>> +            default:

>>>> +                return 1;

>>>> +            }

>>>> +            goto do_vqrdmlx;

>>> Could we not take the opportunity to re-factor out the common bit rather

>>> than make this mega

>>

>> What, specifically, did you have in mind?

> 

> Something like:

> 

> translate: use helper to avoid goto shenanigans


Thanks, this certainly looks better.


r~

> 

> 1 file changed, 18 insertions(+), 17 deletions(-)

> target/arm/translate.c | 35 ++++++++++++++++++-----------------

> 

> modified   target/arm/translate.c

> @@ -5576,6 +5576,20 @@ static const uint8_t neon_2rm_sizes[] = {

>      [NEON_2RM_VCVT_UF] = 0x4,

>  };

> 

> +/* expand v8.1 simd helper */

> +static int do_qrdml(DisasContext *s, gen_helper_gvec_3_ptr *fn, int q, int rd, int rn, int rm)

> +{

> +    if (arm_dc_feature(s, ARM_FEATURE_V8_1_SIMD)) {

> +        int opr_sz = (1 + q) * 8;

> +        tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),

> +                           vfp_reg_offset(1, rn),

> +                           vfp_reg_offset(1, rm), cpu_env,

> +                           opr_sz, opr_sz, 0, fn);

> +        return 0;

> +    }

> +    return 1;

> +}

> +

>  /* Translate a NEON data processing instruction.  Return nonzero if the

>     instruction is invalid.

>     We process data in a mixture of 32-bit and 64-bit chunks.

> @@ -5583,7 +5597,6 @@ static const uint8_t neon_2rm_sizes[] = {

> 

>  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>  {

> -    void (*fn_gvec_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);

>      int op;

>      int q;

>      int rd, rn, rm;

> @@ -5678,24 +5691,13 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>              /* VQRDMLAH */

>              switch (size) {

>              case 1:

> -                fn_gvec_ptr = gen_helper_gvec_qrdmlah_s16;

> -                break;

> +                return do_qrdml(s, gen_helper_gvec_qrdmlah_s16, q, rd, rn, rm);

>              case 2:

> -                fn_gvec_ptr = gen_helper_gvec_qrdmlah_s32;

> +                return do_qrdml(s, gen_helper_gvec_qrdmlah_s32, q, rd, rn, rm);

>                  break;

>              default:

>                  return 1;

>              }

> -        do_vqrdmlx:

> -            if (arm_dc_feature(s, ARM_FEATURE_V8_1_SIMD)) {

> -                int opr_sz = (1 + q) * 8;

> -                tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),

> -                                   vfp_reg_offset(1, rn),

> -                                   vfp_reg_offset(1, rm), cpu_env,

> -                                   opr_sz, opr_sz, 0, fn_gvec_ptr);

> -                return 0;

> -            }

> -            return 1;

> 

>          case NEON_3R_VFM_VQRDMLSH:

>              if (!u) {

> @@ -5708,15 +5710,14 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>              /* VQRDMLSH */

>              switch (size) {

>              case 1:

> -                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s16;

> +                return do_qrdml(s, gen_helper_gvec_qrdmlsh_s16, q, rd, rn, rm);

>                  break;

>              case 2:

> -                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s32;

> +                return do_qrdml(s, gen_helper_gvec_qrdmlsh_s32, q, rd, rn, rm);

>                  break;

>              default:

>                  return 1;

>              }

> -            goto do_vqrdmlx;

>          }

>          if (size == 3 && op != NEON_3R_LOGIC) {

>              /* 64-bit element instructions. */

> 

> 

> --

> Alex Bennée

>
diff mbox series

Patch

diff --git a/target/arm/translate.c b/target/arm/translate.c
index ab1a12a1b8..0cd58710b3 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -25,6 +25,7 @@ 
 #include "disas/disas.h"
 #include "exec/exec-all.h"
 #include "tcg-op.h"
+#include "tcg-op-gvec.h"
 #include "qemu/log.h"
 #include "qemu/bitops.h"
 #include "arm_ldst.h"
@@ -5334,9 +5335,9 @@  static void gen_neon_narrow_op(int op, int u, int size,
 #define NEON_3R_VPMAX 20
 #define NEON_3R_VPMIN 21
 #define NEON_3R_VQDMULH_VQRDMULH 22
-#define NEON_3R_VPADD 23
+#define NEON_3R_VPADD_VQRDMLAH 23
 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
-#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
+#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS : float fused multiply-add */
 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
@@ -5368,9 +5369,9 @@  static const uint8_t neon_3r_sizes[] = {
     [NEON_3R_VPMAX] = 0x7,
     [NEON_3R_VPMIN] = 0x7,
     [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
-    [NEON_3R_VPADD] = 0x7,
+    [NEON_3R_VPADD_VQRDMLAH] = 0x7,
     [NEON_3R_SHA] = 0xf, /* size field encodes op type */
-    [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
+    [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
     [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
     [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
     [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
@@ -5556,6 +5557,7 @@  static const uint8_t neon_2rm_sizes[] = {
 
 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
 {
+    void (*fn_gvec_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     int op;
     int q;
     int rd, rn, rm;
@@ -5600,12 +5602,12 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
         if (q && ((rd | rn | rm) & 1)) {
             return 1;
         }
-        /*
-         * The SHA-1/SHA-256 3-register instructions require special treatment
-         * here, as their size field is overloaded as an op type selector, and
-         * they all consume their input in a single pass.
-         */
-        if (op == NEON_3R_SHA) {
+        switch (op) {
+        case NEON_3R_SHA:
+            /* The SHA-1/SHA-256 3-register instructions require special
+             * treatment here, as their size field is overloaded as an
+             * op type selector, and they all consume their input in a
+             * single pass.  */
             if (!q) {
                 return 1;
             }
@@ -5642,6 +5644,53 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
             tcg_temp_free_i32(tmp2);
             tcg_temp_free_i32(tmp3);
             return 0;
+
+        case NEON_3R_VPADD_VQRDMLAH:
+            if (!u) {
+                break;  /* VPADD */
+            }
+            /* VQRDMLAH */
+            switch (size) {
+            case 1:
+                fn_gvec_ptr = gen_helper_gvec_qrdmlah_s16;
+                break;
+            case 2:
+                fn_gvec_ptr = gen_helper_gvec_qrdmlah_s32;
+                break;
+            default:
+                return 1;
+            }
+        do_vqrdmlx:
+            if (arm_dc_feature(s, ARM_FEATURE_V8_1_SIMD)) {
+                int opr_sz = (1 + q) * 8;
+                tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
+                                   vfp_reg_offset(1, rn),
+                                   vfp_reg_offset(1, rm), cpu_env,
+                                   opr_sz, opr_sz, 0, fn_gvec_ptr);
+                return 0;
+            }
+            return 1;
+
+        case NEON_3R_VFM_VQRDMLSH:
+            if (!u) {
+                /* VFM, VFMS */
+                if ((5 & (1 << size)) == 0) {
+                    return 1;
+                }
+                break;
+            }
+            /* VQRDMLSH */
+            switch (size) {
+            case 1:
+                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s16;
+                break;
+            case 2:
+                fn_gvec_ptr = gen_helper_gvec_qrdmlsh_s32;
+                break;
+            default:
+                return 1;
+            }
+            goto do_vqrdmlx;
         }
         if (size == 3 && op != NEON_3R_LOGIC) {
             /* 64-bit element instructions. */
@@ -5727,11 +5776,7 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                 rm = rtmp;
             }
             break;
-        case NEON_3R_VPADD:
-            if (u) {
-                return 1;
-            }
-            /* Fall through */
+        case NEON_3R_VPADD_VQRDMLAH:
         case NEON_3R_VPMAX:
         case NEON_3R_VPMIN:
             pairwise = 1;
@@ -5765,8 +5810,8 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                 return 1;
             }
             break;
-        case NEON_3R_VFM:
-            if (!arm_dc_feature(s, ARM_FEATURE_VFP4) || u) {
+        case NEON_3R_VFM_VQRDMLSH:
+            if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
                 return 1;
             }
             break;
@@ -5963,7 +6008,7 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                 }
             }
             break;
-        case NEON_3R_VPADD:
+        case NEON_3R_VPADD_VQRDMLAH:
             switch (size) {
             case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
             case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
@@ -6062,7 +6107,7 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
               }
             }
             break;
-        case NEON_3R_VFM:
+        case NEON_3R_VFM_VQRDMLSH:
         {
             /* VFMA, VFMS: fused multiply-add */
             TCGv_ptr fpstatus = get_fpstatus_ptr(1);