@@ -584,6 +584,20 @@ DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"
@@ -3136,6 +3136,50 @@ DO_ZZI(UMIN, umin)
#undef DO_ZZI
+/*
+ *** SVE Floating Point Multiply-Add Indexed Group
+ */
+
+static void trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
+{
+ static gen_helper_gvec_4_ptr * const fns[3] = {
+ gen_helper_gvec_fmla_idx_h,
+ gen_helper_gvec_fmla_idx_s,
+ gen_helper_gvec_fmla_idx_d,
+ };
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
+
+ tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vec_full_reg_offset(s, a->ra),
+ status, vsz, vsz, a->index * 2 + a->sub,
+ fns[a->esz - 1]);
+ tcg_temp_free_ptr(status);
+}
+
+/*
+ *** SVE Floating Point Multiply Indexed Group
+ */
+
+static void trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
+{
+ static gen_helper_gvec_3_ptr * const fns[3] = {
+ gen_helper_gvec_fmul_idx_h,
+ gen_helper_gvec_fmul_idx_s,
+ gen_helper_gvec_fmul_idx_d,
+ };
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
+
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ status, vsz, vsz, a->index, fns[a->esz - 1]);
+ tcg_temp_free_ptr(status);
+}
+
/*
*** SVE Floating Point Accumulating Reduction Group
*/
@@ -24,6 +24,22 @@
#include "fpu/softfloat.h"
+/* Note that vector data is stored in host-endian 64-bit chunks,
+ so addressing units smaller than that needs a host-endian fixup. */
+#ifdef HOST_WORDS_BIGENDIAN
+#define H1(x) ((x) ^ 7)
+#define H1_2(x) ((x) ^ 6)
+#define H1_4(x) ((x) ^ 4)
+#define H2(x) ((x) ^ 3)
+#define H4(x) ((x) ^ 1)
+#else
+#define H1(x) (x)
+#define H1_2(x) (x)
+#define H1_4(x) (x)
+#define H2(x) (x)
+#define H4(x) (x)
+#endif
+
/* Floating-point trigonometric starting value.
* See the ARM ARM pseudocode function FPTrigSMul.
*/
@@ -92,3 +108,51 @@ DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
#endif
#undef DO_3OP
+
+/* For the indexed ops, SVE applies the index per 128-bit vector segment.
+ * For AdvSIMD, there is of course only one such vector segment.
+ */
+
+#define DO_MUL_IDX(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
+ intptr_t idx = simd_data(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = TYPE##_mul(n[i + j], mm, stat); \
+ } \
+ } \
+}
+
+DO_MUL_IDX(gvec_fmul_idx_h, float16, H2)
+DO_MUL_IDX(gvec_fmul_idx_s, float32, H4)
+DO_MUL_IDX(gvec_fmul_idx_d, float64, )
+
+#undef DO_MUL_IDX
+
+#define DO_FMLA_IDX(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
+ void *stat, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
+ TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
+ intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
+ TYPE *d = vd, *n = vn, *m = vm, *a = va; \
+ op1_neg <<= (8 * sizeof(TYPE) - 1); \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \
+ mm, a[i + j], 0, stat); \
+ } \
+ } \
+}
+
+DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
+DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
+DO_FMLA_IDX(gvec_fmla_idx_d, float64, )
+
+#undef DO_FMLA_IDX
@@ -30,6 +30,7 @@
%preg4_5 5:4
%size_23 23:2
%dtype_23_13 23:2 13:2
+%index3_22_19 22:1 19:2
# A combination of tsz:imm3 -- extract esize.
%tszimm_esz 22:2 5:5 !function=tszimm_esz
@@ -720,6 +721,24 @@ UMIN_zzi 00100101 .. 101 011 110 ........ ..... @rdn_i8u
# SVE integer multiply immediate (unpredicated)
MUL_zzi 00100101 .. 110 000 110 ........ ..... @rdn_i8s
+### SVE FP Multiply-Add Indexed Group
+
+# SVE floating-point multiply-add (indexed)
+FMLA_zzxz 01100100 0.1 .. rm:3 00000 sub:1 rn:5 rd:5 \
+ ra=%reg_movprfx index=%index3_22_19 esz=1
+FMLA_zzxz 01100100 101 index:2 rm:3 00000 sub:1 rn:5 rd:5 \
+ ra=%reg_movprfx esz=2
+FMLA_zzxz 01100100 111 index:1 rm:4 00000 sub:1 rn:5 rd:5 \
+ ra=%reg_movprfx esz=3
+
+### SVE FP Multiply Indexed Group
+
+# SVE floating-point multiply (indexed)
+FMUL_zzx 01100100 0.1 .. rm:3 001000 rn:5 rd:5 \
+ index=%index3_22_19 esz=1
+FMUL_zzx 01100100 101 index:2 rm:3 001000 rn:5 rd:5 esz=2
+FMUL_zzx 01100100 111 index:1 rm:4 001000 rn:5 rd:5 esz=3
+
### SVE FP Accumulating Reduction Group
# SVE floating-point serial reduction (predicated)
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/helper.h | 14 ++++++++++ target/arm/translate-sve.c | 44 +++++++++++++++++++++++++++++++ target/arm/vec_helper.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++ target/arm/sve.decode | 19 ++++++++++++++ 4 files changed, 141 insertions(+) -- 2.14.3