@@ -1475,6 +1475,27 @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG,
@@ -4802,7 +4802,7 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint16_t neg1, uint16_t neg3)
+ uint16_t neg1, uint16_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4817,7 +4817,7 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
e2 = *(uint16_t *)(vm + H1_2(i));
e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
- r = float16_muladd(e1, e2, e3, 0, status);
+ r = float16_muladd(e1, e2, e3, flags, status);
*(uint16_t *)(vd + H1_2(i)) = r;
}
} while (i & 63);
@@ -4827,30 +4827,51 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0);
}
void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint32_t neg1, uint32_t neg3)
+ uint32_t neg1, uint32_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4865,7 +4886,7 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
e2 = *(uint32_t *)(vm + H1_4(i));
e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
- r = float32_muladd(e1, e2, e3, 0, status);
+ r = float32_muladd(e1, e2, e3, flags, status);
*(uint32_t *)(vd + H1_4(i)) = r;
}
} while (i & 63);
@@ -4875,30 +4896,51 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0);
}
void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint64_t neg1, uint64_t neg3)
+ uint64_t neg1, uint64_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4913,7 +4955,7 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
e1 = *(uint64_t *)(vn + i) ^ neg1;
e2 = *(uint64_t *)(vm + i);
e3 = *(uint64_t *)(va + i) ^ neg3;
- r = float64_muladd(e1, e2, e3, 0, status);
+ r = float64_muladd(e1, e2, e3, flags, status);
*(uint64_t *)(vd + i) = r;
}
} while (i & 63);
@@ -4923,25 +4965,46 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0);
}
void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
/* Two operand floating-point comparison controlled by a predicate.
@@ -3927,19 +3927,25 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
-#define DO_FMLA(NAME, name) \
+#define DO_FMLA(NAME, name, ah_name) \
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
NULL, gen_helper_sve_##name##_h, \
gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
}; \
- TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
+ static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \
+ NULL, gen_helper_sve_##ah_name##_h, \
+ gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
-DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
-DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
-DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
-DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
+/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
+DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)
+DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz)
+DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz)
+DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz)
#undef DO_FMLA