@@ -1475,6 +1475,27 @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG,
@@ -4802,7 +4802,7 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint16_t neg1, uint16_t neg3)
+ uint16_t neg1, uint16_t neg3, bool fpcr_ah)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4814,9 +4814,15 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
if (likely((pg >> (i & 63)) & 1)) {
float16 e1, e2, e3, r;
- e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
+ e1 = *(uint16_t *)(vn + H1_2(i));
e2 = *(uint16_t *)(vm + H1_2(i));
- e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
+ e3 = *(uint16_t *)(va + H1_2(i));
+ if (neg1 && !(fpcr_ah && float16_is_any_nan(e1))) {
+ e1 ^= neg1;
+ }
+ if (neg3 && !(fpcr_ah && float16_is_any_nan(e3))) {
+ e3 ^= neg3;
+ }
r = float16_muladd(e1, e2, e3, 0, status);
*(uint16_t *)(vd + H1_2(i)) = r;
}
@@ -4827,30 +4833,48 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, false);
}
void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, false);
}
void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, false);
}
void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, false);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, true);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, true);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, true);
}
static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint32_t neg1, uint32_t neg3)
+ uint32_t neg1, uint32_t neg3, bool fpcr_ah)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4862,9 +4886,15 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
if (likely((pg >> (i & 63)) & 1)) {
float32 e1, e2, e3, r;
- e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
+ e1 = *(uint32_t *)(vn + H1_4(i));
e2 = *(uint32_t *)(vm + H1_4(i));
- e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
+ e3 = *(uint32_t *)(va + H1_4(i));
+ if (neg1 && !(fpcr_ah && float32_is_any_nan(e1))) {
+ e1 ^= neg1;
+ }
+ if (neg3 && !(fpcr_ah && float32_is_any_nan(e3))) {
+ e3 ^= neg3;
+ }
r = float32_muladd(e1, e2, e3, 0, status);
*(uint32_t *)(vd + H1_4(i)) = r;
}
@@ -4875,30 +4905,48 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, false);
}
void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, false);
}
void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, false);
}
void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, false);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, true);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, true);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, true);
}
static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint64_t neg1, uint64_t neg3)
+ uint64_t neg1, uint64_t neg3, bool fpcr_ah)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4910,9 +4958,15 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
if (likely((pg >> (i & 63)) & 1)) {
float64 e1, e2, e3, r;
- e1 = *(uint64_t *)(vn + i) ^ neg1;
+ e1 = *(uint64_t *)(vn + i);
e2 = *(uint64_t *)(vm + i);
- e3 = *(uint64_t *)(va + i) ^ neg3;
+ e3 = *(uint64_t *)(va + i);
+ if (neg1 && !(fpcr_ah && float64_is_any_nan(e1))) {
+ e1 ^= neg1;
+ }
+ if (neg3 && !(fpcr_ah && float64_is_any_nan(e3))) {
+ e3 ^= neg3;
+ }
r = float64_muladd(e1, e2, e3, 0, status);
*(uint64_t *)(vd + i) = r;
}
@@ -4923,25 +4977,43 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, false);
}
void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, false);
}
void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, false);
}
void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, false);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, true);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, true);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, true);
}
/* Two operand floating-point comparison controlled by a predicate.
@@ -3924,19 +3924,25 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
-#define DO_FMLA(NAME, name) \
+#define DO_FMLA(NAME, name, ah_name) \
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
NULL, gen_helper_sve_##name##_h, \
gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
}; \
- TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
+ static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \
+ NULL, gen_helper_sve_##ah_name##_h, \
+ gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
-DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
-DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
-DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
-DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
+/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
+DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)
+DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz)
+DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz)
+DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz)
#undef DO_FMLA
Handle the FPCR.AH "don't negate the sign of a NaN" semantics fro the SVE FMLS (vector) insns, by providing new helpers for the AH=1 case which end up passing fpcr_ah = true to the do_fmla_zpzzz_* functions that do the work. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> --- target/arm/tcg/helper-sve.h | 21 ++++++ target/arm/tcg/sve_helper.c | 114 +++++++++++++++++++++++++++------ target/arm/tcg/translate-sve.c | 18 ++++-- 3 files changed, 126 insertions(+), 27 deletions(-)