@@ -67,6 +67,13 @@ DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, fpst)
DEF_HELPER_2(advsimd_rinth_exact, f16, f16, fpst)
DEF_HELPER_2(advsimd_rinth, f16, f16, fpst)
+DEF_HELPER_3(vfp_ah_minh, f16, f16, f16, fpst)
+DEF_HELPER_3(vfp_ah_mins, f32, f32, f32, fpst)
+DEF_HELPER_3(vfp_ah_mind, f64, f64, f64, fpst)
+DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst)
+DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst)
+DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst)
+
DEF_HELPER_2(exception_return, void, env, i64)
DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64)
@@ -399,6 +399,42 @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst)
return r;
}
+/*
+ * AH=1 min/max have some odd special cases:
+ * comparing two zeroes (even of different sign), (NaN, anything),
+ * or (anything, NaN) should return the second argument (possibly
+ * squashed to zero).
+ * Also, denormal outputs are not squashed to zero regardless of FZ or FZ16.
+ */
+#define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
+ { \
+ bool save; \
+ CTYPE r; \
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
+ if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \
+ return b; \
+ } \
+ if (FLOATTYPE ## _is_any_nan(a) || \
+ FLOATTYPE ## _is_any_nan(b)) { \
+ float_raise(float_flag_invalid, fpst); \
+ return b; \
+ } \
+ save = get_flush_to_zero(fpst); \
+ set_flush_to_zero(false, fpst); \
+ r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \
+ set_flush_to_zero(save, fpst); \
+ return r; \
+ }
+
+AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min)
+AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min)
+AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min)
+AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max)
+AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max)
+AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max)
+
/* 64-bit versions of the CRC helpers. Note that although the operation
* (and the prototypes of crc32c() and crc32() mean that only the bottom
* 32 bits of the accumulator and result are used, we pass and return
@@ -5152,6 +5152,15 @@ static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
select_fpst(s, a->esz));
}
+/* Some insns need to call different helpers when FPCR.AH == 1 */
+static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a,
+ const FPScalar *fnormal,
+ const FPScalar *fah,
+ int mergereg)
+{
+ return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg);
+}
+
static const FPScalar f_scalar_fadd = {
gen_helper_vfp_addh,
gen_helper_vfp_adds,
@@ -5185,14 +5194,24 @@ static const FPScalar f_scalar_fmax = {
gen_helper_vfp_maxs,
gen_helper_vfp_maxd,
};
-TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax, a->rn)
+static const FPScalar f_scalar_fmax_ah = {
+ gen_helper_vfp_ah_maxh,
+ gen_helper_vfp_ah_maxs,
+ gen_helper_vfp_ah_maxd,
+};
+TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn)
static const FPScalar f_scalar_fmin = {
gen_helper_vfp_minh,
gen_helper_vfp_mins,
gen_helper_vfp_mind,
};
-TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin, a->rn)
+static const FPScalar f_scalar_fmin_ah = {
+ gen_helper_vfp_ah_minh,
+ gen_helper_vfp_ah_mins,
+ gen_helper_vfp_ah_mind,
+};
+TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn)
static const FPScalar f_scalar_fmaxnm = {
gen_helper_vfp_maxnumh,
When FPCR.AH == 1, floating point FMIN and FMAX have some odd special cases: * comparing two zeroes (even of different sign) or comparing a NaN with anything always returns the second argument (possibly squashed to zero) * denormal outputs are not squashed to zero regardless of FZ or FZ16 Implement these semantics in new helper functions and select them at translate time if FPCR.AH is 1 for the scalar FMAX and FMIN insns. (We will convert the other FMAX and FMIN insns in subsequent commits.) Note that FMINNM and FMAXNM are not affected. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> --- target/arm/tcg/helper-a64.h | 7 +++++++ target/arm/tcg/helper-a64.c | 36 ++++++++++++++++++++++++++++++++++ target/arm/tcg/translate-a64.c | 23 ++++++++++++++++++++-- 3 files changed, 64 insertions(+), 2 deletions(-)