@@ -38,9 +38,15 @@ DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst)
DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
+DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
+DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
+DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
@@ -208,88 +208,66 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst)
return -float64_lt(b, a, fpst);
}
-/* Reciprocal step and sqrt step. Note that unlike the A32/T32
+static float16 float16_ah_chs(float16 a)
+{
+ return float16_is_any_nan(a) ? a : float16_chs(a);
+}
+
+static float32 float32_ah_chs(float32 a)
+{
+ return float32_is_any_nan(a) ? a : float32_chs(a);
+}
+
+static float64 float64_ah_chs(float64 a)
+{
+ return float64_is_any_nan(a) ? a : float64_chs(a);
+}
+/*
+ * Reciprocal step and sqrt step. Note that unlike the A32/T32
* versions, these do a fully fused multiply-add or
* multiply-add-and-halve.
+ * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN.
*/
-
-uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
-{
- a = float16_squash_input_denormal(a, fpst);
- b = float16_squash_input_denormal(b, fpst);
-
- a = float16_chs(a);
- if ((float16_is_infinity(a) && float16_is_zero(b)) ||
- (float16_is_infinity(b) && float16_is_zero(a))) {
- return float16_two;
+#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
+ { \
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
+ a = FLOATTYPE ## _ ## CHSFN(a); \
+ if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \
+ (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \
+ return FLOATTYPE ## _two; \
+ } \
+ return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \
}
- return float16_muladd(a, b, float16_two, 0, fpst);
-}
-float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst)
-{
- a = float32_squash_input_denormal(a, fpst);
- b = float32_squash_input_denormal(b, fpst);
+DO_RECPS(recpsf_f16, uint32_t, float16, chs)
+DO_RECPS(recpsf_f32, float32, float32, chs)
+DO_RECPS(recpsf_f64, float64, float64, chs)
+DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs)
+DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs)
+DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs)
- a = float32_chs(a);
- if ((float32_is_infinity(a) && float32_is_zero(b)) ||
- (float32_is_infinity(b) && float32_is_zero(a))) {
- return float32_two;
- }
- return float32_muladd(a, b, float32_two, 0, fpst);
-}
+#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
+ { \
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
+ a = FLOATTYPE ## _ ## CHSFN(a); \
+ if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \
+ (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \
+ return FLOATTYPE ## _one_point_five; \
+ } \
+ return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \
+ -1, 0, fpst); \
+ } \
-float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst)
-{
- a = float64_squash_input_denormal(a, fpst);
- b = float64_squash_input_denormal(b, fpst);
-
- a = float64_chs(a);
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
- (float64_is_infinity(b) && float64_is_zero(a))) {
- return float64_two;
- }
- return float64_muladd(a, b, float64_two, 0, fpst);
-}
-
-uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
-{
- a = float16_squash_input_denormal(a, fpst);
- b = float16_squash_input_denormal(b, fpst);
-
- a = float16_chs(a);
- if ((float16_is_infinity(a) && float16_is_zero(b)) ||
- (float16_is_infinity(b) && float16_is_zero(a))) {
- return float16_one_point_five;
- }
- return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst);
-}
-
-float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
-{
- a = float32_squash_input_denormal(a, fpst);
- b = float32_squash_input_denormal(b, fpst);
-
- a = float32_chs(a);
- if ((float32_is_infinity(a) && float32_is_zero(b)) ||
- (float32_is_infinity(b) && float32_is_zero(a))) {
- return float32_one_point_five;
- }
- return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst);
-}
-
-float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
-{
- a = float64_squash_input_denormal(a, fpst);
- b = float64_squash_input_denormal(b, fpst);
-
- a = float64_chs(a);
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
- (float64_is_infinity(b) && float64_is_zero(a))) {
- return float64_one_point_five;
- }
- return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst);
-}
+DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs)
+DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs)
+DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs)
+DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs)
+DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs)
+DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs)
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst)
@@ -5250,11 +5250,12 @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
FPST_FPCR_F16_A64 : FPST_FPCR_A64);
}
-static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
- int mergereg)
+static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
+ const FPScalar *fnormal, const FPScalar *fah,
+ int mergereg)
{
- return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
- select_fpst(s, a->esz));
+ return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal,
+ mergereg, select_fpst(s, a->esz));
}
/* Some insns need to call different helpers when FPCR.AH == 1 */
@@ -5475,14 +5476,26 @@ static const FPScalar f_scalar_frecps = {
gen_helper_recpsf_f32,
gen_helper_recpsf_f64,
};
-TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn)
+static const FPScalar f_scalar_ah_frecps = {
+ gen_helper_recpsf_ah_f16,
+ gen_helper_recpsf_ah_f32,
+ gen_helper_recpsf_ah_f64,
+};
+TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a,
+ &f_scalar_frecps, &f_scalar_ah_frecps, a->rn)
static const FPScalar f_scalar_frsqrts = {
gen_helper_rsqrtsf_f16,
gen_helper_rsqrtsf_f32,
gen_helper_rsqrtsf_f64,
};
-TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn)
+static const FPScalar f_scalar_ah_frsqrts = {
+ gen_helper_rsqrtsf_ah_f16,
+ gen_helper_rsqrtsf_ah_f32,
+ gen_helper_rsqrtsf_ah_f64,
+};
+TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a,
+ &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn)
static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
const FPScalar *f, bool swap)
Handle the FPCR.AH semantics that we do not change the sign of an input NaN in the FRECPS and FRSQRTS scalar insns, by providing new helper functions that do the CHS part of the operation differently. Since the extra helper functions would be very repetitive if written out longhand, we condense them and the existing non-AH helpers into being emitted via macros. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> --- target/arm/tcg/helper-a64.h | 6 ++ target/arm/tcg/helper-a64.c | 128 ++++++++++++++------------------- target/arm/tcg/translate-a64.c | 25 +++++-- 3 files changed, 78 insertions(+), 81 deletions(-)