@@ -38,9 +38,15 @@ DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst)
DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
+DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
+DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
+DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
+DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
@@ -267,6 +267,24 @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2,
*/
bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp);
+/*
+ * Negate as for FPCR.AH=1 -- do not negate NaNs.
+ */
+static inline float16 float16_ah_chs(float16 a)
+{
+ return float16_is_any_nan(a) ? a : float16_chs(a);
+}
+
+static inline float32 float32_ah_chs(float32 a)
+{
+ return float32_is_any_nan(a) ? a : float32_chs(a);
+}
+
+static inline float64 float64_ah_chs(float64 a)
+{
+ return float64_is_any_nan(a) ? a : float64_chs(a);
+}
+
static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah)
{
return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a);
@@ -38,6 +38,7 @@
#ifdef CONFIG_USER_ONLY
#include "user/page-protection.h"
#endif
+#include "vec_internal.h"
/* C2.4.7 Multiply and divide */
/* special cases for 0 and LLONG_MIN are mandated by the standard */
@@ -208,88 +209,52 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst)
return -float64_lt(b, a, fpst);
}
-/* Reciprocal step and sqrt step. Note that unlike the A32/T32
+/*
+ * Reciprocal step and sqrt step. Note that unlike the A32/T32
* versions, these do a fully fused multiply-add or
* multiply-add-and-halve.
+ * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN.
*/
-
-uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
-{
- a = float16_squash_input_denormal(a, fpst);
- b = float16_squash_input_denormal(b, fpst);
-
- a = float16_chs(a);
- if ((float16_is_infinity(a) && float16_is_zero(b)) ||
- (float16_is_infinity(b) && float16_is_zero(a))) {
- return float16_two;
+#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
+ { \
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
+ a = FLOATTYPE ## _ ## CHSFN(a); \
+ if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \
+ (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \
+ return FLOATTYPE ## _two; \
+ } \
+ return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \
}
- return float16_muladd(a, b, float16_two, 0, fpst);
-}
-float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst)
-{
- a = float32_squash_input_denormal(a, fpst);
- b = float32_squash_input_denormal(b, fpst);
+DO_RECPS(recpsf_f16, uint32_t, float16, chs)
+DO_RECPS(recpsf_f32, float32, float32, chs)
+DO_RECPS(recpsf_f64, float64, float64, chs)
+DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs)
+DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs)
+DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs)
- a = float32_chs(a);
- if ((float32_is_infinity(a) && float32_is_zero(b)) ||
- (float32_is_infinity(b) && float32_is_zero(a))) {
- return float32_two;
- }
- return float32_muladd(a, b, float32_two, 0, fpst);
-}
+#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \
+ CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
+ { \
+ a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
+ b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
+ a = FLOATTYPE ## _ ## CHSFN(a); \
+ if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \
+ (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \
+ return FLOATTYPE ## _one_point_five; \
+ } \
+ return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \
+ -1, 0, fpst); \
+ } \
-float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst)
-{
- a = float64_squash_input_denormal(a, fpst);
- b = float64_squash_input_denormal(b, fpst);
-
- a = float64_chs(a);
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
- (float64_is_infinity(b) && float64_is_zero(a))) {
- return float64_two;
- }
- return float64_muladd(a, b, float64_two, 0, fpst);
-}
-
-uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
-{
- a = float16_squash_input_denormal(a, fpst);
- b = float16_squash_input_denormal(b, fpst);
-
- a = float16_chs(a);
- if ((float16_is_infinity(a) && float16_is_zero(b)) ||
- (float16_is_infinity(b) && float16_is_zero(a))) {
- return float16_one_point_five;
- }
- return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst);
-}
-
-float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
-{
- a = float32_squash_input_denormal(a, fpst);
- b = float32_squash_input_denormal(b, fpst);
-
- a = float32_chs(a);
- if ((float32_is_infinity(a) && float32_is_zero(b)) ||
- (float32_is_infinity(b) && float32_is_zero(a))) {
- return float32_one_point_five;
- }
- return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst);
-}
-
-float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
-{
- a = float64_squash_input_denormal(a, fpst);
- b = float64_squash_input_denormal(b, fpst);
-
- a = float64_chs(a);
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
- (float64_is_infinity(b) && float64_is_zero(a))) {
- return float64_one_point_five;
- }
- return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst);
-}
+DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs)
+DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs)
+DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs)
+DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs)
+DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs)
+DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs)
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst)
@@ -5250,11 +5250,12 @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
FPST_A64_F16 : FPST_A64);
}
-static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
- int mergereg)
+static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
+ const FPScalar *fnormal, const FPScalar *fah,
+ int mergereg)
{
- return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
- select_ah_fpst(s, a->esz));
+ return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal,
+ mergereg, select_ah_fpst(s, a->esz));
}
/* Some insns need to call different helpers when FPCR.AH == 1 */
@@ -5475,14 +5476,26 @@ static const FPScalar f_scalar_frecps = {
gen_helper_recpsf_f32,
gen_helper_recpsf_f64,
};
-TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn)
+static const FPScalar f_scalar_ah_frecps = {
+ gen_helper_recpsf_ah_f16,
+ gen_helper_recpsf_ah_f32,
+ gen_helper_recpsf_ah_f64,
+};
+TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a,
+ &f_scalar_frecps, &f_scalar_ah_frecps, a->rn)
static const FPScalar f_scalar_frsqrts = {
gen_helper_rsqrtsf_f16,
gen_helper_rsqrtsf_f32,
gen_helper_rsqrtsf_f64,
};
-TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn)
+static const FPScalar f_scalar_ah_frsqrts = {
+ gen_helper_rsqrtsf_ah_f16,
+ gen_helper_rsqrtsf_ah_f32,
+ gen_helper_rsqrtsf_ah_f64,
+};
+TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a,
+ &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn)
static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
const FPScalar *f, bool swap)