diff mbox series

[60/76] target/arm: Handle FPCR.AH in FMLSL

Message ID 20250124162836.2332150-61-peter.maydell@linaro.org
State New
Headers show
Series target/arm: Implement FEAT_AFP and FEAT_RPRES | expand

Commit Message

Peter Maydell Jan. 24, 2025, 4:28 p.m. UTC
Honour the FPCR.AH "don't negate the sign of a NaN" semantics in
FMLSL. We pass in the value of FPCR.AH in the SIMD data field, and
use this to determine whether we should suppress the negation for
NaN inputs.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/tcg/translate-a64.c |  4 ++--
 target/arm/tcg/vec_helper.c    | 28 ++++++++++++++++++++++++----
 2 files changed, 26 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 0827dff16b2..e22c2a148ab 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5968,7 +5968,7 @@  TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
 {
     if (fp_access_check(s)) {
-        int data = (is_2 << 1) | is_s;
+        int data = (s->fpcr_ah << 2) | (is_2 << 1) | is_s;
         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm), tcg_env,
@@ -6738,7 +6738,7 @@  TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
 {
     if (fp_access_check(s)) {
-        int data = (a->idx << 2) | (is_2 << 1) | is_s;
+        int data = (s->fpcr_ah << 5) | (a->idx << 2) | (is_2 << 1) | is_s;
         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm), tcg_env,
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 382b5da4a9c..aa42c50f9fe 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2083,6 +2083,26 @@  static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2)
     return ptr[is_q & is_2] >> ((is_2 & ~is_q) << 5);
 }
 
+static uint64_t neg4_f16(uint64_t v, bool fpcr_ah)
+{
+    /*
+     * Negate all inputs for FMLSL at once. This is slightly complicated
+     * by the need to avoid flipping the sign of a NaN when FPCR.AH == 1
+     */
+    uint64_t mask = 0x8000800080008000ull;
+    if (fpcr_ah) {
+        uint64_t tmp = v, signbit = 0x8000;
+        for (int i = 0; i < 4; i++) {
+            if (float16_is_any_nan(extract64(tmp, 0, 16))) {
+                mask ^= signbit;
+            }
+            tmp >>= 16;
+            signbit <<= 16;
+        }
+    }
+    return v ^ mask;
+}
+
 /*
  * Note that FMLAL requires oprsz == 8 or oprsz == 16,
  * as there is not yet SVE versions that might use blocking.
@@ -2094,6 +2114,7 @@  static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
     intptr_t i, oprsz = simd_oprsz(desc);
     int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
     int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+    bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
     int is_q = oprsz == 16;
     uint64_t n_4, m_4;
 
@@ -2101,9 +2122,8 @@  static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
     n_4 = load4_f16(vn, is_q, is_2);
     m_4 = load4_f16(vm, is_q, is_2);
 
-    /* Negate all inputs for FMLSL at once.  */
     if (is_s) {
-        n_4 ^= 0x8000800080008000ull;
+        n_4 = neg4_f16(n_4, fpcr_ah);
     }
 
     for (i = 0; i < oprsz / 4; i++) {
@@ -2155,6 +2175,7 @@  static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
     int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
     int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
     int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3);
+    bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 5, 1);
     int is_q = oprsz == 16;
     uint64_t n_4;
     float32 m_1;
@@ -2162,9 +2183,8 @@  static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
     /* Pre-load all of the f16 data, avoiding overlap issues.  */
     n_4 = load4_f16(vn, is_q, is_2);
 
-    /* Negate all inputs for FMLSL at once.  */
     if (is_s) {
-        n_4 ^= 0x8000800080008000ull;
+        n_4 = neg4_f16(n_4, fpcr_ah);
     }
 
     m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16);