@@ -6922,7 +6922,7 @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
if (fp_access_check(s)) {
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
- (a->idx << 2) | a->rot, fn);
+ (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
}
return true;
}
@@ -995,29 +995,33 @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va,
uintptr_t opr_sz = simd_oprsz(desc);
float16 *d = vd, *n = vn, *m = vm, *a = va;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
- uint32_t neg_real = flip ^ neg_imag;
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
+ uint32_t negf_real = flip ^ negf_imag;
intptr_t elements = opr_sz / sizeof(float16);
intptr_t eltspersegment = MIN(16 / sizeof(float16), elements);
+ float16 negx_imag, negx_real;
intptr_t i, j;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 15;
- neg_imag <<= 15;
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 15;
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < elements; i += eltspersegment) {
float16 mr = m[H2(i + 2 * index + 0)];
float16 mi = m[H2(i + 2 * index + 1)];
- float16 e1 = neg_real ^ (flip ? mi : mr);
- float16 e3 = neg_imag ^ (flip ? mr : mi);
+ float16 e1 = negx_real ^ (flip ? mi : mr);
+ float16 e3 = negx_imag ^ (flip ? mr : mi);
for (j = i; j < i + eltspersegment; j += 2) {
float16 e2 = n[H2(j + flip)];
float16 e4 = e2;
- d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst);
- d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst);
+ d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst);
+ d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst);
}
}
clear_tail(d, opr_sz, simd_maxsz(desc));
@@ -1059,29 +1063,33 @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va,
uintptr_t opr_sz = simd_oprsz(desc);
float32 *d = vd, *n = vn, *m = vm, *a = va;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
- uint32_t neg_real = flip ^ neg_imag;
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
+ uint32_t negf_real = flip ^ negf_imag;
intptr_t elements = opr_sz / sizeof(float32);
intptr_t eltspersegment = MIN(16 / sizeof(float32), elements);
+ float32 negx_imag, negx_real;
intptr_t i, j;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 31;
- neg_imag <<= 31;
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 31;
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < elements; i += eltspersegment) {
float32 mr = m[H4(i + 2 * index + 0)];
float32 mi = m[H4(i + 2 * index + 1)];
- float32 e1 = neg_real ^ (flip ? mi : mr);
- float32 e3 = neg_imag ^ (flip ? mr : mi);
+ float32 e1 = negx_real ^ (flip ? mi : mr);
+ float32 e3 = negx_imag ^ (flip ? mr : mi);
for (j = i; j < i + eltspersegment; j += 2) {
float32 e2 = n[H4(j + flip)];
float32 e4 = e2;
- d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst);
- d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst);
+ d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst);
+ d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst);
}
}
clear_tail(d, opr_sz, simd_maxsz(desc));
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/translate-a64.c | 2 +- target/arm/tcg/vec_helper.c | 44 ++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 19 deletions(-)