@@ -6794,6 +6794,47 @@ TRANS(FNMADD, do_fmadd, a, true, true)
TRANS(FMSUB, do_fmadd, a, false, true)
TRANS(FNMSUB, do_fmadd, a, true, false)
+/*
+ * Advanced SIMD Across Lanes
+ */
+
+static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
+ MemOp src_sign, NeonGenTwo64OpFn *fn)
+{
+ TCGv_i64 tcg_res, tcg_elt;
+ MemOp src_mop = a->esz | src_sign;
+ int elements = (a->q ? 16 : 8) >> a->esz;
+
+ /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
+ if (elements < 4) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ tcg_res = tcg_temp_new_i64();
+ tcg_elt = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_res, a->rn, 0, src_mop);
+ for (int i = 1; i < elements; i++) {
+ read_vec_element(s, tcg_elt, a->rn, i, src_mop);
+ fn(tcg_res, tcg_res, tcg_elt);
+ }
+
+ tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
+ write_fp_dreg(s, a->rd, tcg_res);
+ return true;
+}
+
+TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
+TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
+TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
+TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
+TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
+TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
+TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
+
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
* Note that it is the caller's responsibility to ensure that the
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
@@ -9092,27 +9133,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
int opcode = extract32(insn, 12, 5);
bool is_q = extract32(insn, 30, 1);
bool is_u = extract32(insn, 29, 1);
- bool is_fp = false;
bool is_min = false;
int elements;
- int i;
- TCGv_i64 tcg_res, tcg_elt;
switch (opcode) {
- case 0x1b: /* ADDV */
- if (is_u) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0x3: /* SADDLV, UADDLV */
- case 0xa: /* SMAXV, UMAXV */
- case 0x1a: /* SMINV, UMINV */
- if (size == 3 || (size == 2 && !is_q)) {
- unallocated_encoding(s);
- return;
- }
- break;
case 0xc: /* FMAXNMV, FMINNMV */
case 0xf: /* FMAXV, FMINV */
/* Bit 1 of size field encodes min vs max and the actual size
@@ -9121,7 +9145,6 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
* precision.
*/
is_min = extract32(size, 1, 1);
- is_fp = true;
if (!is_u && dc_isar_feature(aa64_fp16, s)) {
size = 1;
} else if (!is_u || !is_q || extract32(size, 0, 1)) {
@@ -9132,6 +9155,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
}
break;
default:
+ case 0x3: /* SADDLV, UADDLV */
+ case 0xa: /* SMAXV, UMAXV */
+ case 0x1a: /* SMINV, UMINV */
+ case 0x1b: /* ADDV */
unallocated_encoding(s);
return;
}
@@ -9142,52 +9169,7 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
elements = (is_q ? 16 : 8) >> size;
- tcg_res = tcg_temp_new_i64();
- tcg_elt = tcg_temp_new_i64();
-
- /* These instructions operate across all lanes of a vector
- * to produce a single result. We can guarantee that a 64
- * bit intermediate is sufficient:
- * + for [US]ADDLV the maximum element size is 32 bits, and
- * the result type is 64 bits
- * + for FMAX*V, FMIN*V, ADDV the intermediate type is the
- * same as the element size, which is 32 bits at most
- * For the integer operations we can choose to work at 64
- * or 32 bits and truncate at the end; for simplicity
- * we use 64 bits always. The floating point
- * ops do require 32 bit intermediates, though.
- */
- if (!is_fp) {
- read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
-
- for (i = 1; i < elements; i++) {
- read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
-
- switch (opcode) {
- case 0x03: /* SADDLV / UADDLV */
- case 0x1b: /* ADDV */
- tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
- break;
- case 0x0a: /* SMAXV / UMAXV */
- if (is_u) {
- tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
- } else {
- tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
- }
- break;
- case 0x1a: /* SMINV / UMINV */
- if (is_u) {
- tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
- } else {
- tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
- }
- break;
- default:
- g_assert_not_reached();
- }
-
- }
- } else {
+ {
/* Floating point vector reduction ops which work across 32
* bit (single) or 16 bit (half-precision) intermediates.
* Note that correct NaN propagation requires that we do these
@@ -9195,34 +9177,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
*/
TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
int fpopcode = opcode | is_min << 4 | is_u << 5;
- TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, size,
- 0, elements, fpst);
- tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
+ TCGv_i32 tcg_res = do_reduction_op(s, fpopcode, rn, size,
+ 0, elements, fpst);
+ write_fp_sreg(s, rd, tcg_res);
}
-
- /* Now truncate the result to the width required for the final output */
- if (opcode == 0x03) {
- /* SADDLV, UADDLV: result is 2*esize */
- size++;
- }
-
- switch (size) {
- case 0:
- tcg_gen_ext8u_i64(tcg_res, tcg_res);
- break;
- case 1:
- tcg_gen_ext16u_i64(tcg_res, tcg_res);
- break;
- case 2:
- tcg_gen_ext32u_i64(tcg_res, tcg_res);
- break;
- case 3:
- break;
- default:
- g_assert_not_reached();
- }
-
- write_fp_dreg(s, rd, tcg_res);
}
/* AdvSIMD modified immediate
@@ -59,6 +59,8 @@
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
+@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e
+
@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
@qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1
@qrrr_s . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=2
@@ -1154,3 +1156,13 @@ TRN1 0.00 1110 .. 0 ..... 0 010 10 ..... ..... @qrrr_e
TRN2 0.00 1110 .. 0 ..... 0 110 10 ..... ..... @qrrr_e
ZIP1 0.00 1110 .. 0 ..... 0 011 10 ..... ..... @qrrr_e
ZIP2 0.00 1110 .. 0 ..... 0 111 10 ..... ..... @qrrr_e
+
+# Advanced SIMD Across Lanes
+
+ADDV 0.00 1110 .. 11000 11011 10 ..... ..... @qrr_e
+SADDLV 0.00 1110 .. 11000 00011 10 ..... ..... @qrr_e
+UADDLV 0.10 1110 .. 11000 00011 10 ..... ..... @qrr_e
+SMAXV 0.00 1110 .. 11000 01010 10 ..... ..... @qrr_e
+UMAXV 0.10 1110 .. 11000 01010 10 ..... ..... @qrr_e
+SMINV 0.00 1110 .. 11000 11010 10 ..... ..... @qrr_e
+UMINV 0.10 1110 .. 11000 11010 10 ..... ..... @qrr_e