@@ -123,6 +123,32 @@ uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
return float_rel_to_flags(float64_compare(x, y, fp_status));
}
+float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ if ((float32_is_zero(a) && float32_is_infinity(b)) ||
+ (float32_is_infinity(a) && float32_is_zero(b))) {
+ /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
+ return make_float32((1U << 30) |
+ ((float32_val(a) ^ float32_val(b)) & (1U << 31)));
+ }
+ return float32_mul(a, b, fpst);
+}
+
+float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ if ((float64_is_zero(a) && float64_is_infinity(b)) ||
+ (float64_is_infinity(a) && float64_is_zero(b))) {
+ /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
+ return make_float64((1ULL << 62) |
+ ((float64_val(a) ^ float64_val(b)) & (1ULL << 63)));
+ }
+ return float64_mul(a, b, fpst);
+}
+
uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
uint32_t rn, uint32_t numregs)
{
@@ -27,3 +27,5 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
+DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
+DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
@@ -7813,7 +7813,253 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
*/
static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
{
- unsupported_encoding(s, insn);
+ /* This encoding has two kinds of instruction:
+ * normal, where we perform elt x idxelt => elt for each
+ * element in the vector
+ * long, where we perform elt x idxelt and generate a result of
+ * double the width of the input element
+ * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
+ */
+ bool is_q = extract32(insn, 30, 1);
+ bool u = extract32(insn, 29, 1);
+ int size = extract32(insn, 22, 2);
+ int l = extract32(insn, 21, 1);
+ int m = extract32(insn, 20, 1);
+ /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
+ int rm = extract32(insn, 16, 4);
+ int opcode = extract32(insn, 12, 4);
+ int h = extract32(insn, 11, 1);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ bool is_long = false;
+ bool is_fp = false;
+ int index;
+ TCGv_ptr fpst;
+
+ switch (opcode) {
+ case 0x0: /* MLA */
+ case 0x4: /* MLS */
+ if (!u) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
+ is_long = true;
+ break;
+ case 0x3: /* SQDMLAL, SQDMLAL2 */
+ case 0x7: /* SQDMLSL, SQDMLSL2 */
+ case 0xb: /* SQDMULL, SQDMULL2 */
+ is_long = true;
+ /* fall through */
+ case 0xc: /* SQDMULH */
+ case 0xd: /* SQRDMULH */
+ case 0x8: /* MUL */
+ if (u) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x1: /* FMLA */
+ case 0x5: /* FMLS */
+ if (u) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0x9: /* FMUL, FMULX */
+ if (!extract32(size, 1, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+ is_fp = true;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (is_fp) {
+ /* low bit of size indicates single/double */
+ size = extract32(size, 0, 1) ? 3 : 2;
+ if (size == 2) {
+ index = h << 1 | l;
+ } else {
+ if (l || !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ index = h;
+ }
+ rm |= (m << 4);
+ } else {
+ switch (size) {
+ case 1:
+ index = h << 2 | l << 1 | m;
+ break;
+ case 2:
+ index = h << 1 | l;
+ rm |= (m << 4);
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ }
+
+ if (is_long) {
+ unsupported_encoding(s, insn);
+ return;
+ }
+
+ if (is_fp) {
+ fpst = get_fpstatus_ptr();
+ } else {
+ TCGV_UNUSED_PTR(fpst);
+ }
+
+ if (size == 3) {
+ TCGv_i64 tcg_idx = tcg_temp_new_i64();
+ int pass;
+
+ assert(is_fp && is_q && !is_long);
+
+ read_vec_element(s, tcg_idx, rm, index, MO_64);
+
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+
+ switch (opcode) {
+ case 0x5: /* FMLS */
+ /* As usual for ARM, separate negation for fused multiply-add */
+ gen_helper_vfp_negd(tcg_op, tcg_op);
+ /* fall through */
+ case 0x1: /* FMLA */
+ read_vec_element(s, tcg_res, rd, pass, MO_64);
+ gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+ break;
+ case 0x9: /* FMUL, FMULX */
+ if (u) {
+ gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
+ } else {
+ gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_vec_element(s, tcg_res, rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_op);
+ tcg_temp_free_i64(tcg_res);
+ }
+
+ tcg_temp_free_i64(tcg_idx);
+ } else if (!is_long) {
+ /* 32 bit floating point, or 16 or 32 bit integer */
+ TCGv_i32 tcg_idx = tcg_temp_new_i32();
+ int pass;
+
+ read_vec_element_i32(s, tcg_idx, rm, index, size);
+
+ if (size == 1) {
+ /* The simplest way to handle the 16x16 indexed ops is to duplicate
+ * the index into both halves of the 32 bit tcg_idx and then use
+ * the usual Neon helpers.
+ */
+ tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
+ }
+
+ for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+
+ switch (opcode) {
+ case 0x0: /* MLA */
+ case 0x4: /* MLS */
+ case 0x8: /* MUL */
+ {
+ static NeonGenTwoOpFn * const fns[2][2] = {
+ { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
+ { tcg_gen_add_i32, tcg_gen_sub_i32 },
+ };
+ NeonGenTwoOpFn *genfn;
+ bool is_sub = opcode == 0x4;
+
+ if (size == 1) {
+ gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
+ } else {
+ tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
+ }
+ if (opcode == 0x8) {
+ break;
+ }
+ read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
+ genfn = fns[size - 1][is_sub];
+ genfn(tcg_res, tcg_op, tcg_res);
+ break;
+ }
+ case 0x5: /* FMLS */
+ /* As usual for ARM, separate negation for fused multiply-add */
+ gen_helper_vfp_negs(tcg_op, tcg_op);
+ /* fall through */
+ case 0x1: /* FMLA */
+ read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+ break;
+ case 0x9: /* FMUL, FMULX */
+ if (u) {
+ gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
+ } else {
+ gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
+ }
+ break;
+ case 0xc: /* SQDMULH */
+ if (size == 1) {
+ gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
+ tcg_op, tcg_idx);
+ } else {
+ gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
+ tcg_op, tcg_idx);
+ }
+ break;
+ case 0xd: /* SQRDMULH */
+ if (size == 1) {
+ gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
+ tcg_op, tcg_idx);
+ } else {
+ gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
+ tcg_op, tcg_idx);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ tcg_temp_free_i32(tcg_op);
+ tcg_temp_free_i32(tcg_res);
+ }
+
+ tcg_temp_free_i32(tcg_idx);
+
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+ } else {
+ /* long ops: 16x16->32 or 32x32->64 */
+ }
+
+ if (!TCGV_IS_UNUSED_PTR(fpst)) {
+ tcg_temp_free_ptr(fpst);
+ }
}
/* C3.6.19 Crypto AES