@@ -721,6 +721,16 @@ DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"
@@ -285,8 +285,6 @@ extern const GVecGen3 mls_op[4];
extern const GVecGen3 cmtst_op[4];
extern const GVecGen3 sshl_op[4];
extern const GVecGen3 ushl_op[4];
-extern const GVecGen2i sri_op[4];
-extern const GVecGen2i sli_op[4];
extern const GVecGen4 uqadd_op[4];
extern const GVecGen4 sqadd_op[4];
extern const GVecGen4 uqsub_op[4];
@@ -311,6 +309,11 @@ void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+
/*
* Forward to the isar_feature_* tests given a DisasContext pointer.
*/
@@ -585,16 +585,6 @@ static void gen_gvec_op2(DisasContext *s, bool is_q, int rd,
is_q ? 16 : 8, vec_full_reg_size(s), gvec_op);
}
-/* Expand a 2-operand + immediate AdvSIMD vector operation using
- * an op descriptor.
- */
-static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
- int rn, int64_t imm, const GVecGen2i *gvec_op)
-{
- tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
- is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
-}
-
/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */
static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
int rn, int rm, const GVecGen3 *gvec_op)
@@ -10191,12 +10181,9 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
gen_gvec_fn2i(s, is_q, rd, rn, shift,
is_u ? gen_gvec_usra : gen_gvec_ssra, size);
return;
+
case 0x08: /* SRI */
- /* Shift count same as element size is valid but does nothing. */
- if (shift == 8 << size) {
- goto done;
- }
- gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sri, size);
return;
case 0x00: /* SSHR / USHR */
@@ -10247,7 +10234,6 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
}
tcg_temp_free_i64(tcg_round);
- done:
clear_vec_high(s, is_q, rd);
}
@@ -10272,7 +10258,7 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
}
if (insert) {
- gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]);
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
} else {
gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
}
@@ -4454,47 +4454,62 @@ static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
- if (sh == 0) {
- tcg_gen_mov_vec(d, a);
- } else {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
- tcg_gen_shri_vec(vece, t, a, sh);
- tcg_gen_and_vec(vece, d, d, m);
- tcg_gen_or_vec(vece, d, d, t);
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
+ tcg_gen_shri_vec(vece, t, a, sh);
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
- tcg_temp_free_vec(t);
- tcg_temp_free_vec(m);
- }
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(m);
}
-static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 };
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
+ const GVecGen2i ops[4] = {
+ { .fni8 = gen_shr8_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_shr16_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_shr32_ins_i32,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_shr64_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
-const GVecGen2i sri_op[4] = {
- { .fni8 = gen_shr8_ins_i64,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_8 },
- { .fni8 = gen_shr16_ins_i64,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_16 },
- { .fni4 = gen_shr32_ins_i32,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_32 },
- { .fni8 = gen_shr64_ins_i64,
- .fniv = gen_shr_ins_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_64 },
-};
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /* Shift of esize leaves destination unchanged. */
+ if (shift < (8 << vece)) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ } else {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ }
+}
static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
@@ -4532,47 +4547,60 @@ static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
- if (sh == 0) {
- tcg_gen_mov_vec(d, a);
- } else {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
- tcg_gen_shli_vec(vece, t, a, sh);
- tcg_gen_and_vec(vece, d, d, m);
- tcg_gen_or_vec(vece, d, d, t);
+ tcg_gen_shli_vec(vece, t, a, sh);
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
- tcg_temp_free_vec(t);
- tcg_temp_free_vec(m);
- }
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(m);
}
-static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 };
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
+ const GVecGen2i ops[4] = {
+ { .fni8 = gen_shl8_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_shl16_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_shl32_ins_i32,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_shl64_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
-const GVecGen2i sli_op[4] = {
- { .fni8 = gen_shl8_ins_i64,
- .fniv = gen_shl_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_8 },
- { .fni8 = gen_shl16_ins_i64,
- .fniv = gen_shl_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_16 },
- { .fni4 = gen_shl32_ins_i32,
- .fniv = gen_shl_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_32 },
- { .fni8 = gen_shl64_ins_i64,
- .fniv = gen_shl_ins_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_64 },
-};
+ /* tszimm encoding produces immediates in the range [0..esize-1]. */
+ tcg_debug_assert(shift >= 0);
+ tcg_debug_assert(shift < (8 << vece));
+
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
@@ -5715,20 +5743,14 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
/* Right shift comes here negative. */
shift = -shift;
- /* Shift out of range leaves destination unchanged. */
- if (shift < 8 << size) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
- shift, &sri_op[size]);
- }
+ gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
return 0;
case 5: /* VSHL, VSLI */
if (u) { /* VSLI */
- /* Shift out of range leaves destination unchanged. */
- if (shift < 8 << size) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
- vec_size, shift, &sli_op[size]);
- }
+ gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
} else { /* VSHL */
/* Shifts larger than the element size are
* architecturally valid and results in zero.
@@ -974,6 +974,44 @@ DO_RSRA(gvec_ursra_d, uint64_t)
#undef DO_RSRA
+#define DO_SRI(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = deposit64(d[i], 0, sizeof(TYPE) * 8 - shift, n[i] >> shift); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_SRI(gvec_sri_b, uint8_t)
+DO_SRI(gvec_sri_h, uint16_t)
+DO_SRI(gvec_sri_s, uint32_t)
+DO_SRI(gvec_sri_d, uint64_t)
+
+#undef DO_SRI
+
+#define DO_SLI(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = deposit64(d[i], shift, sizeof(TYPE) * 8 - shift, n[i]); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_SLI(gvec_sli_b, uint8_t)
+DO_SLI(gvec_sli_h, uint16_t)
+DO_SLI(gvec_sli_s, uint32_t)
+DO_SLI(gvec_sli_d, uint64_t)
+
+#undef DO_SLI
+
/*
* Convert float16 to float32, raising no exceptions and
* preserving exceptional values, including SNaN.