@@ -691,6 +691,16 @@ DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"
@@ -285,8 +285,6 @@ extern const GVecGen3 mls_op[4];
extern const GVecGen3 cmtst_op[4];
extern const GVecGen3 sshl_op[4];
extern const GVecGen3 ushl_op[4];
-extern const GVecGen2i ssra_op[4];
-extern const GVecGen2i usra_op[4];
extern const GVecGen2i sri_op[4];
extern const GVecGen2i sli_op[4];
extern const GVecGen4 uqadd_op[4];
@@ -299,6 +297,11 @@ void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+
/*
* Forward to the isar_feature_* tests given a DisasContext pointer.
*/
@@ -10188,19 +10188,8 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
switch (opcode) {
case 0x02: /* SSRA / USRA (accumulate) */
- if (is_u) {
- /* Shift count same as element size produces zero to add. */
- if (shift == 8 << size) {
- goto done;
- }
- gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
- } else {
- /* Shift count same as element size produces all sign to add. */
- if (shift == 8 << size) {
- shift -= 1;
- }
- gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
- }
+ gen_gvec_fn2i(s, is_q, rd, rn, shift,
+ is_u ? gen_gvec_usra : gen_gvec_ssra, size);
return;
case 0x08: /* SRI */
/* Shift count same as element size is valid but does nothing. */
@@ -3874,33 +3874,51 @@ static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
tcg_gen_add_vec(vece, d, d, a);
}
-static const TCGOpcode vecop_list_ssra[] = {
- INDEX_op_sari_vec, INDEX_op_add_vec, 0
-};
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_ssra8_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_ssra16_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_ssra32_i32,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_ssra64_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_b,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
-const GVecGen2i ssra_op[4] = {
- { .fni8 = gen_ssra8_i64,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_ssra,
- .vece = MO_8 },
- { .fni8 = gen_ssra16_i64,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_ssra,
- .vece = MO_16 },
- { .fni4 = gen_ssra32_i32,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_ssra,
- .vece = MO_32 },
- { .fni8 = gen_ssra64_i64,
- .fniv = gen_ssra_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list_ssra,
- .load_dest = true,
- .vece = MO_64 },
-};
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits.
+ */
+ shift = MIN(shift, (8 << vece) - 1);
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+}
static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
@@ -3932,33 +3950,55 @@ static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
tcg_gen_add_vec(vece, d, d, a);
}
-static const TCGOpcode vecop_list_usra[] = {
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
-};
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_usra8_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8, },
+ { .fni8 = gen_usra16_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16, },
+ { .fni4 = gen_usra32_i32,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32, },
+ { .fni8 = gen_usra64_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64, },
+ };
-const GVecGen2i usra_op[4] = {
- { .fni8 = gen_usra8_i64,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_8, },
- { .fni8 = gen_usra16_i64,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_16, },
- { .fni4 = gen_usra32_i32,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_32, },
- { .fni8 = gen_usra64_i64,
- .fniv = gen_usra_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_64, },
-};
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Unsigned results in all zeros as input to accumulate: nop.
+ */
+ if (shift < (8 << vece)) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ } else {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ }
+}
static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
@@ -5220,19 +5260,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case 1: /* VSRA */
/* Right shift comes here negative. */
shift = -shift;
- /* Shifts larger than the element size are architecturally
- * valid. Unsigned results in all zeros; signed results
- * in all sign bits.
- */
- if (!u) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
- MIN(shift, (8 << size) - 1),
- &ssra_op[size]);
- } else if (shift >= 8 << size) {
- /* rd += 0 */
+ if (u) {
+ gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
} else {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
- shift, &usra_op[size]);
+ gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
}
return 0;
@@ -899,6 +899,31 @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn,
clear_tail(d, oprsz, simd_maxsz(desc));
}
+
+#define DO_SRA(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] += n[i] >> shift; \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_SRA(gvec_ssra_b, int8_t)
+DO_SRA(gvec_ssra_h, int16_t)
+DO_SRA(gvec_ssra_s, int32_t)
+DO_SRA(gvec_ssra_d, int64_t)
+
+DO_SRA(gvec_usra_b, uint8_t)
+DO_SRA(gvec_usra_h, uint16_t)
+DO_SRA(gvec_usra_s, uint32_t)
+DO_SRA(gvec_usra_d, uint64_t)
+
+#undef DO_SRA
+
/*
* Convert float16 to float32, raising no exceptions and
* preserving exceptional values, including SNaN.