@@ -334,6 +334,14 @@ DEF_HELPER_FLAGS_5(neon_uqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(neon_uqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(neon_uqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(neon_uqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_uqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_uqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_uqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_uqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_srshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -467,6 +467,10 @@ void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
@@ -102,25 +102,12 @@ VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same
VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same_rev
VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same_rev
-
-# Insns operating on 64-bit elements (size!=0b11 handled elsewhere)
-# The _rev suffix indicates that Vn and Vm are reversed (as explained
-# by the comment for the @3same_rev format).
-@3same_64_rev .... ... . . . 11 .... .... .... . q:1 . . .... \
- &3same vm=%vn_dp vn=%vm_dp vd=%vd_dp size=3
-
VQSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_rev
VQSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_rev
VRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_rev
VRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_rev
-{
- VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
- VQRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_rev
-}
-{
- VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
- VQRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_rev
-}
+VQRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_rev
+VQRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_rev
VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
@@ -1263,6 +1263,30 @@ void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
opr_sz, max_sz, 0, fns[vece]);
}
+void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_3_ptr * const fns[] = {
+ gen_helper_neon_sqrshl_b, gen_helper_neon_sqrshl_h,
+ gen_helper_neon_sqrshl_s, gen_helper_neon_sqrshl_d,
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
+ opr_sz, max_sz, 0, fns[vece]);
+}
+
+void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_3_ptr * const fns[] = {
+ gen_helper_neon_uqrshl_b, gen_helper_neon_uqrshl_h,
+ gen_helper_neon_uqrshl_s, gen_helper_neon_uqrshl_d,
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
+ opr_sz, max_sz, 0, fns[vece]);
+}
+
void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
{
uint64_t max = MAKE_64BIT_MASK(0, 8 << esz);
@@ -435,11 +435,23 @@ uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
+NEON_GVEC_VOP2_ENV(neon_uqrshl_b, uint8_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
+NEON_GVEC_VOP2_ENV(neon_uqrshl_h, uint16_t)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, true, env->vfp.qc))
+NEON_GVEC_VOP2_ENV(neon_uqrshl_s, uint32_t)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_d(src1, (int8_t)src2, true, env->vfp.qc))
+NEON_GVEC_VOP2_ENV(neon_uqrshl_d, uint64_t)
#undef NEON_FN
uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift)
@@ -455,11 +467,23 @@ uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift)
#define NEON_FN(dest, src1, src2) \
(dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
+NEON_GVEC_VOP2_ENV(neon_sqrshl_b, int8_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
+NEON_GVEC_VOP2_ENV(neon_sqrshl_h, int16_t)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, true, env->vfp.qc))
+NEON_GVEC_VOP2_ENV(neon_sqrshl_s, int32_t)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_d(src1, (int8_t)src2, true, env->vfp.qc))
+NEON_GVEC_VOP2_ENV(neon_sqrshl_d, int64_t)
#undef NEON_FN
uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
@@ -10959,6 +10959,13 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
}
switch (opcode) {
+ case 0x0b: /* SQRSHL, UQRSHL */
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_neon_uqrshl, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_neon_sqrshl, size);
+ }
+ return;
case 0x0c: /* SMAX, UMAX */
if (u) {
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
@@ -11101,16 +11108,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genfn = fns[size][u];
break;
}
- case 0xb: /* SQRSHL, UQRSHL */
- {
- static NeonGenTwoOpEnvFn * const fns[3][2] = {
- { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
- { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
- { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
- };
- genenvfn = fns[size][u];
- break;
- }
default:
g_assert_not_reached();
}
@@ -798,6 +798,8 @@ DO_3SAME(VRSHL_S, gen_gvec_srshl)
DO_3SAME(VRSHL_U, gen_gvec_urshl)
DO_3SAME(VQSHL_S, gen_neon_sqshl)
DO_3SAME(VQSHL_U, gen_neon_uqshl)
+DO_3SAME(VQRSHL_S, gen_neon_sqrshl)
+DO_3SAME(VQRSHL_U, gen_neon_uqrshl)
/* These insns are all gvec_bitsel but with the inputs in various orders. */
#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
@@ -916,26 +918,6 @@ DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
-#define DO_3SAME_64(INSN, FUNC) \
- static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
- uint32_t rn_ofs, uint32_t rm_ofs, \
- uint32_t oprsz, uint32_t maxsz) \
- { \
- static const GVecGen3 op = { .fni8 = FUNC }; \
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \
- } \
- DO_3SAME(INSN, gen_##INSN##_3s)
-
-#define DO_3SAME_64_ENV(INSN, FUNC) \
- static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \
- { \
- FUNC(d, tcg_env, n, m); \
- } \
- DO_3SAME_64(INSN, gen_##INSN##_elt)
-
-DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
-DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
-
#define DO_3SAME_32(INSN, FUNC) \
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
uint32_t rn_ofs, uint32_t rm_ofs, \
@@ -969,30 +951,6 @@ DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
FUNC(d, tcg_env, n, m); \
}
-#define DO_3SAME_32_ENV(INSN, FUNC) \
- WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \
- WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \
- WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \
- static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
- uint32_t rn_ofs, uint32_t rm_ofs, \
- uint32_t oprsz, uint32_t maxsz) \
- { \
- static const GVecGen3 ops[4] = { \
- { .fni4 = gen_##INSN##_tramp8 }, \
- { .fni4 = gen_##INSN##_tramp16 }, \
- { .fni4 = gen_##INSN##_tramp32 }, \
- { 0 }, \
- }; \
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
- } \
- static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
- { \
- if (a->size > 2) { \
- return false; \
- } \
- return do_3same(s, a, gen_##INSN##_3s); \
- }
-
DO_3SAME_32(VHADD_S, hadd_s)
DO_3SAME_32(VHADD_U, hadd_u)
DO_3SAME_32(VHSUB_S, hsub_s)
@@ -1000,9 +958,6 @@ DO_3SAME_32(VHSUB_U, hsub_u)
DO_3SAME_32(VRHADD_S, rhadd_s)
DO_3SAME_32(VRHADD_U, rhadd_u)
-DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
-DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
-
#define DO_3SAME_VQDMULH(INSN, FUNC) \
WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/helper.h | 8 ++++++ target/arm/tcg/translate.h | 4 +++ target/arm/tcg/neon-dp.decode | 17 ++---------- target/arm/tcg/gengvec.c | 24 ++++++++++++++++ target/arm/tcg/neon_helper.c | 24 ++++++++++++++++ target/arm/tcg/translate-a64.c | 17 +++++------- target/arm/tcg/translate-neon.c | 49 ++------------------------------- 7 files changed, 71 insertions(+), 72 deletions(-)