@@ -29,6 +29,7 @@ C_O1_I2(w, w, w)
C_O1_I2(w, w, wO)
C_O1_I2(w, w, wV)
C_O1_I2(w, w, wZ)
+C_O1_I3(w, w, w, w)
C_O1_I4(r, r, r, rI, rI)
C_O1_I4(r, r, rIN, rIK, 0)
C_O2_I1(r, r, l)
@@ -168,7 +168,7 @@ extern bool use_neon_instructions;
#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_sat_vec 1
#define TCG_TARGET_HAS_minmax_vec 1
-#define TCG_TARGET_HAS_bitsel_vec 0
+#define TCG_TARGET_HAS_bitsel_vec 1
#define TCG_TARGET_HAS_cmpsel_vec 0
#define TCG_TARGET_DEFAULT_MO (0)
@@ -216,6 +216,10 @@ typedef enum {
INSN_VSARI = 0xf2800010, /* VSHR.S */
INSN_VSHRI = 0xf3800010, /* VSHR.U */
+ INSN_VBSL = 0xf3100110,
+ INSN_VBIT = 0xf3200110,
+ INSN_VBIF = 0xf3300110,
+
INSN_VTST = 0xf2000810,
INSN_VDUP_G = 0xee800b10, /* VDUP (ARM core register) */
@@ -2400,7 +2404,8 @@ static int tcg_target_op_def(TCGOpcode op)
return C_O1_I2(w, w, wV);
case INDEX_op_cmp_vec:
return C_O1_I2(w, w, wZ);
-
+ case INDEX_op_bitsel_vec:
+ return C_O1_I3(w, w, w, w);
default:
g_assert_not_reached();
}
@@ -2721,7 +2726,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
{
TCGType type = vecl + TCG_TYPE_V64;
unsigned q = vecl;
- TCGArg a0, a1, a2;
+ TCGArg a0, a1, a2, a3;
int cmode, imm8;
a0 = args[0];
@@ -2872,6 +2877,18 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
}
return;
+ case INDEX_op_bitsel_vec:
+ a3 = args[3];
+ if (a0 == a3) {
+ tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1);
+ } else if (a0 == a2) {
+ tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1);
+ } else {
+ tcg_out_mov(s, type, a0, a1);
+ tcg_out_vreg3(s, INSN_VBSL, q, 0, a0, a2, a3);
+ }
+ return;
+
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
default:
@@ -2897,6 +2914,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_sssub_vec:
case INDEX_op_usadd_vec:
case INDEX_op_ussub_vec:
+ case INDEX_op_bitsel_vec:
return 1;
case INDEX_op_abs_vec:
case INDEX_op_cmp_vec:
NEON has 3 instructions implementing this 4 argument operation, with each insn overlapping a different logical input onto the destination register. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/arm/tcg-target-conset.h | 1 + tcg/arm/tcg-target.h | 2 +- tcg/arm/tcg-target.c.inc | 22 ++++++++++++++++++++-- 3 files changed, 22 insertions(+), 3 deletions(-) -- 2.25.1