[v2,32/36] tcg/i386: Implement INDEX_op_rotl[is]_vec

Message ID	20200422011722.13287-33-richard.henderson@linaro.org
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of qemu-devel-bounces+patch=linaro.org@nongnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; From: Richard Henderson <richard.henderson@linaro.org> To: qemu-devel@nongnu.org Subject: [PATCH v2 32/36] tcg/i386: Implement INDEX_op_rotl[is]_vec Date: Tue, 21 Apr 2020 18:17:18 -0700 Message-Id: <20200422011722.13287-33-richard.henderson@linaro.org> In-Reply-To: <20200422011722.13287-1-richard.henderson@linaro.org> References: <20200422011722.13287-1-richard.henderson@linaro.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Received-SPF: pass client-ip=2607:f8b0:4864:20::443; envelope-from=richard.henderson@linaro.org; helo=mail-pf1-x443.google.com Error: [-] PROGRAM ABORT : Malformed IPv6 address (bad octet value). Location : parse_addr6(), p0f-client.c:67 Precedence: list Cc: alex.bennee@linaro.org Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>
Series	tcg 5.1 omnibus patch set \| expand [v2,00/36] tcg 5.1 omnibus patch set [v2,01/36] tcg: Add tcg_gen_gvec_dup_imm [v2,02/36] target/s390x: Use tcg_gen_gvec_dup_imm [v2,03/36] target/ppc: Use tcg_gen_gvec_dup_imm [v2,04/36] target/arm: Use tcg_gen_gvec_dup_imm [v2,05/36] tcg: Use tcg_gen_gvec_dup_imm in logical simplifications [v2,06/36] tcg: Remove tcg_gen_gvec_dup{8,16,32,64}i [v2,07/36] tcg: Add tcg_gen_gvec_dup_tl [v2,08/36] tcg: Improve vector tail clearing [v2,09/36] tcg: Consolidate 3 bits into enum TCGTempKind [v2,10/36] tcg: Add temp_readonly [v2,11/36] tcg: Introduce TYPE_CONST temporaries [v2,12/36] tcg: Use tcg_constant_i32 with icount expander [v2,13/36] tcg: Use tcg_constant_{i32, i64} with tcg int expanders [v2,14/36] tcg: Use tcg_constant_{i32, vec} with tcg vec expanders [v2,15/36] tcg: Use tcg_constant_{i32,i64} with tcg plugins [v2,16/36] tcg: Rename struct tcg_temp_info to TempOptInfo [v2,17/36] tcg/optimize: Adjust TempOptInfo allocation [v2,18/36] tcg/optimize: Use tcg_constant_internal with constant folding [v2,19/36] tcg/tci: Add special tci_movi_{i32,i64} opcodes [v2,20/36] tcg: Remove movi and dupi opcodes [v2,21/36] tcg: Use tcg_out_dupi_vec from temp_load [v2,22/36] tcg: Increase tcg_out_dupi_vec immediate to int64_t [v2,23/36] tcg: Add tcg_reg_alloc_dup2 [v2,24/36] tcg/i386: Use tcg_constant_vec with tcg vec expanders [v2,25/36] tcg: Remove tcg_gen_dup{8,16,32,64}i_vec [v2,26/36] tcg: Add load_dest parameter to GVecGen2 [v2,27/36] tcg: Fix integral argument type to tcg_gen_rot[rl]i_i{32, 64} [v2,28/36] tcg: Implement gvec support for rotate by immediate [v2,29/36] tcg: Implement gvec support for rotate by vector [v2,30/36] tcg: Remove expansion to shift by vector from do_shifts [v2,31/36] tcg: Implement gvec support for rotate by scalar [v2,32/36] tcg/i386: Implement INDEX_op_rotl[is]_vec [v2,33/36] tcg/aarch64: Implement INDEX_op_rotli_vec [v2,34/36] tcg/ppc: Implement INDEX_op_rot[lr]v_vec [v2,35/36] target/ppc: Use tcg_gen_gvec_rotlv [v2,36/36] target/s390x: Use tcg_gen_gvec_rotl{i,s,v}

Message ID

20200422011722.13287-33-richard.henderson@linaro.org

State

New

Headers

Received-SPF: pass (google.com: domain of
	qemu-devel-bounces+patch=linaro.org@nongnu.org designates
	209.51.188.17 as permitted sender) client-ip=209.51.188.17; 
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PATCH v2 32/36] tcg/i386: Implement INDEX_op_rotl[is]_vec
Date: Tue, 21 Apr 2020 18:17:18 -0700
Message-Id: <20200422011722.13287-33-richard.henderson@linaro.org>
In-Reply-To: <20200422011722.13287-1-richard.henderson@linaro.org>
References: <20200422011722.13287-1-richard.henderson@linaro.org>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Received-SPF: pass client-ip=2607:f8b0:4864:20::443;
	envelope-from=richard.henderson@linaro.org;
	helo=mail-pf1-x443.google.com
Precedence: list
Cc: alex.bennee@linaro.org
Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org
Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>

Series

tcg 5.1 omnibus patch set | expand

Commit Message

Richard Henderson April 22, 2020, 1:17 a.m. UTC

We must continue the special casing of 8-bit elements and the
other element sizes are trivially implemented with shifts.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 tcg/i386/tcg-target.inc.c | 85 +++++++++++++++++++++++++++++++--------
 1 file changed, 69 insertions(+), 16 deletions(-)

-- 
2.20.1

diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index deace219d2..6039ae4fc6 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -3255,6 +3255,7 @@  static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
     case INDEX_op_shls_vec:
     case INDEX_op_shrs_vec:
     case INDEX_op_sars_vec:
+    case INDEX_op_rotls_vec:
     case INDEX_op_cmp_vec:
     case INDEX_op_x86_shufps_vec:
     case INDEX_op_x86_blend_vec:
@@ -3293,6 +3294,7 @@  int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
     case INDEX_op_xor_vec:
     case INDEX_op_andc_vec:
         return 1;
+    case INDEX_op_rotli_vec:
     case INDEX_op_cmp_vec:
     case INDEX_op_cmpsel_vec:
         return -1;
@@ -3316,6 +3318,7 @@  int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
 
     case INDEX_op_shls_vec:
     case INDEX_op_shrs_vec:
+    case INDEX_op_rotls_vec:
         return vece >= MO_16;
     case INDEX_op_sars_vec:
         return vece >= MO_16 && vece <= MO_32;
@@ -3353,7 +3356,7 @@  int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
     }
 }
 
-static void expand_vec_shi(TCGType type, unsigned vece, bool shr,
+static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc,
                            TCGv_vec v0, TCGv_vec v1, TCGArg imm)
 {
     TCGv_vec t1, t2;
@@ -3363,26 +3366,31 @@  static void expand_vec_shi(TCGType type, unsigned vece, bool shr,
     t1 = tcg_temp_new_vec(type);
     t2 = tcg_temp_new_vec(type);
 
-    /* Unpack to W, shift, and repack.  Tricky bits:
-       (1) Use punpck*bw x,x to produce DDCCBBAA,
-           i.e. duplicate in other half of the 16-bit lane.
-       (2) For right-shift, add 8 so that the high half of
-           the lane becomes zero.  For left-shift, we must
-           shift up and down again.
-       (3) Step 2 leaves high half zero such that PACKUSWB
-           (pack with unsigned saturation) does not modify
-           the quantity.  */
+    /*
+     * Unpack to W, shift, and repack.  Tricky bits:
+     * (1) Use punpck*bw x,x to produce DDCCBBAA,
+     *     i.e. duplicate in other half of the 16-bit lane.
+     * (2) For right-shift, add 8 so that the high half of the lane
+     *     becomes zero.  For left-shift, and left-rotate, we must
+     *     shift up and down again.
+     * (3) Step 2 leaves high half zero such that PACKUSWB
+     *     (pack with unsigned saturation) does not modify
+     *     the quantity.
+     */
     vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
               tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
     vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
               tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
 
-    if (shr) {
-        tcg_gen_shri_vec(MO_16, t1, t1, imm + 8);
-        tcg_gen_shri_vec(MO_16, t2, t2, imm + 8);
+    if (opc != INDEX_op_rotli_vec) {
+        imm += 8;
+    }
+    if (opc == INDEX_op_shri_vec) {
+        tcg_gen_shri_vec(MO_16, t1, t1, imm);
+        tcg_gen_shri_vec(MO_16, t2, t2, imm);
     } else {
-        tcg_gen_shli_vec(MO_16, t1, t1, imm + 8);
-        tcg_gen_shli_vec(MO_16, t2, t2, imm + 8);
+        tcg_gen_shli_vec(MO_16, t1, t1, imm);
+        tcg_gen_shli_vec(MO_16, t2, t2, imm);
         tcg_gen_shri_vec(MO_16, t1, t1, 8);
         tcg_gen_shri_vec(MO_16, t2, t2, 8);
     }
@@ -3449,6 +3457,43 @@  static void expand_vec_sari(TCGType type, unsigned vece,
     }
 }
 
+static void expand_vec_rotli(TCGType type, unsigned vece,
+                             TCGv_vec v0, TCGv_vec v1, TCGArg imm)
+{
+    TCGv_vec t;
+
+    if (vece == MO_8) {
+        expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm);
+        return;
+    }
+
+    t = tcg_temp_new_vec(type);
+    tcg_gen_shli_vec(vece, t, v1, imm);
+    tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
+    tcg_gen_or_vec(vece, v0, v0, t);
+    tcg_temp_free_vec(t);
+}
+
+static void expand_vec_rotls(TCGType type, unsigned vece,
+                             TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
+{
+    TCGv_i32 rsh;
+    TCGv_vec t;
+
+    tcg_debug_assert(vece != MO_8);
+
+    t = tcg_temp_new_vec(type);
+    rsh = tcg_temp_new_i32();
+
+    tcg_gen_neg_i32(rsh, lsh);
+    tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
+    tcg_gen_shls_vec(vece, t, v1, lsh);
+    tcg_gen_shrs_vec(vece, v0, v1, rsh);
+    tcg_gen_or_vec(vece, v0, v0, t);
+    tcg_temp_free_vec(t);
+    tcg_temp_free_i32(rsh);
+}
+
 static void expand_vec_mul(TCGType type, unsigned vece,
                            TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
 {
@@ -3658,13 +3703,21 @@  void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
     switch (opc) {
     case INDEX_op_shli_vec:
     case INDEX_op_shri_vec:
-        expand_vec_shi(type, vece, opc == INDEX_op_shri_vec, v0, v1, a2);
+        expand_vec_shi(type, vece, opc, v0, v1, a2);
         break;
 
     case INDEX_op_sari_vec:
         expand_vec_sari(type, vece, v0, v1, a2);
         break;
 
+    case INDEX_op_rotli_vec:
+        expand_vec_rotli(type, vece, v0, v1, a2);
+        break;
+
+    case INDEX_op_rotls_vec:
+        expand_vec_rotls(type, vece, v0, v1, temp_tcgv_i32(arg_temp(a2)));
+        break;
+
     case INDEX_op_mul_vec:
         v2 = temp_tcgv_vec(arg_temp(a2));
         expand_vec_mul(type, vece, v0, v1, v2);

[v2,32/36] tcg/i386: Implement INDEX_op_rotl[is]_vec

Commit Message

Patch