[PULL,12/16] tcg/aarch64: Split up is_fimm

Message ID	20190522222821.23850-13-richard.henderson@linaro.org
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of qemu-devel-bounces+patch=linaro.org@nongnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; From: Richard Henderson <richard.henderson@linaro.org> To: qemu-devel@nongnu.org Date: Wed, 22 May 2019 18:28:17 -0400 Message-Id: <20190522222821.23850-13-richard.henderson@linaro.org> In-Reply-To: <20190522222821.23850-1-richard.henderson@linaro.org> References: <20190522222821.23850-1-richard.henderson@linaro.org> Subject: [Qemu-devel] [PULL 12/16] tcg/aarch64: Split up is_fimm Precedence: list Cc: peter.maydell@linaro.org Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>
Series	tcg queued patches \| expand [PULL,00/16] tcg queued patches [PULL,01/16] tcg/i386: Fix dupi/dupm for avx1 and 32-bit hosts [PULL,02/16] tcg: Fix missing checks and clears in tcg_gen_gvec_dup_mem [PULL,03/16] tcg: Add support for vector bitwise select [PULL,04/16] tcg: Add support for vector compare select [PULL,05/16] tcg: Introduce do_op3_nofail for vector expansion [PULL,06/16] tcg: Expand vector minmax using cmp+cmpsel [PULL,07/16] tcg: Add TCG_OPF_NOT_PRESENT if TCG_TARGET_HAS_foo is negative [PULL,08/16] tcg/i386: Support vector comparison select value [PULL,09/16] tcg/i386: Remove expansion for missing minmax [PULL,10/16] tcg/i386: Use umin/umax in expanding unsigned compare [PULL,11/16] tcg/aarch64: Support vector bitwise select value [PULL,12/16] tcg/aarch64: Split up is_fimm [PULL,13/16] tcg/aarch64: Use MVNI in tcg_out_dupi_vec [PULL,14/16] tcg/aarch64: Build vector immediates with two insns [PULL,15/16] tcg/aarch64: Allow immediates for vector ORR and BIC [PULL,16/16] tcg/i386: Use MOVDQA for TCG_TYPE_V128 load/store

Message ID

20190522222821.23850-13-richard.henderson@linaro.org

State

New

Headers

Received-SPF: pass (google.com: domain of
	qemu-devel-bounces+patch=linaro.org@nongnu.org designates
	209.51.188.17 as permitted sender) client-ip=209.51.188.17; 
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Date: Wed, 22 May 2019 18:28:17 -0400
Message-Id: <20190522222821.23850-13-richard.henderson@linaro.org>
In-Reply-To: <20190522222821.23850-1-richard.henderson@linaro.org>
References: <20190522222821.23850-1-richard.henderson@linaro.org>
Subject: [Qemu-devel] [PULL 12/16] tcg/aarch64: Split up is_fimm
Precedence: list
Cc: peter.maydell@linaro.org
Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org
Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>

Series

tcg queued patches | expand

Commit Message

Richard Henderson May 22, 2019, 10:28 p.m. UTC

There are several sub-classes of vector immediate, and only MOVI
can use them all.  This will enable usage of MVNI and ORRI, which
use progressively fewer sub-classes.

This patch adds no new functionality, merely splits the function
and moves part of the logic into tcg_out_dupi_vec.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 tcg/aarch64/tcg-target.inc.c | 205 ++++++++++++++++++++---------------
 1 file changed, 120 insertions(+), 85 deletions(-)

-- 
2.17.1

diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index e99149cda7..1422dfebe2 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -190,103 +190,86 @@  static inline bool is_limm(uint64_t val)
     return (val & (val - 1)) == 0;
 }
 
-/* Match a constant that is valid for vectors.  */
-static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
+/* Return true if v16 is a valid 16-bit shifted immediate.  */
+static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
 {
-    int i;
-
-    *op = 0;
-    /* Match replication across 8 bits.  */
-    if (v64 == dup_const(MO_8, v64)) {
-        *cmode = 0xe;
-        *imm8 = v64 & 0xff;
+    if (v16 == (v16 & 0xff)) {
+        *cmode = 0x8;
+        *imm8 = v16 & 0xff;
+        return true;
+    } else if (v16 == (v16 & 0xff00)) {
+        *cmode = 0xa;
+        *imm8 = v16 >> 8;
         return true;
     }
-    /* Match replication across 16 bits.  */
-    if (v64 == dup_const(MO_16, v64)) {
-        uint16_t v16 = v64;
+    return false;
+}
 
-        if (v16 == (v16 & 0xff)) {
-            *cmode = 0x8;
-            *imm8 = v16 & 0xff;
-            return true;
-        } else if (v16 == (v16 & 0xff00)) {
-            *cmode = 0xa;
-            *imm8 = v16 >> 8;
-            return true;
-        }
+/* Return true if v32 is a valid 32-bit shifted immediate.  */
+static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
+{
+    if (v32 == (v32 & 0xff)) {
+        *cmode = 0x0;
+        *imm8 = v32 & 0xff;
+        return true;
+    } else if (v32 == (v32 & 0xff00)) {
+        *cmode = 0x2;
+        *imm8 = (v32 >> 8) & 0xff;
+        return true;
+    } else if (v32 == (v32 & 0xff0000)) {
+        *cmode = 0x4;
+        *imm8 = (v32 >> 16) & 0xff;
+        return true;
+    } else if (v32 == (v32 & 0xff000000)) {
+        *cmode = 0x6;
+        *imm8 = v32 >> 24;
+        return true;
     }
-    /* Match replication across 32 bits.  */
-    if (v64 == dup_const(MO_32, v64)) {
-        uint32_t v32 = v64;
+    return false;
+}
 
-        if (v32 == (v32 & 0xff)) {
-            *cmode = 0x0;
-            *imm8 = v32 & 0xff;
-            return true;
-        } else if (v32 == (v32 & 0xff00)) {
-            *cmode = 0x2;
-            *imm8 = (v32 >> 8) & 0xff;
-            return true;
-        } else if (v32 == (v32 & 0xff0000)) {
-            *cmode = 0x4;
-            *imm8 = (v32 >> 16) & 0xff;
-            return true;
-        } else if (v32 == (v32 & 0xff000000)) {
-            *cmode = 0x6;
-            *imm8 = v32 >> 24;
-            return true;
-        } else if ((v32 & 0xffff00ff) == 0xff) {
-            *cmode = 0xc;
-            *imm8 = (v32 >> 8) & 0xff;
-            return true;
-        } else if ((v32 & 0xff00ffff) == 0xffff) {
-            *cmode = 0xd;
-            *imm8 = (v32 >> 16) & 0xff;
-            return true;
-        }
-        /* Match forms of a float32.  */
-        if (extract32(v32, 0, 19) == 0
-            && (extract32(v32, 25, 6) == 0x20
-                || extract32(v32, 25, 6) == 0x1f)) {
-            *cmode = 0xf;
-            *imm8 = (extract32(v32, 31, 1) << 7)
-                  | (extract32(v32, 25, 1) << 6)
-                  | extract32(v32, 19, 6);
-            return true;
-        }
+/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
+static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
+{
+    if ((v32 & 0xffff00ff) == 0xff) {
+        *cmode = 0xc;
+        *imm8 = (v32 >> 8) & 0xff;
+        return true;
+    } else if ((v32 & 0xff00ffff) == 0xffff) {
+        *cmode = 0xd;
+        *imm8 = (v32 >> 16) & 0xff;
+        return true;
     }
-    /* Match forms of a float64.  */
+    return false;
+}
+
+/* Return true if v32 is a valid float32 immediate.  */
+static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
+{
+    if (extract32(v32, 0, 19) == 0
+        && (extract32(v32, 25, 6) == 0x20
+            || extract32(v32, 25, 6) == 0x1f)) {
+        *cmode = 0xf;
+        *imm8 = (extract32(v32, 31, 1) << 7)
+              | (extract32(v32, 25, 1) << 6)
+              | extract32(v32, 19, 6);
+        return true;
+    }
+    return false;
+}
+
+/* Return true if v64 is a valid float64 immediate.  */
+static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
+{
     if (extract64(v64, 0, 48) == 0
         && (extract64(v64, 54, 9) == 0x100
             || extract64(v64, 54, 9) == 0x0ff)) {
         *cmode = 0xf;
-        *op = 1;
         *imm8 = (extract64(v64, 63, 1) << 7)
               | (extract64(v64, 54, 1) << 6)
               | extract64(v64, 48, 6);
         return true;
     }
-    /* Match bytes of 0x00 and 0xff.  */
-    for (i = 0; i < 64; i += 8) {
-        uint64_t byte = extract64(v64, i, 8);
-        if (byte != 0 && byte != 0xff) {
-            break;
-        }
-    }
-    if (i == 64) {
-        *cmode = 0xe;
-        *op = 1;
-        *imm8 = (extract64(v64, 0, 1) << 0)
-              | (extract64(v64, 8, 1) << 1)
-              | (extract64(v64, 16, 1) << 2)
-              | (extract64(v64, 24, 1) << 3)
-              | (extract64(v64, 32, 1) << 4)
-              | (extract64(v64, 40, 1) << 5)
-              | (extract64(v64, 48, 1) << 6)
-              | (extract64(v64, 56, 1) << 7);
-        return true;
-    }
     return false;
 }
 
@@ -817,11 +800,63 @@  static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
                              TCGReg rd, tcg_target_long v64)
 {
-    int op, cmode, imm8;
+    bool q = type == TCG_TYPE_V128;
+    int cmode, imm8, i;
 
-    if (is_fimm(v64, &op, &cmode, &imm8)) {
-        tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
-    } else if (type == TCG_TYPE_V128) {
+    /* Test all bytes equal first.  */
+    if (v64 == dup_const(MO_8, v64)) {
+        imm8 = (uint8_t)v64;
+        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
+        return;
+    }
+
+    /*
+     * Test all bytes 0x00 or 0xff second.  This can match cases that
+     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
+     */
+    for (i = imm8 = 0; i < 8; i++) {
+        uint8_t byte = v64 >> (i * 8);
+        if (byte == 0xff) {
+            imm8 |= 1 << i;
+        } else if (byte != 0) {
+            goto fail_bytes;
+        }
+    }
+    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
+    return;
+ fail_bytes:
+
+    /*
+     * Tests for various replications.  For each element width, if we
+     * cannot find an expansion there's no point checking a larger
+     * width because we already know by replication it cannot match.
+     */
+    if (v64 == dup_const(MO_16, v64)) {
+        uint16_t v16 = v64;
+
+        if (is_shimm16(v16, &cmode, &imm8)) {
+            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
+            return;
+        }
+    } else if (v64 == dup_const(MO_32, v64)) {
+        uint32_t v32 = v64;
+
+        if (is_shimm32(v32, &cmode, &imm8) ||
+            is_soimm32(v32, &cmode, &imm8) ||
+            is_fimm32(v32, &cmode, &imm8)) {
+            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
+            return;
+        }
+    } else if (is_fimm64(v64, &cmode, &imm8)) {
+        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
+        return;
+    }
+
+    /*
+     * As a last resort, load from the constant pool.  Sadly there
+     * is no LD1R (literal), so store the full 16-byte vector.
+     */
+    if (type == TCG_TYPE_V128) {
         new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
         tcg_out_insn(s, 3305, LDR_v128, 0, rd);
     } else {

[PULL,12/16] tcg/aarch64: Split up is_fimm

Commit Message

Patch