diff mbox series

[for-4.1,v2,08/13] tcg/ppc: Implement INDEX_op_dupm_vec

Message ID 20190317090834.5552-9-richard.henderson@linaro.org
State New
Headers show
Series tcg/ppc: Add vector opcodes | expand

Commit Message

Richard Henderson March 17, 2019, 9:08 a.m. UTC
This saves a round trip through an integer register and back to memory.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 tcg/ppc/tcg-target.h     |  2 +-
 tcg/ppc/tcg-target.inc.c | 57 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 57 insertions(+), 2 deletions(-)

-- 
2.17.2
diff mbox series

Patch

diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 5143ee853a..8ba5668fae 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -152,7 +152,7 @@  extern bool have_isa_3_00;
 #define TCG_TARGET_HAS_mul_vec          1
 #define TCG_TARGET_HAS_sat_vec          1
 #define TCG_TARGET_HAS_minmax_vec       1
-#define TCG_TARGET_HAS_dupm_vec         0
+#define TCG_TARGET_HAS_dupm_vec         1
 
 void flush_icache_range(uintptr_t start, uintptr_t stop);
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 61a245b828..85e332fcd3 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -467,6 +467,8 @@  static int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define NOP    ORI  /* ori 0,0,0 */
 
 #define LVX        XO31(103)
+#define LVEBX      XO31(7)
+#define LVEHX      XO31(39)
 #define LVEWX      XO31(71)
 
 #define STVX       XO31(231)
@@ -2835,6 +2837,7 @@  int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
     case INDEX_op_xor_vec:
     case INDEX_op_andc_vec:
     case INDEX_op_not_vec:
+    case INDEX_op_dupm_vec:
         return 1;
     case INDEX_op_add_vec:
     case INDEX_op_sub_vec:
@@ -2854,6 +2857,55 @@  int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
     }
 }
 
+static void tcg_out_dupm_vec(TCGContext *s, unsigned vece, TCGReg out,
+                             TCGReg base, intptr_t offset)
+{
+    int elt;
+
+    out &= 31;
+    switch (vece) {
+    case MO_8:
+        tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
+        elt = extract32(offset, 0, 4);
+#ifndef HOST_WORDS_BIGENDIAN
+        elt ^= 15;
+#endif
+        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
+        break;
+    case MO_16:
+        assert((offset & 1) == 0);
+        tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
+        elt = extract32(offset, 1, 3);
+#ifndef HOST_WORDS_BIGENDIAN
+        elt ^= 7;
+#endif
+        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
+        break;
+    case MO_32:
+        assert((offset & 3) == 0);
+        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
+        elt = extract32(offset, 2, 2);
+#ifndef HOST_WORDS_BIGENDIAN
+        elt ^= 3;
+#endif
+        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
+        break;
+    case MO_64:
+        assert((offset & 7) == 0);
+        tcg_out_mem_long(s, 0, LVX, out, base, offset);
+        /* FIXME: 32-bit altivec */
+        tcg_out_dupi_vec(s, TCG_TYPE_V128, TCG_VEC_TMP1,
+                         offset & 8
+                         ? 0x08090a0b0c0d0e0full
+                         : 0x0001020304050607ull);
+        tcg_out32(s, VPERM | VRT(out) | VRA(out) | VRB(out)
+                  | VRC(TCG_VEC_TMP1));
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
                            unsigned vecl, unsigned vece,
                            const TCGArg *args, const int *const_args)
@@ -2884,7 +2936,9 @@  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_st_vec:
         tcg_out_st(s, type, a0, a1, a2);
         return;
-
+    case INDEX_op_dupm_vec:
+        tcg_out_dupm_vec(s, vece, a0, a1, a2);
+        return;
     case INDEX_op_add_vec:
         insn = add_op[vece];
         break;
@@ -3251,6 +3305,7 @@  static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
         return &v_v;
     case INDEX_op_ld_vec:
     case INDEX_op_st_vec:
+    case INDEX_op_dupm_vec:
         return &v_r;
 
     default: