Message ID | 20200422011722.13287-23-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | tcg 5.1 omnibus patch set | expand |
Richard Henderson <richard.henderson@linaro.org> writes: > While we don't store more than tcg_target_long in TCGTemp, > we shouldn't be limited to that for code generation. We will > be able to use this for INDEX_op_dup2_vec with 2 constants. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> > --- > tcg/aarch64/tcg-target.inc.c | 2 +- > tcg/i386/tcg-target.inc.c | 20 ++++++++++++-------- > tcg/ppc/tcg-target.inc.c | 15 ++++++++------- > tcg/tcg.c | 4 ++-- > 4 files changed, 23 insertions(+), 18 deletions(-) > > diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c > index e5c9ab70a9..3b5a5d78c7 100644 > --- a/tcg/aarch64/tcg-target.inc.c > +++ b/tcg/aarch64/tcg-target.inc.c > @@ -856,7 +856,7 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, > } > > static void tcg_out_dupi_vec(TCGContext *s, TCGType type, > - TCGReg rd, tcg_target_long v64) > + TCGReg rd, int64_t v64) > { > bool q = type == TCG_TYPE_V128; > int cmode, imm8, i; > diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c > index 07424f7ef9..9cb627d6eb 100644 > --- a/tcg/i386/tcg-target.inc.c > +++ b/tcg/i386/tcg-target.inc.c > @@ -945,7 +945,7 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, > } > > static void tcg_out_dupi_vec(TCGContext *s, TCGType type, > - TCGReg ret, tcg_target_long arg) > + TCGReg ret, int64_t arg) > { > int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); > > @@ -958,7 +958,14 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, > return; > } > > - if (TCG_TARGET_REG_BITS == 64) { > + if (TCG_TARGET_REG_BITS == 32 && arg == dup_const(MO_32, arg)) { > + if (have_avx2) { > + tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTW + vex_l, ret); > + } else { > + tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret); > + } > + new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); > + } else { > if (type == TCG_TYPE_V64) { > tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret); > } else if (have_avx2) { > @@ -966,14 +973,11 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, > } else { > tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret); > } > - new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); > - } else { > - if (have_avx2) { > - tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTW + vex_l, ret); > + if (TCG_TARGET_REG_BITS == 64) { > + new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); > } else { > - tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret); > + new_pool_l2(s, R_386_32, s->code_ptr - 4, 0, arg, arg >> 32); > } > - new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); > } > } > > diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c > index 7ab1e32064..3333b55766 100644 > --- a/tcg/ppc/tcg-target.inc.c > +++ b/tcg/ppc/tcg-target.inc.c > @@ -913,7 +913,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, > } > > static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, > - tcg_target_long val) > + int64_t val) > { > uint32_t load_insn; > int rel, low; > @@ -921,20 +921,20 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, > > low = (int8_t)val; > if (low >= -16 && low < 16) { > - if (val == (tcg_target_long)dup_const(MO_8, low)) { > + if (val == dup_const(MO_8, low)) { > tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); > return; > } > - if (val == (tcg_target_long)dup_const(MO_16, low)) { > + if (val == dup_const(MO_16, low)) { > tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); > return; > } > - if (val == (tcg_target_long)dup_const(MO_32, low)) { > + if (val == dup_const(MO_32, low)) { > tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); > return; > } > } > - if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) { > + if (have_isa_3_00 && val == dup_const(MO_8, val)) { > tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); > return; > } > @@ -956,14 +956,15 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, > if (TCG_TARGET_REG_BITS == 64) { > new_pool_label(s, val, rel, s->code_ptr, add); > } else { > - new_pool_l2(s, rel, s->code_ptr, add, val, val); > + new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val); > } > } else { > load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); > if (TCG_TARGET_REG_BITS == 64) { > new_pool_l2(s, rel, s->code_ptr, add, val, val); > } else { > - new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); > + new_pool_l4(s, rel, s->code_ptr, add, > + val >> 32, val, val >> 32, val); > } > } > > diff --git a/tcg/tcg.c b/tcg/tcg.c > index 4f1ed1d2fe..fc1c97d586 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -117,7 +117,7 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, > static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, > TCGReg dst, TCGReg base, intptr_t offset); > static void tcg_out_dupi_vec(TCGContext *s, TCGType type, > - TCGReg dst, tcg_target_long arg); > + TCGReg dst, int64_t arg); > static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, > unsigned vece, const TCGArg *args, > const int *const_args); > @@ -133,7 +133,7 @@ static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, > g_assert_not_reached(); > } > static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, > - TCGReg dst, tcg_target_long arg) > + TCGReg dst, int64_t arg) > { > g_assert_not_reached(); > } -- Alex Bennée
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index e5c9ab70a9..3b5a5d78c7 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -856,7 +856,7 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, } static void tcg_out_dupi_vec(TCGContext *s, TCGType type, - TCGReg rd, tcg_target_long v64) + TCGReg rd, int64_t v64) { bool q = type == TCG_TYPE_V128; int cmode, imm8, i; diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 07424f7ef9..9cb627d6eb 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -945,7 +945,7 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, } static void tcg_out_dupi_vec(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long arg) + TCGReg ret, int64_t arg) { int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); @@ -958,7 +958,14 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, return; } - if (TCG_TARGET_REG_BITS == 64) { + if (TCG_TARGET_REG_BITS == 32 && arg == dup_const(MO_32, arg)) { + if (have_avx2) { + tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTW + vex_l, ret); + } else { + tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret); + } + new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); + } else { if (type == TCG_TYPE_V64) { tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret); } else if (have_avx2) { @@ -966,14 +973,11 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, } else { tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret); } - new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); - } else { - if (have_avx2) { - tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTW + vex_l, ret); + if (TCG_TARGET_REG_BITS == 64) { + new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); } else { - tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret); + new_pool_l2(s, R_386_32, s->code_ptr - 4, 0, arg, arg >> 32); } - new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); } } diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index 7ab1e32064..3333b55766 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -913,7 +913,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, - tcg_target_long val) + int64_t val) { uint32_t load_insn; int rel, low; @@ -921,20 +921,20 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, low = (int8_t)val; if (low >= -16 && low < 16) { - if (val == (tcg_target_long)dup_const(MO_8, low)) { + if (val == dup_const(MO_8, low)) { tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); return; } - if (val == (tcg_target_long)dup_const(MO_16, low)) { + if (val == dup_const(MO_16, low)) { tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); return; } - if (val == (tcg_target_long)dup_const(MO_32, low)) { + if (val == dup_const(MO_32, low)) { tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); return; } } - if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) { + if (have_isa_3_00 && val == dup_const(MO_8, val)) { tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); return; } @@ -956,14 +956,15 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, if (TCG_TARGET_REG_BITS == 64) { new_pool_label(s, val, rel, s->code_ptr, add); } else { - new_pool_l2(s, rel, s->code_ptr, add, val, val); + new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val); } } else { load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); if (TCG_TARGET_REG_BITS == 64) { new_pool_l2(s, rel, s->code_ptr, add, val, val); } else { - new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); + new_pool_l4(s, rel, s->code_ptr, add, + val >> 32, val, val >> 32, val); } } diff --git a/tcg/tcg.c b/tcg/tcg.c index 4f1ed1d2fe..fc1c97d586 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -117,7 +117,7 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg dst, TCGReg base, intptr_t offset); static void tcg_out_dupi_vec(TCGContext *s, TCGType type, - TCGReg dst, tcg_target_long arg); + TCGReg dst, int64_t arg); static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg *args, const int *const_args); @@ -133,7 +133,7 @@ static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, g_assert_not_reached(); } static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, - TCGReg dst, tcg_target_long arg) + TCGReg dst, int64_t arg) { g_assert_not_reached(); }
While we don't store more than tcg_target_long in TCGTemp, we shouldn't be limited to that for code generation. We will be able to use this for INDEX_op_dup2_vec with 2 constants. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/aarch64/tcg-target.inc.c | 2 +- tcg/i386/tcg-target.inc.c | 20 ++++++++++++-------- tcg/ppc/tcg-target.inc.c | 15 ++++++++------- tcg/tcg.c | 4 ++-- 4 files changed, 23 insertions(+), 18 deletions(-) -- 2.20.1