Message ID | 20240510091251.7975-1-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | tcg/loongarch64: Fill out tcg_out_{ld,st} for vector regs | expand |
在 2024/5/10 下午5:12, Richard Henderson 写道: > TCG register spill/fill uses tcg_out_ld/st with all types, > not necessarily going through INDEX_op_{ld,st}_vec. > > Cc: qemu-stable@nongnu.org > Fixes: 16288ded944 ("tcg/loongarch64: Lower basic tcg vec ops to LSX") > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2336 > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > tcg/loongarch64/tcg-target.c.inc | 103 ++++++++++++++++++++++++------- > 1 file changed, 80 insertions(+), 23 deletions(-) Tested-by: Song Gao <gaosong@loongson.cn> Reviewed-by: Song Gao <gaosong@loongson.cn> Thanks. Song Gao > diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc > index 69c5b8ac4f..06ca1ab11c 100644 > --- a/tcg/loongarch64/tcg-target.c.inc > +++ b/tcg/loongarch64/tcg-target.c.inc > @@ -808,18 +808,88 @@ static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data, > } > } > > -static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, > - TCGReg arg1, intptr_t arg2) > +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg dest, > + TCGReg base, intptr_t offset) > { > - bool is_32bit = type == TCG_TYPE_I32; > - tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2); > + switch (type) { > + case TCG_TYPE_I32: > + if (dest < TCG_REG_V0) { > + tcg_out_ldst(s, OPC_LD_W, dest, base, offset); > + } else { > + tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_32, dest, base, offset); > + } > + break; > + case TCG_TYPE_I64: > + if (dest < TCG_REG_V0) { > + tcg_out_ldst(s, OPC_LD_D, dest, base, offset); > + } else { > + tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_64, dest, base, offset); > + } > + break; > + case TCG_TYPE_V128: > + if (-0x800 <= offset && offset <= 0x7ff) { > + tcg_out_opc_vld(s, dest, base, offset); > + } else { > + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); > + tcg_out_opc_vldx(s, dest, base, TCG_REG_TMP0); > + } > + break; > + default: > + g_assert_not_reached(); > + } > } > > -static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, > - TCGReg arg1, intptr_t arg2) > +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, > + TCGReg base, intptr_t offset) > { > - bool is_32bit = type == TCG_TYPE_I32; > - tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2); > + switch (type) { > + case TCG_TYPE_I32: > + if (src < TCG_REG_V0) { > + tcg_out_ldst(s, OPC_ST_W, src, base, offset); > + } else { > + /* TODO: Could use fst_s, fstx_s */ > + if (offset < -0x100 || offset > 0xff || (offset & 3)) { > + if (-0x800 <= offset && offset <= 0x7ff) { > + tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset); > + } else { > + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); > + tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base); > + } > + base = TCG_REG_TMP0; > + offset = 0; > + } > + tcg_out_opc_vstelm_w(s, src, base, offset, 0); > + } > + break; > + case TCG_TYPE_I64: > + if (src < TCG_REG_V0) { > + tcg_out_ldst(s, OPC_ST_D, src, base, offset); > + } else { > + /* TODO: Could use fst_d, fstx_d */ > + if (offset < -0x100 || offset > 0xff || (offset & 7)) { > + if (-0x800 <= offset && offset <= 0x7ff) { > + tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset); > + } else { > + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); > + tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base); > + } > + base = TCG_REG_TMP0; > + offset = 0; > + } > + tcg_out_opc_vstelm_d(s, src, base, offset, 0); > + } > + break; > + case TCG_TYPE_V128: > + if (-0x800 <= offset && offset <= 0x7ff) { > + tcg_out_opc_vst(s, src, base, offset); > + } else { > + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); > + tcg_out_opc_vstx(s, src, base, TCG_REG_TMP0); > + } > + break; > + default: > + g_assert_not_reached(); > + } > } > > static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, > @@ -1740,7 +1810,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, > { > TCGType type = vecl + TCG_TYPE_V64; > TCGArg a0, a1, a2, a3; > - TCGReg temp = TCG_REG_TMP0; > TCGReg temp_vec = TCG_VEC_TMP0; > > static const LoongArchInsn cmp_vec_insn[16][4] = { > @@ -1820,22 +1889,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, > > switch (opc) { > case INDEX_op_st_vec: > - /* Try to fit vst imm */ > - if (-0x800 <= a2 && a2 <= 0x7ff) { > - tcg_out_opc_vst(s, a0, a1, a2); > - } else { > - tcg_out_movi(s, TCG_TYPE_I64, temp, a2); > - tcg_out_opc_vstx(s, a0, a1, temp); > - } > + tcg_out_st(s, type, a0, a1, a2); > break; > case INDEX_op_ld_vec: > - /* Try to fit vld imm */ > - if (-0x800 <= a2 && a2 <= 0x7ff) { > - tcg_out_opc_vld(s, a0, a1, a2); > - } else { > - tcg_out_movi(s, TCG_TYPE_I64, temp, a2); > - tcg_out_opc_vldx(s, a0, a1, temp); > - } > + tcg_out_ld(s, type, a0, a1, a2); > break; > case INDEX_op_and_vec: > tcg_out_opc_vand_v(s, a0, a1, a2);
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 69c5b8ac4f..06ca1ab11c 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -808,18 +808,88 @@ static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data, } } -static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, intptr_t offset) { - bool is_32bit = type == TCG_TYPE_I32; - tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2); + switch (type) { + case TCG_TYPE_I32: + if (dest < TCG_REG_V0) { + tcg_out_ldst(s, OPC_LD_W, dest, base, offset); + } else { + tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_32, dest, base, offset); + } + break; + case TCG_TYPE_I64: + if (dest < TCG_REG_V0) { + tcg_out_ldst(s, OPC_LD_D, dest, base, offset); + } else { + tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_64, dest, base, offset); + } + break; + case TCG_TYPE_V128: + if (-0x800 <= offset && offset <= 0x7ff) { + tcg_out_opc_vld(s, dest, base, offset); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); + tcg_out_opc_vldx(s, dest, base, TCG_REG_TMP0); + } + break; + default: + g_assert_not_reached(); + } } -static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, intptr_t arg2) +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src, + TCGReg base, intptr_t offset) { - bool is_32bit = type == TCG_TYPE_I32; - tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2); + switch (type) { + case TCG_TYPE_I32: + if (src < TCG_REG_V0) { + tcg_out_ldst(s, OPC_ST_W, src, base, offset); + } else { + /* TODO: Could use fst_s, fstx_s */ + if (offset < -0x100 || offset > 0xff || (offset & 3)) { + if (-0x800 <= offset && offset <= 0x7ff) { + tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); + tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base); + } + base = TCG_REG_TMP0; + offset = 0; + } + tcg_out_opc_vstelm_w(s, src, base, offset, 0); + } + break; + case TCG_TYPE_I64: + if (src < TCG_REG_V0) { + tcg_out_ldst(s, OPC_ST_D, src, base, offset); + } else { + /* TODO: Could use fst_d, fstx_d */ + if (offset < -0x100 || offset > 0xff || (offset & 7)) { + if (-0x800 <= offset && offset <= 0x7ff) { + tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); + tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base); + } + base = TCG_REG_TMP0; + offset = 0; + } + tcg_out_opc_vstelm_d(s, src, base, offset, 0); + } + break; + case TCG_TYPE_V128: + if (-0x800 <= offset && offset <= 0x7ff) { + tcg_out_opc_vst(s, src, base, offset); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset); + tcg_out_opc_vstx(s, src, base, TCG_REG_TMP0); + } + break; + default: + g_assert_not_reached(); + } } static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, @@ -1740,7 +1810,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, { TCGType type = vecl + TCG_TYPE_V64; TCGArg a0, a1, a2, a3; - TCGReg temp = TCG_REG_TMP0; TCGReg temp_vec = TCG_VEC_TMP0; static const LoongArchInsn cmp_vec_insn[16][4] = { @@ -1820,22 +1889,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_st_vec: - /* Try to fit vst imm */ - if (-0x800 <= a2 && a2 <= 0x7ff) { - tcg_out_opc_vst(s, a0, a1, a2); - } else { - tcg_out_movi(s, TCG_TYPE_I64, temp, a2); - tcg_out_opc_vstx(s, a0, a1, temp); - } + tcg_out_st(s, type, a0, a1, a2); break; case INDEX_op_ld_vec: - /* Try to fit vld imm */ - if (-0x800 <= a2 && a2 <= 0x7ff) { - tcg_out_opc_vld(s, a0, a1, a2); - } else { - tcg_out_movi(s, TCG_TYPE_I64, temp, a2); - tcg_out_opc_vldx(s, a0, a1, temp); - } + tcg_out_ld(s, type, a0, a1, a2); break; case INDEX_op_and_vec: tcg_out_opc_vand_v(s, a0, a1, a2);
TCG register spill/fill uses tcg_out_ld/st with all types, not necessarily going through INDEX_op_{ld,st}_vec. Cc: qemu-stable@nongnu.org Fixes: 16288ded944 ("tcg/loongarch64: Lower basic tcg vec ops to LSX") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2336 Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/loongarch64/tcg-target.c.inc | 103 ++++++++++++++++++++++++------- 1 file changed, 80 insertions(+), 23 deletions(-)