Message ID | 20220806170800.373219-1-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | target/loongarch: Remove cpu_fcsr0 | expand |
> On Aug 7, 2022, at 01:09, Richard Henderson <richard.henderson@linaro.org> wrote: > All of the fpu operations are defined with TCG_CALL_NO_WG, but they > all modify FCSR0. The most efficient way to fix this is to remove > cpu_fcsr0, and instead use explicit load and store operations for the > two instructions that manipulate that value. > > Cc: Qi Hu <huqi@loongson.cn> > Cc: Song Gao <gaosong@loongson.cn> > Reported-by: Feiyang Chen <chenfeiyang@loongson.cn> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/loongarch/helper.h | 2 +- > target/loongarch/fpu_helper.c | 4 +-- > target/loongarch/translate.c | 3 -- > tests/tcg/loongarch64/test_fcsr.c | 15 +++++++++ > target/loongarch/insn_trans/trans_fmov.c.inc | 33 ++++++++++---------- > tests/tcg/loongarch64/Makefile.target | 1 + > 6 files changed, 36 insertions(+), 22 deletions(-) > create mode 100644 tests/tcg/loongarch64/test_fcsr.c > > diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h > index cbbe008f32..9c01823a26 100644 > --- a/target/loongarch/helper.h > +++ b/target/loongarch/helper.h > @@ -91,7 +91,7 @@ DEF_HELPER_2(ftint_w_d, i64, env, i64) > DEF_HELPER_2(frint_s, i64, env, i64) > DEF_HELPER_2(frint_d, i64, env, i64) > > -DEF_HELPER_FLAGS_2(set_rounding_mode, TCG_CALL_NO_RWG, void, env, i32) > +DEF_HELPER_FLAGS_1(set_rounding_mode, TCG_CALL_NO_RWG, void, env) > > DEF_HELPER_1(rdtime_d, i64, env) > > diff --git a/target/loongarch/fpu_helper.c b/target/loongarch/fpu_helper.c > index bd76529219..4b9637210a 100644 > --- a/target/loongarch/fpu_helper.c > +++ b/target/loongarch/fpu_helper.c > @@ -872,8 +872,8 @@ uint64_t helper_ftint_w_d(CPULoongArchState *env, uint64_t fj) > return fd; > } > > -void helper_set_rounding_mode(CPULoongArchState *env, uint32_t fcsr0) > +void helper_set_rounding_mode(CPULoongArchState *env) > { > - set_float_rounding_mode(ieee_rm[(fcsr0 >> FCSR0_RM) & 0x3], > + set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3], > &env->fp_status); > } > diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c > index c9afd11420..51ba291430 100644 > --- a/target/loongarch/translate.c > +++ b/target/loongarch/translate.c > @@ -22,7 +22,6 @@ > /* Global register indices */ > TCGv cpu_gpr[32], cpu_pc; > static TCGv cpu_lladdr, cpu_llval; > -TCGv_i32 cpu_fcsr0; > TCGv_i64 cpu_fpr[32]; > > #include "exec/gen-icount.h" > @@ -266,8 +265,6 @@ void loongarch_translate_init(void) > } > > cpu_pc = tcg_global_mem_new(cpu_env, offsetof(CPULoongArchState, pc), "pc"); > - cpu_fcsr0 = tcg_global_mem_new_i32(cpu_env, > - offsetof(CPULoongArchState, fcsr0), "fcsr0"); > cpu_lladdr = tcg_global_mem_new(cpu_env, > offsetof(CPULoongArchState, lladdr), "lladdr"); > cpu_llval = tcg_global_mem_new(cpu_env, > diff --git a/tests/tcg/loongarch64/test_fcsr.c b/tests/tcg/loongarch64/test_fcsr.c > new file mode 100644 > index 0000000000..ad3609eb99 > --- /dev/null > +++ b/tests/tcg/loongarch64/test_fcsr.c > @@ -0,0 +1,15 @@ > +#include <assert.h> > + > +int main() > +{ > + unsigned fcsr; > + > + asm("movgr2fcsr $r0,$r0\n\t" > + "movgr2fr.d $f0,$r0\n\t" > + "fdiv.d $f0,$f0,$f0\n\t" > + "movfcsr2gr %0,$r0" > + : "=r"(fcsr) : : "f0"); > + > + assert(fcsr & (16 << 16)); /* Invalid */ > + return 0; > +} > diff --git a/target/loongarch/insn_trans/trans_fmov.c.inc b/target/loongarch/insn_trans/trans_fmov.c.inc > index 24753d4568..5537e3dd35 100644 > --- a/target/loongarch/insn_trans/trans_fmov.c.inc > +++ b/target/loongarch/insn_trans/trans_fmov.c.inc > @@ -60,38 +60,39 @@ static bool trans_movgr2fcsr(DisasContext *ctx, arg_movgr2fcsr *a) > TCGv Rj = gpr_src(ctx, a->rj, EXT_NONE); > > if (mask == UINT32_MAX) { > - tcg_gen_extrl_i64_i32(cpu_fcsr0, Rj); > + tcg_gen_st32_i64(Rj, cpu_env, offsetof(CPULoongArchState, fcsr0)); > } else { > + TCGv_i32 fcsr0 = tcg_temp_new_i32(); > TCGv_i32 temp = tcg_temp_new_i32(); > > + tcg_gen_ld_i32(fcsr0, cpu_env, offsetof(CPULoongArchState, fcsr0)); > tcg_gen_extrl_i64_i32(temp, Rj); > tcg_gen_andi_i32(temp, temp, mask); > - tcg_gen_andi_i32(cpu_fcsr0, cpu_fcsr0, ~mask); > - tcg_gen_or_i32(cpu_fcsr0, cpu_fcsr0, temp); > + tcg_gen_andi_i32(fcsr0, fcsr0, ~mask); > + tcg_gen_or_i32(fcsr0, fcsr0, temp); > + tcg_gen_st_i32(fcsr0, cpu_env, offsetof(CPULoongArchState, fcsr0)); > + > tcg_temp_free_i32(temp); > - > - /* > - * Install the new rounding mode to fpu_status, if changed. > - * Note that FCSR3 is exactly the rounding mode field. > - */ > - if (mask != FCSR0_M3) { > - return true; > - } > + tcg_temp_free_i32(fcsr0); > } > - gen_helper_set_rounding_mode(cpu_env, cpu_fcsr0); > > + /* > + * Install the new rounding mode to fpu_status, if changed. > + * Note that FCSR3 is exactly the rounding mode field. > + */ > + if (mask & FCSR0_M3) { > + gen_helper_set_rounding_mode(cpu_env); > + } > return true; > } > > static bool trans_movfcsr2gr(DisasContext *ctx, arg_movfcsr2gr *a) > { > - TCGv_i32 temp = tcg_temp_new_i32(); > TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); > > - tcg_gen_andi_i32(temp, cpu_fcsr0, fcsr_mask[a->fcsrs]); > - tcg_gen_ext_i32_i64(dest, temp); > + tcg_gen_ld32u_i64(dest, cpu_env, offsetof(CPULoongArchState, fcsr0)); > + tcg_gen_andi_i64(dest, dest, fcsr_mask[a->fcsrs]); > gen_set_gpr(a->rd, dest, EXT_NONE); > - tcg_temp_free_i32(temp); > > return true; > } > diff --git a/tests/tcg/loongarch64/Makefile.target b/tests/tcg/loongarch64/Makefile.target > index 0115de78ef..00030a1026 100644 > --- a/tests/tcg/loongarch64/Makefile.target > +++ b/tests/tcg/loongarch64/Makefile.target > @@ -15,5 +15,6 @@ LOONGARCH64_TESTS += test_div > LOONGARCH64_TESTS += test_fclass > LOONGARCH64_TESTS += test_fpcom > LOONGARCH64_TESTS += test_pcadd > +LOONGARCH64_TESTS += test_fcsr > > TESTS += $(LOONGARCH64_TESTS) > -- > 2.34.1 Acked-by: Qi Hu <huqi@loongson.cn>
On 2022/8/7 上午1:08, Richard Henderson wrote: > All of the fpu operations are defined with TCG_CALL_NO_WG, but they > all modify FCSR0. The most efficient way to fix this is to remove > cpu_fcsr0, and instead use explicit load and store operations for the > two instructions that manipulate that value. > > Cc: Qi Hu<huqi@loongson.cn> > Cc: Song Gao<gaosong@loongson.cn> > Reported-by: Feiyang Chen<chenfeiyang@loongson.cn> > Signed-off-by: Richard Henderson<richard.henderson@linaro.org> > --- > target/loongarch/helper.h | 2 +- > target/loongarch/fpu_helper.c | 4 +-- > target/loongarch/translate.c | 3 -- > tests/tcg/loongarch64/test_fcsr.c | 15 +++++++++ > target/loongarch/insn_trans/trans_fmov.c.inc | 33 ++++++++++---------- > tests/tcg/loongarch64/Makefile.target | 1 + > 6 files changed, 36 insertions(+), 22 deletions(-) > create mode 100644 tests/tcg/loongarch64/test_fcsr.c Reviewed-by: Song Gao <gaosong@loongson.cn> Thanks. Song Gao
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h index cbbe008f32..9c01823a26 100644 --- a/target/loongarch/helper.h +++ b/target/loongarch/helper.h @@ -91,7 +91,7 @@ DEF_HELPER_2(ftint_w_d, i64, env, i64) DEF_HELPER_2(frint_s, i64, env, i64) DEF_HELPER_2(frint_d, i64, env, i64) -DEF_HELPER_FLAGS_2(set_rounding_mode, TCG_CALL_NO_RWG, void, env, i32) +DEF_HELPER_FLAGS_1(set_rounding_mode, TCG_CALL_NO_RWG, void, env) DEF_HELPER_1(rdtime_d, i64, env) diff --git a/target/loongarch/fpu_helper.c b/target/loongarch/fpu_helper.c index bd76529219..4b9637210a 100644 --- a/target/loongarch/fpu_helper.c +++ b/target/loongarch/fpu_helper.c @@ -872,8 +872,8 @@ uint64_t helper_ftint_w_d(CPULoongArchState *env, uint64_t fj) return fd; } -void helper_set_rounding_mode(CPULoongArchState *env, uint32_t fcsr0) +void helper_set_rounding_mode(CPULoongArchState *env) { - set_float_rounding_mode(ieee_rm[(fcsr0 >> FCSR0_RM) & 0x3], + set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3], &env->fp_status); } diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c index c9afd11420..51ba291430 100644 --- a/target/loongarch/translate.c +++ b/target/loongarch/translate.c @@ -22,7 +22,6 @@ /* Global register indices */ TCGv cpu_gpr[32], cpu_pc; static TCGv cpu_lladdr, cpu_llval; -TCGv_i32 cpu_fcsr0; TCGv_i64 cpu_fpr[32]; #include "exec/gen-icount.h" @@ -266,8 +265,6 @@ void loongarch_translate_init(void) } cpu_pc = tcg_global_mem_new(cpu_env, offsetof(CPULoongArchState, pc), "pc"); - cpu_fcsr0 = tcg_global_mem_new_i32(cpu_env, - offsetof(CPULoongArchState, fcsr0), "fcsr0"); cpu_lladdr = tcg_global_mem_new(cpu_env, offsetof(CPULoongArchState, lladdr), "lladdr"); cpu_llval = tcg_global_mem_new(cpu_env, diff --git a/tests/tcg/loongarch64/test_fcsr.c b/tests/tcg/loongarch64/test_fcsr.c new file mode 100644 index 0000000000..ad3609eb99 --- /dev/null +++ b/tests/tcg/loongarch64/test_fcsr.c @@ -0,0 +1,15 @@ +#include <assert.h> + +int main() +{ + unsigned fcsr; + + asm("movgr2fcsr $r0,$r0\n\t" + "movgr2fr.d $f0,$r0\n\t" + "fdiv.d $f0,$f0,$f0\n\t" + "movfcsr2gr %0,$r0" + : "=r"(fcsr) : : "f0"); + + assert(fcsr & (16 << 16)); /* Invalid */ + return 0; +} diff --git a/target/loongarch/insn_trans/trans_fmov.c.inc b/target/loongarch/insn_trans/trans_fmov.c.inc index 24753d4568..5537e3dd35 100644 --- a/target/loongarch/insn_trans/trans_fmov.c.inc +++ b/target/loongarch/insn_trans/trans_fmov.c.inc @@ -60,38 +60,39 @@ static bool trans_movgr2fcsr(DisasContext *ctx, arg_movgr2fcsr *a) TCGv Rj = gpr_src(ctx, a->rj, EXT_NONE); if (mask == UINT32_MAX) { - tcg_gen_extrl_i64_i32(cpu_fcsr0, Rj); + tcg_gen_st32_i64(Rj, cpu_env, offsetof(CPULoongArchState, fcsr0)); } else { + TCGv_i32 fcsr0 = tcg_temp_new_i32(); TCGv_i32 temp = tcg_temp_new_i32(); + tcg_gen_ld_i32(fcsr0, cpu_env, offsetof(CPULoongArchState, fcsr0)); tcg_gen_extrl_i64_i32(temp, Rj); tcg_gen_andi_i32(temp, temp, mask); - tcg_gen_andi_i32(cpu_fcsr0, cpu_fcsr0, ~mask); - tcg_gen_or_i32(cpu_fcsr0, cpu_fcsr0, temp); + tcg_gen_andi_i32(fcsr0, fcsr0, ~mask); + tcg_gen_or_i32(fcsr0, fcsr0, temp); + tcg_gen_st_i32(fcsr0, cpu_env, offsetof(CPULoongArchState, fcsr0)); + tcg_temp_free_i32(temp); - - /* - * Install the new rounding mode to fpu_status, if changed. - * Note that FCSR3 is exactly the rounding mode field. - */ - if (mask != FCSR0_M3) { - return true; - } + tcg_temp_free_i32(fcsr0); } - gen_helper_set_rounding_mode(cpu_env, cpu_fcsr0); + /* + * Install the new rounding mode to fpu_status, if changed. + * Note that FCSR3 is exactly the rounding mode field. + */ + if (mask & FCSR0_M3) { + gen_helper_set_rounding_mode(cpu_env); + } return true; } static bool trans_movfcsr2gr(DisasContext *ctx, arg_movfcsr2gr *a) { - TCGv_i32 temp = tcg_temp_new_i32(); TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - tcg_gen_andi_i32(temp, cpu_fcsr0, fcsr_mask[a->fcsrs]); - tcg_gen_ext_i32_i64(dest, temp); + tcg_gen_ld32u_i64(dest, cpu_env, offsetof(CPULoongArchState, fcsr0)); + tcg_gen_andi_i64(dest, dest, fcsr_mask[a->fcsrs]); gen_set_gpr(a->rd, dest, EXT_NONE); - tcg_temp_free_i32(temp); return true; } diff --git a/tests/tcg/loongarch64/Makefile.target b/tests/tcg/loongarch64/Makefile.target index 0115de78ef..00030a1026 100644 --- a/tests/tcg/loongarch64/Makefile.target +++ b/tests/tcg/loongarch64/Makefile.target @@ -15,5 +15,6 @@ LOONGARCH64_TESTS += test_div LOONGARCH64_TESTS += test_fclass LOONGARCH64_TESTS += test_fpcom LOONGARCH64_TESTS += test_pcadd +LOONGARCH64_TESTS += test_fcsr TESTS += $(LOONGARCH64_TESTS)
All of the fpu operations are defined with TCG_CALL_NO_WG, but they all modify FCSR0. The most efficient way to fix this is to remove cpu_fcsr0, and instead use explicit load and store operations for the two instructions that manipulate that value. Cc: Qi Hu <huqi@loongson.cn> Cc: Song Gao <gaosong@loongson.cn> Reported-by: Feiyang Chen <chenfeiyang@loongson.cn> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/loongarch/helper.h | 2 +- target/loongarch/fpu_helper.c | 4 +-- target/loongarch/translate.c | 3 -- tests/tcg/loongarch64/test_fcsr.c | 15 +++++++++ target/loongarch/insn_trans/trans_fmov.c.inc | 33 ++++++++++---------- tests/tcg/loongarch64/Makefile.target | 1 + 6 files changed, 36 insertions(+), 22 deletions(-) create mode 100644 tests/tcg/loongarch64/test_fcsr.c