Message ID | 20200724002807.441147-6-richard.henderson@linaro.org |
---|---|
State | Accepted |
Commit | ffe70e4dfc9cf2a6934e674b81b69c847b403c4b |
Headers | show |
Series | target/riscv: NaN-boxing for multiple precison | expand |
On 2020/7/24 8:28, Richard Henderson wrote: > If a 32-bit input is not properly nanboxed, then the input is replaced > with the default qnan. The only inline expansion is for the sign-changing > set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------ > target/riscv/translate.c | 18 +++++++ > 2 files changed, 73 insertions(+), 16 deletions(-) > > diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c > index 264d3139f1..f9a9e0643a 100644 > --- a/target/riscv/insn_trans/trans_rvf.inc.c > +++ b/target/riscv/insn_trans/trans_rvf.inc.c > @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a) > { > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > if (a->rs1 == a->rs2) { /* FMOV */ > - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]); > + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]); > } else { /* FSGNJ */ > - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1], > - 0, 31); > + TCGv_i64 rs1 = tcg_temp_new_i64(); > + TCGv_i64 rs2 = tcg_temp_new_i64(); > + > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* This formulation retains the nanboxing of rs2. */ > + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31); > + tcg_temp_free_i64(rs1); > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > mark_fs_dirty(ctx); > return true; > } > > static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) > { > + TCGv_i64 rs1, rs2, mask; > + > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > + rs1 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + > if (a->rs1 == a->rs2) { /* FNEG */ > - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN); > + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1)); > } else { > - TCGv_i64 t0 = tcg_temp_new_i64(); > - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]); > - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31); > - tcg_temp_free_i64(t0); > + rs2 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* > + * Replace bit 31 in rs1 with inverse in rs2. > + * This formulation retains the nanboxing of rs1. > + */ > + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1)); > + tcg_gen_andc_i64(rs2, mask, rs2); > + tcg_gen_and_i64(rs1, mask, rs1); > + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); > + > + tcg_temp_free_i64(mask); > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > + tcg_temp_free_i64(rs1); > + > mark_fs_dirty(ctx); > return true; > } > > static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) > { > + TCGv_i64 rs1, rs2; > + > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > + rs1 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + > if (a->rs1 == a->rs2) { /* FABS */ > - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN); > + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1)); > } else { > - TCGv_i64 t0 = tcg_temp_new_i64(); > - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN); > - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0); > - tcg_temp_free_i64(t0); > + rs2 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* > + * Xor bit 31 in rs1 with that in rs2. > + * This formulation retains the nanboxing of rs1. > + */ > + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1)); > + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2); > + > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > + tcg_temp_free_i64(rs1); > + > mark_fs_dirty(ctx); > return true; > } > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > index 12a746da97..bf35182776 100644 > --- a/target/riscv/translate.c > +++ b/target/riscv/translate.c > @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in) > tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32)); > } > > +/* > + * A narrow n-bit operation, where n < FLEN, checks that input operands > + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1. > + * If so, the least-significant bits of the input are used, otherwise the > + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2). > + * > + * Here, the result is always nan-boxed, even the canonical nan. > + */ > +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in) > +{ > + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull); > + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull); > + > + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan); > + tcg_temp_free_i64(t_max); > + tcg_temp_free_i64(t_nan); > +} > + Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Zhiwei > static void generate_exception(DisasContext *ctx, int excp) > { > tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson < richard.henderson@linaro.org> wrote: > If a 32-bit input is not properly nanboxed, then the input is replaced > with the default qnan. The only inline expansion is for the sign-changing > set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------ > target/riscv/translate.c | 18 +++++++ > 2 files changed, 73 insertions(+), 16 deletions(-) > > diff --git a/target/riscv/insn_trans/trans_rvf.inc.c > b/target/riscv/insn_trans/trans_rvf.inc.c > index 264d3139f1..f9a9e0643a 100644 > --- a/target/riscv/insn_trans/trans_rvf.inc.c > +++ b/target/riscv/insn_trans/trans_rvf.inc.c > @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, > arg_fsgnj_s *a) > { > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > if (a->rs1 == a->rs2) { /* FMOV */ > - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]); > + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]); > } else { /* FSGNJ */ > - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], > cpu_fpr[a->rs1], > - 0, 31); > + TCGv_i64 rs1 = tcg_temp_new_i64(); > + TCGv_i64 rs2 = tcg_temp_new_i64(); > + > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* This formulation retains the nanboxing of rs2. */ > + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31); > + tcg_temp_free_i64(rs1); > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > mark_fs_dirty(ctx); > return true; > } > > static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) > { > + TCGv_i64 rs1, rs2, mask; > + > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > + rs1 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + > if (a->rs1 == a->rs2) { /* FNEG */ > - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN); > + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1)); > } else { > - TCGv_i64 t0 = tcg_temp_new_i64(); > - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]); > - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31); > - tcg_temp_free_i64(t0); > + rs2 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* > + * Replace bit 31 in rs1 with inverse in rs2. > + * This formulation retains the nanboxing of rs1. > + */ > + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1)); > + tcg_gen_andc_i64(rs2, mask, rs2); > + tcg_gen_and_i64(rs1, mask, rs1); > + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); > + > + tcg_temp_free_i64(mask); > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > + tcg_temp_free_i64(rs1); > + > mark_fs_dirty(ctx); > return true; > } > > static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) > { > + TCGv_i64 rs1, rs2; > + > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > + rs1 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + > if (a->rs1 == a->rs2) { /* FABS */ > - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN); > + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1)); > } else { > - TCGv_i64 t0 = tcg_temp_new_i64(); > - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN); > - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0); > - tcg_temp_free_i64(t0); > + rs2 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* > + * Xor bit 31 in rs1 with that in rs2. > + * This formulation retains the nanboxing of rs1. > + */ > + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1)); > + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2); > + > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > + tcg_temp_free_i64(rs1); > + > mark_fs_dirty(ctx); > return true; > } > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > index 12a746da97..bf35182776 100644 > --- a/target/riscv/translate.c > +++ b/target/riscv/translate.c > @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in) > tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32)); > } > > +/* > + * A narrow n-bit operation, where n < FLEN, checks that input operands > + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1. > + * If so, the least-significant bits of the input are used, otherwise the > + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2). > + * > + * Here, the result is always nan-boxed, even the canonical nan. > + */ > +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in) > +{ > + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull); > + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull); > + > + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan); > + tcg_temp_free_i64(t_max); > + tcg_temp_free_i64(t_nan); > +} > + > static void generate_exception(DisasContext *ctx, int excp) > { > tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next); > -- > 2.25.1 > > > Reviewed-by: Chih-Min Chao <chihmin.chao@sifive.com> Chih-Min Chao <div dir="ltr"><div dir="ltr"><div><div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr"><br></div></div></div></div><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson <<a href="mailto:richard.henderson@linaro.org">richard.henderson@linaro.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">If a 32-bit input is not properly nanboxed, then the input is replaced<br> with the default qnan. The only inline expansion is for the sign-changing<br> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.<br> <br> Signed-off-by: Richard Henderson <<a href="mailto:richard.henderson@linaro.org" target="_blank">richard.henderson@linaro.org</a>><br> ---<br> target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------<br> target/riscv/translate.c | 18 +++++++<br> 2 files changed, 73 insertions(+), 16 deletions(-)<br> <br> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c<br> index 264d3139f1..f9a9e0643a 100644<br> --- a/target/riscv/insn_trans/trans_rvf.inc.c<br> +++ b/target/riscv/insn_trans/trans_rvf.inc.c<br> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)<br> {<br> REQUIRE_FPU;<br> REQUIRE_EXT(ctx, RVF);<br> +<br> if (a->rs1 == a->rs2) { /* FMOV */<br> - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);<br> + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);<br> } else { /* FSGNJ */<br> - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1],<br> - 0, 31);<br> + TCGv_i64 rs1 = tcg_temp_new_i64();<br> + TCGv_i64 rs2 = tcg_temp_new_i64();<br> +<br> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);<br> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);<br> +<br> + /* This formulation retains the nanboxing of rs2. */<br> + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);<br> + tcg_temp_free_i64(rs1);<br> + tcg_temp_free_i64(rs2);<br> }<br> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);<br> mark_fs_dirty(ctx);<br> return true;<br> }<br> <br> static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)<br> {<br> + TCGv_i64 rs1, rs2, mask;<br> +<br> REQUIRE_FPU;<br> REQUIRE_EXT(ctx, RVF);<br> +<br> + rs1 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);<br> +<br> if (a->rs1 == a->rs2) { /* FNEG */<br> - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);<br> + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));<br> } else {<br> - TCGv_i64 t0 = tcg_temp_new_i64();<br> - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);<br> - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);<br> - tcg_temp_free_i64(t0);<br> + rs2 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);<br> +<br> + /*<br> + * Replace bit 31 in rs1 with inverse in rs2.<br> + * This formulation retains the nanboxing of rs1.<br> + */<br> + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));<br> + tcg_gen_andc_i64(rs2, mask, rs2);<br> + tcg_gen_and_i64(rs1, mask, rs1);<br> + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);<br> +<br> + tcg_temp_free_i64(mask);<br> + tcg_temp_free_i64(rs2);<br> }<br> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);<br> + tcg_temp_free_i64(rs1);<br> +<br> mark_fs_dirty(ctx);<br> return true;<br> }<br> <br> static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)<br> {<br> + TCGv_i64 rs1, rs2;<br> +<br> REQUIRE_FPU;<br> REQUIRE_EXT(ctx, RVF);<br> +<br> + rs1 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);<br> +<br> if (a->rs1 == a->rs2) { /* FABS */<br> - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);<br> + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));<br> } else {<br> - TCGv_i64 t0 = tcg_temp_new_i64();<br> - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);<br> - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);<br> - tcg_temp_free_i64(t0);<br> + rs2 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);<br> +<br> + /*<br> + * Xor bit 31 in rs1 with that in rs2.<br> + * This formulation retains the nanboxing of rs1.<br> + */<br> + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));<br> + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);<br> +<br> + tcg_temp_free_i64(rs2);<br> }<br> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);<br> + tcg_temp_free_i64(rs1);<br> +<br> mark_fs_dirty(ctx);<br> return true;<br> }<br> diff --git a/target/riscv/translate.c b/target/riscv/translate.c<br> index 12a746da97..bf35182776 100644<br> --- a/target/riscv/translate.c<br> +++ b/target/riscv/translate.c<br> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br> tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));<br> }<br> <br> +/*<br> + * A narrow n-bit operation, where n < FLEN, checks that input operands<br> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.<br> + * If so, the least-significant bits of the input are used, otherwise the<br> + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).<br> + *<br> + * Here, the result is always nan-boxed, even the canonical nan.<br> + */<br> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br> +{<br> + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);<br> + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);<br> +<br> + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);<br> + tcg_temp_free_i64(t_max);<br> + tcg_temp_free_i64(t_nan);<br> +}<br> +<br> static void generate_exception(DisasContext *ctx, int excp)<br> {<br> tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);<br> -- <br> 2.25.1<br> <br> <br></blockquote><div><br></div><div>Reviewed-by: Chih-Min Chao <<a href="mailto:chihmin.chao@sifive.com">chihmin.chao@sifive.com</a>></div><div><br></div><div><div dir="ltr" class="gmail_signature"><div dir="ltr"><span style="color:rgb(136,136,136)">Chih-Min Chao</span><div style="color:rgb(136,136,136)"></div></div></div></div><div> </div></div></div>
On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson < richard.henderson@linaro.org> wrote: > If a 32-bit input is not properly nanboxed, then the input is replaced > with the default qnan. The only inline expansion is for the sign-changing > set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------ > target/riscv/translate.c | 18 +++++++ > 2 files changed, 73 insertions(+), 16 deletions(-) > > diff --git a/target/riscv/insn_trans/trans_rvf.inc.c > b/target/riscv/insn_trans/trans_rvf.inc.c > index 264d3139f1..f9a9e0643a 100644 > --- a/target/riscv/insn_trans/trans_rvf.inc.c > +++ b/target/riscv/insn_trans/trans_rvf.inc.c > @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, > arg_fsgnj_s *a) > { > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > if (a->rs1 == a->rs2) { /* FMOV */ > - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]); > + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]); > } else { /* FSGNJ */ > - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], > cpu_fpr[a->rs1], > - 0, 31); > + TCGv_i64 rs1 = tcg_temp_new_i64(); > + TCGv_i64 rs2 = tcg_temp_new_i64(); > + > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* This formulation retains the nanboxing of rs2. */ > + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31); > + tcg_temp_free_i64(rs1); > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > mark_fs_dirty(ctx); > return true; > } > > static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) > { > + TCGv_i64 rs1, rs2, mask; > + > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > + rs1 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + > if (a->rs1 == a->rs2) { /* FNEG */ > - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN); > + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1)); > } else { > - TCGv_i64 t0 = tcg_temp_new_i64(); > - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]); > - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31); > - tcg_temp_free_i64(t0); > + rs2 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* > + * Replace bit 31 in rs1 with inverse in rs2. > + * This formulation retains the nanboxing of rs1. > + */ > + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1)); > + tcg_gen_andc_i64(rs2, mask, rs2); > should be tcg_gen_not_i64(rs2, rs2); // forget to inverse rs2 tcg_gen_andc_i64(rs2, rs2, mask); //mask needs to be inverted to get only sign Chih-Min Chao > + tcg_gen_and_i64(rs1, mask, rs1); > + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); > + > + tcg_temp_free_i64(mask); > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > + tcg_temp_free_i64(rs1); > + > mark_fs_dirty(ctx); > return true; > } > > static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) > { > + TCGv_i64 rs1, rs2; > + > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > + rs1 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + > if (a->rs1 == a->rs2) { /* FABS */ > - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN); > + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1)); > } else { > - TCGv_i64 t0 = tcg_temp_new_i64(); > - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN); > - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0); > - tcg_temp_free_i64(t0); > + rs2 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* > + * Xor bit 31 in rs1 with that in rs2. > + * This formulation retains the nanboxing of rs1. > + */ > + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1)); > + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2); > + > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > + tcg_temp_free_i64(rs1); > + > mark_fs_dirty(ctx); > return true; > } > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > index 12a746da97..bf35182776 100644 > --- a/target/riscv/translate.c > +++ b/target/riscv/translate.c > @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in) > tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32)); > } > > +/* > + * A narrow n-bit operation, where n < FLEN, checks that input operands > + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1. > + * If so, the least-significant bits of the input are used, otherwise the > + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2). > + * > + * Here, the result is always nan-boxed, even the canonical nan. > + */ > +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in) > +{ > + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull); > + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull); > + > + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan); > + tcg_temp_free_i64(t_max); > + tcg_temp_free_i64(t_nan); > +} > + > static void generate_exception(DisasContext *ctx, int excp) > { > tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next); > -- > 2.25.1 > > > <div dir="ltr"><div dir="ltr"><div><div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr">On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson <<a href="mailto:richard.henderson@linaro.org">richard.henderson@linaro.org</a>> wrote:<br></div></div></div></div><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">If a 32-bit input is not properly nanboxed, then the input is replaced<br> with the default qnan. The only inline expansion is for the sign-changing<br> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.<br> <br> Signed-off-by: Richard Henderson <<a href="mailto:richard.henderson@linaro.org" target="_blank">richard.henderson@linaro.org</a>><br> ---<br> target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------<br> target/riscv/translate.c | 18 +++++++<br> 2 files changed, 73 insertions(+), 16 deletions(-)<br> <br> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c<br> index 264d3139f1..f9a9e0643a 100644<br> --- a/target/riscv/insn_trans/trans_rvf.inc.c<br> +++ b/target/riscv/insn_trans/trans_rvf.inc.c<br> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)<br> {<br> REQUIRE_FPU;<br> REQUIRE_EXT(ctx, RVF);<br> +<br> if (a->rs1 == a->rs2) { /* FMOV */<br> - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);<br> + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);<br> } else { /* FSGNJ */<br> - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1],<br> - 0, 31);<br> + TCGv_i64 rs1 = tcg_temp_new_i64();<br> + TCGv_i64 rs2 = tcg_temp_new_i64();<br> +<br> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);<br> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);<br> +<br> + /* This formulation retains the nanboxing of rs2. */<br> + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);<br> + tcg_temp_free_i64(rs1);<br> + tcg_temp_free_i64(rs2);<br> }<br> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);<br> mark_fs_dirty(ctx);<br> return true;<br> }<br> <br> static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)<br> {<br> + TCGv_i64 rs1, rs2, mask;<br> +<br> REQUIRE_FPU;<br> REQUIRE_EXT(ctx, RVF);<br> +<br> + rs1 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);<br> +<br> if (a->rs1 == a->rs2) { /* FNEG */<br> - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);<br> + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));<br> } else {<br> - TCGv_i64 t0 = tcg_temp_new_i64();<br> - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);<br> - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);<br> - tcg_temp_free_i64(t0);<br> + rs2 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);<br> +<br> + /*<br> + * Replace bit 31 in rs1 with inverse in rs2.<br> + * This formulation retains the nanboxing of rs1.<br> + */<br> + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));<br> + tcg_gen_andc_i64(rs2, mask, rs2);<br></blockquote><div><br></div><div>should be </div><div> tcg_gen_not_i64(rs2, rs2); // forget to inverse rs2</div><div> tcg_gen_andc_i64(rs2, rs2, mask); //mask needs to be inverted to get only sign</div><div><br></div><div> Chih-Min Chao<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"> + tcg_gen_and_i64(rs1, mask, rs1);<br> + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);<br> +<br> + tcg_temp_free_i64(mask);<br> + tcg_temp_free_i64(rs2);<br> }<br> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);<br> + tcg_temp_free_i64(rs1);<br> +<br> mark_fs_dirty(ctx);<br> return true;<br> }<br> <br> static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)<br> {<br> + TCGv_i64 rs1, rs2;<br> +<br> REQUIRE_FPU;<br> REQUIRE_EXT(ctx, RVF);<br> +<br> + rs1 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);<br> +<br> if (a->rs1 == a->rs2) { /* FABS */<br> - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);<br> + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));<br> } else {<br> - TCGv_i64 t0 = tcg_temp_new_i64();<br> - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);<br> - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);<br> - tcg_temp_free_i64(t0);<br> + rs2 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);<br> +<br> + /*<br> + * Xor bit 31 in rs1 with that in rs2.<br> + * This formulation retains the nanboxing of rs1.<br> + */<br> + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));<br> + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);<br> +<br> + tcg_temp_free_i64(rs2);<br> }<br> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);<br> + tcg_temp_free_i64(rs1);<br> +<br> mark_fs_dirty(ctx);<br> return true;<br> }<br> diff --git a/target/riscv/translate.c b/target/riscv/translate.c<br> index 12a746da97..bf35182776 100644<br> --- a/target/riscv/translate.c<br> +++ b/target/riscv/translate.c<br> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br> tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));<br> }<br> <br> +/*<br> + * A narrow n-bit operation, where n < FLEN, checks that input operands<br> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.<br> + * If so, the least-significant bits of the input are used, otherwise the<br> + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).<br> + *<br> + * Here, the result is always nan-boxed, even the canonical nan.<br> + */<br> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br> +{<br> + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);<br> + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);<br> +<br> + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);<br> + tcg_temp_free_i64(t_max);<br> + tcg_temp_free_i64(t_nan);<br> +}<br> +<br> static void generate_exception(DisasContext *ctx, int excp)<br> {<br> tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);<br> -- <br> 2.25.1<br> <br> <br> </blockquote></div></div>
On 2020/8/8 4:24, Chih-Min Chao wrote: > On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson > <richard.henderson@linaro.org <mailto:richard.henderson@linaro.org>> > wrote: > > If a 32-bit input is not properly nanboxed, then the input is replaced > with the default qnan. The only inline expansion is for the > sign-changing > set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org > <mailto:richard.henderson@linaro.org>> > --- > target/riscv/insn_trans/trans_rvf.inc.c | 71 > +++++++++++++++++++------ > target/riscv/translate.c | 18 +++++++ > 2 files changed, 73 insertions(+), 16 deletions(-) > > diff --git a/target/riscv/insn_trans/trans_rvf.inc.c > b/target/riscv/insn_trans/trans_rvf.inc.c > index 264d3139f1..f9a9e0643a 100644 > --- a/target/riscv/insn_trans/trans_rvf.inc.c > +++ b/target/riscv/insn_trans/trans_rvf.inc.c > @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, > arg_fsgnj_s *a) > { > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > if (a->rs1 == a->rs2) { /* FMOV */ > - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]); > + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]); > } else { /* FSGNJ */ > - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], > cpu_fpr[a->rs1], > - 0, 31); > + TCGv_i64 rs1 = tcg_temp_new_i64(); > + TCGv_i64 rs2 = tcg_temp_new_i64(); > + > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* This formulation retains the nanboxing of rs2. */ > + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31); > + tcg_temp_free_i64(rs1); > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > mark_fs_dirty(ctx); > return true; > } > > static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) > { > + TCGv_i64 rs1, rs2, mask; > + > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > + rs1 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + > if (a->rs1 == a->rs2) { /* FNEG */ > - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN); > + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, > 1)); > } else { > - TCGv_i64 t0 = tcg_temp_new_i64(); > - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]); > - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], > 0, 31); > - tcg_temp_free_i64(t0); > + rs2 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* > + * Replace bit 31 in rs1 with inverse in rs2. > + * This formulation retains the nanboxing of rs1. > + */ > + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1)); > + tcg_gen_andc_i64(rs2, mask, rs2); > > > should be > tcg_gen_not_i64(rs2, rs2); // forget to inverse rs2 > tcg_gen_andc_i64(rs2, rs2, mask); //mask needs to be > inverted to get only sign Hi Chih-Min, Thanks for pointing it out. It's a bug here. However, I think it should be tcg_gen_andc_i64(rs2, rs2, mask); // only get rs2 bit 31 tcg_gen_not_i64(rs2, rs2); // inverse rs2 Best Regards, Zhiwei > > Chih-Min Chao > > + tcg_gen_and_i64(rs1, mask, rs1); > + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); > + > + tcg_temp_free_i64(mask); > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > + tcg_temp_free_i64(rs1); > + > mark_fs_dirty(ctx); > return true; > } > > static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) > { > + TCGv_i64 rs1, rs2; > + > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > + rs1 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + > if (a->rs1 == a->rs2) { /* FABS */ > - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], > ~INT32_MIN); > + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, > ~MAKE_64BIT_MASK(31, 1)); > } else { > - TCGv_i64 t0 = tcg_temp_new_i64(); > - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN); > - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0); > - tcg_temp_free_i64(t0); > + rs2 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* > + * Xor bit 31 in rs1 with that in rs2. > + * This formulation retains the nanboxing of rs1. > + */ > + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1)); > + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2); > + > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > + tcg_temp_free_i64(rs1); > + > mark_fs_dirty(ctx); > return true; > } > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > index 12a746da97..bf35182776 100644 > --- a/target/riscv/translate.c > +++ b/target/riscv/translate.c > @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, > TCGv_i64 in) > tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32)); > } > > +/* > + * A narrow n-bit operation, where n < FLEN, checks that input > operands > + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1. > + * If so, the least-significant bits of the input are used, > otherwise the > + * input value is treated as an n-bit canonical NaN (v2.2 section > 9.2). > + * > + * Here, the result is always nan-boxed, even the canonical nan. > + */ > +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in) > +{ > + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull); > + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull); > + > + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan); > + tcg_temp_free_i64(t_max); > + tcg_temp_free_i64(t_nan); > +} > + > static void generate_exception(DisasContext *ctx, int excp) > { > tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next); > -- > 2.25.1 > > <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> </head> <body> <br> <br> <div class="moz-cite-prefix">On 2020/8/8 4:24, Chih-Min Chao wrote:<br> </div> <blockquote type="cite" cite="mid:CAEiOBXVac0bFSZCrh_rhZbLVC7DGVwBe+D6YF90HQy1K-8wfYQ@mail.gmail.com"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> <div dir="ltr"> <div dir="ltr"> <div> <div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"> <div dir="ltr">On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson <<a href="mailto:richard.henderson@linaro.org" moz-do-not-send="true">richard.henderson@linaro.org</a>> wrote:<br> </div> </div> </div> </div> <div class="gmail_quote"> <blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">If a 32-bit input is not properly nanboxed, then the input is replaced<br> with the default qnan. The only inline expansion is for the sign-changing<br> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.<br> <br> Signed-off-by: Richard Henderson <<a href="mailto:richard.henderson@linaro.org" target="_blank" moz-do-not-send="true">richard.henderson@linaro.org</a>><br> ---<br> target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------<br> target/riscv/translate.c | 18 +++++++<br> 2 files changed, 73 insertions(+), 16 deletions(-)<br> <br> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c<br> index 264d3139f1..f9a9e0643a 100644<br> --- a/target/riscv/insn_trans/trans_rvf.inc.c<br> +++ b/target/riscv/insn_trans/trans_rvf.inc.c<br> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)<br> {<br> REQUIRE_FPU;<br> REQUIRE_EXT(ctx, RVF);<br> +<br> if (a->rs1 == a->rs2) { /* FMOV */<br> - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);<br> + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);<br> } else { /* FSGNJ */<br> - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1],<br> - 0, 31);<br> + TCGv_i64 rs1 = tcg_temp_new_i64();<br> + TCGv_i64 rs2 = tcg_temp_new_i64();<br> +<br> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);<br> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);<br> +<br> + /* This formulation retains the nanboxing of rs2. */<br> + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);<br> + tcg_temp_free_i64(rs1);<br> + tcg_temp_free_i64(rs2);<br> }<br> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);<br> mark_fs_dirty(ctx);<br> return true;<br> }<br> <br> static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)<br> {<br> + TCGv_i64 rs1, rs2, mask;<br> +<br> REQUIRE_FPU;<br> REQUIRE_EXT(ctx, RVF);<br> +<br> + rs1 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);<br> +<br> if (a->rs1 == a->rs2) { /* FNEG */<br> - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);<br> + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));<br> } else {<br> - TCGv_i64 t0 = tcg_temp_new_i64();<br> - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);<br> - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);<br> - tcg_temp_free_i64(t0);<br> + rs2 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);<br> +<br> + /*<br> + * Replace bit 31 in rs1 with inverse in rs2.<br> + * This formulation retains the nanboxing of rs1.<br> + */<br> + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));<br> + tcg_gen_andc_i64(rs2, mask, rs2);<br> </blockquote> <div><br> </div> <div>should be </div> <div> tcg_gen_not_i64(rs2, rs2); // forget to inverse rs2</div> <div> tcg_gen_andc_i64(rs2, rs2, mask); //mask needs to be inverted to get only sign</div> </div> </div> </blockquote> Hi Chih-Min,<br> <br> Thanks for pointing it out. It's a bug here. However, I think it should be <br> <br> <pre>tcg_gen_andc_i64(rs2, rs2, mask); // only get rs2 bit 31</pre> <pre>tcg_gen_not_i64(rs2, rs2); // inverse rs2</pre> <br> Best Regards,<br> Zhiwei<br> <blockquote type="cite" cite="mid:CAEiOBXVac0bFSZCrh_rhZbLVC7DGVwBe+D6YF90HQy1K-8wfYQ@mail.gmail.com"> <div dir="ltr"> <div class="gmail_quote"> <div><br> </div> <div> Chih-Min Chao<br> </div> <blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"> + tcg_gen_and_i64(rs1, mask, rs1);<br> + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);<br> +<br> + tcg_temp_free_i64(mask);<br> + tcg_temp_free_i64(rs2);<br> }<br> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);<br> + tcg_temp_free_i64(rs1);<br> +<br> mark_fs_dirty(ctx);<br> return true;<br> }<br> <br> static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)<br> {<br> + TCGv_i64 rs1, rs2;<br> +<br> REQUIRE_FPU;<br> REQUIRE_EXT(ctx, RVF);<br> +<br> + rs1 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);<br> +<br> if (a->rs1 == a->rs2) { /* FABS */<br> - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);<br> + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));<br> } else {<br> - TCGv_i64 t0 = tcg_temp_new_i64();<br> - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);<br> - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);<br> - tcg_temp_free_i64(t0);<br> + rs2 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);<br> +<br> + /*<br> + * Xor bit 31 in rs1 with that in rs2.<br> + * This formulation retains the nanboxing of rs1.<br> + */<br> + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));<br> + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);<br> +<br> + tcg_temp_free_i64(rs2);<br> }<br> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);<br> + tcg_temp_free_i64(rs1);<br> +<br> mark_fs_dirty(ctx);<br> return true;<br> }<br> diff --git a/target/riscv/translate.c b/target/riscv/translate.c<br> index 12a746da97..bf35182776 100644<br> --- a/target/riscv/translate.c<br> +++ b/target/riscv/translate.c<br> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br> tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));<br> }<br> <br> +/*<br> + * A narrow n-bit operation, where n < FLEN, checks that input operands<br> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.<br> + * If so, the least-significant bits of the input are used, otherwise the<br> + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).<br> + *<br> + * Here, the result is always nan-boxed, even the canonical nan.<br> + */<br> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br> +{<br> + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);<br> + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);<br> +<br> + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);<br> + tcg_temp_free_i64(t_max);<br> + tcg_temp_free_i64(t_nan);<br> +}<br> +<br> static void generate_exception(DisasContext *ctx, int excp)<br> {<br> tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);<br> -- <br> 2.25.1<br> <br> <br> </blockquote> </div> </div> </blockquote> <br> </body> </html>
On 2020/8/8 22:18, LIU Zhiwei wrote: > > > On 2020/8/8 4:24, Chih-Min Chao wrote: >> On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson >> <richard.henderson@linaro.org <mailto:richard.henderson@linaro.org>> >> wrote: >> >> If a 32-bit input is not properly nanboxed, then the input is >> replaced >> with the default qnan. The only inline expansion is for the >> sign-changing >> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S. >> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org >> <mailto:richard.henderson@linaro.org>> >> --- >> target/riscv/insn_trans/trans_rvf.inc.c | 71 >> +++++++++++++++++++------ >> target/riscv/translate.c | 18 +++++++ >> 2 files changed, 73 insertions(+), 16 deletions(-) >> >> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c >> b/target/riscv/insn_trans/trans_rvf.inc.c >> index 264d3139f1..f9a9e0643a 100644 >> --- a/target/riscv/insn_trans/trans_rvf.inc.c >> +++ b/target/riscv/insn_trans/trans_rvf.inc.c >> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext >> *ctx, arg_fsgnj_s *a) >> { >> REQUIRE_FPU; >> REQUIRE_EXT(ctx, RVF); >> + >> if (a->rs1 == a->rs2) { /* FMOV */ >> - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]); >> + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]); >> } else { /* FSGNJ */ >> - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], >> cpu_fpr[a->rs1], >> - 0, 31); >> + TCGv_i64 rs1 = tcg_temp_new_i64(); >> + TCGv_i64 rs2 = tcg_temp_new_i64(); >> + >> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); >> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); >> + >> + /* This formulation retains the nanboxing of rs2. */ >> + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31); >> + tcg_temp_free_i64(rs1); >> + tcg_temp_free_i64(rs2); >> } >> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); >> mark_fs_dirty(ctx); >> return true; >> } >> >> static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) >> { >> + TCGv_i64 rs1, rs2, mask; >> + >> REQUIRE_FPU; >> REQUIRE_EXT(ctx, RVF); >> + >> + rs1 = tcg_temp_new_i64(); >> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); >> + >> if (a->rs1 == a->rs2) { /* FNEG */ >> - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], >> INT32_MIN); >> + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, >> MAKE_64BIT_MASK(31, 1)); >> } else { >> - TCGv_i64 t0 = tcg_temp_new_i64(); >> - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]); >> - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], >> 0, 31); >> - tcg_temp_free_i64(t0); >> + rs2 = tcg_temp_new_i64(); >> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); >> + >> + /* >> + * Replace bit 31 in rs1 with inverse in rs2. >> + * This formulation retains the nanboxing of rs1. >> + */ >> + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1)); >> + tcg_gen_andc_i64(rs2, mask, rs2); >> >> >> should be >> tcg_gen_not_i64(rs2, rs2); // forget to inverse rs2 >> tcg_gen_andc_i64(rs2, rs2, mask); //mask needs to be >> inverted to get only sign > Hi Chih-Min, > > Thanks for pointing it out. It's a bug here. However, I think it > should be > > tcg_gen_andc_i64(rs2, rs2, mask); // only get rs2 bit 31 > tcg_gen_not_i64(rs2, rs2); // inverse rs2 > Hi Chih-Min, Sorry, your code is right. Zhiwei > Best Regards, > Zhiwei >> >> Chih-Min Chao >> >> + tcg_gen_and_i64(rs1, mask, rs1); >> + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); >> + >> + tcg_temp_free_i64(mask); >> + tcg_temp_free_i64(rs2); >> } >> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); >> + tcg_temp_free_i64(rs1); >> + >> mark_fs_dirty(ctx); >> return true; >> } >> >> static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) >> { >> + TCGv_i64 rs1, rs2; >> + >> REQUIRE_FPU; >> REQUIRE_EXT(ctx, RVF); >> + >> + rs1 = tcg_temp_new_i64(); >> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); >> + >> if (a->rs1 == a->rs2) { /* FABS */ >> - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], >> ~INT32_MIN); >> + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, >> ~MAKE_64BIT_MASK(31, 1)); >> } else { >> - TCGv_i64 t0 = tcg_temp_new_i64(); >> - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN); >> - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0); >> - tcg_temp_free_i64(t0); >> + rs2 = tcg_temp_new_i64(); >> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); >> + >> + /* >> + * Xor bit 31 in rs1 with that in rs2. >> + * This formulation retains the nanboxing of rs1. >> + */ >> + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1)); >> + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2); >> + >> + tcg_temp_free_i64(rs2); >> } >> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); >> + tcg_temp_free_i64(rs1); >> + >> mark_fs_dirty(ctx); >> return true; >> } >> diff --git a/target/riscv/translate.c b/target/riscv/translate.c >> index 12a746da97..bf35182776 100644 >> --- a/target/riscv/translate.c >> +++ b/target/riscv/translate.c >> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, >> TCGv_i64 in) >> tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32)); >> } >> >> +/* >> + * A narrow n-bit operation, where n < FLEN, checks that input >> operands >> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1. >> + * If so, the least-significant bits of the input are used, >> otherwise the >> + * input value is treated as an n-bit canonical NaN (v2.2 >> section 9.2). >> + * >> + * Here, the result is always nan-boxed, even the canonical nan. >> + */ >> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in) >> +{ >> + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull); >> + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull); >> + >> + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan); >> + tcg_temp_free_i64(t_max); >> + tcg_temp_free_i64(t_nan); >> +} >> + >> static void generate_exception(DisasContext *ctx, int excp) >> { >> tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next); >> -- >> 2.25.1 >> >> > <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> </head> <body> <br> <br> <div class="moz-cite-prefix">On 2020/8/8 22:18, LIU Zhiwei wrote:<br> </div> <blockquote type="cite" cite="mid:560581be-926e-c303-85a6-b15d4c187ad6@c-sky.com"> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> <br> <br> <div class="moz-cite-prefix">On 2020/8/8 4:24, Chih-Min Chao wrote:<br> </div> <blockquote type="cite" cite="mid:CAEiOBXVac0bFSZCrh_rhZbLVC7DGVwBe+D6YF90HQy1K-8wfYQ@mail.gmail.com"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> <div dir="ltr"> <div dir="ltr"> <div> <div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"> <div dir="ltr">On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson <<a href="mailto:richard.henderson@linaro.org" moz-do-not-send="true">richard.henderson@linaro.org</a>> wrote:<br> </div> </div> </div> </div> <div class="gmail_quote"> <blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">If a 32-bit input is not properly nanboxed, then the input is replaced<br> with the default qnan. The only inline expansion is for the sign-changing<br> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.<br> <br> Signed-off-by: Richard Henderson <<a href="mailto:richard.henderson@linaro.org" target="_blank" moz-do-not-send="true">richard.henderson@linaro.org</a>><br> ---<br> target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------<br> target/riscv/translate.c | 18 +++++++<br> 2 files changed, 73 insertions(+), 16 deletions(-)<br> <br> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c<br> index 264d3139f1..f9a9e0643a 100644<br> --- a/target/riscv/insn_trans/trans_rvf.inc.c<br> +++ b/target/riscv/insn_trans/trans_rvf.inc.c<br> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)<br> {<br> REQUIRE_FPU;<br> REQUIRE_EXT(ctx, RVF);<br> +<br> if (a->rs1 == a->rs2) { /* FMOV */<br> - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);<br> + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);<br> } else { /* FSGNJ */<br> - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1],<br> - 0, 31);<br> + TCGv_i64 rs1 = tcg_temp_new_i64();<br> + TCGv_i64 rs2 = tcg_temp_new_i64();<br> +<br> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);<br> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);<br> +<br> + /* This formulation retains the nanboxing of rs2. */<br> + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);<br> + tcg_temp_free_i64(rs1);<br> + tcg_temp_free_i64(rs2);<br> }<br> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);<br> mark_fs_dirty(ctx);<br> return true;<br> }<br> <br> static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)<br> {<br> + TCGv_i64 rs1, rs2, mask;<br> +<br> REQUIRE_FPU;<br> REQUIRE_EXT(ctx, RVF);<br> +<br> + rs1 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);<br> +<br> if (a->rs1 == a->rs2) { /* FNEG */<br> - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);<br> + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));<br> } else {<br> - TCGv_i64 t0 = tcg_temp_new_i64();<br> - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);<br> - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);<br> - tcg_temp_free_i64(t0);<br> + rs2 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);<br> +<br> + /*<br> + * Replace bit 31 in rs1 with inverse in rs2.<br> + * This formulation retains the nanboxing of rs1.<br> + */<br> + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));<br> + tcg_gen_andc_i64(rs2, mask, rs2);<br> </blockquote> <div><br> </div> <div>should be </div> <div> tcg_gen_not_i64(rs2, rs2); // forget to inverse rs2</div> <div> tcg_gen_andc_i64(rs2, rs2, mask); //mask needs to be inverted to get only sign</div> </div> </div> </blockquote> Hi Chih-Min,<br> <br> Thanks for pointing it out. It's a bug here. However, I think it should be <br> <br> <pre>tcg_gen_andc_i64(rs2, rs2, mask); // only get rs2 bit 31</pre> <pre>tcg_gen_not_i64(rs2, rs2); // inverse rs2</pre> <br> </blockquote> Hi Chih-Min,<br> <br> Sorry, your code is right. <br> <br> Zhiwei<br> <blockquote type="cite" cite="mid:560581be-926e-c303-85a6-b15d4c187ad6@c-sky.com"> Best Regards,<br> Zhiwei<br> <blockquote type="cite" cite="mid:CAEiOBXVac0bFSZCrh_rhZbLVC7DGVwBe+D6YF90HQy1K-8wfYQ@mail.gmail.com"> <div dir="ltr"> <div class="gmail_quote"> <div><br> </div> <div> Chih-Min Chao<br> </div> <blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"> + tcg_gen_and_i64(rs1, mask, rs1);<br> + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);<br> +<br> + tcg_temp_free_i64(mask);<br> + tcg_temp_free_i64(rs2);<br> }<br> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);<br> + tcg_temp_free_i64(rs1);<br> +<br> mark_fs_dirty(ctx);<br> return true;<br> }<br> <br> static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)<br> {<br> + TCGv_i64 rs1, rs2;<br> +<br> REQUIRE_FPU;<br> REQUIRE_EXT(ctx, RVF);<br> +<br> + rs1 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);<br> +<br> if (a->rs1 == a->rs2) { /* FABS */<br> - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);<br> + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));<br> } else {<br> - TCGv_i64 t0 = tcg_temp_new_i64();<br> - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);<br> - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);<br> - tcg_temp_free_i64(t0);<br> + rs2 = tcg_temp_new_i64();<br> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);<br> +<br> + /*<br> + * Xor bit 31 in rs1 with that in rs2.<br> + * This formulation retains the nanboxing of rs1.<br> + */<br> + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));<br> + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);<br> +<br> + tcg_temp_free_i64(rs2);<br> }<br> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);<br> + tcg_temp_free_i64(rs1);<br> +<br> mark_fs_dirty(ctx);<br> return true;<br> }<br> diff --git a/target/riscv/translate.c b/target/riscv/translate.c<br> index 12a746da97..bf35182776 100644<br> --- a/target/riscv/translate.c<br> +++ b/target/riscv/translate.c<br> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br> tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));<br> }<br> <br> +/*<br> + * A narrow n-bit operation, where n < FLEN, checks that input operands<br> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.<br> + * If so, the least-significant bits of the input are used, otherwise the<br> + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).<br> + *<br> + * Here, the result is always nan-boxed, even the canonical nan.<br> + */<br> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br> +{<br> + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);<br> + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);<br> +<br> + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);<br> + tcg_temp_free_i64(t_max);<br> + tcg_temp_free_i64(t_nan);<br> +}<br> +<br> static void generate_exception(DisasContext *ctx, int excp)<br> {<br> tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);<br> -- <br> 2.25.1<br> <br> <br> </blockquote> </div> </div> </blockquote> <br> </blockquote> <br> </body> </html>
diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c index 264d3139f1..f9a9e0643a 100644 --- a/target/riscv/insn_trans/trans_rvf.inc.c +++ b/target/riscv/insn_trans/trans_rvf.inc.c @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a) { REQUIRE_FPU; REQUIRE_EXT(ctx, RVF); + if (a->rs1 == a->rs2) { /* FMOV */ - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]); + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]); } else { /* FSGNJ */ - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1], - 0, 31); + TCGv_i64 rs1 = tcg_temp_new_i64(); + TCGv_i64 rs2 = tcg_temp_new_i64(); + + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); + + /* This formulation retains the nanboxing of rs2. */ + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31); + tcg_temp_free_i64(rs1); + tcg_temp_free_i64(rs2); } - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); mark_fs_dirty(ctx); return true; } static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) { + TCGv_i64 rs1, rs2, mask; + REQUIRE_FPU; REQUIRE_EXT(ctx, RVF); + + rs1 = tcg_temp_new_i64(); + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); + if (a->rs1 == a->rs2) { /* FNEG */ - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN); + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1)); } else { - TCGv_i64 t0 = tcg_temp_new_i64(); - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]); - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31); - tcg_temp_free_i64(t0); + rs2 = tcg_temp_new_i64(); + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); + + /* + * Replace bit 31 in rs1 with inverse in rs2. + * This formulation retains the nanboxing of rs1. + */ + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1)); + tcg_gen_andc_i64(rs2, mask, rs2); + tcg_gen_and_i64(rs1, mask, rs1); + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); + + tcg_temp_free_i64(mask); + tcg_temp_free_i64(rs2); } - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); + tcg_temp_free_i64(rs1); + mark_fs_dirty(ctx); return true; } static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) { + TCGv_i64 rs1, rs2; + REQUIRE_FPU; REQUIRE_EXT(ctx, RVF); + + rs1 = tcg_temp_new_i64(); + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); + if (a->rs1 == a->rs2) { /* FABS */ - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN); + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1)); } else { - TCGv_i64 t0 = tcg_temp_new_i64(); - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN); - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0); - tcg_temp_free_i64(t0); + rs2 = tcg_temp_new_i64(); + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); + + /* + * Xor bit 31 in rs1 with that in rs2. + * This formulation retains the nanboxing of rs1. + */ + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1)); + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2); + + tcg_temp_free_i64(rs2); } - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); + tcg_temp_free_i64(rs1); + mark_fs_dirty(ctx); return true; } diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 12a746da97..bf35182776 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in) tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32)); } +/* + * A narrow n-bit operation, where n < FLEN, checks that input operands + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1. + * If so, the least-significant bits of the input are used, otherwise the + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2). + * + * Here, the result is always nan-boxed, even the canonical nan. + */ +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in) +{ + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull); + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull); + + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan); + tcg_temp_free_i64(t_max); + tcg_temp_free_i64(t_nan); +} + static void generate_exception(DisasContext *ctx, int excp) { tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
If a 32-bit input is not properly nanboxed, then the input is replaced with the default qnan. The only inline expansion is for the sign-changing set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------ target/riscv/translate.c | 18 +++++++ 2 files changed, 73 insertions(+), 16 deletions(-) -- 2.25.1