Message ID | 20200724002807.441147-2-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | target/riscv: NaN-boxing for multiple precison | expand |
On 2020/7/24 8:28, Richard Henderson wrote: > Make sure that all results from single-precision scalar helpers > are properly nan-boxed to 64-bits. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/riscv/internals.h | 5 +++++ > target/riscv/fpu_helper.c | 42 +++++++++++++++++++++------------------ > 2 files changed, 28 insertions(+), 19 deletions(-) > > diff --git a/target/riscv/internals.h b/target/riscv/internals.h > index 37d33820ad..9f4ba7d617 100644 > --- a/target/riscv/internals.h > +++ b/target/riscv/internals.h > @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1); > #define SEW32 2 > #define SEW64 3 > > +static inline uint64_t nanbox_s(float32 f) > +{ > + return f | MAKE_64BIT_MASK(32, 32); > +} > + If define it here, we can also define a more general function with flen. +static inline uint64_t nanbox_s(float32 f, uint32_t flen) +{ + return f | MAKE_64BIT_MASK(flen, 64 - flen); +} + So we can reuse it in fp16 or bf16 scalar instruction and in vector instructions. Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com> Zhiwei > #endif > diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c > index 4379756dc4..72541958a7 100644 > --- a/target/riscv/fpu_helper.c > +++ b/target/riscv/fpu_helper.c > @@ -81,10 +81,16 @@ void helper_set_rounding_mode(CPURISCVState *env, uint32_t rm) > set_float_rounding_mode(softrm, &env->fp_status); > } > > +static uint64_t do_fmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > + uint64_t frs3, int flags) > +{ > + return nanbox_s(float32_muladd(frs1, frs2, frs3, flags, &env->fp_status)); > +} > + > uint64_t helper_fmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > uint64_t frs3) > { > - return float32_muladd(frs1, frs2, frs3, 0, &env->fp_status); > + return do_fmadd_s(env, frs1, frs2, frs3, 0); > } > > uint64_t helper_fmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > @@ -96,8 +102,7 @@ uint64_t helper_fmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > uint64_t helper_fmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > uint64_t frs3) > { > - return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c, > - &env->fp_status); > + return do_fmadd_s(env, frs1, frs2, frs3, float_muladd_negate_c); > } > > uint64_t helper_fmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > @@ -110,8 +115,7 @@ uint64_t helper_fmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > uint64_t helper_fnmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > uint64_t frs3) > { > - return float32_muladd(frs1, frs2, frs3, float_muladd_negate_product, > - &env->fp_status); > + return do_fmadd_s(env, frs1, frs2, frs3, float_muladd_negate_product); > } > > uint64_t helper_fnmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > @@ -124,8 +128,8 @@ uint64_t helper_fnmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > uint64_t helper_fnmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > uint64_t frs3) > { > - return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c | > - float_muladd_negate_product, &env->fp_status); > + return do_fmadd_s(env, frs1, frs2, frs3, > + float_muladd_negate_c | float_muladd_negate_product); > } > > uint64_t helper_fnmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > @@ -137,37 +141,37 @@ uint64_t helper_fnmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, > > uint64_t helper_fadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) > { > - return float32_add(frs1, frs2, &env->fp_status); > + return nanbox_s(float32_add(frs1, frs2, &env->fp_status)); > } > > uint64_t helper_fsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) > { > - return float32_sub(frs1, frs2, &env->fp_status); > + return nanbox_s(float32_sub(frs1, frs2, &env->fp_status)); > } > > uint64_t helper_fmul_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) > { > - return float32_mul(frs1, frs2, &env->fp_status); > + return nanbox_s(float32_mul(frs1, frs2, &env->fp_status)); > } > > uint64_t helper_fdiv_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) > { > - return float32_div(frs1, frs2, &env->fp_status); > + return nanbox_s(float32_div(frs1, frs2, &env->fp_status)); > } > > uint64_t helper_fmin_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) > { > - return float32_minnum(frs1, frs2, &env->fp_status); > + return nanbox_s(float32_minnum(frs1, frs2, &env->fp_status)); > } > > uint64_t helper_fmax_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) > { > - return float32_maxnum(frs1, frs2, &env->fp_status); > + return nanbox_s(float32_maxnum(frs1, frs2, &env->fp_status)); > } > > uint64_t helper_fsqrt_s(CPURISCVState *env, uint64_t frs1) > { > - return float32_sqrt(frs1, &env->fp_status); > + return nanbox_s(float32_sqrt(frs1, &env->fp_status)); > } > > target_ulong helper_fle_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) > @@ -209,23 +213,23 @@ uint64_t helper_fcvt_lu_s(CPURISCVState *env, uint64_t frs1) > > uint64_t helper_fcvt_s_w(CPURISCVState *env, target_ulong rs1) > { > - return int32_to_float32((int32_t)rs1, &env->fp_status); > + return nanbox_s(int32_to_float32((int32_t)rs1, &env->fp_status)); > } > > uint64_t helper_fcvt_s_wu(CPURISCVState *env, target_ulong rs1) > { > - return uint32_to_float32((uint32_t)rs1, &env->fp_status); > + return nanbox_s(uint32_to_float32((uint32_t)rs1, &env->fp_status)); > } > > #if defined(TARGET_RISCV64) > uint64_t helper_fcvt_s_l(CPURISCVState *env, uint64_t rs1) > { > - return int64_to_float32(rs1, &env->fp_status); > + return nanbox_s(int64_to_float32(rs1, &env->fp_status)); > } > > uint64_t helper_fcvt_s_lu(CPURISCVState *env, uint64_t rs1) > { > - return uint64_to_float32(rs1, &env->fp_status); > + return nanbox_s(uint64_to_float32(rs1, &env->fp_status)); > } > #endif > > @@ -266,7 +270,7 @@ uint64_t helper_fmax_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2) > > uint64_t helper_fcvt_s_d(CPURISCVState *env, uint64_t rs1) > { > - return float64_to_float32(rs1, &env->fp_status); > + return nanbox_s(float64_to_float32(rs1, &env->fp_status)); > } > > uint64_t helper_fcvt_d_s(CPURISCVState *env, uint64_t rs1)
On 7/23/20 7:35 PM, LIU Zhiwei wrote: > > > On 2020/7/24 8:28, Richard Henderson wrote: >> Make sure that all results from single-precision scalar helpers >> are properly nan-boxed to 64-bits. >> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> target/riscv/internals.h | 5 +++++ >> target/riscv/fpu_helper.c | 42 +++++++++++++++++++++------------------ >> 2 files changed, 28 insertions(+), 19 deletions(-) >> >> diff --git a/target/riscv/internals.h b/target/riscv/internals.h >> index 37d33820ad..9f4ba7d617 100644 >> --- a/target/riscv/internals.h >> +++ b/target/riscv/internals.h >> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1); >> #define SEW32 2 >> #define SEW64 3 >> +static inline uint64_t nanbox_s(float32 f) >> +{ >> + return f | MAKE_64BIT_MASK(32, 32); >> +} >> + > If define it here, we can also define a more general function with flen. > > +static inline uint64_t nanbox_s(float32 f, uint32_t flen) > +{ > + return f | MAKE_64BIT_MASK(flen, 64 - flen); > +} > + > > So we can reuse it in fp16 or bf16 scalar instruction and in vector instructions. While we could do that, we will not encounter all possible lengths. In the cover letter, I mentioned defining a second function, static inline uint64_t nanbox_h(float16 f) { return f | MAKE_64BIT_MASK(16, 48); } Having two separate functions will, I believe, be easier to use in practice. r~
On 2020/7/24 11:55, Richard Henderson wrote: > On 7/23/20 7:35 PM, LIU Zhiwei wrote: >> >> On 2020/7/24 8:28, Richard Henderson wrote: >>> Make sure that all results from single-precision scalar helpers >>> are properly nan-boxed to 64-bits. >>> >>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >>> --- >>> target/riscv/internals.h | 5 +++++ >>> target/riscv/fpu_helper.c | 42 +++++++++++++++++++++------------------ >>> 2 files changed, 28 insertions(+), 19 deletions(-) >>> >>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h >>> index 37d33820ad..9f4ba7d617 100644 >>> --- a/target/riscv/internals.h >>> +++ b/target/riscv/internals.h >>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1); >>> #define SEW32 2 >>> #define SEW64 3 >>> +static inline uint64_t nanbox_s(float32 f) >>> +{ >>> + return f | MAKE_64BIT_MASK(32, 32); >>> +} >>> + >> If define it here, we can also define a more general function with flen. >> >> +static inline uint64_t nanbox_s(float32 f, uint32_t flen) >> +{ >> + return f | MAKE_64BIT_MASK(flen, 64 - flen); >> +} >> + >> >> So we can reuse it in fp16 or bf16 scalar instruction and in vector instructions. > While we could do that, we will not encounter all possible lengths. In the > cover letter, I mentioned defining a second function, > > static inline uint64_t nanbox_h(float16 f) > { > return f | MAKE_64BIT_MASK(16, 48); > } > > Having two separate functions will, I believe, be easier to use in practice. > Get it. Thanks. Zhiwei > > r~
On Fri, Jul 24, 2020 at 2:06 PM LIU Zhiwei <zhiwei_liu@c-sky.com> wrote: > > > On 2020/7/24 11:55, Richard Henderson wrote: > > On 7/23/20 7:35 PM, LIU Zhiwei wrote: > >> > >> On 2020/7/24 8:28, Richard Henderson wrote: > >>> Make sure that all results from single-precision scalar helpers > >>> are properly nan-boxed to 64-bits. > >>> > >>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > >>> --- > >>> target/riscv/internals.h | 5 +++++ > >>> target/riscv/fpu_helper.c | 42 > +++++++++++++++++++++------------------ > >>> 2 files changed, 28 insertions(+), 19 deletions(-) > >>> > >>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h > >>> index 37d33820ad..9f4ba7d617 100644 > >>> --- a/target/riscv/internals.h > >>> +++ b/target/riscv/internals.h > >>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1); > >>> #define SEW32 2 > >>> #define SEW64 3 > >>> +static inline uint64_t nanbox_s(float32 f) > >>> +{ > >>> + return f | MAKE_64BIT_MASK(32, 32); > >>> +} > >>> + > >> If define it here, we can also define a more general function with > flen. > >> > >> +static inline uint64_t nanbox_s(float32 f, uint32_t flen) > >> +{ > >> + return f | MAKE_64BIT_MASK(flen, 64 - flen); > >> +} > >> + > >> > >> So we can reuse it in fp16 or bf16 scalar instruction and in vector > instructions. > > While we could do that, we will not encounter all possible lengths. In > the > > cover letter, I mentioned defining a second function, > > > > static inline uint64_t nanbox_h(float16 f) > > { > > return f | MAKE_64BIT_MASK(16, 48); > > } > > > > Having two separate functions will, I believe, be easier to use in > practice. > > > Get it. Thanks. > > Zhiwei > > > > r~ > > > That is what has been implemented in spike. It fills up the Nan-Box when value is stored back internal structure and unbox the value with difference floating type (half/single/double/quad). By the way, I prefer to keeping the suffix to tell different floating type rather than pass arbitrary since each floating type belong to each extension. Reviewed-by: Chih-Min Chao <chihmin.chao@sifive.com> <div dir="ltr"><div dir="ltr"><div><div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr">On Fri, Jul 24, 2020 at 2:06 PM LIU Zhiwei <<a href="mailto:zhiwei_liu@c-sky.com">zhiwei_liu@c-sky.com</a>> wrote:<br></div></div></div></div><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><br> <br> On 2020/7/24 11:55, Richard Henderson wrote:<br> > On 7/23/20 7:35 PM, LIU Zhiwei wrote:<br> >><br> >> On 2020/7/24 8:28, Richard Henderson wrote:<br> >>> Make sure that all results from single-precision scalar helpers<br> >>> are properly nan-boxed to 64-bits.<br> >>><br> >>> Signed-off-by: Richard Henderson <<a href="mailto:richard.henderson@linaro.org" target="_blank">richard.henderson@linaro.org</a>><br> >>> ---<br> >>> target/riscv/internals.h | 5 +++++<br> >>> target/riscv/fpu_helper.c | 42 +++++++++++++++++++++------------------<br> >>> 2 files changed, 28 insertions(+), 19 deletions(-)<br> >>><br> >>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h<br> >>> index 37d33820ad..9f4ba7d617 100644<br> >>> --- a/target/riscv/internals.h<br> >>> +++ b/target/riscv/internals.h<br> >>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1);<br> >>> #define SEW32 2<br> >>> #define SEW64 3<br> >>> +static inline uint64_t nanbox_s(float32 f)<br> >>> +{<br> >>> + return f | MAKE_64BIT_MASK(32, 32);<br> >>> +}<br> >>> +<br> >> If define it here, we can also define a more general function with flen.<br> >><br> >> +static inline uint64_t nanbox_s(float32 f, uint32_t flen)<br> >> +{<br> >> + return f | MAKE_64BIT_MASK(flen, 64 - flen);<br> >> +}<br> >> +<br> >><br> >> So we can reuse it in fp16 or bf16 scalar instruction and in vector instructions.<br> > While we could do that, we will not encounter all possible lengths. In the<br> > cover letter, I mentioned defining a second function,<br> ><br> > static inline uint64_t nanbox_h(float16 f)<br> > {<br> > return f | MAKE_64BIT_MASK(16, 48);<br> > }<br> ><br> > Having two separate functions will, I believe, be easier to use in practice.<br> ><br> Get it. Thanks.<br> <br> Zhiwei<br> ><br> > r~<br> <br> <br></blockquote><div><br></div><div>That is what has been implemented in spike. It fills up the Nan-Box when value is stored back internal structure and </div><div>unbox the value with difference floating type (half/single/double/quad).<br></div><div><br></div><div>By the way, I prefer to keeping the suffix to tell different floating type rather than pass arbitrary </div><div>since each floating type belong to each extension.<br><br>Reviewed-by: Chih-Min Chao <<a href="mailto:chihmin.chao@sifive.com">chihmin.chao@sifive.com</a>><br></div></div></div>
On 2020/8/6 14:09, Chih-Min Chao wrote: > On Fri, Jul 24, 2020 at 2:06 PM LIU Zhiwei <zhiwei_liu@c-sky.com > <mailto:zhiwei_liu@c-sky.com>> wrote: > > > > On 2020/7/24 11:55, Richard Henderson wrote: > > On 7/23/20 7:35 PM, LIU Zhiwei wrote: > >> > >> On 2020/7/24 8:28, Richard Henderson wrote: > >>> Make sure that all results from single-precision scalar helpers > >>> are properly nan-boxed to 64-bits. > >>> > >>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org > <mailto:richard.henderson@linaro.org>> > >>> --- > >>> target/riscv/internals.h | 5 +++++ > >>> target/riscv/fpu_helper.c | 42 > +++++++++++++++++++++------------------ > >>> 2 files changed, 28 insertions(+), 19 deletions(-) > >>> > >>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h > >>> index 37d33820ad..9f4ba7d617 100644 > >>> --- a/target/riscv/internals.h > >>> +++ b/target/riscv/internals.h > >>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1); > >>> #define SEW32 2 > >>> #define SEW64 3 > >>> +static inline uint64_t nanbox_s(float32 f) > >>> +{ > >>> + return f | MAKE_64BIT_MASK(32, 32); > >>> +} > >>> + > >> If define it here, we can also define a more general function > with flen. > >> > >> +static inline uint64_t nanbox_s(float32 f, uint32_t flen) > >> +{ > >> + return f | MAKE_64BIT_MASK(flen, 64 - flen); > >> +} > >> + > >> > >> So we can reuse it in fp16 or bf16 scalar instruction and in > vector instructions. > > While we could do that, we will not encounter all possible > lengths. In the > > cover letter, I mentioned defining a second function, > > > > static inline uint64_t nanbox_h(float16 f) > > { > > return f | MAKE_64BIT_MASK(16, 48); > > } > > > > Having two separate functions will, I believe, be easier to use > in practice. > > > Get it. Thanks. > > Zhiwei > > > > r~ > > > > That is what has been implemented in spike. It fills up the Nan-Box > when value is stored back internal structure and > unbox the value with difference floating type (half/single/double/quad). Hi Chih-Min, Has half-precision been a part of RVV? Or do you know the ISA abbreviation of half-precision? Thanks very much. Best Regards, Zhiwei > > By the way, I prefer to keeping the suffix to tell different floating > type rather than pass arbitrary > since each floating type belong to each extension. > > Reviewed-by: Chih-Min Chao <chihmin.chao@sifive.com > <mailto:chihmin.chao@sifive.com>> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> </head> <body> <br> <br> <div class="moz-cite-prefix">On 2020/8/6 14:09, Chih-Min Chao wrote:<br> </div> <blockquote type="cite" cite="mid:CAEiOBXXmz2APpmtwPrvikXUt5j_Q=k5ZqK9g2Fe4bdjeAbg_6g@mail.gmail.com"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> <div dir="ltr"> <div dir="ltr"> <div> <div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"> <div dir="ltr">On Fri, Jul 24, 2020 at 2:06 PM LIU Zhiwei <<a href="mailto:zhiwei_liu@c-sky.com" moz-do-not-send="true">zhiwei_liu@c-sky.com</a>> wrote:<br> </div> </div> </div> </div> <div class="gmail_quote"> <blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><br> <br> On 2020/7/24 11:55, Richard Henderson wrote:<br> > On 7/23/20 7:35 PM, LIU Zhiwei wrote:<br> >><br> >> On 2020/7/24 8:28, Richard Henderson wrote:<br> >>> Make sure that all results from single-precision scalar helpers<br> >>> are properly nan-boxed to 64-bits.<br> >>><br> >>> Signed-off-by: Richard Henderson <<a href="mailto:richard.henderson@linaro.org" target="_blank" moz-do-not-send="true">richard.henderson@linaro.org</a>><br> >>> ---<br> >>> target/riscv/internals.h | 5 +++++<br> >>> target/riscv/fpu_helper.c | 42 +++++++++++++++++++++------------------<br> >>> 2 files changed, 28 insertions(+), 19 deletions(-)<br> >>><br> >>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h<br> >>> index 37d33820ad..9f4ba7d617 100644<br> >>> --- a/target/riscv/internals.h<br> >>> +++ b/target/riscv/internals.h<br> >>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1);<br> >>> #define SEW32 2<br> >>> #define SEW64 3<br> >>> +static inline uint64_t nanbox_s(float32 f)<br> >>> +{<br> >>> + return f | MAKE_64BIT_MASK(32, 32);<br> >>> +}<br> >>> +<br> >> If define it here, we can also define a more general function with flen.<br> >><br> >> +static inline uint64_t nanbox_s(float32 f, uint32_t flen)<br> >> +{<br> >> + return f | MAKE_64BIT_MASK(flen, 64 - flen);<br> >> +}<br> >> +<br> >><br> >> So we can reuse it in fp16 or bf16 scalar instruction and in vector instructions.<br> > While we could do that, we will not encounter all possible lengths. In the<br> > cover letter, I mentioned defining a second function,<br> ><br> > static inline uint64_t nanbox_h(float16 f)<br> > {<br> > return f | MAKE_64BIT_MASK(16, 48);<br> > }<br> ><br> > Having two separate functions will, I believe, be easier to use in practice.<br> ><br> Get it. Thanks.<br> <br> Zhiwei<br> ><br> > r~<br> <br> <br> </blockquote> <div><br> </div> <div>That is what has been implemented in spike. It fills up the Nan-Box when value is stored back internal structure and </div> <div>unbox the value with difference floating type (half/single/double/quad).<br> </div> </div> </div> </blockquote> Hi Chih-Min,<br> <br> Has half-precision been a part of RVV? Or do you know the ISA abbreviation of half-precision?<br> <br> Thanks very much.<br> <br> Best Regards,<br> Zhiwei <br> <blockquote type="cite" cite="mid:CAEiOBXXmz2APpmtwPrvikXUt5j_Q=k5ZqK9g2Fe4bdjeAbg_6g@mail.gmail.com"> <div dir="ltr"> <div class="gmail_quote"> <div><br> </div> <div>By the way, I prefer to keeping the suffix to tell different floating type rather than pass arbitrary </div> <div>since each floating type belong to each extension.<br> <br> Reviewed-by: Chih-Min Chao <<a href="mailto:chihmin.chao@sifive.com" moz-do-not-send="true">chihmin.chao@sifive.com</a>><br> </div> </div> </div> </blockquote> <br> </body> </html>
On Thu, Aug 6, 2020 at 3:05 PM LIU Zhiwei <zhiwei_liu@c-sky.com> wrote: > > > On 2020/8/6 14:09, Chih-Min Chao wrote: > > On Fri, Jul 24, 2020 at 2:06 PM LIU Zhiwei <zhiwei_liu@c-sky.com> wrote: > >> >> >> On 2020/7/24 11:55, Richard Henderson wrote: >> > On 7/23/20 7:35 PM, LIU Zhiwei wrote: >> >> >> >> On 2020/7/24 8:28, Richard Henderson wrote: >> >>> Make sure that all results from single-precision scalar helpers >> >>> are properly nan-boxed to 64-bits. >> >>> >> >>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> >>> --- >> >>> target/riscv/internals.h | 5 +++++ >> >>> target/riscv/fpu_helper.c | 42 >> +++++++++++++++++++++------------------ >> >>> 2 files changed, 28 insertions(+), 19 deletions(-) >> >>> >> >>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h >> >>> index 37d33820ad..9f4ba7d617 100644 >> >>> --- a/target/riscv/internals.h >> >>> +++ b/target/riscv/internals.h >> >>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1); >> >>> #define SEW32 2 >> >>> #define SEW64 3 >> >>> +static inline uint64_t nanbox_s(float32 f) >> >>> +{ >> >>> + return f | MAKE_64BIT_MASK(32, 32); >> >>> +} >> >>> + >> >> If define it here, we can also define a more general function with >> flen. >> >> >> >> +static inline uint64_t nanbox_s(float32 f, uint32_t flen) >> >> +{ >> >> + return f | MAKE_64BIT_MASK(flen, 64 - flen); >> >> +} >> >> + >> >> >> >> So we can reuse it in fp16 or bf16 scalar instruction and in vector >> instructions. >> > While we could do that, we will not encounter all possible lengths. In >> the >> > cover letter, I mentioned defining a second function, >> > >> > static inline uint64_t nanbox_h(float16 f) >> > { >> > return f | MAKE_64BIT_MASK(16, 48); >> > } >> > >> > Having two separate functions will, I believe, be easier to use in >> practice. >> > >> Get it. Thanks. >> >> Zhiwei >> > >> > r~ >> >> >> > That is what has been implemented in spike. It fills up the Nan-Box when > value is stored back internal structure and > unbox the value with difference floating type (half/single/double/quad). > > Hi Chih-Min, > > Has half-precision been a part of RVV? Or do you know the ISA abbreviation > of half-precision? > > Thanks very much. > > Best Regards, > Zhiwei > > > By the way, I prefer to keeping the suffix to tell different floating > type rather than pass arbitrary > since each floating type belong to each extension. > > Reviewed-by: Chih-Min Chao <chihmin.chao@sifive.com> > > Hi ZhiWei, It is still under branch https://github.com/riscv/riscv-isa-manual/tree/zfh and I am not sure about the working group progress. I have an implementation based on this draft and will send it as RFC patch next week. Thanks Chih-Min <div dir="ltr"><div dir="ltr"><br clear="all"><div><div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr"><br></div></div></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Thu, Aug 6, 2020 at 3:05 PM LIU Zhiwei <<a href="mailto:zhiwei_liu@c-sky.com">zhiwei_liu@c-sky.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"> <div> <br> <br> <div>On 2020/8/6 14:09, Chih-Min Chao wrote:<br> </div> <blockquote type="cite"> <div dir="ltr"> <div dir="ltr"> <div> <div dir="ltr"> <div dir="ltr">On Fri, Jul 24, 2020 at 2:06 PM LIU Zhiwei <<a href="mailto:zhiwei_liu@c-sky.com" target="_blank">zhiwei_liu@c-sky.com</a>> wrote:<br> </div> </div> </div> </div> <div class="gmail_quote"> <blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><br> <br> On 2020/7/24 11:55, Richard Henderson wrote:<br> > On 7/23/20 7:35 PM, LIU Zhiwei wrote:<br> >><br> >> On 2020/7/24 8:28, Richard Henderson wrote:<br> >>> Make sure that all results from single-precision scalar helpers<br> >>> are properly nan-boxed to 64-bits.<br> >>><br> >>> Signed-off-by: Richard Henderson <<a href="mailto:richard.henderson@linaro.org" target="_blank">richard.henderson@linaro.org</a>><br> >>> ---<br> >>> target/riscv/internals.h | 5 +++++<br> >>> target/riscv/fpu_helper.c | 42 +++++++++++++++++++++------------------<br> >>> 2 files changed, 28 insertions(+), 19 deletions(-)<br> >>><br> >>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h<br> >>> index 37d33820ad..9f4ba7d617 100644<br> >>> --- a/target/riscv/internals.h<br> >>> +++ b/target/riscv/internals.h<br> >>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1);<br> >>> #define SEW32 2<br> >>> #define SEW64 3<br> >>> +static inline uint64_t nanbox_s(float32 f)<br> >>> +{<br> >>> + return f | MAKE_64BIT_MASK(32, 32);<br> >>> +}<br> >>> +<br> >> If define it here, we can also define a more general function with flen.<br> >><br> >> +static inline uint64_t nanbox_s(float32 f, uint32_t flen)<br> >> +{<br> >> + return f | MAKE_64BIT_MASK(flen, 64 - flen);<br> >> +}<br> >> +<br> >><br> >> So we can reuse it in fp16 or bf16 scalar instruction and in vector instructions.<br> > While we could do that, we will not encounter all possible lengths. In the<br> > cover letter, I mentioned defining a second function,<br> ><br> > static inline uint64_t nanbox_h(float16 f)<br> > {<br> > return f | MAKE_64BIT_MASK(16, 48);<br> > }<br> ><br> > Having two separate functions will, I believe, be easier to use in practice.<br> ><br> Get it. Thanks.<br> <br> Zhiwei<br> ><br> > r~<br> <br> <br> </blockquote> <div><br> </div> <div>That is what has been implemented in spike. It fills up the Nan-Box when value is stored back internal structure and </div> <div>unbox the value with difference floating type (half/single/double/quad).<br> </div> </div> </div> </blockquote> Hi Chih-Min,<br> <br> Has half-precision been a part of RVV? Or do you know the ISA abbreviation of half-precision?<br> <br> Thanks very much.<br> <br> Best Regards,<br> Zhiwei <br> <blockquote type="cite"> <div dir="ltr"> <div class="gmail_quote"> <div><br> </div> <div>By the way, I prefer to keeping the suffix to tell different floating type rather than pass arbitrary </div> <div>since each floating type belong to each extension.<br> <br> Reviewed-by: Chih-Min Chao <<a href="mailto:chihmin.chao@sifive.com" target="_blank">chihmin.chao@sifive.com</a>></div></div></div></blockquote></div></blockquote><div><br></div><div>Hi ZhiWei,</div><div><br></div><div>It is still under branch <a href="https://github.com/riscv/riscv-isa-manual/tree/zfh">https://github.com/riscv/riscv-isa-manual/tree/zfh</a> and I am not sure about the working group progress.</div><div>I have an implementation based on this draft and will send it as RFC patch next week.<br><br>Thanks</div><div>Chih-Min</div></div></div>
On 2020/8/6 16:42, Chih-Min Chao wrote: > > > > On Thu, Aug 6, 2020 at 3:05 PM LIU Zhiwei <zhiwei_liu@c-sky.com > <mailto:zhiwei_liu@c-sky.com>> wrote: > > > > On 2020/8/6 14:09, Chih-Min Chao wrote: >> On Fri, Jul 24, 2020 at 2:06 PM LIU Zhiwei <zhiwei_liu@c-sky.com >> <mailto:zhiwei_liu@c-sky.com>> wrote: >> >> >> >> On 2020/7/24 11:55, Richard Henderson wrote: >> > On 7/23/20 7:35 PM, LIU Zhiwei wrote: >> >> >> >> On 2020/7/24 8:28, Richard Henderson wrote: >> >>> Make sure that all results from single-precision scalar >> helpers >> >>> are properly nan-boxed to 64-bits. >> >>> >> >>> Signed-off-by: Richard Henderson >> <richard.henderson@linaro.org >> <mailto:richard.henderson@linaro.org>> >> >>> --- >> >>> target/riscv/internals.h | 5 +++++ >> >>> target/riscv/fpu_helper.c | 42 >> +++++++++++++++++++++------------------ >> >>> 2 files changed, 28 insertions(+), 19 deletions(-) >> >>> >> >>> diff --git a/target/riscv/internals.h >> b/target/riscv/internals.h >> >>> index 37d33820ad..9f4ba7d617 100644 >> >>> --- a/target/riscv/internals.h >> >>> +++ b/target/riscv/internals.h >> >>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1); >> >>> #define SEW32 2 >> >>> #define SEW64 3 >> >>> +static inline uint64_t nanbox_s(float32 f) >> >>> +{ >> >>> + return f | MAKE_64BIT_MASK(32, 32); >> >>> +} >> >>> + >> >> If define it here, we can also define a more general >> function with flen. >> >> >> >> +static inline uint64_t nanbox_s(float32 f, uint32_t flen) >> >> +{ >> >> + return f | MAKE_64BIT_MASK(flen, 64 - flen); >> >> +} >> >> + >> >> >> >> So we can reuse it in fp16 or bf16 scalar instruction and >> in vector instructions. >> > While we could do that, we will not encounter all possible >> lengths. In the >> > cover letter, I mentioned defining a second function, >> > >> > static inline uint64_t nanbox_h(float16 f) >> > { >> > return f | MAKE_64BIT_MASK(16, 48); >> > } >> > >> > Having two separate functions will, I believe, be easier to >> use in practice. >> > >> Get it. Thanks. >> >> Zhiwei >> > >> > r~ >> >> >> >> That is what has been implemented in spike. It fills up the >> Nan-Box when value is stored back internal structure and >> unbox the value with difference floating type >> (half/single/double/quad). > Hi Chih-Min, > > Has half-precision been a part of RVV? Or do you know the ISA > abbreviation of half-precision? > > Thanks very much. > > Best Regards, > Zhiwei >> >> By the way, I prefer to keeping the suffix to tell >> different floating type rather than pass arbitrary >> since each floating type belong to each extension. >> >> Reviewed-by: Chih-Min Chao <chihmin.chao@sifive.com >> <mailto:chihmin.chao@sifive.com>> > > > Hi ZhiWei, > > It is still under branch > https://github.com/riscv/riscv-isa-manual/tree/zfh and I am not sure > about the working group progress. > I have an implementation based on this draft and will send it as RFC > patch next week. Hi Chih-Min, Thanks for your information. As Krste said once, as we don't have RV16, FP16 separated won't make sense. Obviously, it has changed.:-P I also have implemented a version of FP16 ,“obvious set including existing FP instructions with format field set to "half" (fmt=10)“ If you want to send the patch, I will not send it again.:-) Zhiwei > > Thanks > Chih-Min <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> </head> <body> <br> <br> <div class="moz-cite-prefix">On 2020/8/6 16:42, Chih-Min Chao wrote:<br> </div> <blockquote type="cite" cite="mid:CAEiOBXXUDECyBXxduJCgPkFx0=Rmtjgad0yu_irVcjw32aELFQ@mail.gmail.com"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> <div dir="ltr"> <div dir="ltr"><br clear="all"> <div> <div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"> <div dir="ltr"><br> </div> </div> </div> </div> <br> <div class="gmail_quote"> <div dir="ltr" class="gmail_attr">On Thu, Aug 6, 2020 at 3:05 PM LIU Zhiwei <<a href="mailto:zhiwei_liu@c-sky.com" moz-do-not-send="true">zhiwei_liu@c-sky.com</a>> wrote:<br> </div> <blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"> <div> <br> <br> <div>On 2020/8/6 14:09, Chih-Min Chao wrote:<br> </div> <blockquote type="cite"> <div dir="ltr"> <div dir="ltr"> <div> <div dir="ltr"> <div dir="ltr">On Fri, Jul 24, 2020 at 2:06 PM LIU Zhiwei <<a href="mailto:zhiwei_liu@c-sky.com" target="_blank" moz-do-not-send="true">zhiwei_liu@c-sky.com</a>> wrote:<br> </div> </div> </div> </div> <div class="gmail_quote"> <blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><br> <br> On 2020/7/24 11:55, Richard Henderson wrote:<br> > On 7/23/20 7:35 PM, LIU Zhiwei wrote:<br> >><br> >> On 2020/7/24 8:28, Richard Henderson wrote:<br> >>> Make sure that all results from single-precision scalar helpers<br> >>> are properly nan-boxed to 64-bits.<br> >>><br> >>> Signed-off-by: Richard Henderson <<a href="mailto:richard.henderson@linaro.org" target="_blank" moz-do-not-send="true">richard.henderson@linaro.org</a>><br> >>> ---<br> >>> target/riscv/internals.h | 5 +++++<br> >>> target/riscv/fpu_helper.c | 42 +++++++++++++++++++++------------------<br> >>> 2 files changed, 28 insertions(+), 19 deletions(-)<br> >>><br> >>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h<br> >>> index 37d33820ad..9f4ba7d617 100644<br> >>> --- a/target/riscv/internals.h<br> >>> +++ b/target/riscv/internals.h<br> >>> @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1);<br> >>> #define SEW32 2<br> >>> #define SEW64 3<br> >>> +static inline uint64_t nanbox_s(float32 f)<br> >>> +{<br> >>> + return f | MAKE_64BIT_MASK(32, 32);<br> >>> +}<br> >>> +<br> >> If define it here, we can also define a more general function with flen.<br> >><br> >> +static inline uint64_t nanbox_s(float32 f, uint32_t flen)<br> >> +{<br> >> + return f | MAKE_64BIT_MASK(flen, 64 - flen);<br> >> +}<br> >> +<br> >><br> >> So we can reuse it in fp16 or bf16 scalar instruction and in vector instructions.<br> > While we could do that, we will not encounter all possible lengths. In the<br> > cover letter, I mentioned defining a second function,<br> ><br> > static inline uint64_t nanbox_h(float16 f)<br> > {<br> > return f | MAKE_64BIT_MASK(16, 48);<br> > }<br> ><br> > Having two separate functions will, I believe, be easier to use in practice.<br> ><br> Get it. Thanks.<br> <br> Zhiwei<br> ><br> > r~<br> <br> <br> </blockquote> <div><br> </div> <div>That is what has been implemented in spike. It fills up the Nan-Box when value is stored back internal structure and </div> <div>unbox the value with difference floating type (half/single/double/quad).<br> </div> </div> </div> </blockquote> Hi Chih-Min,<br> <br> Has half-precision been a part of RVV? Or do you know the ISA abbreviation of half-precision?<br> <br> Thanks very much.<br> <br> Best Regards,<br> Zhiwei <br> <blockquote type="cite"> <div dir="ltr"> <div class="gmail_quote"> <div><br> </div> <div>By the way, I prefer to keeping the suffix to tell different floating type rather than pass arbitrary </div> <div>since each floating type belong to each extension.<br> <br> Reviewed-by: Chih-Min Chao <<a href="mailto:chihmin.chao@sifive.com" target="_blank" moz-do-not-send="true">chihmin.chao@sifive.com</a>></div> </div> </div> </blockquote> </div> </blockquote> <div><br> </div> <div>Hi ZhiWei,</div> <div><br> </div> <div>It is still under branch <a href="https://github.com/riscv/riscv-isa-manual/tree/zfh" moz-do-not-send="true">https://github.com/riscv/riscv-isa-manual/tree/zfh</a> and I am not sure about the working group progress.</div> <div>I have an implementation based on this draft and will send it as RFC patch next week.<br> </div> </div> </div> </blockquote> Hi Chih-Min,<br> <br> Thanks for your information.<br> <br> As Krste said once, as we don't have RV16, FP16 separated won't make sense. Obviously, it has changed.<span class="moz-smiley-s4"><span>:-P</span></span><br> <br> I also have implemented a version of FP16 ,“<span style="color: rgb(36, 41, 46); font-size: 14px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; text-align: left; text-indent: 0px; text-transform: none; white-space: normal; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(255, 255, 255); text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;">obvious set including existing FP instructions with format field set to "half" (fmt=10)“<br> <br> If you want to send the patch, I will not send it again.<span class="moz-smiley-s1"><span>:-)</span></span><br> <br> <br> Zhiwei<br> </span> <blockquote type="cite" cite="mid:CAEiOBXXUDECyBXxduJCgPkFx0=Rmtjgad0yu_irVcjw32aELFQ@mail.gmail.com"> <div dir="ltr"> <div class="gmail_quote"> <div><br> Thanks</div> <div>Chih-Min</div> </div> </div> </blockquote> <br> </body> </html>
diff --git a/target/riscv/internals.h b/target/riscv/internals.h index 37d33820ad..9f4ba7d617 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -38,4 +38,9 @@ target_ulong fclass_d(uint64_t frs1); #define SEW32 2 #define SEW64 3 +static inline uint64_t nanbox_s(float32 f) +{ + return f | MAKE_64BIT_MASK(32, 32); +} + #endif diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c index 4379756dc4..72541958a7 100644 --- a/target/riscv/fpu_helper.c +++ b/target/riscv/fpu_helper.c @@ -81,10 +81,16 @@ void helper_set_rounding_mode(CPURISCVState *env, uint32_t rm) set_float_rounding_mode(softrm, &env->fp_status); } +static uint64_t do_fmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, + uint64_t frs3, int flags) +{ + return nanbox_s(float32_muladd(frs1, frs2, frs3, flags, &env->fp_status)); +} + uint64_t helper_fmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, uint64_t frs3) { - return float32_muladd(frs1, frs2, frs3, 0, &env->fp_status); + return do_fmadd_s(env, frs1, frs2, frs3, 0); } uint64_t helper_fmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, @@ -96,8 +102,7 @@ uint64_t helper_fmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, uint64_t helper_fmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, uint64_t frs3) { - return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c, - &env->fp_status); + return do_fmadd_s(env, frs1, frs2, frs3, float_muladd_negate_c); } uint64_t helper_fmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, @@ -110,8 +115,7 @@ uint64_t helper_fmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, uint64_t helper_fnmsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, uint64_t frs3) { - return float32_muladd(frs1, frs2, frs3, float_muladd_negate_product, - &env->fp_status); + return do_fmadd_s(env, frs1, frs2, frs3, float_muladd_negate_product); } uint64_t helper_fnmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, @@ -124,8 +128,8 @@ uint64_t helper_fnmsub_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, uint64_t helper_fnmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, uint64_t frs3) { - return float32_muladd(frs1, frs2, frs3, float_muladd_negate_c | - float_muladd_negate_product, &env->fp_status); + return do_fmadd_s(env, frs1, frs2, frs3, + float_muladd_negate_c | float_muladd_negate_product); } uint64_t helper_fnmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, @@ -137,37 +141,37 @@ uint64_t helper_fnmadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2, uint64_t helper_fadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) { - return float32_add(frs1, frs2, &env->fp_status); + return nanbox_s(float32_add(frs1, frs2, &env->fp_status)); } uint64_t helper_fsub_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) { - return float32_sub(frs1, frs2, &env->fp_status); + return nanbox_s(float32_sub(frs1, frs2, &env->fp_status)); } uint64_t helper_fmul_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) { - return float32_mul(frs1, frs2, &env->fp_status); + return nanbox_s(float32_mul(frs1, frs2, &env->fp_status)); } uint64_t helper_fdiv_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) { - return float32_div(frs1, frs2, &env->fp_status); + return nanbox_s(float32_div(frs1, frs2, &env->fp_status)); } uint64_t helper_fmin_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) { - return float32_minnum(frs1, frs2, &env->fp_status); + return nanbox_s(float32_minnum(frs1, frs2, &env->fp_status)); } uint64_t helper_fmax_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) { - return float32_maxnum(frs1, frs2, &env->fp_status); + return nanbox_s(float32_maxnum(frs1, frs2, &env->fp_status)); } uint64_t helper_fsqrt_s(CPURISCVState *env, uint64_t frs1) { - return float32_sqrt(frs1, &env->fp_status); + return nanbox_s(float32_sqrt(frs1, &env->fp_status)); } target_ulong helper_fle_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2) @@ -209,23 +213,23 @@ uint64_t helper_fcvt_lu_s(CPURISCVState *env, uint64_t frs1) uint64_t helper_fcvt_s_w(CPURISCVState *env, target_ulong rs1) { - return int32_to_float32((int32_t)rs1, &env->fp_status); + return nanbox_s(int32_to_float32((int32_t)rs1, &env->fp_status)); } uint64_t helper_fcvt_s_wu(CPURISCVState *env, target_ulong rs1) { - return uint32_to_float32((uint32_t)rs1, &env->fp_status); + return nanbox_s(uint32_to_float32((uint32_t)rs1, &env->fp_status)); } #if defined(TARGET_RISCV64) uint64_t helper_fcvt_s_l(CPURISCVState *env, uint64_t rs1) { - return int64_to_float32(rs1, &env->fp_status); + return nanbox_s(int64_to_float32(rs1, &env->fp_status)); } uint64_t helper_fcvt_s_lu(CPURISCVState *env, uint64_t rs1) { - return uint64_to_float32(rs1, &env->fp_status); + return nanbox_s(uint64_to_float32(rs1, &env->fp_status)); } #endif @@ -266,7 +270,7 @@ uint64_t helper_fmax_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2) uint64_t helper_fcvt_s_d(CPURISCVState *env, uint64_t rs1) { - return float64_to_float32(rs1, &env->fp_status); + return nanbox_s(float64_to_float32(rs1, &env->fp_status)); } uint64_t helper_fcvt_d_s(CPURISCVState *env, uint64_t rs1)
Make sure that all results from single-precision scalar helpers are properly nan-boxed to 64-bits. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/riscv/internals.h | 5 +++++ target/riscv/fpu_helper.c | 42 +++++++++++++++++++++------------------ 2 files changed, 28 insertions(+), 19 deletions(-) -- 2.25.1