Message ID | 1297268850-5777-4-git-send-email-peter.maydell@linaro.org |
---|---|
State | Superseded |
Headers | show |
On Wed, Feb 09, 2011 at 04:27:27PM +0000, Peter Maydell wrote: > Fix various bugs in the single-to-half-precision conversion code: > * input NaNs not correctly converted in IEEE mode > (fixed by defining and using a commonNaNToFloat16()) > * wrong values returned when converting NaN/Inf into non-IEEE > half precision value > * wrong values returned for conversion of values which are > on the boundary between denormal and zero for the half > precision format > * zeroes not correctly identified > * excessively large results in non-IEEE mode should > generate InvalidOp, not Overflow > > Signed-off-by: Peter Maydell <peter.maydell@linaro.org> > --- > fpu/softfloat-specialize.h | 20 ++++++++++++++++++++ > fpu/softfloat.c | 30 ++++++++++++++++++------------ > 2 files changed, 38 insertions(+), 12 deletions(-) > > diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h > index 2f65b9d..4907484 100644 > --- a/fpu/softfloat-specialize.h > +++ b/fpu/softfloat-specialize.h > @@ -116,6 +116,26 @@ float16 float16_maybe_silence_nan(float16 a) > } > > /*---------------------------------------------------------------------------- > +| Returns the result of converting the canonical NaN `a' to the half- > +| precision floating-point format. > +*----------------------------------------------------------------------------*/ > + > +static float16 commonNaNToFloat16(commonNaNT a STATUS_PARAM) > +{ > + uint16_t mantissa = a.high>>54; > + > + if (STATUS(default_nan_mode)) { > + return float16_default_nan; > + } > + > + if (mantissa) { > + return ((((uint16_t) a.sign) << 15) | (0x1F << 10) | mantissa); > + } else { > + return float16_default_nan; > + } > +} > + > +/*---------------------------------------------------------------------------- > | The pattern for a default generated single-precision NaN. > *----------------------------------------------------------------------------*/ > #if defined(TARGET_SPARC) > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index c3058f4..4d51428 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -2767,25 +2767,31 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) > aExp = extractFloat32Exp( a ); > aSign = extractFloat32Sign( a ); > if ( aExp == 0xFF ) { > - if (aSig) { > - /* Make sure correct exceptions are raised. */ > - float32ToCommonNaN(a STATUS_VAR); > - aSig |= 0x00400000; > + if ( aSig ) { > + /* Input is a NaN */ > + bits16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR ); Here you should now use a float16 instead of bits16. > + if (!ieee) { > + return packFloat16(aSign, 0, 0); > + } > + return r; > } > - return packFloat16(aSign, 0x1f, aSig >> 13); > + /* Infinity */ > + if (!ieee) { > + float_raise(float_flag_invalid STATUS_VAR); > + return packFloat16(aSign, 0x1f, 0x3ff); > + } > + return packFloat16(aSign, 0x1f, 0); > } > - if (aExp == 0 && aSign == 0) { > + if (aExp == 0 && aSig == 0) { > return packFloat16(aSign, 0, 0); > } > /* Decimal point between bits 22 and 23. */ > aSig |= 0x00800000; > aExp -= 0x7f; > if (aExp < -14) { > - mask = 0x007fffff; > - if (aExp < -24) { > - aExp = -25; > - } else { > - mask >>= 24 + aExp; > + mask = 0x00ffffff; > + if (aExp >= -24) { > + mask >>= 25 + aExp; > } > } else { > mask = 0x00001fff; > @@ -2827,7 +2833,7 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) > } > } else { > if (aExp > 16) { > - float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); > + float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR); > return packFloat16(aSign, 0x1f, 0x3ff); > } > } Otherwise looks fine, nice fixes.
diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h index 2f65b9d..4907484 100644 --- a/fpu/softfloat-specialize.h +++ b/fpu/softfloat-specialize.h @@ -116,6 +116,26 @@ float16 float16_maybe_silence_nan(float16 a) } /*---------------------------------------------------------------------------- +| Returns the result of converting the canonical NaN `a' to the half- +| precision floating-point format. +*----------------------------------------------------------------------------*/ + +static float16 commonNaNToFloat16(commonNaNT a STATUS_PARAM) +{ + uint16_t mantissa = a.high>>54; + + if (STATUS(default_nan_mode)) { + return float16_default_nan; + } + + if (mantissa) { + return ((((uint16_t) a.sign) << 15) | (0x1F << 10) | mantissa); + } else { + return float16_default_nan; + } +} + +/*---------------------------------------------------------------------------- | The pattern for a default generated single-precision NaN. *----------------------------------------------------------------------------*/ #if defined(TARGET_SPARC) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index c3058f4..4d51428 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -2767,25 +2767,31 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); if ( aExp == 0xFF ) { - if (aSig) { - /* Make sure correct exceptions are raised. */ - float32ToCommonNaN(a STATUS_VAR); - aSig |= 0x00400000; + if ( aSig ) { + /* Input is a NaN */ + bits16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR ); + if (!ieee) { + return packFloat16(aSign, 0, 0); + } + return r; } - return packFloat16(aSign, 0x1f, aSig >> 13); + /* Infinity */ + if (!ieee) { + float_raise(float_flag_invalid STATUS_VAR); + return packFloat16(aSign, 0x1f, 0x3ff); + } + return packFloat16(aSign, 0x1f, 0); } - if (aExp == 0 && aSign == 0) { + if (aExp == 0 && aSig == 0) { return packFloat16(aSign, 0, 0); } /* Decimal point between bits 22 and 23. */ aSig |= 0x00800000; aExp -= 0x7f; if (aExp < -14) { - mask = 0x007fffff; - if (aExp < -24) { - aExp = -25; - } else { - mask >>= 24 + aExp; + mask = 0x00ffffff; + if (aExp >= -24) { + mask >>= 25 + aExp; } } else { mask = 0x00001fff; @@ -2827,7 +2833,7 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) } } else { if (aExp > 16) { - float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); + float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR); return packFloat16(aSign, 0x1f, 0x3ff); } }
Fix various bugs in the single-to-half-precision conversion code: * input NaNs not correctly converted in IEEE mode (fixed by defining and using a commonNaNToFloat16()) * wrong values returned when converting NaN/Inf into non-IEEE half precision value * wrong values returned for conversion of values which are on the boundary between denormal and zero for the half precision format * zeroes not correctly identified * excessively large results in non-IEEE mode should generate InvalidOp, not Overflow Signed-off-by: Peter Maydell <peter.maydell@linaro.org> --- fpu/softfloat-specialize.h | 20 ++++++++++++++++++++ fpu/softfloat.c | 30 ++++++++++++++++++------------ 2 files changed, 38 insertions(+), 12 deletions(-)