@@ -533,6 +533,16 @@ typedef struct {
uint64_t frac_lo;
} FloatParts128;
+typedef struct {
+ FloatClass cls;
+ bool sign;
+ int32_t exp;
+ uint64_t frac_hi;
+ uint64_t frac_hm; /* high-middle */
+ uint64_t frac_lm; /* low-middle */
+ uint64_t frac_lo;
+} FloatParts256;
+
/* These apply to the most significant word of each FloatPartsN. */
#define DECOMPOSED_BINARY_POINT 63
#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
@@ -769,6 +779,14 @@ static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
#define parts_addsub(A, B, S, Z) \
PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
+static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
+ float_status *s);
+static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
+ float_status *s);
+
+#define parts_mul(A, B, S) \
+ PARTS_GENERIC_64_128(mul, A)(A, B, S)
+
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
@@ -859,6 +877,19 @@ static bool frac128_eqz(FloatParts128 *a)
#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
+static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
+{
+ mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
+}
+
+static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
+{
+ mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
+ &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
+}
+
+#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
+
static void frac64_neg(FloatParts64 *a)
{
a->frac = -a->frac;
@@ -955,23 +986,42 @@ static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
#define frac_sub(R, A, B) FRAC_GENERIC_64_128(sub, R)(R, A, B)
+static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
+{
+ r->frac = a->frac_hi | (a->frac_lo != 0);
+}
+
+static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
+{
+ r->frac_hi = a->frac_hi;
+ r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
+}
+
+#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
+
#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
#define FloatPartsN glue(FloatParts,N)
+#define FloatPartsW glue(FloatParts,W)
#define N 64
+#define W 128
#include "softfloat-parts-addsub.c.inc"
#include "softfloat-parts.c.inc"
#undef N
+#undef W
#define N 128
+#define W 256
#include "softfloat-parts-addsub.c.inc"
#include "softfloat-parts.c.inc"
#undef N
+#undef W
#undef partsN
#undef FloatPartsN
+#undef FloatPartsW
/*
* Pack/unpack routines with a specific FloatFmt.
@@ -1250,89 +1300,42 @@ float128 float128_sub(float128 a, float128 b, float_status *status)
}
/*
- * Returns the result of multiplying the floating-point values `a' and
- * `b'. The operation is performed according to the IEC/IEEE Standard
- * for Binary Floating-Point Arithmetic.
+ * Multiplication
*/
-static FloatParts64 mul_floats(FloatParts64 a, FloatParts64 b, float_status *s)
-{
- bool sign = a.sign ^ b.sign;
-
- if (a.cls == float_class_normal && b.cls == float_class_normal) {
- uint64_t hi, lo;
- int exp = a.exp + b.exp;
-
- mul64To128(a.frac, b.frac, &hi, &lo);
- if (hi & DECOMPOSED_IMPLICIT_BIT) {
- exp += 1;
- } else {
- hi <<= 1;
- }
- hi |= (lo != 0);
-
- /* Re-use a */
- a.exp = exp;
- a.sign = sign;
- a.frac = hi;
- return a;
- }
- /* handle all the NaN cases */
- if (is_nan(a.cls) || is_nan(b.cls)) {
- return *parts_pick_nan(&a, &b, s);
- }
- /* Inf * Zero == NaN */
- if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
- (a.cls == float_class_zero && b.cls == float_class_inf)) {
- float_raise(float_flag_invalid, s);
- parts_default_nan(&a, s);
- return a;
- }
- /* Multiply by 0 or Inf */
- if (a.cls == float_class_inf || a.cls == float_class_zero) {
- a.sign = sign;
- return a;
- }
- if (b.cls == float_class_inf || b.cls == float_class_zero) {
- b.sign = sign;
- return b;
- }
- g_assert_not_reached();
-}
-
float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
float16_unpack_canonical(&pa, a, status);
float16_unpack_canonical(&pb, b, status);
- pr = mul_floats(pa, pb, status);
+ pr = parts_mul(&pa, &pb, status);
- return float16_round_pack_canonical(&pr, status);
+ return float16_round_pack_canonical(pr, status);
}
static float32 QEMU_SOFTFLOAT_ATTR
soft_f32_mul(float32 a, float32 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
float32_unpack_canonical(&pa, a, status);
float32_unpack_canonical(&pb, b, status);
- pr = mul_floats(pa, pb, status);
+ pr = parts_mul(&pa, &pb, status);
- return float32_round_pack_canonical(&pr, status);
+ return float32_round_pack_canonical(pr, status);
}
static float64 QEMU_SOFTFLOAT_ATTR
soft_f64_mul(float64 a, float64 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
float64_unpack_canonical(&pa, a, status);
float64_unpack_canonical(&pb, b, status);
- pr = mul_floats(pa, pb, status);
+ pr = parts_mul(&pa, &pb, status);
- return float64_round_pack_canonical(&pr, status);
+ return float64_round_pack_canonical(pr, status);
}
static float hard_f32_mul(float a, float b)
@@ -1359,20 +1362,28 @@ float64_mul(float64 a, float64 b, float_status *s)
f64_is_zon2, f64_addsubmul_post);
}
-/*
- * Returns the result of multiplying the bfloat16
- * values `a' and `b'.
- */
-
-bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
+bfloat16 QEMU_FLATTEN
+bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
{
- FloatParts64 pa, pb, pr;
+ FloatParts64 pa, pb, *pr;
bfloat16_unpack_canonical(&pa, a, status);
bfloat16_unpack_canonical(&pb, b, status);
- pr = mul_floats(pa, pb, status);
+ pr = parts_mul(&pa, &pb, status);
- return bfloat16_round_pack_canonical(&pr, status);
+ return bfloat16_round_pack_canonical(pr, status);
+}
+
+float128 QEMU_FLATTEN
+float128_mul(float128 a, float128 b, float_status *status)
+{
+ FloatParts128 pa, pb, *pr;
+
+ float128_unpack_canonical(&pa, a, status);
+ float128_unpack_canonical(&pb, b, status);
+ pr = parts_mul(&pa, &pb, status);
+
+ return float128_round_pack_canonical(pr, status);
}
/*
@@ -7068,69 +7079,6 @@ float128 float128_round_to_int(float128 a, float_status *status)
}
-/*----------------------------------------------------------------------------
-| Returns the result of multiplying the quadruple-precision floating-point
-| values `a' and `b'. The operation is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_mul(float128 a, float128 b, float_status *status)
-{
- bool aSign, bSign, zSign;
- int32_t aExp, bExp, zExp;
- uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- bSig1 = extractFloat128Frac1( b );
- bSig0 = extractFloat128Frac0( b );
- bExp = extractFloat128Exp( b );
- bSign = extractFloat128Sign( b );
- zSign = aSign ^ bSign;
- if ( aExp == 0x7FFF ) {
- if ( ( aSig0 | aSig1 )
- || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
- return propagateFloat128NaN(a, b, status);
- }
- if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
- return packFloat128( zSign, 0x7FFF, 0, 0 );
- }
- if ( bExp == 0x7FFF ) {
- if (bSig0 | bSig1) {
- return propagateFloat128NaN(a, b, status);
- }
- if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
- invalid:
- float_raise(float_flag_invalid, status);
- return float128_default_nan(status);
- }
- return packFloat128( zSign, 0x7FFF, 0, 0 );
- }
- if ( aExp == 0 ) {
- if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
- normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
- }
- if ( bExp == 0 ) {
- if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
- normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
- }
- zExp = aExp + bExp - 0x4000;
- aSig0 |= UINT64_C(0x0001000000000000);
- shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
- mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
- add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
- zSig2 |= ( zSig3 != 0 );
- if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
- shift128ExtraRightJamming(
- zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
- ++zExp;
- }
- return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
-
-}
-
/*----------------------------------------------------------------------------
| Returns the result of dividing the quadruple-precision floating-point value
| `a' by the corresponding value `b'. The operation is performed according to
@@ -362,3 +362,54 @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
p_nan:
return parts_pick_nan(a, b, s);
}
+
+/*
+ * Returns the result of multiplying the floating-point values `a' and
+ * `b'. The operation is performed according to the IEC/IEEE Standard
+ * for Binary Floating-Point Arithmetic.
+ */
+static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
+ float_status *s)
+{
+ int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+ bool sign = a->sign ^ b->sign;
+
+ if (likely(ab_mask == float_cmask_normal)) {
+ FloatPartsW tmp;
+
+ frac_mulw(&tmp, a, b);
+ frac_truncjam(a, &tmp);
+
+ a->exp += b->exp + 1;
+ if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
+ frac_add(a, a, a);
+ a->exp -= 1;
+ }
+
+ a->sign = sign;
+ return a;
+ }
+
+ /* Inf * Zero == NaN */
+ if (unlikely(ab_mask == float_cmask_infzero)) {
+ float_raise(float_flag_invalid, s);
+ parts_default_nan(a, s);
+ return a;
+ }
+
+ if (unlikely(ab_mask & float_cmask_anynan)) {
+ return parts_pick_nan(a, b, s);
+ }
+
+ /* Multiply by 0 or Inf */
+ if (ab_mask & float_cmask_inf) {
+ a->cls = float_class_inf;
+ a->sign = sign;
+ return a;
+ }
+
+ g_assert(ab_mask & float_cmask_zero);
+ a->cls = float_class_zero;
+ a->sign = sign;
+ return a;
+}