Message ID | 20210508014802.892561-40-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | Convert floatx80 and float128 to FloatParts | expand |
Richard Henderson <richard.henderson@linaro.org> writes: > We can perform the operation in 6 total adds instead of 8. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Tested-by: Alex Bennée <alex.bennee@linaro.org> > --- > include/fpu/softfloat-macros.h | 37 +++++++++++----------------------- > 1 file changed, 12 insertions(+), 25 deletions(-) > > diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h > index f6dfbe108d..76327d844d 100644 > --- a/include/fpu/softfloat-macros.h > +++ b/include/fpu/softfloat-macros.h > @@ -511,34 +511,21 @@ static inline void > | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. > *----------------------------------------------------------------------------*/ > > -static inline void > - mul128To256( > - uint64_t a0, > - uint64_t a1, > - uint64_t b0, > - uint64_t b1, > - uint64_t *z0Ptr, > - uint64_t *z1Ptr, > - uint64_t *z2Ptr, > - uint64_t *z3Ptr > - ) > +static inline void mul128To256(uint64_t a0, uint64_t a1, > + uint64_t b0, uint64_t b1, > + uint64_t *z0Ptr, uint64_t *z1Ptr, > + uint64_t *z2Ptr, uint64_t *z3Ptr) > { > - uint64_t z0, z1, z2, z3; > - uint64_t more1, more2; > + uint64_t z0, z1, z2; > + uint64_t m0, m1, m2, n1, n2; > > - mul64To128( a1, b1, &z2, &z3 ); > - mul64To128( a1, b0, &z1, &more2 ); > - add128( z1, more2, 0, z2, &z1, &z2 ); > - mul64To128( a0, b0, &z0, &more1 ); > - add128( z0, more1, 0, z1, &z0, &z1 ); > - mul64To128( a0, b1, &more1, &more2 ); > - add128( more1, more2, 0, z2, &more1, &z2 ); > - add128( z0, z1, 0, more1, &z0, &z1 ); > - *z3Ptr = z3; > - *z2Ptr = z2; > - *z1Ptr = z1; > - *z0Ptr = z0; > + mul64To128(a1, b0, &m1, &m2); > + mul64To128(a0, b1, &n1, &n2); > + mul64To128(a1, b1, &z2, z3Ptr); > + mul64To128(a0, b0, &z0, &z1); > > + add192( 0, m1, m2, 0, n1, n2, &m0, &m1, &m2); > + add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr); > } > > /*---------------------------------------------------------------------------- -- Alex Bennée
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h index f6dfbe108d..76327d844d 100644 --- a/include/fpu/softfloat-macros.h +++ b/include/fpu/softfloat-macros.h @@ -511,34 +511,21 @@ static inline void | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. *----------------------------------------------------------------------------*/ -static inline void - mul128To256( - uint64_t a0, - uint64_t a1, - uint64_t b0, - uint64_t b1, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr, - uint64_t *z3Ptr - ) +static inline void mul128To256(uint64_t a0, uint64_t a1, + uint64_t b0, uint64_t b1, + uint64_t *z0Ptr, uint64_t *z1Ptr, + uint64_t *z2Ptr, uint64_t *z3Ptr) { - uint64_t z0, z1, z2, z3; - uint64_t more1, more2; + uint64_t z0, z1, z2; + uint64_t m0, m1, m2, n1, n2; - mul64To128( a1, b1, &z2, &z3 ); - mul64To128( a1, b0, &z1, &more2 ); - add128( z1, more2, 0, z2, &z1, &z2 ); - mul64To128( a0, b0, &z0, &more1 ); - add128( z0, more1, 0, z1, &z0, &z1 ); - mul64To128( a0, b1, &more1, &more2 ); - add128( more1, more2, 0, z2, &more1, &z2 ); - add128( z0, z1, 0, more1, &z0, &z1 ); - *z3Ptr = z3; - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; + mul64To128(a1, b0, &m1, &m2); + mul64To128(a0, b1, &n1, &n2); + mul64To128(a1, b1, &z2, z3Ptr); + mul64To128(a0, b0, &z0, &z1); + add192( 0, m1, m2, 0, n1, n2, &m0, &m1, &m2); + add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr); } /*----------------------------------------------------------------------------
We can perform the operation in 6 total adds instead of 8. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- include/fpu/softfloat-macros.h | 37 +++++++++++----------------------- 1 file changed, 12 insertions(+), 25 deletions(-) -- 2.25.1