diff mbox series

[39/72] softfloat: Use add192 in mul128To256

Message ID 20210508014802.892561-40-richard.henderson@linaro.org
State Superseded
Headers show
Series Convert floatx80 and float128 to FloatParts | expand

Commit Message

Richard Henderson May 8, 2021, 1:47 a.m. UTC
We can perform the operation in 6 total adds instead of 8.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 include/fpu/softfloat-macros.h | 37 +++++++++++-----------------------
 1 file changed, 12 insertions(+), 25 deletions(-)

-- 
2.25.1

Comments

Alex Bennée May 13, 2021, 10:49 a.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> We can perform the operation in 6 total adds instead of 8.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>


Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

Tested-by: Alex Bennée <alex.bennee@linaro.org>


> ---

>  include/fpu/softfloat-macros.h | 37 +++++++++++-----------------------

>  1 file changed, 12 insertions(+), 25 deletions(-)

>

> diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h

> index f6dfbe108d..76327d844d 100644

> --- a/include/fpu/softfloat-macros.h

> +++ b/include/fpu/softfloat-macros.h

> @@ -511,34 +511,21 @@ static inline void

>  | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.

>  *----------------------------------------------------------------------------*/

>  

> -static inline void

> - mul128To256(

> -     uint64_t a0,

> -     uint64_t a1,

> -     uint64_t b0,

> -     uint64_t b1,

> -     uint64_t *z0Ptr,

> -     uint64_t *z1Ptr,

> -     uint64_t *z2Ptr,

> -     uint64_t *z3Ptr

> - )

> +static inline void mul128To256(uint64_t a0, uint64_t a1,

> +                               uint64_t b0, uint64_t b1,

> +                               uint64_t *z0Ptr, uint64_t *z1Ptr,

> +                               uint64_t *z2Ptr, uint64_t *z3Ptr)

>  {

> -    uint64_t z0, z1, z2, z3;

> -    uint64_t more1, more2;

> +    uint64_t z0, z1, z2;

> +    uint64_t m0, m1, m2, n1, n2;

>  

> -    mul64To128( a1, b1, &z2, &z3 );

> -    mul64To128( a1, b0, &z1, &more2 );

> -    add128( z1, more2, 0, z2, &z1, &z2 );

> -    mul64To128( a0, b0, &z0, &more1 );

> -    add128( z0, more1, 0, z1, &z0, &z1 );

> -    mul64To128( a0, b1, &more1, &more2 );

> -    add128( more1, more2, 0, z2, &more1, &z2 );

> -    add128( z0, z1, 0, more1, &z0, &z1 );

> -    *z3Ptr = z3;

> -    *z2Ptr = z2;

> -    *z1Ptr = z1;

> -    *z0Ptr = z0;

> +    mul64To128(a1, b0, &m1, &m2);

> +    mul64To128(a0, b1, &n1, &n2);

> +    mul64To128(a1, b1, &z2, z3Ptr);

> +    mul64To128(a0, b0, &z0, &z1);

>  

> +    add192( 0, m1, m2,  0, n1, n2, &m0, &m1, &m2);

> +    add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr);

>  }

>  

>  /*----------------------------------------------------------------------------



-- 
Alex Bennée
diff mbox series

Patch

diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index f6dfbe108d..76327d844d 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -511,34 +511,21 @@  static inline void
 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- mul128To256(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t b0,
-     uint64_t b1,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr,
-     uint64_t *z3Ptr
- )
+static inline void mul128To256(uint64_t a0, uint64_t a1,
+                               uint64_t b0, uint64_t b1,
+                               uint64_t *z0Ptr, uint64_t *z1Ptr,
+                               uint64_t *z2Ptr, uint64_t *z3Ptr)
 {
-    uint64_t z0, z1, z2, z3;
-    uint64_t more1, more2;
+    uint64_t z0, z1, z2;
+    uint64_t m0, m1, m2, n1, n2;
 
-    mul64To128( a1, b1, &z2, &z3 );
-    mul64To128( a1, b0, &z1, &more2 );
-    add128( z1, more2, 0, z2, &z1, &z2 );
-    mul64To128( a0, b0, &z0, &more1 );
-    add128( z0, more1, 0, z1, &z0, &z1 );
-    mul64To128( a0, b1, &more1, &more2 );
-    add128( more1, more2, 0, z2, &more1, &z2 );
-    add128( z0, z1, 0, more1, &z0, &z1 );
-    *z3Ptr = z3;
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
+    mul64To128(a1, b0, &m1, &m2);
+    mul64To128(a0, b1, &n1, &n2);
+    mul64To128(a1, b1, &z2, z3Ptr);
+    mul64To128(a0, b0, &z0, &z1);
 
+    add192( 0, m1, m2,  0, n1, n2, &m0, &m1, &m2);
+    add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr);
 }
 
 /*----------------------------------------------------------------------------