Message ID | 20241025182614.2022697-17-adhemerval.zanella@linaro.org |
---|---|
State | Accepted |
Commit | c28f8d7f1943433b1673369d7432cec8abe9ca03 |
Headers | show |
Series | Add more CORE-MATH on libm | expand |
On Fri, Oct 25, 2024 at 1:31 PM Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote: > > The CORE-MATH exp10m1f implementation showed slight worse latency > when using x86_64 baseline ABI. This patch adds a ifunc variant > with similar performance for x86_64-v3. > --- > sysdeps/ieee754/flt-32/s_exp10m1f.c | 2 ++ > sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++ > sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c | 4 +++ > sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c | 33 +++++++++++++++++++ > 4 files changed, 41 insertions(+) > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c > > diff --git a/sysdeps/ieee754/flt-32/s_exp10m1f.c b/sysdeps/ieee754/flt-32/s_exp10m1f.c > index 610f269b01..c918c905e8 100644 > --- a/sysdeps/ieee754/flt-32/s_exp10m1f.c > +++ b/sysdeps/ieee754/flt-32/s_exp10m1f.c > @@ -222,4 +222,6 @@ __exp10m1f (float x) > return (s - 1.0) + w * c0; > } > } > +#ifndef __exp10m1f > libm_alias_float (__exp10m1, exp10m1) > +#endif > diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile > index cbe09d49f4..dcff4df2f1 100644 > --- a/sysdeps/x86_64/fpu/multiarch/Makefile > +++ b/sysdeps/x86_64/fpu/multiarch/Makefile > @@ -11,6 +11,7 @@ CFLAGS-s_log1p-fma.c = -mfma -mavx2 > CFLAGS-s_sin-fma.c = -mfma -mavx2 > CFLAGS-s_tan-fma.c = -mfma -mavx2 > CFLAGS-s_sincos-fma.c = -mfma -mavx2 > +CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2 > > CFLAGS-e_exp2f-fma.c = -mfma -mavx2 > CFLAGS-e_expf-fma.c = -mfma -mavx2 > @@ -72,6 +73,7 @@ libm-sysdep_routines += \ > s_ceilf-sse4_1 \ > s_cosf-fma \ > s_cosf-sse2 \ > + s_exp10m1f-fma \ > s_expm1-fma \ > s_floor-sse4_1 \ > s_floorf-sse4_1 \ > diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c > new file mode 100644 > index 0000000000..3dda04e2dd > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c > @@ -0,0 +1,4 @@ > +#define __exp10m1f __exp10m1f_fma > +#define SECTION __attribute__ ((section (".text.fma"))) > + > +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c > new file mode 100644 > index 0000000000..8040b7ed79 > --- /dev/null > +++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c > @@ -0,0 +1,33 @@ > +/* Multiple versions of exp10m1. > + Copyright (C) 2024 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdeps/x86/isa-level.h> > +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL > +# include <libm-alias-float.h> > + > +extern float __redirect_exp10m1f (float); > + > +# define SYMBOL_NAME exp10m1f > +# include "ifunc-fma.h" > + > +libc_ifunc_redirected (__redirect_exp10m1f, __exp10m1f, IFUNC_SELECTOR ()); > +libm_alias_float (__exp10m1, exp10m1) > + > +# define __exp10m1f __exp10m1f_sse2 > +#endif > +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c> > -- > 2.43.0 > LGTM Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
diff --git a/sysdeps/ieee754/flt-32/s_exp10m1f.c b/sysdeps/ieee754/flt-32/s_exp10m1f.c index 610f269b01..c918c905e8 100644 --- a/sysdeps/ieee754/flt-32/s_exp10m1f.c +++ b/sysdeps/ieee754/flt-32/s_exp10m1f.c @@ -222,4 +222,6 @@ __exp10m1f (float x) return (s - 1.0) + w * c0; } } +#ifndef __exp10m1f libm_alias_float (__exp10m1, exp10m1) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index cbe09d49f4..dcff4df2f1 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -11,6 +11,7 @@ CFLAGS-s_log1p-fma.c = -mfma -mavx2 CFLAGS-s_sin-fma.c = -mfma -mavx2 CFLAGS-s_tan-fma.c = -mfma -mavx2 CFLAGS-s_sincos-fma.c = -mfma -mavx2 +CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2 CFLAGS-e_exp2f-fma.c = -mfma -mavx2 CFLAGS-e_expf-fma.c = -mfma -mavx2 @@ -72,6 +73,7 @@ libm-sysdep_routines += \ s_ceilf-sse4_1 \ s_cosf-fma \ s_cosf-sse2 \ + s_exp10m1f-fma \ s_expm1-fma \ s_floor-sse4_1 \ s_floorf-sse4_1 \ diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c new file mode 100644 index 0000000000..3dda04e2dd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f-fma.c @@ -0,0 +1,4 @@ +#define __exp10m1f __exp10m1f_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c new file mode 100644 index 0000000000..8040b7ed79 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_exp10m1f.c @@ -0,0 +1,33 @@ +/* Multiple versions of exp10m1. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdeps/x86/isa-level.h> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL +# include <libm-alias-float.h> + +extern float __redirect_exp10m1f (float); + +# define SYMBOL_NAME exp10m1f +# include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_exp10m1f, __exp10m1f, IFUNC_SELECTOR ()); +libm_alias_float (__exp10m1, exp10m1) + +# define __exp10m1f __exp10m1f_sse2 +#endif +#include <sysdeps/ieee754/flt-32/s_exp10m1f.c>