Message ID | 20190206145850.22003-3-adhemerval.zanella@linaro.org |
---|---|
State | New |
Headers | show |
Series | [1/7] wcsmbs: optimize wcpcpy | expand |
If no one opposes it, I will commit this shortly. On 06/02/2019 12:58, Adhemerval Zanella wrote: > This patch rewrites wcscat using wcslen and wcscpy. This is similar to > the optimization done on strcat by 6e46de42fe. > > The strcpy changes are mainly to add the internal alias to avoid PLT > calls. > > Checked on x86_64-linux-gnu and a build against the affected > architectures. > > * include/wchar.h (__wcscpy): New prototype. > * sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c > (__wcscpy): Route internal symbol to generic implementation. > * sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c (wcscpy): > Add internal __wcscpy alias. > * sysdeps/powerpc/powerpc64/multiarch/wcscpy.c (wcscpy): Likewise. > * sysdeps/s390/wcscpy.c (wcscpy): Likewise. > * sysdeps/x86_64/multiarch/wcscpy.c (wcscpy): Likewise. > * wcsmbs/wcscpy.c (wcscpy): Add > * sysdeps/x86_64/multiarch/wcscpy-c.c (WCSCPY): Adjust macro to > use generic implementation. > * wcsmbs/wcscat.c (wcscat): Rewrite using wcslen and wcscpy. > --- > include/wchar.h | 4 +++ > .../powerpc32/power4/multiarch/wcscpy-ppc32.c | 9 ++++--- > .../powerpc32/power4/multiarch/wcscpy.c | 13 ++++------ > sysdeps/powerpc/powerpc64/multiarch/wcscpy.c | 25 +++++++++---------- > sysdeps/s390/wcscpy.c | 4 ++- > sysdeps/x86_64/multiarch/wcscpy-c.c | 4 +-- > sysdeps/x86_64/multiarch/wcscpy.c | 11 +++++--- > wcsmbs/wcscat.c | 21 +--------------- > wcsmbs/wcscpy.c | 10 +++++--- > 9 files changed, 47 insertions(+), 54 deletions(-) > > diff --git a/include/wchar.h b/include/wchar.h > index 614073bcb3..2cb44954fc 100644 > --- a/include/wchar.h > +++ b/include/wchar.h > @@ -182,6 +182,10 @@ extern size_t __wcsnrtombs (char *__restrict __dst, > size_t __nwc, size_t __len, > __mbstate_t *__restrict __ps) > attribute_hidden; > +extern wchar_t *__wcscpy (wchar_t *__restrict __dest, > + const wchar_t *__restrict __src) > + attribute_hidden __nonnull ((1, 2)); > +libc_hidden_proto (__wcscpy) > extern wchar_t *__wcsncpy (wchar_t *__restrict __dest, > const wchar_t *__restrict __src, size_t __n); > extern wchar_t *__wcpcpy (wchar_t *__dest, const wchar_t *__src); > diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c b/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c > index 52b692b47b..31e0d81ef0 100644 > --- a/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c > +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c > @@ -17,10 +17,11 @@ > > #include <wchar.h> > > -#if IS_IN (libc) > -# define WCSCPY __wcscpy_ppc > -#endif > - > extern __typeof (wcscpy) __wcscpy_ppc; > > +#define WCSCPY __wcscpy_ppc > #include <wcsmbs/wcscpy.c> > + > +#ifdef SHARED > +__hidden_ver1 (__wcscpy_ppc, __GI___wcscpy, __wcscpy_ppc); > +#endif > diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c b/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c > index ecca37d5d6..eb95c856bd 100644 > --- a/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c > +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c > @@ -16,21 +16,18 @@ > License along with the GNU C Library; if not, see > <http://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > -# include <wchar.h> > -# include <shlib-compat.h> > -# include "init-arch.h" > +#include <wchar.h> > +#include <shlib-compat.h> > +#include "init-arch.h" > > extern __typeof (wcscpy) __wcscpy_ppc attribute_hidden; > extern __typeof (wcscpy) __wcscpy_power6 attribute_hidden; > extern __typeof (wcscpy) __wcscpy_power7 attribute_hidden; > > -libc_ifunc (wcscpy, > +libc_ifunc (__wcscpy, > (hwcap & PPC_FEATURE_HAS_VSX) > ? __wcscpy_power7 : > (hwcap & PPC_FEATURE_ARCH_2_05) > ? __wcscpy_power6 > : __wcscpy_ppc); > -#else > -#include <wcsmbs/wcscpy.c> > -#endif > +weak_alias (__wcscpy, wcscpy) > diff --git a/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c b/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c > index 3cea9a489d..3f918b27c6 100644 > --- a/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c > +++ b/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c > @@ -16,21 +16,20 @@ > License along with the GNU C Library; if not, see > <http://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > -# include <wchar.h> > -# include <shlib-compat.h> > -# include "init-arch.h" > +#define __wcscpy __redirect___wcscpy > +#include <wchar.h> > +#undef __wcscpy > +#include <shlib-compat.h> > +#include "init-arch.h" > > extern __typeof (wcscpy) __wcscpy_ppc attribute_hidden; > extern __typeof (wcscpy) __wcscpy_power6 attribute_hidden; > extern __typeof (wcscpy) __wcscpy_power7 attribute_hidden; > > -libc_ifunc (wcscpy, > - (hwcap & PPC_FEATURE_HAS_VSX) > - ? __wcscpy_power7 : > - (hwcap & PPC_FEATURE_ARCH_2_05) > - ? __wcscpy_power6 > - : __wcscpy_ppc); > -#else > -#include <wcsmbs/wcscpy.c> > -#endif > +libc_ifunc_redirected (__redirect___wcscpy, __wcscpy, > + (hwcap & PPC_FEATURE_HAS_VSX) > + ? __wcscpy_power7 : > + (hwcap & PPC_FEATURE_ARCH_2_05) > + ? __wcscpy_power6 > + : __wcscpy_ppc); > +weak_alias (__wcscpy, wcscpy) > diff --git a/sysdeps/s390/wcscpy.c b/sysdeps/s390/wcscpy.c > index 2e8ef5024f..a569f917af 100644 > --- a/sysdeps/s390/wcscpy.c > +++ b/sysdeps/s390/wcscpy.c > @@ -30,9 +30,11 @@ extern __typeof (wcscpy) WCSCPY_C attribute_hidden; > extern __typeof (wcscpy) WCSCPY_Z13 attribute_hidden; > # endif > > -s390_libc_ifunc_expr (wcscpy, wcscpy, > +s390_libc_ifunc_expr (wcscpy, __wcscpy, > (HAVE_WCSCPY_Z13 && (hwcap & HWCAP_S390_VX)) > ? WCSCPY_Z13 > : WCSCPY_DEFAULT > ) > +weak_alias (__wcscpy, wcscpy) > +libc_hidden_def (__wcscpy) > #endif > diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c > index a51a83a9be..26d6984e9b 100644 > --- a/sysdeps/x86_64/multiarch/wcscpy-c.c > +++ b/sysdeps/x86_64/multiarch/wcscpy-c.c > @@ -1,5 +1,5 @@ > #if IS_IN (libc) > -# define wcscpy __wcscpy_sse2 > +# define WCSCPY __wcscpy_sse2 > #endif > > -#include "wcsmbs/wcscpy.c" > +#include <wcsmbs/wcscpy.c> > diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c > index 101a585358..96151b4963 100644 > --- a/sysdeps/x86_64/multiarch/wcscpy.c > +++ b/sysdeps/x86_64/multiarch/wcscpy.c > @@ -19,9 +19,9 @@ > > /* Define multiple versions only for the definition in libc. */ > #if IS_IN (libc) > -# define wcscpy __redirect_wcscpy > +# define __wcscpy __redirect_wcscpy > # include <wchar.h> > -# undef wcscpy > +# undef __wcscpy > > # define SYMBOL_NAME wcscpy > # include <init-arch.h> > @@ -40,5 +40,10 @@ IFUNC_SELECTOR (void) > return OPTIMIZE (sse2); > } > > -libc_ifunc_redirected (__redirect_wcscpy, wcscpy, IFUNC_SELECTOR ()); > +libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ()); > +weak_alias (__wcscpy, wcscpy) > +# ifdef SHARED > +__hidden_ver1 (__wcscpy, __GI___wcscpy, __redirect_wcscpy) > + __attribute__((visibility ("hidden"))) __attribute_copy__ (wcscpy); > +# endif > #endif > diff --git a/wcsmbs/wcscat.c b/wcsmbs/wcscat.c > index 6a25b20e31..1a9d667fda 100644 > --- a/wcsmbs/wcscat.c > +++ b/wcsmbs/wcscat.c > @@ -26,26 +26,7 @@ > wchar_t * > __wcscat (wchar_t *dest, const wchar_t *src) > { > - wchar_t *s1 = dest; > - const wchar_t *s2 = src; > - wchar_t c; > - > - /* Find the end of the string. */ > - do > - c = *s1++; > - while (c != L'\0'); > - > - /* Make S1 point before the next character, so we can increment > - it while memory is read (wins on pipelined cpus). */ > - s1 -= 2; > - > - do > - { > - c = *s2++; > - *++s1 = c; > - } > - while (c != L'\0'); > - > + __wcscpy (dest + __wcslen (dest), src); > return dest; > } > #ifndef WCSCAT > diff --git a/wcsmbs/wcscpy.c b/wcsmbs/wcscpy.c > index 7a34c77a9e..636bf6bd01 100644 > --- a/wcsmbs/wcscpy.c > +++ b/wcsmbs/wcscpy.c > @@ -20,13 +20,13 @@ > #include <wchar.h> > > > -#ifndef WCSCPY > -# define WCSCPY wcscpy > +#ifdef WCSCPY > +# define __wcscpy WCSCPY > #endif > > /* Copy SRC to DEST. */ > wchar_t * > -WCSCPY (wchar_t *dest, const wchar_t *src) > +__wcscpy (wchar_t *dest, const wchar_t *src) > { > wint_t c; > wchar_t *wcp; > @@ -58,3 +58,7 @@ WCSCPY (wchar_t *dest, const wchar_t *src) > > return dest; > } > +#ifndef WCSCPY > +weak_alias (__wcscpy, wcscpy) > +libc_hidden_def (__wcscpy) > +#endif >
A recent patch, probably this one, has introduced linknamespace test failures for powerpc (32-bit, --with-cpu=power4), as shown with build-many-glibcs.py for powerpc-linux-gnu-power4. FAIL: conform/POSIX/fnmatch.h/linknamespace FAIL: conform/POSIX/glob.h/linknamespace FAIL: conform/POSIX/wordexp.h/linknamespace FAIL: conform/XPG4/fnmatch.h/linknamespace FAIL: conform/XPG4/glob.h/linknamespace FAIL: conform/XPG4/wordexp.h/linknamespace FAIL: conform/XPG42/fnmatch.h/linknamespace FAIL: conform/XPG42/glob.h/linknamespace FAIL: conform/XPG42/wordexp.h/linknamespace [initial] wordexp -> [libc.a(wordexp.o)] fnmatch -> [libc.a(fnmatch.o)] __wcscat -> [libc.a(wcscat.o)] __wcscpy -> [libc.a(wcscpy.o)] wcscpy -- Joseph S. Myers joseph@codesourcery.com
Joseph Myers <joseph@codesourcery.com> writes: > A recent patch, probably this one, has introduced linknamespace test > failures for powerpc (32-bit, --with-cpu=power4), as shown with > build-many-glibcs.py for powerpc-linux-gnu-power4. > > FAIL: conform/POSIX/fnmatch.h/linknamespace > FAIL: conform/POSIX/glob.h/linknamespace > FAIL: conform/POSIX/wordexp.h/linknamespace > FAIL: conform/XPG4/fnmatch.h/linknamespace > FAIL: conform/XPG4/glob.h/linknamespace > FAIL: conform/XPG4/wordexp.h/linknamespace > FAIL: conform/XPG42/fnmatch.h/linknamespace > FAIL: conform/XPG42/glob.h/linknamespace > FAIL: conform/XPG42/wordexp.h/linknamespace > > [initial] wordexp -> [libc.a(wordexp.o)] fnmatch -> [libc.a(fnmatch.o)] __wcscat -> [libc.a(wcscat.o)] __wcscpy -> [libc.a(wcscpy.o)] wcscpy I can reproduce this issue with powerpc[|64|64le] too, using different values for --with-cpu. There is also a build failure when using --disable-multi-arch: vpath/libc_pic.os: In function `__wcscat': build/wcsmbs/wcscat.c:29: undefined reference to `__GI___wcscpy' collect2: error: ld returned 1 exit status -- Tulio Magno
On 27/02/2019 15:05, Joseph Myers wrote: > A recent patch, probably this one, has introduced linknamespace test > failures for powerpc (32-bit, --with-cpu=power4), as shown with > build-many-glibcs.py for powerpc-linux-gnu-power4. > > FAIL: conform/POSIX/fnmatch.h/linknamespace > FAIL: conform/POSIX/glob.h/linknamespace > FAIL: conform/POSIX/wordexp.h/linknamespace > FAIL: conform/XPG4/fnmatch.h/linknamespace > FAIL: conform/XPG4/glob.h/linknamespace > FAIL: conform/XPG4/wordexp.h/linknamespace > FAIL: conform/XPG42/fnmatch.h/linknamespace > FAIL: conform/XPG42/glob.h/linknamespace > FAIL: conform/XPG42/wordexp.h/linknamespace > > [initial] wordexp -> [libc.a(wordexp.o)] fnmatch -> [libc.a(fnmatch.o)] __wcscat -> [libc.a(wcscat.o)] __wcscpy -> [libc.a(wcscpy.o)] wcscpy > Fixed upstream by 6bd4d02ee997f5b073583c5d8638ffdefedb13bc. Thanks for catching it.
On Wed, 27 Feb 2019, Tulio Magno Quites Machado Filho wrote: > I can reproduce this issue with powerpc[|64|64le] too, using different values > for --with-cpu. > > There is also a build failure when using --disable-multi-arch: Since neither the 64-bit failures, nor the --disable-multi-arch one, showed up with my build-many-glibcs.py bot, that suggests we need more such powerpc variants added to build-many-glibcs.py. -- Joseph S. Myers joseph@codesourcery.com
diff --git a/include/wchar.h b/include/wchar.h index 614073bcb3..2cb44954fc 100644 --- a/include/wchar.h +++ b/include/wchar.h @@ -182,6 +182,10 @@ extern size_t __wcsnrtombs (char *__restrict __dst, size_t __nwc, size_t __len, __mbstate_t *__restrict __ps) attribute_hidden; +extern wchar_t *__wcscpy (wchar_t *__restrict __dest, + const wchar_t *__restrict __src) + attribute_hidden __nonnull ((1, 2)); +libc_hidden_proto (__wcscpy) extern wchar_t *__wcsncpy (wchar_t *__restrict __dest, const wchar_t *__restrict __src, size_t __n); extern wchar_t *__wcpcpy (wchar_t *__dest, const wchar_t *__src); diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c b/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c index 52b692b47b..31e0d81ef0 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c @@ -17,10 +17,11 @@ #include <wchar.h> -#if IS_IN (libc) -# define WCSCPY __wcscpy_ppc -#endif - extern __typeof (wcscpy) __wcscpy_ppc; +#define WCSCPY __wcscpy_ppc #include <wcsmbs/wcscpy.c> + +#ifdef SHARED +__hidden_ver1 (__wcscpy_ppc, __GI___wcscpy, __wcscpy_ppc); +#endif diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c b/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c index ecca37d5d6..eb95c856bd 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c @@ -16,21 +16,18 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#if IS_IN (libc) -# include <wchar.h> -# include <shlib-compat.h> -# include "init-arch.h" +#include <wchar.h> +#include <shlib-compat.h> +#include "init-arch.h" extern __typeof (wcscpy) __wcscpy_ppc attribute_hidden; extern __typeof (wcscpy) __wcscpy_power6 attribute_hidden; extern __typeof (wcscpy) __wcscpy_power7 attribute_hidden; -libc_ifunc (wcscpy, +libc_ifunc (__wcscpy, (hwcap & PPC_FEATURE_HAS_VSX) ? __wcscpy_power7 : (hwcap & PPC_FEATURE_ARCH_2_05) ? __wcscpy_power6 : __wcscpy_ppc); -#else -#include <wcsmbs/wcscpy.c> -#endif +weak_alias (__wcscpy, wcscpy) diff --git a/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c b/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c index 3cea9a489d..3f918b27c6 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c +++ b/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c @@ -16,21 +16,20 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#if IS_IN (libc) -# include <wchar.h> -# include <shlib-compat.h> -# include "init-arch.h" +#define __wcscpy __redirect___wcscpy +#include <wchar.h> +#undef __wcscpy +#include <shlib-compat.h> +#include "init-arch.h" extern __typeof (wcscpy) __wcscpy_ppc attribute_hidden; extern __typeof (wcscpy) __wcscpy_power6 attribute_hidden; extern __typeof (wcscpy) __wcscpy_power7 attribute_hidden; -libc_ifunc (wcscpy, - (hwcap & PPC_FEATURE_HAS_VSX) - ? __wcscpy_power7 : - (hwcap & PPC_FEATURE_ARCH_2_05) - ? __wcscpy_power6 - : __wcscpy_ppc); -#else -#include <wcsmbs/wcscpy.c> -#endif +libc_ifunc_redirected (__redirect___wcscpy, __wcscpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcscpy_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcscpy_power6 + : __wcscpy_ppc); +weak_alias (__wcscpy, wcscpy) diff --git a/sysdeps/s390/wcscpy.c b/sysdeps/s390/wcscpy.c index 2e8ef5024f..a569f917af 100644 --- a/sysdeps/s390/wcscpy.c +++ b/sysdeps/s390/wcscpy.c @@ -30,9 +30,11 @@ extern __typeof (wcscpy) WCSCPY_C attribute_hidden; extern __typeof (wcscpy) WCSCPY_Z13 attribute_hidden; # endif -s390_libc_ifunc_expr (wcscpy, wcscpy, +s390_libc_ifunc_expr (wcscpy, __wcscpy, (HAVE_WCSCPY_Z13 && (hwcap & HWCAP_S390_VX)) ? WCSCPY_Z13 : WCSCPY_DEFAULT ) +weak_alias (__wcscpy, wcscpy) +libc_hidden_def (__wcscpy) #endif diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c index a51a83a9be..26d6984e9b 100644 --- a/sysdeps/x86_64/multiarch/wcscpy-c.c +++ b/sysdeps/x86_64/multiarch/wcscpy-c.c @@ -1,5 +1,5 @@ #if IS_IN (libc) -# define wcscpy __wcscpy_sse2 +# define WCSCPY __wcscpy_sse2 #endif -#include "wcsmbs/wcscpy.c" +#include <wcsmbs/wcscpy.c> diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c index 101a585358..96151b4963 100644 --- a/sysdeps/x86_64/multiarch/wcscpy.c +++ b/sysdeps/x86_64/multiarch/wcscpy.c @@ -19,9 +19,9 @@ /* Define multiple versions only for the definition in libc. */ #if IS_IN (libc) -# define wcscpy __redirect_wcscpy +# define __wcscpy __redirect_wcscpy # include <wchar.h> -# undef wcscpy +# undef __wcscpy # define SYMBOL_NAME wcscpy # include <init-arch.h> @@ -40,5 +40,10 @@ IFUNC_SELECTOR (void) return OPTIMIZE (sse2); } -libc_ifunc_redirected (__redirect_wcscpy, wcscpy, IFUNC_SELECTOR ()); +libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ()); +weak_alias (__wcscpy, wcscpy) +# ifdef SHARED +__hidden_ver1 (__wcscpy, __GI___wcscpy, __redirect_wcscpy) + __attribute__((visibility ("hidden"))) __attribute_copy__ (wcscpy); +# endif #endif diff --git a/wcsmbs/wcscat.c b/wcsmbs/wcscat.c index 6a25b20e31..1a9d667fda 100644 --- a/wcsmbs/wcscat.c +++ b/wcsmbs/wcscat.c @@ -26,26 +26,7 @@ wchar_t * __wcscat (wchar_t *dest, const wchar_t *src) { - wchar_t *s1 = dest; - const wchar_t *s2 = src; - wchar_t c; - - /* Find the end of the string. */ - do - c = *s1++; - while (c != L'\0'); - - /* Make S1 point before the next character, so we can increment - it while memory is read (wins on pipelined cpus). */ - s1 -= 2; - - do - { - c = *s2++; - *++s1 = c; - } - while (c != L'\0'); - + __wcscpy (dest + __wcslen (dest), src); return dest; } #ifndef WCSCAT diff --git a/wcsmbs/wcscpy.c b/wcsmbs/wcscpy.c index 7a34c77a9e..636bf6bd01 100644 --- a/wcsmbs/wcscpy.c +++ b/wcsmbs/wcscpy.c @@ -20,13 +20,13 @@ #include <wchar.h> -#ifndef WCSCPY -# define WCSCPY wcscpy +#ifdef WCSCPY +# define __wcscpy WCSCPY #endif /* Copy SRC to DEST. */ wchar_t * -WCSCPY (wchar_t *dest, const wchar_t *src) +__wcscpy (wchar_t *dest, const wchar_t *src) { wint_t c; wchar_t *wcp; @@ -58,3 +58,7 @@ WCSCPY (wchar_t *dest, const wchar_t *src) return dest; } +#ifndef WCSCPY +weak_alias (__wcscpy, wcscpy) +libc_hidden_def (__wcscpy) +#endif