From patchwork Thu Mar 31 14:01:01 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Adhemerval Zanella Netto X-Patchwork-Id: 64792 Delivered-To: patch@linaro.org Received: by 10.112.199.169 with SMTP id jl9csp175388lbc; Thu, 31 Mar 2016 07:01:48 -0700 (PDT) X-Received: by 10.67.7.1 with SMTP id cy1mr22477168pad.123.1459432908024; Thu, 31 Mar 2016 07:01:48 -0700 (PDT) Return-Path: Received: from sourceware.org (server1.sourceware.org. [209.132.180.131]) by mx.google.com with ESMTPS id vx8si14358239pac.107.2016.03.31.07.01.47 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Thu, 31 Mar 2016 07:01:48 -0700 (PDT) Received-SPF: pass (google.com: domain of libc-alpha-return-68489-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; Authentication-Results: mx.google.com; dkim=pass header.i=@sourceware.org; spf=pass (google.com: domain of libc-alpha-return-68489-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) smtp.mailfrom=libc-alpha-return-68489-patch=linaro.org@sourceware.org; dmarc=fail (p=NONE dis=NONE) header.from=linaro.org DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:in-reply-to :references; q=dns; s=default; b=hKcolL8qVMEAUTbpaYlHqrz82UusGwF EuqAT6S5+Aam2gY7d0wlkaxDeFbWHpe2jiaesktFvSis8t0r+kkE0HN+Ah5KcbWC tUMI1yrIeyZQf/bnKEVuHSkbmB19cx4JojmuyjiaWu3eygOQVH92gUxSY3UnozmS WOe6/GhELxMI= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:in-reply-to :references; s=default; bh=eZdjDgwmx+ku3VMtltGa7OZuE/s=; b=K8EBU fQZM8PEaFa8nBX0+pFjy3KEkC5xFJZH14hyNNu7aPSdkTPWkNLGf6/6PZL983UOo 0hmq9BfOTYRduetgFyO3119w6InJHcyQYjUv5h72gV2DZdsZdEueHZT6hf8GEB8n w2EccojeCuJxCyo06OSrYYI6+NidUJ92fSCgtI= Received: (qmail 116482 invoked by alias); 31 Mar 2016 14:01:23 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 116368 invoked by uid 89); 31 Mar 2016 14:01:22 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy=91478 X-HELO: mail-yw0-f180.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:subject:date:message-id:in-reply-to :references; bh=v3vDmEOFmTYjPOZGf7mAj3x51/FW/TjSYy8kvk6XVnk=; b=mMEtJX+v7DPx0SFM649VcanGEcoZm2zS3U7l8Skqk7UH3rntIiA6WnzxrmpRQxZlLu SBtortBcLpsPOxdClghc+/sHWHVvS/TvOOwA2JNRsiWRd8fpSJQoj3G9oyPL8913YCVK HcamtT2MSxcG5T/cFpfLiNDORMdQ2qDGs0Jk540rbltKs6THyMKa/Dbtfk2NPUPBsbKl mwc2z4+1S/di2V/2vvbv1XBcZEWusjYF1SfOHpFE9X5PvAoTVO7M2GgH9m28oapEbvGe 5Cmz4w4ttqN/2plR778+YRMV/OnezuDqmmbu7dwTa/YQVj7eVkFuxYLnfK+aM1fK1YuE gTrQ== X-Gm-Message-State: AD7BkJKTRe5taZn6/cUD6kCi1IMur9hE0w71np2viW2Hy/06Y807bt0VJRzmvC1KhoX9iQC9 X-Received: by 10.37.20.195 with SMTP id 186mr8007886ybu.60.1459432872589; Thu, 31 Mar 2016 07:01:12 -0700 (PDT) From: Adhemerval Zanella To: libc-alpha@sourceware.org Subject: [PATCH 2/4] Improve generic strspn performance Date: Thu, 31 Mar 2016 11:01:01 -0300 Message-Id: <1459432863-20749-3-git-send-email-adhemerval.zanella@linaro.org> In-Reply-To: <1459432863-20749-1-git-send-email-adhemerval.zanella@linaro.org> References: <1459432863-20749-1-git-send-email-adhemerval.zanella@linaro.org> As for strcspn, this patch improves strspn performance using a much faster algorithm. It first constructs a 256-entry table based on the accept string and then uses it as a lookup table for the input string. As for strcspn optimization, it is generally at least 10 times faster than the existing implementation on bench-strspn on a few AArch64 implementations. Also the string/bits/string2.h inlines make no longer sense, as current implementation will already implement most of the optimizations. Tested on x86_64, i686, and aarch64. * string/strspn.c (strcspn): Rewrite function. * string/bits/string2.h (strspn): Use __builtin_strcspn. (__strspn_c1): Remove inline function. (__strspn_c2): Likewise. (__strspn_c3): Likewise. * string/string-inlines.c [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c1): Add compatibility symbol. [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c2): Likewise. [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strspn_c3): Likewise. --- ChangeLog | 15 ++++++++++ string/bits/string2.h | 74 ++----------------------------------------------- string/string-inlines.c | 36 ++++++++++++++++++++++++ string/strspn.c | 54 ++++++++++++++++++++++++++---------- 4 files changed, 94 insertions(+), 85 deletions(-) -- 1.9.1 diff --git a/string/bits/string2.h b/string/bits/string2.h index a8df0db..75a66a1 100644 --- a/string/bits/string2.h +++ b/string/bits/string2.h @@ -914,78 +914,10 @@ __stpcpy_small (char *__dest, /* Return the length of the initial segment of S which consists entirely of characters in ACCEPT. */ -#if !defined _HAVE_STRING_ARCH_strspn || defined _FORCE_INLINES -# ifndef _HAVE_STRING_ARCH_strspn -# if __GNUC_PREREQ (3, 2) -# define strspn(s, accept) \ - __extension__ \ - ({ char __a0, __a1, __a2; \ - (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \ - ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \ - ? __builtin_strspn (s, accept) \ - : ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \ - ? ((void) (s), (size_t) 0) \ - : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \ - ? __strspn_c1 (s, __a0) \ - : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \ - ? __strspn_c2 (s, __a0, __a1) \ - : (((const char *) (accept))[3] == '\0' \ - ? __strspn_c3 (s, __a0, __a1, __a2) \ - : __builtin_strspn (s, accept)))))) \ - : __builtin_strspn (s, accept)); }) -# else -# define strspn(s, accept) \ - __extension__ \ - ({ char __a0, __a1, __a2; \ - (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \ - ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \ - ? ((void) (s), (size_t) 0) \ - : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \ - ? __strspn_c1 (s, __a0) \ - : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \ - ? __strspn_c2 (s, __a0, __a1) \ - : (((const char *) (accept))[3] == '\0' \ - ? __strspn_c3 (s, __a0, __a1, __a2) \ - : strspn (s, accept))))) \ - : strspn (s, accept)); }) -# endif +#ifndef _HAVE_STRING_ARCH_strspn +# if __GNUC_PREREQ (3, 2) +# define strspn(s, accept) __builtin_strspn (s, accept) # endif - -__STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept); -__STRING_INLINE size_t -__strspn_c1 (const char *__s, int __accept) -{ - size_t __result = 0; - /* Please note that __accept never can be '\0'. */ - while (__s[__result] == __accept) - ++__result; - return __result; -} - -__STRING_INLINE size_t __strspn_c2 (const char *__s, int __accept1, - int __accept2); -__STRING_INLINE size_t -__strspn_c2 (const char *__s, int __accept1, int __accept2) -{ - size_t __result = 0; - /* Please note that __accept1 and __accept2 never can be '\0'. */ - while (__s[__result] == __accept1 || __s[__result] == __accept2) - ++__result; - return __result; -} - -__STRING_INLINE size_t __strspn_c3 (const char *__s, int __accept1, - int __accept2, int __accept3); -__STRING_INLINE size_t -__strspn_c3 (const char *__s, int __accept1, int __accept2, int __accept3) -{ - size_t __result = 0; - /* Please note that __accept1 to __accept3 never can be '\0'. */ - while (__s[__result] == __accept1 || __s[__result] == __accept2 - || __s[__result] == __accept3) - ++__result; - return __result; -} #endif diff --git a/string/string-inlines.c b/string/string-inlines.c index 83bdd6c..754b315 100644 --- a/string/string-inlines.c +++ b/string/string-inlines.c @@ -71,4 +71,40 @@ __old_strcspn_c3 (const char *__s, int __reject1, int __reject2, return __result; } compat_symbol (libc, __old_strcspn_c3, __strcspn_c3, GLIBC_2_1_1); + +size_t +__old_strspn_c1 (const char *__s, int __accept) +{ + size_t __result = 0; + /* Please note that __accept never can be '\0'. */ + while (__s[__result] == __accept) + ++__result; + return __result; +} +compat_symbol (libc, __old_strspn_c1, __strspn_c1, GLIBC_2_1_1); + +size_t +__old_strspn_c2 (const char *__s, int __accept1, int __accept2) +{ + size_t __result = 0; + /* Please note that __accept1 and __accept2 never can be '\0'. */ + while (__s[__result] == __accept1 || __s[__result] == __accept2) + ++__result; + return __result; +} +compat_symbol (libc, __old_strspn_c2, __strspn_c2, GLIBC_2_1_1); + +size_t +__old_strspn_c3 (const char *__s, int __accept1, int __accept2, + int __accept3) +{ + size_t __result = 0; + /* Please note that __accept1 to __accept3 never can be '\0'. */ + while (__s[__result] == __accept1 || __s[__result] == __accept2 + || __s[__result] == __accept3) + ++__result; + return __result; +} +compat_symbol (libc, __old_strspn_c3, __strspn_c3, GLIBC_2_1_1); + #endif diff --git a/string/strspn.c b/string/strspn.c index f0635c1..30f7747 100644 --- a/string/strspn.c +++ b/string/strspn.c @@ -25,23 +25,49 @@ /* Return the length of the maximum initial segment of S which contains only characters in ACCEPT. */ size_t -STRSPN (const char *s, const char *accept) +STRSPN (const char *str, const char *accept) { - const char *p; - const char *a; - size_t count = 0; - - for (p = s; *p != '\0'; ++p) + if (accept[0] == '\0') + return 0; + if (__glibc_unlikely (accept[1] == '\0')) { - for (a = accept; *a != '\0'; ++a) - if (*p == *a) - break; - if (*a == '\0') - return count; - else - ++count; + const char *a = str; + for (; *str == *accept; str++); + return str - a; } - return count; + /* Use multiple small memsets to enable inlining on most targets. */ + unsigned char table[256]; + unsigned char *p = memset (table, 0, 64); + memset (p + 64, 0, 64); + memset (p + 128, 0, 64); + memset (p + 192, 0, 64); + + unsigned char *s = (unsigned char*) accept; + /* Different from strcspn it does not add the NULL on the table + so can avoid check if str[i] is NULL, since table['\0'] will + be 0 and thus stopping the loop check. */ + do + p[*s++] = 1; + while (*s); + + s = (unsigned char*) str; + if (!p[s[0]]) return 0; + if (!p[s[1]]) return 1; + if (!p[s[2]]) return 2; + if (!p[s[3]]) return 3; + + s = (unsigned char *) ((size_t)(s) & ~3); + unsigned int c0, c1, c2, c3; + do { + s += 4; + c0 = p[s[0]]; + c1 = p[s[1]]; + c2 = p[s[2]]; + c3 = p[s[3]]; + } while ((c0 & c1 & c2 & c3) != 0); + + size_t count = s - (unsigned char *) str; + return (c0 & c1) == 0 ? count + c0 : count + c2 + 2; } libc_hidden_builtin_def (strspn)