@@ -15,8 +15,13 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <stddef.h>
#include <string.h>
+#include <stdint.h>
+#include <limits.h>
+#include <string-fzb.h>
+#include <string-fzi.h>
+#include <string-extbyte.h>
+#include <memcopy.h>
#undef strcpy
@@ -28,6 +33,106 @@
char *
STRCPY (char *dest, const char *src)
{
- return memcpy (dest, src, strlen (src) + 1);
+ char *dst = dest;
+ const op_t *xs;
+ op_t *xd;
+ op_t ws;
+
+#if _STRING_ARCH_unaligned
+ /* For architectures which supports unaligned memory operations, it first
+ aligns the source pointer, reads op_t bytes at time until a zero is
+ found, and writes unaligned to destination. */
+ uintptr_t n = -(uintptr_t) src % sizeof (op_t);
+ for (uintptr_t i = 0; i < n; ++i)
+ {
+ unsigned c = *src++;
+ *dst++ = c;
+ if (c == '\0')
+ return dest;
+ }
+ xs = (const op_t *) src;
+ ws = *xs++;
+ xd = (op_t *) dst;
+ while (!has_zero (ws))
+ {
+ *xd++ = ws;
+ ws = *xs++;
+ }
+#else
+ /* For architectures which only supports aligned accesses, it first align
+ the destination pointer. */
+ uintptr_t n = -(uintptr_t) dst % sizeof (op_t);
+ for (uintptr_t i = 0; i < n; ++i)
+ {
+ unsigned c = *src++;
+ *dst++ = c;
+ if (c == '\0')
+ return dest;
+ }
+ xd = (op_t *) dst;
+
+ /* Destination is aligned to op_t while source might be not. */
+ uintptr_t ofs = (uintptr_t) src % sizeof (op_t);
+ if (ofs == 0)
+ {
+ /* Aligned loop. If a zero is found, exit to copy the remaining
+ bytes. */
+ xs = (const op_t *) src;
+
+ ws = *xs++;
+ while (!has_zero (ws))
+ {
+ *xd++ = ws;
+ ws = *xs++;
+ }
+ }
+ else
+ {
+ /* Unaligned loop: align the source pointer and mask off the
+ undesirable bytes which is not part of the string. */
+ op_t wsa, wsb;
+ uintptr_t sh_1, sh_2;
+
+ xs = (const op_t *)(src - ofs);
+ wsa = *xs++;
+ sh_1 = ofs * CHAR_BIT;
+ sh_2 = sizeof(op_t) * CHAR_BIT - sh_1;
+
+ /* Align the first partial op_t from source, with 0xff for the rest
+ of the bytes so that we can also apply the has_zero test to see if we
+ have already reached EOS. If we have, then we can simply fall
+ through to the final byte copies. */
+ ws = MERGE (wsa, sh_1, (op_t)-1, sh_2);
+ if (!has_zero (ws))
+ {
+ while (1)
+ {
+ wsb = *xs++;
+ ws = MERGE (wsa, sh_1, wsb, sh_2);
+ if (has_zero (wsb))
+ break;
+ *xd++ = ws;
+ wsa = wsb;
+ }
+
+ /* WS may contain bytes that we not written yet in destination.
+ Write them down and merge with the op_t containing the EOS
+ byte. */
+ if (!has_zero (ws))
+ {
+ *xd++ = ws;
+ ws = MERGE (wsb, sh_1, ws, sh_2);
+ }
+ }
+ }
+#endif
+
+ /* Just copy the final bytes from op_t. */
+ dst = (char *) xd;
+ uintptr_t fz = index_first_zero (ws);
+ for (uintptr_t i = 0; i < fz + 1; i++)
+ *dst++ = extractbyte (ws, i);
+
+ return dest;
}
libc_hidden_builtin_def (strcpy)
@@ -207,7 +207,7 @@ do_random_tests (void)
int
test_main (void)
{
- size_t i;
+ size_t i, j;
test_init ();
@@ -222,12 +222,26 @@ test_main (void)
do_test (0, 0, i, BIG_CHAR);
do_test (0, i, i, SMALL_CHAR);
do_test (i, 0, i, BIG_CHAR);
+
+ for (j = 1; j < 16; ++j)
+ {
+ do_test (0, 0, i + j, SMALL_CHAR);
+ do_test (0, 0, i + j, BIG_CHAR);
+ do_test (0, i, i + j, SMALL_CHAR);
+ do_test (i, 0, i + j, BIG_CHAR);
+ }
}
for (i = 1; i < 8; ++i)
{
do_test (0, 0, 8 << i, SMALL_CHAR);
do_test (8 - i, 2 * i, 8 << i, SMALL_CHAR);
+
+ for (j = 1; j < 8; ++j)
+ {
+ do_test (0, 0, (8 << i) + j, SMALL_CHAR);
+ do_test (8 - i, 2 * i, (8 << i) + j, SMALL_CHAR);
+ }
}
for (i = 1; i < 8; ++i)
@@ -236,6 +250,14 @@ test_main (void)
do_test (2 * i, i, 8 << i, BIG_CHAR);
do_test (i, i, 8 << i, SMALL_CHAR);
do_test (i, i, 8 << i, BIG_CHAR);
+
+ for (j = 1; j < 8; ++j)
+ {
+ do_test (i, 2 * i, (8 << i) + j, SMALL_CHAR);
+ do_test (2 * i, i, (8 << i) + j, BIG_CHAR);
+ do_test (i, i, (8 << i) + j, SMALL_CHAR);
+ do_test (i, i, (8 << i) + j, BIG_CHAR);
+ }
}
do_random_tests ();
From: Adhemerval Zanella <adhemerval.zanella@linaro.com> New generic implementation tries to use word operations along with the new string-fz{b,i} functions even for inputs with different alignments (with still uses aligned access plus merge operation to get a correct word by word comparison). Checked on x86_64-linux-gnu, i686-linux-gnu, sparc64-linux-gnu, and sparcv9-linux-gnu by removing the arch-specific assembly implementation and disabling multi-arch (it covers both LE and BE for 64 and 32 bits). Richard Henderson <rth@twiddle.net> Adhemerval Zanella <adhemerval.zanella@linaro.org> * string/strcpy.c: Rewrite using memcopy.h, string-fzb.h, string-fzi.h. * string/test-strcpy.c (test_main): Add move coverage. --- string/strcpy.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++- string/test-strcpy.c | 24 +++++++++++- 2 files changed, 130 insertions(+), 3 deletions(-) -- 2.7.4