@@ -80,6 +80,8 @@ Deprecated and removed features, and other changes affecting compatibility:
* The obsolete functions ftime has been deprecated and will be removed from
a future version of glibc. Application should use clock_gettime instead.
+* The 32 bit sparc v8 architecture configuration is no longer supported.
+
Changes to build and runtime requirements:
[Add changes to build and runtime requirements here]
@@ -359,12 +359,12 @@ class Context(object):
os_name='linux-gnu',
glibcs=[{},
{'arch': 'sparcv9',
- 'ccopts': '-m32 -mlong-double-128'}],
+ 'ccopts': '-m32 -mlong-double-128 -mcpu=v9'}],
extra_glibcs=[{'variant': 'disable-multi-arch',
'cfg': ['--disable-multi-arch']},
{'variant': 'disable-multi-arch',
'arch': 'sparcv9',
- 'ccopts': '-m32 -mlong-double-128',
+ 'ccopts': '-m32 -mlong-double-128 -mcpu=v9',
'cfg': ['--disable-multi-arch']}])
self.add_config(arch='x86_64',
os_name='linux-gnu',
@@ -1,18 +1,14 @@
# preconfigure fragment for sparc.
case "$machine" in
-sparc | sparcv[67])
+sparc | sparcv8plus | sparcv8plusa | sparcv9)
base_machine=sparc machine=sparc/sparc32 ;;
-sparcv8 | supersparc | hypersparc)
- base_machine=sparc machine=sparc/sparc32/sparcv8 ;;
-sparcv8plus | sparcv8plusa | sparcv9)
- base_machine=sparc machine=sparc/sparc32/sparcv9 ;;
sparcv8plusb | sparcv9b)
- base_machine=sparc machine=sparc/sparc32/sparcv9/sparcv9b ;;
+ base_machine=sparc machine=sparc/sparc32/sparcv9b ;;
sparcv9v)
- base_machine=sparc machine=sparc/sparc32/sparcv9/sparcv9v ;;
+ base_machine=sparc machine=sparc/sparc32/sparcv9v ;;
sparcv9v2)
- base_machine=sparc machine=sparc/sparc32/sparcv9/sparcv9v2 ;;
+ base_machine=sparc machine=sparc/sparc32/sparcv9v2 ;;
sparc64)
base_machine=sparc machine=sparc/sparc64 ;;
sparc64b)
@@ -15,37 +15,19 @@
# License along with the GNU C Library; if not, see
# <https://www.gnu.org/licenses/>.
+sysdep-CFLAGS += -mcpu=ultrasparc -Wa,-Av9a -mvis
+
+ASFLAGS-.o += -Wa,-Av9d
+ASFLAGS-.os += -Wa,-Av9d
+ASFLAGS-.op += -Wa,-Av9d
+ASFLAGS-.oS += -Wa,-Av9d
+
ifeq ($(subdir),gnulib)
sysdep_routines = dotmul umul $(divrem) alloca
-endif # gnulib
+endif # gnulib
-# We distribute these files, even though they are generated,
-# so as to avoid the need for a functioning m4 to build the library.
divrem := sdiv udiv rem urem
-+divrem-NAME-sdiv := div
-+divrem-NAME-udiv := udiv
-+divrem-NAME-rem := rem
-+divrem-NAME-urem := urem
-+divrem-NAME = $(+divrem-NAME-$(basename $(notdir $@)))
-+divrem-OP-div := div
-+divrem-OP-udiv := div
-+divrem-OP-rem := rem
-+divrem-OP-urem := rem
-+divrem-S-div := true
-+divrem-S-rem := true
-+divrem-S-udiv := false
-+divrem-S-urem := false
-$(divrem:%=$(sysdep_dir)/sparc/sparc32/%.S): $(sysdep_dir)/sparc/sparc32/divrem.m4
- (echo -n "define(NAME,\`.$(+divrem-NAME)')"; \
- echo -n " define(OP,\`$(+divrem-OP-$(+divrem-NAME))')"; \
- echo -n " define(S,\`$(+divrem-S-$(+divrem-NAME))')"; \
- echo " /* This file is generated from divrem.m4; DO NOT EDIT! */"; \
- cat $<) | $(M4) > $@-tmp
-# Make it unwritable so noone will edit it by mistake.
- -chmod a-w $@-tmp
- mv -f $@-tmp $@
-
sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/sparc32/%.S)
# libgcc __divdi3 and __moddi3 uses .udiv and since it is also exported by
@@ -55,10 +37,18 @@ sysdep-realclean := $(sysdep-realclean) $(divrem:%=sysdeps/sparc/sparc32/%.S)
libc.so-gnulib += -Wl,--wrap=.udiv
ifeq ($(subdir),soft-fp)
-sparc32-quad-routines := q_add q_cmp q_cmpe q_div q_dtoq q_feq q_fge \
- q_fgt q_fle q_flt q_fne q_itoq q_mul q_neg q_qtod q_qtoi \
- q_qtos q_qtou q_qtoull q_qtoll q_sqrt q_stoq q_sub q_utoq \
- q_ulltoq q_lltoq q_util
+sparc32-quad-routines := q_add q_cmp q_cmpe q_div q_dtoq q_feq q_fge \
+ q_fgt q_fle q_flt q_fne q_itoq q_mul q_neg q_qtod q_qtoi \
+ q_qtos q_qtou q_qtoull q_qtoll q_sqrt q_stoq q_sub q_utoq \
+ q_ulltoq q_lltoq q_util
sysdep_routines += $(sparc32-quad-routines)
+endif
+
+# nscd uses atomic_spin_nop which in turn requires cpu_relax
+ifeq ($(subdir),nscd)
+routines += cpu_relax
+endif
+ifeq ($(subdir), nptl)
+libpthread-routines += cpu_relax
endif
@@ -1,146 +1,81 @@
-! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add
-! the result to a second limb vector.
+! SPARC v9 32-bit __mpn_addmul_1 -- Multiply a limb vector with a limb
+! and add the result to a second limb vector.
!
-! Copyright (C) 1992-2019 Free Software Foundation, Inc.
+! Copyright (C) 2013-2019 Free Software Foundation, Inc.
+! This file is part of the GNU C Library.
+! Contributed by David S. Miller <davem@davemloft.net>
!
-! This file is part of the GNU MP Library.
+! The GNU C Library is free software; you can redistribute it and/or
+! modify it under the terms of the GNU Lesser General Public
+! License as published by the Free Software Foundation; either
+! version 2.1 of the License, or (at your option) any later version.
!
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at your
-! option) any later version.
+! The GNU C Library is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+! Lesser General Public License for more details.
!
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-! License for more details.
-!
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library; see the file COPYING.LIB. If not,
-! see <https://www.gnu.org/licenses/>.
-
-
-! INPUT PARAMETERS
-! RES_PTR o0
-! S1_PTR o1
-! SIZE o2
-! S2_LIMB o3
+! You should have received a copy of the GNU Lesser General Public
+! License along with the GNU C Library; if not, see
+! <https://www.gnu.org/licenses/>.
#include <sysdep.h>
-ENTRY(__mpn_addmul_1)
- ! Make S1_PTR and RES_PTR point at the end of their blocks
- ! and put (- 4 x SIZE) in index/loop counter.
- sll %o2,2,%o2
- add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
- add %o1,%o2,%o1
- sub %g0,%o2,%o2
-
- cmp %o3,0xfff
- bgu LOC(large)
- nop
-
- ld [%o1+%o2],%o5
- mov 0,%o0
- b LOC(0)
- add %o4,-4,%o4
-LOC(loop0):
- addcc %o5,%g1,%g1
- ld [%o1+%o2],%o5
- addx %o0,%g0,%o0
- st %g1,[%o4+%o2]
-LOC(0): wr %g0,%o3,%y
- sra %o5,31,%g2
- and %o3,%g2,%g2
- andcc %g1,0,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,0,%g1
- sra %g1,20,%g4
- sll %g1,12,%g1
- rd %y,%g3
- srl %g3,20,%g3
- or %g1,%g3,%g1
+#define res_ptr %i0
+#define s1_ptr %i1
+#define sz_arg %i2
+#define s2l_arg %i3
+#define sz %o4
+#define carry %o5
+#define s2_limb %g1
+#define tmp1 %l0
+#define tmp2 %l1
+#define tmp3 %l2
+#define tmp4 %l3
+#define tmp64_1 %g3
+#define tmp64_2 %o3
- addcc %g1,%o0,%g1
- addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
- addcc %o2,4,%o2 ! loop counter
- bne LOC(loop0)
- ld [%o4+%o2],%o5
-
- addcc %o5,%g1,%g1
- addx %o0,%g0,%o0
- retl
- st %g1,[%o4+%o2]
+ENTRY(__mpn_addmul_1)
+ save %sp, -96, %sp
+ srl sz_arg, 0, sz
+ srl s2l_arg, 0, s2_limb
+ subcc sz, 1, sz
+ be,pn %icc, .Lfinal_limb
+ clr carry
+.Lloop:
+ lduw [s1_ptr + 0x00], tmp1
+ lduw [res_ptr + 0x00], tmp3
+ lduw [s1_ptr + 0x04], tmp2
+ lduw [res_ptr + 0x04], tmp4
+ mulx tmp1, s2_limb, tmp64_1
+ add s1_ptr, 8, s1_ptr
+ mulx tmp2, s2_limb, tmp64_2
+ sub sz, 2, sz
+ add res_ptr, 8, res_ptr
+ add tmp3, tmp64_1, tmp64_1
+ add carry, tmp64_1, tmp64_1
+ stw tmp64_1, [res_ptr - 0x08]
+ srlx tmp64_1, 32, carry
+ add tmp4, tmp64_2, tmp64_2
+ add carry, tmp64_2, tmp64_2
+ stw tmp64_2, [res_ptr - 0x04]
+ brgz sz, .Lloop
+ srlx tmp64_2, 32, carry
-LOC(large):
- ld [%o1+%o2],%o5
- mov 0,%o0
- sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
- b LOC(1)
- add %o4,-4,%o4
-LOC(loop):
- addcc %o5,%g3,%g3
- ld [%o1+%o2],%o5
- addx %o0,%g0,%o0
- st %g3,[%o4+%o2]
-LOC(1): wr %g0,%o5,%y
- and %o5,%g4,%g2
- andcc %g0,%g0,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%g0,%g1
- rd %y,%g3
- addcc %g3,%o0,%g3
- addx %g2,%g1,%o0
- addcc %o2,4,%o2
- bne LOC(loop)
- ld [%o4+%o2],%o5
+ brlz,pt sz, .Lfinish
+ nop
- addcc %o5,%g3,%g3
- addx %o0,%g0,%o0
- retl
- st %g3,[%o4+%o2]
+.Lfinal_limb:
+ lduw [s1_ptr + 0x00], tmp1
+ lduw [res_ptr + 0x00], tmp3
+ mulx tmp1, s2_limb, tmp64_1
+ add tmp3, tmp64_1, tmp64_1
+ add carry, tmp64_1, tmp64_1
+ stw tmp64_1, [res_ptr + 0x00]
+ srlx tmp64_1, 32, carry
+.Lfinish:
+ jmpl %i7 + 0x8, %g0
+ restore carry, 0, %o0
END(__mpn_addmul_1)
@@ -1,4 +1,4 @@
-/* Atomic operations. sparc32 version.
+/* Atomic operations. sparcv9 version.
Copyright (C) 2003-2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
@@ -17,9 +17,6 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#ifndef _ATOMIC_MACHINE_H
-#define _ATOMIC_MACHINE_H 1
-
#include <stdint.h>
typedef int8_t atomic8_t;
@@ -51,313 +48,61 @@ typedef uintmax_t uatomic_max_t;
#define USE_ATOMIC_COMPILER_BUILTINS 0
/* XXX Is this actually correct? */
-#define ATOMIC_EXCHANGE_USES_CAS 1
-
-
-/* We have no compare and swap, just test and set.
- The following implementation contends on 64 global locks
- per library and assumes no variable will be accessed using atomic.h
- macros from two different libraries. */
-
-__make_section_unallocated
- (".gnu.linkonce.b.__sparc32_atomic_locks, \"aw\", %nobits");
-
-volatile unsigned char __sparc32_atomic_locks[64]
- __attribute__ ((nocommon, section (".gnu.linkonce.b.__sparc32_atomic_locks"
- __sec_comment),
- visibility ("hidden")));
-
-#define __sparc32_atomic_do_lock(addr) \
- do \
- { \
- unsigned int __old_lock; \
- unsigned int __idx = (((long) addr >> 2) ^ ((long) addr >> 12)) \
- & 63; \
- do \
- __asm __volatile ("ldstub %1, %0" \
- : "=r" (__old_lock), \
- "=m" (__sparc32_atomic_locks[__idx]) \
- : "m" (__sparc32_atomic_locks[__idx]) \
- : "memory"); \
- while (__old_lock); \
- } \
- while (0)
-
-#define __sparc32_atomic_do_unlock(addr) \
- do \
- { \
- __sparc32_atomic_locks[(((long) addr >> 2) \
- ^ ((long) addr >> 12)) & 63] = 0; \
- __asm __volatile ("" ::: "memory"); \
- } \
- while (0)
-
-#define __sparc32_atomic_do_lock24(addr) \
- do \
- { \
- unsigned int __old_lock; \
- do \
- __asm __volatile ("ldstub %1, %0" \
- : "=r" (__old_lock), "=m" (*(addr)) \
- : "m" (*(addr)) \
- : "memory"); \
- while (__old_lock); \
- } \
- while (0)
-
-#define __sparc32_atomic_do_unlock24(addr) \
- do \
- { \
- __asm __volatile ("" ::: "memory"); \
- *(char *) (addr) = 0; \
- } \
- while (0)
-
-
-#ifndef SHARED
-# define __v9_compare_and_exchange_val_32_acq(mem, newval, oldval) \
-({union { __typeof (oldval) a; uint32_t v; } oldval_arg = { .a = (oldval) }; \
- union { __typeof (newval) a; uint32_t v; } newval_arg = { .a = (newval) }; \
- register uint32_t __acev_tmp __asm ("%g6"); \
- register __typeof (mem) __acev_mem __asm ("%g1") = (mem); \
- register uint32_t __acev_oldval __asm ("%g5"); \
- __acev_tmp = newval_arg.v; \
- __acev_oldval = oldval_arg.v; \
- /* .word 0xcde05005 is cas [%g1], %g5, %g6. Can't use cas here though, \
- because as will then mark the object file as V8+ arch. */ \
- __asm __volatile (".word 0xcde05005" \
- : "+r" (__acev_tmp), "=m" (*__acev_mem) \
- : "r" (__acev_oldval), "m" (*__acev_mem), \
- "r" (__acev_mem) : "memory"); \
- (__typeof (oldval)) __acev_tmp; })
-#endif
-
-/* The only basic operation needed is compare and exchange. */
-#define __v7_compare_and_exchange_val_acq(mem, newval, oldval) \
- ({ __typeof (mem) __acev_memp = (mem); \
- __typeof (*mem) __acev_ret; \
- __typeof (*mem) __acev_newval = (newval); \
- \
- __sparc32_atomic_do_lock (__acev_memp); \
- __acev_ret = *__acev_memp; \
- if (__acev_ret == (oldval)) \
- *__acev_memp = __acev_newval; \
- __sparc32_atomic_do_unlock (__acev_memp); \
- __acev_ret; })
-
-#define __v7_compare_and_exchange_bool_acq(mem, newval, oldval) \
- ({ __typeof (mem) __aceb_memp = (mem); \
- int __aceb_ret; \
- __typeof (*mem) __aceb_newval = (newval); \
- \
- __sparc32_atomic_do_lock (__aceb_memp); \
- __aceb_ret = 0; \
- if (*__aceb_memp == (oldval)) \
- *__aceb_memp = __aceb_newval; \
- else \
- __aceb_ret = 1; \
- __sparc32_atomic_do_unlock (__aceb_memp); \
- __aceb_ret; })
-
-#define __v7_exchange_acq(mem, newval) \
- ({ __typeof (mem) __acev_memp = (mem); \
- __typeof (*mem) __acev_ret; \
- __typeof (*mem) __acev_newval = (newval); \
- \
- __sparc32_atomic_do_lock (__acev_memp); \
- __acev_ret = *__acev_memp; \
- *__acev_memp = __acev_newval; \
- __sparc32_atomic_do_unlock (__acev_memp); \
- __acev_ret; })
-
-#define __v7_exchange_and_add(mem, value) \
- ({ __typeof (mem) __acev_memp = (mem); \
- __typeof (*mem) __acev_ret; \
- \
- __sparc32_atomic_do_lock (__acev_memp); \
- __acev_ret = *__acev_memp; \
- *__acev_memp = __acev_ret + (value); \
- __sparc32_atomic_do_unlock (__acev_memp); \
- __acev_ret; })
-
-/* Special versions, which guarantee that top 8 bits of all values
- are cleared and use those bits as the ldstub lock. */
-#define __v7_compare_and_exchange_val_24_acq(mem, newval, oldval) \
- ({ __typeof (mem) __acev_memp = (mem); \
- __typeof (*mem) __acev_ret; \
- __typeof (*mem) __acev_newval = (newval); \
- \
- __sparc32_atomic_do_lock24 (__acev_memp); \
- __acev_ret = *__acev_memp & 0xffffff; \
- if (__acev_ret == (oldval)) \
- *__acev_memp = __acev_newval; \
- else \
- __sparc32_atomic_do_unlock24 (__acev_memp); \
- __asm __volatile ("" ::: "memory"); \
- __acev_ret; })
-
-#define __v7_exchange_24_rel(mem, newval) \
- ({ __typeof (mem) __acev_memp = (mem); \
- __typeof (*mem) __acev_ret; \
- __typeof (*mem) __acev_newval = (newval); \
- \
- __sparc32_atomic_do_lock24 (__acev_memp); \
- __acev_ret = *__acev_memp & 0xffffff; \
- *__acev_memp = __acev_newval; \
- __asm __volatile ("" ::: "memory"); \
- __acev_ret; })
-
-#ifdef SHARED
-
-/* When dynamically linked, we assume pre-v9 libraries are only ever
- used on pre-v9 CPU. */
-# define __atomic_is_v9 0
-
-# define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
- __v7_compare_and_exchange_val_acq (mem, newval, oldval)
-
-# define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
- __v7_compare_and_exchange_bool_acq (mem, newval, oldval)
-
-# define atomic_exchange_acq(mem, newval) \
- __v7_exchange_acq (mem, newval)
-
-# define atomic_exchange_and_add(mem, value) \
- __v7_exchange_and_add (mem, value)
-
-# define atomic_compare_and_exchange_val_24_acq(mem, newval, oldval) \
- ({ \
- if (sizeof (*mem) != 4) \
- abort (); \
- __v7_compare_and_exchange_val_24_acq (mem, newval, oldval); })
-
-# define atomic_exchange_24_rel(mem, newval) \
- ({ \
- if (sizeof (*mem) != 4) \
- abort (); \
- __v7_exchange_24_rel (mem, newval); })
-
-# define atomic_full_barrier() __asm ("" ::: "memory")
-# define atomic_read_barrier() atomic_full_barrier ()
-# define atomic_write_barrier() atomic_full_barrier ()
-
-#else
-
-/* In libc.a/libpthread.a etc. we don't know if we'll be run on
- pre-v9 or v9 CPU. To be interoperable with dynamically linked
- apps on v9 CPUs e.g. with process shared primitives, use cas insn
- on v9 CPUs and ldstub on pre-v9. */
-
-extern uint64_t _dl_hwcap __attribute__((weak));
-# define __atomic_is_v9 \
- (__builtin_expect (&_dl_hwcap != 0, 1) \
- && __builtin_expect (_dl_hwcap & HWCAP_SPARC_V9, HWCAP_SPARC_V9))
-
-# define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
- ({ \
- __typeof (*mem) __acev_wret; \
- if (sizeof (*mem) != 4) \
- abort (); \
- if (__atomic_is_v9) \
- __acev_wret \
- = __v9_compare_and_exchange_val_32_acq (mem, newval, oldval);\
- else \
- __acev_wret \
- = __v7_compare_and_exchange_val_acq (mem, newval, oldval); \
- __acev_wret; })
-
-# define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
- ({ \
- int __acev_wret; \
- if (sizeof (*mem) != 4) \
- abort (); \
- if (__atomic_is_v9) \
- { \
- __typeof (oldval) __acev_woldval = (oldval); \
- __acev_wret \
- = __v9_compare_and_exchange_val_32_acq (mem, newval, \
- __acev_woldval) \
- != __acev_woldval; \
- } \
- else \
- __acev_wret \
- = __v7_compare_and_exchange_bool_acq (mem, newval, oldval); \
- __acev_wret; })
-
-# define atomic_exchange_rel(mem, newval) \
- ({ \
- __typeof (*mem) __acev_wret; \
- if (sizeof (*mem) != 4) \
- abort (); \
- if (__atomic_is_v9) \
- { \
- __typeof (mem) __acev_wmemp = (mem); \
- __typeof (*(mem)) __acev_wval = (newval); \
- do \
- __acev_wret = *__acev_wmemp; \
- while (__builtin_expect \
- (__v9_compare_and_exchange_val_32_acq (__acev_wmemp,\
- __acev_wval, \
- __acev_wret) \
- != __acev_wret, 0)); \
- } \
- else \
- __acev_wret = __v7_exchange_acq (mem, newval); \
- __acev_wret; })
-
-# define atomic_compare_and_exchange_val_24_acq(mem, newval, oldval) \
- ({ \
- __typeof (*mem) __acev_wret; \
- if (sizeof (*mem) != 4) \
- abort (); \
- if (__atomic_is_v9) \
- __acev_wret \
- = __v9_compare_and_exchange_val_32_acq (mem, newval, oldval);\
- else \
- __acev_wret \
- = __v7_compare_and_exchange_val_24_acq (mem, newval, oldval);\
- __acev_wret; })
-
-# define atomic_exchange_24_rel(mem, newval) \
- ({ \
- __typeof (*mem) __acev_w24ret; \
- if (sizeof (*mem) != 4) \
- abort (); \
- if (__atomic_is_v9) \
- __acev_w24ret = atomic_exchange_rel (mem, newval); \
- else \
- __acev_w24ret = __v7_exchange_24_rel (mem, newval); \
- __acev_w24ret; })
-
-#define atomic_full_barrier() \
- do { \
- if (__atomic_is_v9) \
- /* membar #LoadLoad | #LoadStore | #StoreLoad | #StoreStore */ \
- __asm __volatile (".word 0x8143e00f" : : : "memory"); \
- else \
- __asm __volatile ("" : : : "memory"); \
- } while (0)
-
-#define atomic_read_barrier() \
- do { \
- if (__atomic_is_v9) \
- /* membar #LoadLoad | #LoadStore */ \
- __asm __volatile (".word 0x8143e005" : : : "memory"); \
- else \
- __asm __volatile ("" : : : "memory"); \
- } while (0)
-
-#define atomic_write_barrier() \
- do { \
- if (__atomic_is_v9) \
- /* membar #LoadStore | #StoreStore */ \
- __asm __volatile (".word 0x8143e00c" : : : "memory"); \
- else \
- __asm __volatile ("" : : : "memory"); \
- } while (0)
-
-#endif
-
-#include <sysdep.h>
-
-#endif /* atomic-machine.h */
+#define ATOMIC_EXCHANGE_USES_CAS 0
+
+
+#define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval) \
+ (abort (), (__typeof (*mem)) 0)
+
+#define __arch_compare_and_exchange_val_16_acq(mem, newval, oldval) \
+ (abort (), (__typeof (*mem)) 0)
+
+#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
+({ \
+ __typeof (*(mem)) __acev_tmp; \
+ __typeof (mem) __acev_mem = (mem); \
+ if (__builtin_constant_p (oldval) && (oldval) == 0) \
+ __asm __volatile ("cas [%3], %%g0, %0" \
+ : "=r" (__acev_tmp), "=m" (*__acev_mem) \
+ : "m" (*__acev_mem), "r" (__acev_mem), \
+ "0" (newval) : "memory"); \
+ else \
+ __asm __volatile ("cas [%4], %2, %0" \
+ : "=r" (__acev_tmp), "=m" (*__acev_mem) \
+ : "r" (oldval), "m" (*__acev_mem), "r" (__acev_mem), \
+ "0" (newval) : "memory"); \
+ __acev_tmp; })
+
+/* This can be implemented if needed. */
+#define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
+ (abort (), (__typeof (*mem)) 0)
+
+#define atomic_exchange_acq(mem, newvalue) \
+ ({ __typeof (*(mem)) __oldval; \
+ __typeof (mem) __memp = (mem); \
+ __typeof (*(mem)) __value = (newvalue); \
+ \
+ if (sizeof (*(mem)) == 4) \
+ __asm ("swap %0, %1" \
+ : "=m" (*__memp), "=r" (__oldval) \
+ : "m" (*__memp), "1" (__value) : "memory"); \
+ else \
+ abort (); \
+ __oldval; })
+
+#define atomic_compare_and_exchange_val_24_acq(mem, newval, oldval) \
+ atomic_compare_and_exchange_val_acq (mem, newval, oldval)
+
+#define atomic_exchange_24_rel(mem, newval) \
+ atomic_exchange_rel (mem, newval)
+
+#define atomic_full_barrier() \
+ __asm __volatile ("membar #LoadLoad | #LoadStore" \
+ " | #StoreLoad | #StoreStore" : : : "memory")
+#define atomic_read_barrier() \
+ __asm __volatile ("membar #LoadLoad | #LoadStore" : : : "memory")
+#define atomic_write_barrier() \
+ __asm __volatile ("membar #LoadStore | #StoreStore" : : : "memory")
+
+extern void __cpu_relax (void);
+#define atomic_spin_nop() __cpu_relax ()
@@ -1,7 +1,7 @@
-/* Private macros for guiding the backtrace implementation, sparc32
+/* Private macros for guiding the backtrace implementation, sparc32 v9
version. */
#define backtrace_flush_register_windows() \
- asm volatile ("ta %0" : : "i" (ST_FLUSH_WINDOWS))
+ asm volatile ("flushw")
#define BACKTRACE_STACK_BIAS 0
new file mode 100644
@@ -0,0 +1,162 @@
+# This file is generated from configure.ac by Autoconf. DO NOT EDIT!
+ # Local configure fragment for sysdeps/sparc/sparc32
+
+# Test if compiler defines __sparc_v9__.
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if ${ac_cv_path_GREP+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -z "$GREP"; then
+ ac_path_GREP_found=false
+ # Loop through the user's path and test for each of PROGNAME-LIST
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in grep ggrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+ as_fn_executable_p "$ac_path_GREP" || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+ # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+ ac_count=0
+ $as_echo_n 0123456789 >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ $as_echo 'GREP' >> "conftest.nl"
+ "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
+ if test $ac_count -gt ${ac_path_GREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_GREP="$ac_path_GREP"
+ ac_path_GREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+ $ac_path_GREP_found && break 3
+ done
+ done
+ done
+IFS=$as_save_IFS
+ if test -z "$ac_cv_path_GREP"; then
+ as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ fi
+else
+ ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+$as_echo_n "checking for egrep... " >&6; }
+if ${ac_cv_path_EGREP+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+ then ac_cv_path_EGREP="$GREP -E"
+ else
+ if test -z "$EGREP"; then
+ ac_path_EGREP_found=false
+ # Loop through the user's path and test for each of PROGNAME-LIST
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in egrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+ as_fn_executable_p "$ac_path_EGREP" || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+ # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+ ac_count=0
+ $as_echo_n 0123456789 >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ $as_echo 'EGREP' >> "conftest.nl"
+ "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
+ if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_EGREP="$ac_path_EGREP"
+ ac_path_EGREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+ $ac_path_EGREP_found && break 3
+ done
+ done
+ done
+IFS=$as_save_IFS
+ if test -z "$ac_cv_path_EGREP"; then
+ as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ fi
+else
+ ac_cv_path_EGREP=$EGREP
+fi
+
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+$as_echo "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sparcv9" >&5
+$as_echo_n "checking for sparcv9... " >&6; }
+if ${libc_sparcv9+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#ifdef __sparc_v9__
+ yes
+ #endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "yes" >/dev/null 2>&1; then :
+ libc_sparcv9=yes
+else
+ libc_sparcv9=no
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_sparcv9" >&5
+$as_echo "$libc_sparcv9" >&6; }
+if test $libc_sparcv9 = no; then
+ as_fn_error $? "no support for pre-v9 sparc" "$LINENO" 5
+fi
new file mode 100644
@@ -0,0 +1,13 @@
+GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+# Local configure fragment for sysdeps/sparc/sparc32
+
+# Test if compiler defines __sparc_v9__.
+AC_CACHE_CHECK([for sparcv9],
+ [libc_sparcv9],
+ [AC_EGREP_CPP(yes,[#ifdef __sparc_v9__
+ yes
+ #endif
+ ], libc_sparcv9=yes, libc_sparcv9=no)])
+if test $libc_sparcv9 = no; then
+ AC_MSG_ERROR([no support for pre-v9 sparc])
+fi
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/cpu_relax.c
rename to sysdeps/sparc/sparc32/cpu_relax.c
deleted file mode 100644
@@ -1,234 +0,0 @@
-/*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
- */
-
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- * NAME name of function to generate
- * OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1
- * S S=true => signed; S=false => unsigned
- *
- * Algorithm parameters:
- * N how many bits per iteration we try to get (4)
- * WORDSIZE total number of bits (32)
- *
- * Derived constants:
- * TOPBITS number of bits in the top `decade' of a number
- *
- * Important variables:
- * Q the partial quotient under development (initially 0)
- * R the remainder so far, initially the dividend
- * ITER number of main division loop iterations required;
- * equal to ceil(log2(quotient) / N). Note that this
- * is the log base (2^N) of the quotient.
- * V the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- * Current estimate for non-large dividend is
- * ceil(log2(quotient) / N) * (10 + 7N/2) + C
- * A large dividend is one greater than 2^(31-TOPBITS) and takes a
- * different path, as the upper bits of the quotient must be developed
- * one bit at a time.
- */
-
-define(N, `4')dnl
-define(WORDSIZE, `32')dnl
-define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl
-dnl
-define(dividend, `%o0')dnl
-define(divisor, `%o1')dnl
-define(Q, `%o2')dnl
-define(R, `%o3')dnl
-define(ITER, `%o4')dnl
-define(V, `%o5')dnl
-dnl
-dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
-define(T, `%g1')dnl
-define(SC, `%g2')dnl
-ifelse(S, `true', `define(SIGN, `%g3')')dnl
-
-dnl
-dnl This is the recursive definition for developing quotient digits.
-dnl
-dnl Parameters:
-dnl $1 the current depth, 1 <= $1 <= N
-dnl $2 the current accumulation of quotient bits
-dnl N max depth
-dnl
-dnl We add a new bit to $2 and either recurse or insert the bits in
-dnl the quotient. R, Q, and V are inputs and outputs as defined above;
-dnl the condition codes are expected to reflect the input R, and are
-dnl modified to reflect the output R.
-dnl
-define(DEVELOP_QUOTIENT_BITS,
-` ! depth $1, accumulated bits $2
- bl LOC($1.eval(2**N+$2))
- srl V,1,V
- ! remainder is positive
- subcc R,V,R
- ifelse($1, N,
- ` b 9f
- add Q, ($2*2+1), Q
-', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
-LOC($1.eval(2**N+$2)):
- ! remainder is negative
- addcc R,V,R
- ifelse($1, N,
- ` b 9f
- add Q, ($2*2-1), Q
-', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
-ifelse($1, 1, `9:')')dnl
-
-#include <sysdep.h>
-#include <sys/trap.h>
-
-ENTRY(NAME)
-ifelse(S, `true',
-` ! compute sign of result; if neither is negative, no problem
- orcc divisor, dividend, %g0 ! either negative?
- bge 2f ! no, go do the divide
-ifelse(OP, `div',
-` xor divisor, dividend, SIGN ! compute sign in any case',
-` mov dividend, SIGN ! sign of remainder matches dividend')
- tst divisor
- bge 1f
- tst dividend
- ! divisor is definitely negative; dividend might also be negative
- bge 2f ! if dividend not negative...
- sub %g0, divisor, divisor ! in any case, make divisor nonneg
-1: ! dividend is negative, divisor is nonnegative
- sub %g0, dividend, dividend ! make dividend nonnegative
-2:
-')
- ! Ready to divide. Compute size of quotient; scale comparand.
- orcc divisor, %g0, V
- bne 1f
- mov dividend, R
-
- ! Divide by zero trap. If it returns, return 0 (about as
- ! wrong as possible, but that is what SunOS does...).
- ta ST_DIV0
- retl
- clr %o0
-
-1:
- cmp R, V ! if divisor exceeds dividend, done
- blu LOC(got_result) ! (and algorithm fails otherwise)
- clr Q
- sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T
- cmp R, T
- blu LOC(not_really_big)
- clr ITER
-
- ! `Here the dividend is >= 2**(31-N) or so. We must be careful here,
- ! as our usual N-at-a-shot divide step will cause overflow and havoc.
- ! The number of bits in the result here is N*ITER+SC, where SC <= N.
- ! Compute ITER in an unorthodox manner: know we need to shift V into
- ! the top decade: so do not even bother to compare to R.'
- 1:
- cmp V, T
- bgeu 3f
- mov 1, SC
- sll V, N, V
- b 1b
- add ITER, 1, ITER
-
- ! Now compute SC.
- 2: addcc V, V, V
- bcc LOC(not_too_big)
- add SC, 1, SC
-
- ! We get here if the divisor overflowed while shifting.
- ! This means that R has the high-order bit set.
- ! Restore V and subtract from R.
- sll T, TOPBITS, T ! high order bit
- srl V, 1, V ! rest of V
- add V, T, V
- b LOC(do_single_div)
- sub SC, 1, SC
-
- LOC(not_too_big):
- 3: cmp V, R
- blu 2b
- nop
- be LOC(do_single_div)
- nop
- /* NB: these are commented out in the V8-Sparc manual as well */
- /* (I do not understand this) */
- ! V > R: went too far: back up 1 step
- ! srl V, 1, V
- ! dec SC
- ! do single-bit divide steps
- !
- ! We have to be careful here. We know that R >= V, so we can do the
- ! first divide step without thinking. BUT, the others are conditional,
- ! and are only done if R >= 0. Because both R and V may have the high-
- ! order bit set in the first step, just falling into the regular
- ! division loop will mess up the first time around.
- ! So we unroll slightly...
- LOC(do_single_div):
- subcc SC, 1, SC
- bl LOC(end_regular_divide)
- nop
- sub R, V, R
- mov 1, Q
- b LOC(end_single_divloop)
- nop
- LOC(single_divloop):
- sll Q, 1, Q
- bl 1f
- srl V, 1, V
- ! R >= 0
- sub R, V, R
- b 2f
- add Q, 1, Q
- 1: ! R < 0
- add R, V, R
- sub Q, 1, Q
- 2:
- LOC(end_single_divloop):
- subcc SC, 1, SC
- bge LOC(single_divloop)
- tst R
- b,a LOC(end_regular_divide)
-
-LOC(not_really_big):
-1:
- sll V, N, V
- cmp V, R
- bleu 1b
- addcc ITER, 1, ITER
- be LOC(got_result)
- sub ITER, 1, ITER
-
- tst R ! set up for initial iteration
-LOC(divloop):
- sll Q, N, Q
- DEVELOP_QUOTIENT_BITS(1, 0)
-LOC(end_regular_divide):
- subcc ITER, 1, ITER
- bge LOC(divloop)
- tst R
- bl,a LOC(got_result)
- ! non-restoring fixup here (one instruction only!)
-ifelse(OP, `div',
-` sub Q, 1, Q
-', ` add R, divisor, R
-')
-
-LOC(got_result):
-ifelse(S, `true',
-` ! check to see if answer should be < 0
- tst SIGN
- bl,a 1f
- ifelse(OP, `div', `sub %g0, Q, Q', `sub %g0, R, R')
-1:')
- retl
- ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
-
-END(NAME)
-ifelse(OP, `div', ifelse(S, `false', `strong_alias (.udiv, __wrap_.udiv)
-'))dnl
@@ -147,7 +147,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
Elf32_Rela *relaend
= (Elf32_Rela *) ((char *) rela
+ l->l_info[DT_PLTRELSZ]->d_un.d_val);
-#if !defined RTLD_BOOTSTRAP && !defined __sparc_v9__
+#if !defined RTLD_BOOTSTRAP
/* Note that we don't mask the hwcap here, as the flush is
essential to functionality on those cpu's that implement it.
For sparcv9 we can assume flush is present. */
@@ -298,15 +298,8 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t,
const Elf32_Rela *reloc,
Elf32_Addr *reloc_addr, Elf32_Addr value)
{
-#ifdef __sparc_v9__
/* Sparc v9 can assume flush is always present. */
- const int do_flush = 1;
-#else
- /* Note that we don't mask the hwcap here, as the flush is essential to
- functionality on those cpu's that implement it. */
- const int do_flush = GLRO(dl_hwcap) & HWCAP_SPARC_FLUSH;
-#endif
- return sparc_fixup_plt (reloc, reloc_addr, value, 1, do_flush);
+ return sparc_fixup_plt (reloc, reloc_addr, value, 1, 1);
}
/* Return the final value of a plt relocation. */
@@ -433,7 +426,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
/* Fall thru */
case R_SPARC_JMP_SLOT:
{
-#if !defined RTLD_BOOTSTRAP && !defined __sparc_v9__
+#if !defined RTLD_BOOTSTRAP
/* Note that we don't mask the hwcap here, as the flush is
essential to functionality on those cpu's that implement
it. For sparcv9 we can assume flush is present. */
@@ -47,13 +47,11 @@ sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr,
{
unsigned int insn = OPCODE_BA | ((disp >> 2) & 0x3fffff);
-#ifdef __sparc_v9__
/* On V9 we can do even better by using a branch with
prediction if we fit into the even smaller 19-bit
displacement field. */
if (disp >= -0x100000 && disp < 0x100000)
insn = OPCODE_BA_PT | ((disp >> 2) & 0x07ffff);
-#endif
/* Even if we are writing just a single branch, we must not
ignore the 't' offset. Consider a case where we have some
@@ -1,127 +1,17 @@
/*
- * Signed multiply, from Appendix E of the Sparc Version 8
- * Architecture Manual.
- */
-
-/*
- * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of
- * the 64-bit product).
- *
- * This code optimizes short (less than 13-bit) multiplies.
+ * Sparc v9 has multiply.
*/
#include <sysdep.h>
-
+ .text
+ .align 32
ENTRY(.mul)
- mov %o0, %y ! multiplier -> Y
- andncc %o0, 0xfff, %g0 ! test bits 12..31
- be LOC(mul_shortway) ! if zero, can do it the short way
- andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
-
- /*
- * Long multiply. 32 steps, followed by a final shift step.
- */
- mulscc %o4, %o1, %o4 ! 1
- mulscc %o4, %o1, %o4 ! 2
- mulscc %o4, %o1, %o4 ! 3
- mulscc %o4, %o1, %o4 ! 4
- mulscc %o4, %o1, %o4 ! 5
- mulscc %o4, %o1, %o4 ! 6
- mulscc %o4, %o1, %o4 ! 7
- mulscc %o4, %o1, %o4 ! 8
- mulscc %o4, %o1, %o4 ! 9
- mulscc %o4, %o1, %o4 ! 10
- mulscc %o4, %o1, %o4 ! 11
- mulscc %o4, %o1, %o4 ! 12
- mulscc %o4, %o1, %o4 ! 13
- mulscc %o4, %o1, %o4 ! 14
- mulscc %o4, %o1, %o4 ! 15
- mulscc %o4, %o1, %o4 ! 16
- mulscc %o4, %o1, %o4 ! 17
- mulscc %o4, %o1, %o4 ! 18
- mulscc %o4, %o1, %o4 ! 19
- mulscc %o4, %o1, %o4 ! 20
- mulscc %o4, %o1, %o4 ! 21
- mulscc %o4, %o1, %o4 ! 22
- mulscc %o4, %o1, %o4 ! 23
- mulscc %o4, %o1, %o4 ! 24
- mulscc %o4, %o1, %o4 ! 25
- mulscc %o4, %o1, %o4 ! 26
- mulscc %o4, %o1, %o4 ! 27
- mulscc %o4, %o1, %o4 ! 28
- mulscc %o4, %o1, %o4 ! 29
- mulscc %o4, %o1, %o4 ! 30
- mulscc %o4, %o1, %o4 ! 31
- mulscc %o4, %o1, %o4 ! 32
- mulscc %o4, %g0, %o4 ! final shift
-
- ! If %o0 was negative, the result is
- ! (%o0 * %o1) + (%o1 << 32))
- ! We fix that here.
-
-#if 0
- tst %o0
- bge 1f
- rd %y, %o0
-
- ! %o0 was indeed negative; fix upper 32 bits of result by subtracting
- ! %o1 (i.e., return %o4 - %o1 in %o1).
- retl
- sub %o4, %o1, %o1
-
-1:
- retl
- mov %o4, %o1
-#else
- /* Faster code adapted from tege@sics.se's code for umul.S. */
- sra %o0, 31, %o2 ! make mask from sign bit
- and %o1, %o2, %o2 ! %o2 = 0 or %o1, depending on sign of %o0
- rd %y, %o0 ! get lower half of product
- retl
- sub %o4, %o2, %o1 ! subtract compensation
- ! and put upper half in place
-#endif
-
-LOC(mul_shortway):
- /*
- * Short multiply. 12 steps, followed by a final shift step.
- * The resulting bits are off by 12 and (32-12) = 20 bit positions,
- * but there is no problem with %o0 being negative (unlike above).
- */
- mulscc %o4, %o1, %o4 ! 1
- mulscc %o4, %o1, %o4 ! 2
- mulscc %o4, %o1, %o4 ! 3
- mulscc %o4, %o1, %o4 ! 4
- mulscc %o4, %o1, %o4 ! 5
- mulscc %o4, %o1, %o4 ! 6
- mulscc %o4, %o1, %o4 ! 7
- mulscc %o4, %o1, %o4 ! 8
- mulscc %o4, %o1, %o4 ! 9
- mulscc %o4, %o1, %o4 ! 10
- mulscc %o4, %o1, %o4 ! 11
- mulscc %o4, %o1, %o4 ! 12
- mulscc %o4, %g0, %o4 ! final shift
-
- /*
- * %o4 has 20 of the bits that should be in the low part of the
- * result; %y has the bottom 12 (as %y's top 12). That is:
- *
- * %o4 %y
- * +----------------+----------------+
- * | -12- | -20- | -12- | -20- |
- * +------(---------+------)---------+
- * --hi-- ----low-part----
- *
- * The upper 12 bits of %o4 should be sign-extended to form the
- * high part of the product (i.e., highpart = %o4 >> 20).
- */
- rd %y, %o5
- sll %o4, 12, %o0 ! shift middle bits left 12
- srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left
- or %o5, %o0, %o0 ! construct low part of result
+ sra %o0, 0, %o0
+ sra %o1, 0, %o1
+ mulx %o0, %o1, %o0
retl
- sra %o4, 20, %o1 ! ... and extract high part of result
+ srax %o0, 32, %o1
END(.mul)
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile
rename to sysdeps/sparc/sparc32/fpu/multiarch/Makefile
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_copysign-generic.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_copysign-generic.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_copysign-vis3.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_copysign-vis3.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_copysign.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_copysign.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_copysignf-generic.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_copysignf-generic.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_copysignf-vis3.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_copysignf-vis3.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_copysignf.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_copysignf.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fabs-generic.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fabs-generic.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fabs-vis3.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fabs-vis3.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fabs.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fabs.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fabsf-generic.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fabsf-generic.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fabsf-vis3.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fabsf-vis3.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fabsf.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fabsf.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdim-generic.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fdim-generic.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdim-vis3.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fdim-vis3.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdim.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fdim.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdimf-generic.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fdimf-generic.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdimf-vis3.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fdimf-vis3.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdimf.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fdimf.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fma-generic.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fma-generic.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fma-vis3.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fma-vis3.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fma.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fma.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fmaf-generic.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fmaf-generic.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fmaf-vis3.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fmaf-vis3.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fmaf.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_fmaf.c
similarity index 68%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_llrint-generic.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_llrint-generic.S
@@ -2,4 +2,4 @@
#define __llrint __llrint_generic
#undef libm_alias_double
#define libm_alias_double(a,b)
-#include <sysdeps/sparc/sparc32/sparcv9/fpu/s_llrint.S>
+#include <sysdeps/sparc/sparc32/fpu/s_llrint.S>
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_llrint-vis3.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_llrint-vis3.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_llrint.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_llrint.c
similarity index 67%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_llrintf-generic.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_llrintf-generic.S
@@ -2,4 +2,4 @@
#define __llrintf __llrintf_generic
#undef libm_alias_float
#define libm_alias_float(a, b)
-#include <sysdeps/sparc/sparc32/sparcv9/fpu/s_llrintf.S>
+#include <sysdeps/sparc/sparc32/fpu/s_llrintf.S>
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_llrintf-vis3.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_llrintf-vis3.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_llrintf.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_llrintf.c
similarity index 68%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyint-generic.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_nearbyint-generic.S
@@ -2,4 +2,4 @@
#define __nearbyint __nearbyint_generic
#undef libm_alias_double
#define libm_alias_double(a, b)
-#include <sysdeps/sparc/sparc32/sparcv9/fpu/s_nearbyint.S>
+#include <sysdeps/sparc/sparc32/fpu/s_nearbyint.S>
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyint-vis3.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_nearbyint-vis3.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyint.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_nearbyint.c
similarity index 67%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyintf-generic.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_nearbyintf-generic.S
@@ -2,4 +2,4 @@
#define __nearbyintf __nearbyintf_generic
#undef libm_alias_float
#define libm_alias_float(a, b)
-#include <sysdeps/sparc/sparc32/sparcv9/fpu/s_nearbyintf.S>
+#include <sysdeps/sparc/sparc32/fpu/s_nearbyintf.S>
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyintf-vis3.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_nearbyintf-vis3.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyintf.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_nearbyintf.c
similarity index 68%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_rint-generic.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_rint-generic.S
@@ -2,4 +2,4 @@
#define __rint __rint_generic
#undef libm_alias_double
#define libm_alias_double(a, b)
-#include <sysdeps/sparc/sparc32/sparcv9/fpu/s_rint.S>
+#include <sysdeps/sparc/sparc32/fpu/s_rint.S>
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_rint-vis3.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_rint-vis3.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_rint.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_rint.c
similarity index 68%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_rintf-generic.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_rintf-generic.S
@@ -2,4 +2,4 @@
#define __rintf __rintf_generic
#undef libm_alias_float
#define libm_alias_float(a, b)
-#include <sysdeps/sparc/sparc32/sparcv9/fpu/s_rintf.S>
+#include <sysdeps/sparc/sparc32/fpu/s_rintf.S>
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_rintf-vis3.S
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_rintf-vis3.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_rintf.c
rename to sysdeps/sparc/sparc32/fpu/multiarch/s_rintf.c
@@ -1,4 +1,4 @@
-/* Float absolute value, sparc32 version.
+/* Float absolute value, sparc32+v9 version.
Copyright (C) 2011-2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -25,6 +25,6 @@ ENTRY (__fabs)
st %o1, [%sp+76]
ldd [%sp+72], %f0
retl
- fabss %f0, %f0
+ fabsd %f0, %f0
END (__fabs)
libm_alias_double (__fabs, fabs)
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/s_isnan.S
rename to sysdeps/sparc/sparc32/fpu/s_isnan.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/s_llrint.S
rename to sysdeps/sparc/sparc32/fpu/s_llrint.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/s_llrintf.S
rename to sysdeps/sparc/sparc32/fpu/s_llrintf.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/s_lrintf.S
rename to sysdeps/sparc/sparc32/fpu/s_lrintf.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/s_nearbyint.S
rename to sysdeps/sparc/sparc32/fpu/s_nearbyint.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/s_nearbyintf.S
rename to sysdeps/sparc/sparc32/fpu/s_nearbyintf.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/s_rint.S
rename to sysdeps/sparc/sparc32/fpu/s_rint.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/fpu/s_rintf.S
rename to sysdeps/sparc/sparc32/fpu/s_rintf.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/hp-timing.h
rename to sysdeps/sparc/sparc32/hp-timing.h
@@ -1,142 +1,4 @@
-/* memchr (str, ch, n) -- Return pointer to first occurrence of CH in STR less
- than N.
- For SPARC v7.
- Copyright (C) 1996-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
- David S. Miller <davem@caip.rutgers.edu>.
- This version is developed using the same algorithm as the fast C
- version which carries the following introduction:
- Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
- with help from Dan Sahlin (dan@sics.se) and
- commentary by Jim Blandy (jimb@ai.mit.edu);
- adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
- and implemented by Roland McGrath (roland@ai.mit.edu).
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- .text
- .align 4
-ENTRY(__memchr)
- andcc %o1, 0xff, %o1
- sll %o1, 8, %g6
- andcc %o0, 3, %g0
- or %o1, %g6, %g6
- sll %g6, 16, %o3
- be 10f
- or %o3, %g6, %g2
- cmp %o2, 0
- be 9f
- sethi %hi(0x80808080), %o4
- ldub [%o0], %g4
- cmp %g4, %o1
- be 1f
- add %o0, 1, %o0
- subcc %o2, 1, %o2
- be 9f
- andcc %o0, 3, %g0
- be 4f
- or %o4, %lo(0x80808080), %o3
- ldub [%o0], %g4
- cmp %g4, %o1
- be 1f
- add %o0, 1, %o0
- subcc %o2, 1, %o2
- be 9f
- andcc %o0, 3, %g0
- be 5f
- sethi %hi(0x01010101), %o5
- ldub [%o0], %g4
- cmp %g4, %o1
- be 1f
- add %o0, 1, %o0
- subcc %o2, 1, %o2
- bne,a 7f
- and %o2, 3, %g1
- retl
- clr %o0
-1: retl
- sub %o0, 1, %o0
-10: sethi %hi(0x80808080), %o4
- or %o4, %lo(0x80808080), %o3
-4: sethi %hi(0x01010101), %o5
-5: and %o2, 3, %g1
-7: andcc %o2, 0xfffffffc, %o2
- be 0f
- or %o5, %lo(0x01010101), %g6
- ld [%o0], %g4
-6: xor %g4, %g2, %g5
- add %o0, 4, %o0
- sub %g5, %g6, %g5
- andcc %g5, %o3, %g0
- bne 8f
- subcc %o2, 4, %o2
- bne,a 6b
- ld [%o0], %g4
-0: cmp %g1, 0
-1: be 9f
- add %o0, 4, %o0
- ldub [%o0 - 4], %g4
- cmp %g4, %o1
- be 4f
- cmp %g1, 1
- be 9f
- ldub [%o0 - 3], %g4
- cmp %g4, %o1
- be 3f
- cmp %g1, 2
- be 9f
- ldub [%o0 - 2], %g4
- cmp %g4, %o1
- be 2f
- nop
-9: retl
- clr %o0
-
- /* Check every byte. */
-8: srl %g4, 24, %g5
- and %g5, 0xff, %g5
- cmp %g5, %o1
- be 4f
- srl %g4, 16, %g5
- and %g5, 0xff, %g5
- cmp %g5, %o1
- be 3f
- srl %g4, 8, %g5
- and %g5, 0xff, %g5
- cmp %g5, %o1
- be 2f
- and %g4, 0xff, %g5
- cmp %g5, %o1
- be 1f
- cmp %o2, 0
- bne,a 6b
- ld [%o0], %g4
- b 1b
- cmp %g1, 0
-1: retl
- sub %o0, 1, %o0
-2: retl
- sub %o0, 2, %o0
-3: retl
- sub %o0, 3, %o0
-4: retl
- sub %o0, 4, %o0
-END(__memchr)
-
-weak_alias (__memchr, memchr)
-libc_hidden_builtin_def (memchr)
+#define ASI_PNF 0x82
+#define ASI_BLK_P 0xf0
+#define XCC icc
+#include <sparc64/memchr.S>
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/memcmp.S
rename to sysdeps/sparc/sparc32/memcmp.S
@@ -1,653 +1,4 @@
-/* Copy SIZE bytes from SRC to DEST.
- For SPARC v7.
- Copyright (C) 1996-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by David S. Miller <davem@caip.rutgers.edu>,
- Eddie C. Dost <ecd@skynet.be> and
- Jakub Jelinek <jj@ultra.linux.cz>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* Both these macros have to start with exactly the same insn */
-#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
- ldd [%src + offset + 0x00], %t0; \
- ldd [%src + offset + 0x08], %t2; \
- ldd [%src + offset + 0x10], %t4; \
- ldd [%src + offset + 0x18], %t6; \
- st %t0, [%dst + offset + 0x00]; \
- st %t1, [%dst + offset + 0x04]; \
- st %t2, [%dst + offset + 0x08]; \
- st %t3, [%dst + offset + 0x0c]; \
- st %t4, [%dst + offset + 0x10]; \
- st %t5, [%dst + offset + 0x14]; \
- st %t6, [%dst + offset + 0x18]; \
- st %t7, [%dst + offset + 0x1c];
-
-#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
- ldd [%src + offset + 0x00], %t0; \
- ldd [%src + offset + 0x08], %t2; \
- ldd [%src + offset + 0x10], %t4; \
- ldd [%src + offset + 0x18], %t6; \
- std %t0, [%dst + offset + 0x00]; \
- std %t2, [%dst + offset + 0x08]; \
- std %t4, [%dst + offset + 0x10]; \
- std %t6, [%dst + offset + 0x18];
-
-#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldd [%src - offset - 0x10], %t0; \
- ldd [%src - offset - 0x08], %t2; \
- st %t0, [%dst - offset - 0x10]; \
- st %t1, [%dst - offset - 0x0c]; \
- st %t2, [%dst - offset - 0x08]; \
- st %t3, [%dst - offset - 0x04];
-
-#define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldd [%src - offset - 0x10], %t0; \
- ldd [%src - offset - 0x08], %t2; \
- std %t0, [%dst - offset - 0x10]; \
- std %t2, [%dst - offset - 0x08];
-
-#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
- ldub [%src - offset - 0x02], %t0; \
- ldub [%src - offset - 0x01], %t1; \
- stb %t0, [%dst - offset - 0x02]; \
- stb %t1, [%dst - offset - 0x01];
-
-#define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
- ldd [%src + offset + 0x00], %t0; \
- ldd [%src + offset + 0x08], %t2; \
- srl %t0, shir, %t5; \
- srl %t1, shir, %t6; \
- sll %t0, shil, %t0; \
- or %t5, %prev, %t5; \
- sll %t1, shil, %prev; \
- or %t6, %t0, %t0; \
- srl %t2, shir, %t1; \
- srl %t3, shir, %t6; \
- sll %t2, shil, %t2; \
- or %t1, %prev, %t1; \
- std %t4, [%dst + offset + offset2 - 0x04]; \
- std %t0, [%dst + offset + offset2 + 0x04]; \
- sll %t3, shil, %prev; \
- or %t6, %t2, %t4;
-
-#define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
- ldd [%src + offset + 0x00], %t0; \
- ldd [%src + offset + 0x08], %t2; \
- srl %t0, shir, %t4; \
- srl %t1, shir, %t5; \
- sll %t0, shil, %t6; \
- or %t4, %prev, %t0; \
- sll %t1, shil, %prev; \
- or %t5, %t6, %t1; \
- srl %t2, shir, %t4; \
- srl %t3, shir, %t5; \
- sll %t2, shil, %t6; \
- or %t4, %prev, %t2; \
- sll %t3, shil, %prev; \
- or %t5, %t6, %t3; \
- std %t0, [%dst + offset + offset2 + 0x00]; \
- std %t2, [%dst + offset + offset2 + 0x08];
-
- .text
-ENTRY(__mempcpy)
- add %o0, %o2, %g1
- ba 101f
- st %g1, [%sp + 64]
-END(__mempcpy)
-
- .align 4
-ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */
- st %o0, [%sp + 64]
-101:
- sub %o0, %o1, %o4
-9: andcc %o4, 3, %o5
-0: bne 86f
- cmp %o2, 15
-
- bleu 90f
- andcc %o1, 3, %g0
-
- be 78f
- andcc %o1, 4, %g0
-
- andcc %o1, 1, %g0
- be 4f
- andcc %o1, 2, %g0
-
- ldub [%o1], %g2
- add %o1, 1, %o1
- stb %g2, [%o0]
- sub %o2, 1, %o2
- bne 77f
- add %o0, 1, %o0
-4: lduh [%o1], %g2
- add %o1, 2, %o1
- sth %g2, [%o0]
- sub %o2, 2, %o2
- add %o0, 2, %o0
-
-77: andcc %o1, 4, %g0
-78: be 2f
- mov %o2, %g1
-
- ld [%o1], %o4
- sub %g1, 4, %g1
- st %o4, [%o0]
- add %o1, 4, %o1
- add %o0, 4, %o0
-2: andcc %g1, 0xffffff80, %g6
- be 3f
- andcc %o0, 4, %g0
-
- be 82f + 4
-5: MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
- MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
- MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
- MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
- subcc %g6, 128, %g6
- add %o1, 128, %o1
- bne 5b
- add %o0, 128, %o0
-3: andcc %g1, 0x70, %g6
- be 80f
- andcc %g1, 8, %g0
-
- srl %g6, 1, %o4
- mov %o7, %g2
- add %g6, %o4, %o4
- add %o1, %g6, %o1
-104: call 100f
- add %o0, %g6, %o0
- jmpl %o5 + (80f - 104b), %g0
- mov %g2, %o7
-
-79: MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
- MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
- MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
- MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
- MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
- MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
- MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
-
-80: be 81f
- andcc %g1, 4, %g0
-
- ldd [%o1], %g2
- add %o0, 8, %o0
- st %g2, [%o0 - 0x08]
- add %o1, 8, %o1
- st %g3, [%o0 - 0x04]
-
-81: be 1f
- andcc %g1, 2, %g0
-
- ld [%o1], %g2
- add %o1, 4, %o1
- st %g2, [%o0]
- add %o0, 4, %o0
-1: be 1f
- andcc %g1, 1, %g0
-
- lduh [%o1], %g2
- add %o1, 2, %o1
- sth %g2, [%o0]
- add %o0, 2, %o0
-1: be 1f
- nop
-
- ldub [%o1], %g2
- stb %g2, [%o0]
-1: retl
- ld [%sp + 64], %o0
-
-82: /* ldd_std */
- MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
- MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
- MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
- MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
- subcc %g6, 128, %g6
- add %o1, 128, %o1
- bne 82b
- add %o0, 128, %o0
-
- andcc %g1, 0x70, %g6
- be 84f
- andcc %g1, 8, %g0
-
- mov %o7, %g2
-111: call 110f
- add %o1, %g6, %o1
- mov %g2, %o7
- jmpl %o5 + (84f - 111b), %g0
- add %o0, %g6, %o0
-
-83: MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
- MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
- MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
- MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
- MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
- MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
- MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
-
-84: be 85f
- andcc %g1, 4, %g0
-
- ldd [%o1], %g2
- add %o0, 8, %o0
- std %g2, [%o0 - 0x08]
- add %o1, 8, %o1
-85: be 1f
- andcc %g1, 2, %g0
-
- ld [%o1], %g2
- add %o1, 4, %o1
- st %g2, [%o0]
- add %o0, 4, %o0
-1: be 1f
- andcc %g1, 1, %g0
-
- lduh [%o1], %g2
- add %o1, 2, %o1
- sth %g2, [%o0]
- add %o0, 2, %o0
-1: be 1f
- nop
-
- ldub [%o1], %g2
- stb %g2, [%o0]
-1: retl
- ld [%sp + 64], %o0
-
-86: cmp %o2, 6
- bleu 88f
-
- cmp %o2, 256
- bcc 87f
-
- andcc %o0, 3, %g0
- be 61f
- andcc %o0, 1, %g0
- be 60f
- andcc %o0, 2, %g0
-
- ldub [%o1], %g5
- add %o1, 1, %o1
- stb %g5, [%o0]
- sub %o2, 1, %o2
- bne 61f
- add %o0, 1, %o0
-60: ldub [%o1], %g3
- add %o1, 2, %o1
- stb %g3, [%o0]
- sub %o2, 2, %o2
- ldub [%o1 - 1], %g3
- add %o0, 2, %o0
- stb %g3, [%o0 - 1]
-61: and %o1, 3, %g2
- and %o2, 0xc, %g3
- and %o1, -4, %o1
- cmp %g3, 4
- sll %g2, 3, %g4
- mov 32, %g2
- be 4f
- sub %g2, %g4, %g6
-
- blu 3f
- cmp %g3, 0x8
-
- be 2f
- srl %o2, 2, %g3
-
- ld [%o1], %o3
- add %o0, -8, %o0
- ld [%o1 + 4], %o4
- b 8f
- add %g3, 1, %g3
-2: ld [%o1], %o4
- add %o0, -12, %o0
- ld [%o1 + 4], %o5
- add %g3, 2, %g3
- b 9f
- add %o1, -4, %o1
-3: ld [%o1], %g1
- add %o0, -4, %o0
- ld [%o1 + 4], %o3
- srl %o2, 2, %g3
- b 7f
- add %o1, 4, %o1
-4: ld [%o1], %o5
- cmp %o2, 7
- ld [%o1 + 4], %g1
- srl %o2, 2, %g3
- bleu 10f
- add %o1, 8, %o1
-
- ld [%o1], %o3
- add %g3, -1, %g3
-5: sll %o5, %g4, %g2
- srl %g1, %g6, %g5
- or %g2, %g5, %g2
- st %g2, [%o0]
-7: ld [%o1 + 4], %o4
- sll %g1, %g4, %g2
- srl %o3, %g6, %g5
- or %g2, %g5, %g2
- st %g2, [%o0 + 4]
-8: ld [%o1 + 8], %o5
- sll %o3, %g4, %g2
- srl %o4, %g6, %g5
- or %g2, %g5, %g2
- st %g2, [%o0 + 8]
-9: ld [%o1 + 12], %g1
- sll %o4, %g4, %g2
- srl %o5, %g6, %g5
- addcc %g3, -4, %g3
- or %g2, %g5, %g2
- add %o1, 16, %o1
- st %g2, [%o0 + 12]
- add %o0, 16, %o0
- bne,a 5b
- ld [%o1], %o3
-10: sll %o5, %g4, %g2
- srl %g1, %g6, %g5
- srl %g6, 3, %g3
- or %g2, %g5, %g2
- sub %o1, %g3, %o1
- andcc %o2, 2, %g0
- st %g2, [%o0]
- be 1f
- andcc %o2, 1, %g0
-
- ldub [%o1], %g2
- add %o1, 2, %o1
- stb %g2, [%o0 + 4]
- add %o0, 2, %o0
- ldub [%o1 - 1], %g2
- stb %g2, [%o0 + 3]
-1: be 1f
- nop
- ldub [%o1], %g2
- stb %g2, [%o0 + 4]
-1: retl
- ld [%sp + 64], %o0
-
-87: andcc %o1, 3, %g0
- be 3f
- andcc %o1, 1, %g0
-
- be 4f
- andcc %o1, 2, %g0
-
- ldub [%o1], %g2
- add %o1, 1, %o1
- stb %g2, [%o0]
- sub %o2, 1, %o2
- bne 3f
- add %o0, 1, %o0
-4: lduh [%o1], %g2
- add %o1, 2, %o1
- srl %g2, 8, %g3
- sub %o2, 2, %o2
- stb %g3, [%o0]
- add %o0, 2, %o0
- stb %g2, [%o0 - 1]
-3: andcc %o1, 4, %g0
-
- bne 2f
- cmp %o5, 1
-
- ld [%o1], %o4
- srl %o4, 24, %g2
- stb %g2, [%o0]
- srl %o4, 16, %g3
- stb %g3, [%o0 + 1]
- srl %o4, 8, %g2
- stb %g2, [%o0 + 2]
- sub %o2, 4, %o2
- stb %o4, [%o0 + 3]
- add %o1, 4, %o1
- add %o0, 4, %o0
-2: be 33f
- cmp %o5, 2
- be 32f
- sub %o2, 4, %o2
-31: ld [%o1], %g2
- add %o1, 4, %o1
- srl %g2, 24, %g3
- and %o0, 7, %g5
- stb %g3, [%o0]
- cmp %g5, 7
- sll %g2, 8, %g1
- add %o0, 4, %o0
- be 41f
- and %o2, 0xffffffc0, %o3
- ld [%o0 - 7], %o4
-4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
- SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
- SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
- SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
- subcc %o3, 64, %o3
- add %o1, 64, %o1
- bne 4b
- add %o0, 64, %o0
-
- andcc %o2, 0x30, %o3
- be,a 1f
- srl %g1, 16, %g2
-4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
- subcc %o3, 16, %o3
- add %o1, 16, %o1
- bne 4b
- add %o0, 16, %o0
-
- srl %g1, 16, %g2
-1: st %o4, [%o0 - 7]
- sth %g2, [%o0 - 3]
- srl %g1, 8, %g4
- b 88f
- stb %g4, [%o0 - 1]
-32: ld [%o1], %g2
- add %o1, 4, %o1
- srl %g2, 16, %g3
- and %o0, 7, %g5
- sth %g3, [%o0]
- cmp %g5, 6
- sll %g2, 16, %g1
- add %o0, 4, %o0
- be 42f
- and %o2, 0xffffffc0, %o3
- ld [%o0 - 6], %o4
-4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
- SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
- SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
- SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
- subcc %o3, 64, %o3
- add %o1, 64, %o1
- bne 4b
- add %o0, 64, %o0
-
- andcc %o2, 0x30, %o3
- be,a 1f
- srl %g1, 16, %g2
-4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
- subcc %o3, 16, %o3
- add %o1, 16, %o1
- bne 4b
- add %o0, 16, %o0
-
- srl %g1, 16, %g2
-1: st %o4, [%o0 - 6]
- b 88f
- sth %g2, [%o0 - 2]
-33: ld [%o1], %g2
- sub %o2, 4, %o2
- srl %g2, 24, %g3
- and %o0, 7, %g5
- stb %g3, [%o0]
- cmp %g5, 5
- srl %g2, 8, %g4
- sll %g2, 24, %g1
- sth %g4, [%o0 + 1]
- add %o1, 4, %o1
- be 43f
- and %o2, 0xffffffc0, %o3
-
- ld [%o0 - 1], %o4
- add %o0, 4, %o0
-4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
- SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
- SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
- SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
- subcc %o3, 64, %o3
- add %o1, 64, %o1
- bne 4b
- add %o0, 64, %o0
-
- andcc %o2, 0x30, %o3
- be,a 1f
- srl %g1, 24, %g2
-4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
- subcc %o3, 16, %o3
- add %o1, 16, %o1
- bne 4b
- add %o0, 16, %o0
-
- srl %g1, 24, %g2
-1: st %o4, [%o0 - 5]
- b 88f
- stb %g2, [%o0 - 1]
-41: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
- SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
- SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
- SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
- subcc %o3, 64, %o3
- add %o1, 64, %o1
- bne 41b
- add %o0, 64, %o0
-
- andcc %o2, 0x30, %o3
- be,a 1f
- srl %g1, 16, %g2
-4: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
- subcc %o3, 16, %o3
- add %o1, 16, %o1
- bne 4b
- add %o0, 16, %o0
-
- srl %g1, 16, %g2
-1: sth %g2, [%o0 - 3]
- srl %g1, 8, %g4
- b 88f
- stb %g4, [%o0 - 1]
-43: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
- SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
- SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
- SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
- subcc %o3, 64, %o3
- add %o1, 64, %o1
- bne 43b
- add %o0, 64, %o0
-
- andcc %o2, 0x30, %o3
- be,a 1f
- srl %g1, 24, %g2
-4: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
- subcc %o3, 16, %o3
- add %o1, 16, %o1
- bne 4b
- add %o0, 16, %o0
-
- srl %g1, 24, %g2
-1: stb %g2, [%o0 + 3]
- b 88f
- add %o0, 4, %o0
-42: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
- SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
- SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
- SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
- subcc %o3, 64, %o3
- add %o1, 64, %o1
- bne 42b
- add %o0, 64, %o0
-
- andcc %o2, 0x30, %o3
- be,a 1f
- srl %g1, 16, %g2
-4: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
- subcc %o3, 16, %o3
- add %o1, 16, %o1
- bne 4b
- add %o0, 16, %o0
-
- srl %g1, 16, %g2
-1: sth %g2, [%o0 - 2]
-
- /* Fall through */
-
-88: and %o2, 0xe, %o3
- mov %o7, %g2
- sll %o3, 3, %o4
- add %o0, %o3, %o0
-106: call 100f
- add %o1, %o3, %o1
- mov %g2, %o7
- jmpl %o5 + (89f - 106b), %g0
- andcc %o2, 1, %g0
-
- MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
- MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
- MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
- MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
- MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
- MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
- MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
-
-89: be 1f
- nop
-
- ldub [%o1], %g2
- stb %g2, [%o0]
-1: retl
- ld [%sp + 64], %o0
-
-90: bne 88b
- andcc %o2, 8, %g0
-
- be 1f
- andcc %o2, 4, %g0
-
- ld [%o1 + 0x00], %g2
- ld [%o1 + 0x04], %g3
- add %o1, 8, %o1
- st %g2, [%o0 + 0x00]
- st %g3, [%o0 + 0x04]
- add %o0, 8, %o0
-1: b 81b
- mov %o2, %g1
-
-100: retl
- sub %o7, %o4, %o5
-110: retl
- sub %o7, %g6, %o5
-END(memcpy)
-
-libc_hidden_builtin_def (memcpy)
-
-libc_hidden_def (__mempcpy)
-weak_alias (__mempcpy, mempcpy)
-libc_hidden_builtin_def (mempcpy)
+#define ASI_PNF 0x82
+#define ASI_BLK_P 0xf0
+#define XCC icc
+#include <sparc64/memcpy.S>
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/memmove.S
rename to sysdeps/sparc/sparc32/memmove.S
@@ -1,154 +1,4 @@
-/* Set a block of memory to some byte value.
- For SPARC v7.
- Copyright (C) 1996-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by David S. Miller <davem@caip.rutgers.edu> and
- Jakub Jelinek <jj@ultra.linux.cz>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- /* Store 64 bytes at (BASE + OFFSET) using value SOURCE. */
-#define ZERO_BIG_BLOCK(base, offset, source) \
- std source, [base + offset + 0x00]; \
- std source, [base + offset + 0x08]; \
- std source, [base + offset + 0x10]; \
- std source, [base + offset + 0x18]; \
- std source, [base + offset + 0x20]; \
- std source, [base + offset + 0x28]; \
- std source, [base + offset + 0x30]; \
- std source, [base + offset + 0x38];
-
-#define ZERO_LAST_BLOCKS(base, offset, source) \
- std source, [base - offset - 0x38]; \
- std source, [base - offset - 0x30]; \
- std source, [base - offset - 0x28]; \
- std source, [base - offset - 0x20]; \
- std source, [base - offset - 0x18]; \
- std source, [base - offset - 0x10]; \
- std source, [base - offset - 0x08]; \
- std source, [base - offset - 0x00];
-
- .text
- .align 4
-ENTRY(__bzero)
- b 1f
- mov %g0, %g3
-
-3: cmp %o2, 3
- be 2f
- stb %g3, [%o0]
-
- cmp %o2, 2
- be 2f
- stb %g3, [%o0 + 0x01]
-
- stb %g3, [%o0 + 0x02]
-2: sub %o2, 4, %o2
- add %o1, %o2, %o1
- b 4f
- sub %o0, %o2, %o0
-END(__bzero)
-
-ENTRY(memset)
- and %o1, 0xff, %g3
- sll %g3, 8, %g2
- or %g3, %g2, %g3
- sll %g3, 16, %g2
- or %g3, %g2, %g3
- orcc %o2, %g0, %o1
-1: cmp %o1, 7
- bleu 7f
- mov %o0, %g1
-
- andcc %o0, 3, %o2
- bne 3b
-4: andcc %o0, 4, %g0
-
- be 2f
- mov %g3, %g2
-
- st %g3, [%o0]
- sub %o1, 4, %o1
- add %o0, 4, %o0
-2: andcc %o1, 0xffffff80, %o3
- be 9f
- andcc %o1, 0x78, %o2
-4: ZERO_BIG_BLOCK (%o0, 0x00, %g2)
- subcc %o3, 128, %o3
- ZERO_BIG_BLOCK (%o0, 0x40, %g2)
- bne 4b
- add %o0, 128, %o0
-
- orcc %o2, %g0, %g0
-9: be 6f
- andcc %o1, 7, %o1
-
- mov %o7, %g4
-101: call 100f
- srl %o2, 1, %o3
- mov %g4, %o7
- jmpl %o4 + (20f + 64 - 101b), %g0
- add %o0, %o2, %o0
-
-100: retl
- sub %o7, %o3, %o4
-
-20: ZERO_LAST_BLOCKS(%o0, 0x48, %g2)
- ZERO_LAST_BLOCKS(%o0, 0x08, %g2)
-
-6: be 8f
- andcc %o1, 4, %g0
- be 1f
- andcc %o1, 2, %g0
- st %g3, [%o0]
- add %o0, 4, %o0
-1: be 1f
- andcc %o1, 1, %g0
- sth %g3, [%o0]
- add %o0, 2, %o0
-1: bne,a 8f
- stb %g3, [%o0]
-8: retl
- mov %g1, %o0
-7: orcc %o1, 0, %g0
- be 0f
- subcc %o1, 1, %o1
- stb %g3, [%o0]
- be 0f
- subcc %o1, 1, %o1
- stb %g3, [%o0 + 1]
- be 0f
- subcc %o1, 1, %o1
- stb %g3, [%o0 + 2]
- be 0f
- subcc %o1, 1, %o1
- stb %g3, [%o0 + 3]
- be 0f
- subcc %o1, 1, %o1
- stb %g3, [%o0 + 4]
- be 0f
- subcc %o1, 1, %o1
- stb %g3, [%o0 + 5]
- be 0f
- subcc %o1, 1, %o1
- stb %g3, [%o0 + 6]
-0: retl
- nop
-END(memset)
-libc_hidden_builtin_def (memset)
-
-weak_alias (__bzero, bzero)
+#define ASI_PNF 0x82
+#define ASI_BLK_P 0xf0
+#define XCC icc
+#include <sparc64/memset.S>
@@ -1,198 +1,70 @@
-! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
-! the result in a second limb vector.
+! SPARC v9 32-bit __mpn_mul_1 -- Multiply a limb vector with a single
+! limb and store the product in a second limb vector.
!
-! Copyright (C) 1992-2019 Free Software Foundation, Inc.
+! Copyright (C) 2013-2019 Free Software Foundation, Inc.
+! This file is part of the GNU C Library.
+! Contributed by David S. Miller <davem@davemloft.net>
!
-! This file is part of the GNU MP Library.
+! The GNU C Library is free software; you can redistribute it and/or
+! modify it under the terms of the GNU Lesser General Public
+! License as published by the Free Software Foundation; either
+! version 2.1 of the License, or (at your option) any later version.
!
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at your
-! option) any later version.
+! The GNU C Library is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+! Lesser General Public License for more details.
!
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-! License for more details.
-!
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library; see the file COPYING.LIB. If not,
-! see <https://www.gnu.org/licenses/>.
-
-
-! INPUT PARAMETERS
-! RES_PTR o0
-! S1_PTR o1
-! SIZE o2
-! S2_LIMB o3
-
-! ADD CODE FOR SMALL MULTIPLIERS!
-!1: ld
-! st
-!
-!2: ld ,a
-! addxcc a,a,x
-! st x,
-!
-!3_unrolled:
-! ld ,a
-! addxcc a,a,x1 ! 2a + cy
-! addx %g0,%g0,x2
-! addcc a,x1,x ! 3a + c
-! st x,
-!
-! ld ,a
-! addxcc a,a,y1
-! addx %g0,%g0,y2
-! addcc a,y1,x
-! st x,
-!
-!4_unrolled:
-! ld ,a
-! srl a,2,x1 ! 4a
-! addxcc y2,x1,x
-! sll a,30,x2
-! st x,
-!
-! ld ,a
-! srl a,2,y1
-! addxcc x2,y1,y
-! sll a,30,y2
-! st x,
-!
-!5_unrolled:
-! ld ,a
-! srl a,2,x1 ! 4a
-! addxcc a,x1,x ! 5a + c
-! sll a,30,x2
-! addx %g0,x2,x2
-! st x,
-!
-! ld ,a
-! srl a,2,y1
-! addxcc a,y1,x
-! sll a,30,y2
-! addx %g0,y2,y2
-! st x,
-!
-!8_unrolled:
-! ld ,a
-! srl a,3,x1 ! 8a
-! addxcc y2,x1,x
-! sll a,29,x2
-! st x,
-!
-! ld ,a
-! srl a,3,y1
-! addxcc x2,y1,y
-! sll a,29,y2
-! st x,
+! You should have received a copy of the GNU Lesser General Public
+! License along with the GNU C Library; if not, see
+! <https://www.gnu.org/licenses/>.
#include <sysdep.h>
-ENTRY(__mpn_mul_1)
- ! Make S1_PTR and RES_PTR point at the end of their blocks
- ! and put (- 4 x SIZE) in index/loop counter.
- sll %o2,2,%o2
- add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
- add %o1,%o2,%o1
- sub %g0,%o2,%o2
-
- cmp %o3,0xfff
- bgu LOC(large)
- nop
+#define res_ptr %o0
+#define s1_ptr %o1
+#define sz %o2
+#define s2_limb %o3
+#define carry %o5
+#define tmp1 %g1
+#define tmp2 %g2
+#define tmp3 %g3
+#define tmp4 %o4
- ld [%o1+%o2],%o5
- mov 0,%o0
- b LOC(0)
- add %o4,-4,%o4
-LOC(loop0):
- st %g1,[%o4+%o2]
-LOC(0): wr %g0,%o3,%y
- sra %o5,31,%g2
- and %o3,%g2,%g2
- andcc %g1,0,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,0,%g1
- sra %g1,20,%g4
- sll %g1,12,%g1
- rd %y,%g3
- srl %g3,20,%g3
- or %g1,%g3,%g1
-
- addcc %g1,%o0,%g1
- addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
- addcc %o2,4,%o2 ! loop counter
- bne,a LOC(loop0)
- ld [%o1+%o2],%o5
+ENTRY(__mpn_mul_1)
+ srl sz, 0, sz
+ srl s2_limb, 0, s2_limb
+ subcc sz, 1, sz
+ be,pn %icc, .Lfinal_limb
+ clr carry
- retl
- st %g1,[%o4+%o2]
+.Lloop:
+ lduw [s1_ptr + 0x00], tmp1
+ lduw [s1_ptr + 0x04], tmp2
+ mulx tmp1, s2_limb, tmp3
+ add s1_ptr, 8, s1_ptr
+ mulx tmp2, s2_limb, tmp4
+ sub sz, 2, sz
+ add res_ptr, 8, res_ptr
+ add carry, tmp3, tmp3
+ stw tmp3, [res_ptr - 0x08]
+ srlx tmp3, 32, carry
+ add carry, tmp4, tmp4
+ stw tmp4, [res_ptr - 0x04]
+ brgz sz, .Lloop
+ srlx tmp4, 32, carry
+ brlz,pt sz, .Lfinish
+ nop
-LOC(large):
- ld [%o1+%o2],%o5
- mov 0,%o0
- sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
- b LOC(1)
- add %o4,-4,%o4
-LOC(loop):
- st %g3,[%o4+%o2]
-LOC(1): wr %g0,%o5,%y
- and %o5,%g4,%g2 ! g2 = S1_LIMB iff S2_LIMB < 0, else 0
- andcc %g0,%g0,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%g0,%g1
- rd %y,%g3
- addcc %g3,%o0,%g3
- addx %g2,%g1,%o0 ! add sign-compensation and cy to hi limb
- addcc %o2,4,%o2 ! loop counter
- bne,a LOC(loop)
- ld [%o1+%o2],%o5
+.Lfinal_limb:
+ lduw [s1_ptr + 0x00], tmp1
+ mulx tmp1, s2_limb, tmp3
+ add carry, tmp3, tmp3
+ stw tmp3, [res_ptr + 0x00]
+ srlx tmp3, 32, carry
+.Lfinish:
retl
- st %g3,[%o4+%o2]
-
+ mov carry, %o0
END(__mpn_mul_1)
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile
rename to sysdeps/sparc/sparc32/multiarch/Makefile
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/bzero.c
rename to sysdeps/sparc/sparc32/multiarch/bzero.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/ifunc-impl-list.c
rename to sysdeps/sparc/sparc32/multiarch/ifunc-impl-list.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/md5-block.c
rename to sysdeps/sparc/sparc32/multiarch/md5-block.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/md5-crop.S
rename to sysdeps/sparc/sparc32/multiarch/md5-crop.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-memmove-niagara7.S
rename to sysdeps/sparc/sparc32/multiarch/memcpy-memmove-niagara7.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S
rename to sysdeps/sparc/sparc32/multiarch/memcpy-niagara1.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S
rename to sysdeps/sparc/sparc32/multiarch/memcpy-niagara2.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara4.S
rename to sysdeps/sparc/sparc32/multiarch/memcpy-niagara4.S
similarity index 95%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra1.S
rename to sysdeps/sparc/sparc32/multiarch/memcpy-ultra1.S
@@ -29,5 +29,5 @@
# define memcpy __memcpy_ultra1
# define __memcpy_large __memcpy_large_ultra1
# define __mempcpy __mempcpy_ultra1
-# include <sysdeps/sparc/sparc32/sparcv9/memcpy.S>
+# include <sysdeps/sparc/sparc32/memcpy.S>
#endif
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S
rename to sysdeps/sparc/sparc32/multiarch/memcpy-ultra3.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.c
rename to sysdeps/sparc/sparc32/multiarch/memcpy.c
similarity index 66%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/memmove-ultra1.S
rename to sysdeps/sparc/sparc32/multiarch/memmove-ultra1.S
@@ -1,4 +1,4 @@
#undef libc_hidden_builtin_def
#define libc_hidden_builtin_def(name)
#define memmove __memmove_ultra1
-#include <sysdeps/sparc/sparc32/sparcv9/memmove.S>
+#include <sysdeps/sparc/sparc32/memmove.S>
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/mempcpy.c
rename to sysdeps/sparc/sparc32/multiarch/mempcpy.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S
rename to sysdeps/sparc/sparc32/multiarch/memset-niagara1.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara4.S
rename to sysdeps/sparc/sparc32/multiarch/memset-niagara4.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara7.S
rename to sysdeps/sparc/sparc32/multiarch/memset-niagara7.S
similarity index 95%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/memset-ultra1.S
rename to sysdeps/sparc/sparc32/multiarch/memset-ultra1.S
@@ -26,5 +26,5 @@
# define memset __memset_ultra1
# define __bzero __bzero_ultra1
-# include <sysdeps/sparc/sparc32/sparcv9/memset.S>
+# include <sysdeps/sparc/sparc32/memset.S>
#endif
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/memset.c
rename to sysdeps/sparc/sparc32/multiarch/memset.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/rtld-memcpy.c
rename to sysdeps/sparc/sparc32/multiarch/rtld-memcpy.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/rtld-memmove.c
rename to sysdeps/sparc/sparc32/multiarch/rtld-memmove.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/rtld-memset.c
rename to sysdeps/sparc/sparc32/multiarch/rtld-memset.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-block.c
rename to sysdeps/sparc/sparc32/multiarch/sha256-block.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-crop.S
rename to sysdeps/sparc/sparc32/multiarch/sha256-crop.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-block.c
rename to sysdeps/sparc/sparc32/multiarch/sha512-block.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-crop.S
rename to sysdeps/sparc/sparc32/multiarch/sha512-crop.S
deleted file mode 100644
@@ -1 +0,0 @@
-#error No support for pthread barriers on pre-v9 sparc.
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/pthread_spin_init.c
rename to sysdeps/sparc/sparc32/pthread_spin_init.c
@@ -1,32 +1 @@
-/* Copyright (C) 2012-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- .text
-ENTRY(pthread_spin_lock)
-1: ldstub [%o0], %g1
- orcc %g1, 0x0, %g0
- bne,a 2f
- ldub [%o0], %g1
- retl
- mov 0, %o0
-2: orcc %g1, 0x0, %g0
- bne,a 2b
- ldub [%o0], %g1
- ba,a 1b
-END(pthread_spin_lock)
+#include <sysdeps/sparc/sparc64/pthread_spin_lock.S>
@@ -1,29 +1 @@
-/* Copyright (C) 2012-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <pthread-errnos.h>
-
- .text
-ENTRY(pthread_spin_trylock)
- ldstub [%o0], %o0
- cmp %o0, 0
- bne,a 1f
- mov EBUSY, %o0
-1: retl
- nop
-END(pthread_spin_trylock)
+#include <sysdeps/sparc/sparc64/pthread_spin_trylock.S>
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/pthread_spin_unlock.S
rename to sysdeps/sparc/sparc32/pthread_spin_unlock.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/rawmemchr.S
rename to sysdeps/sparc/sparc32/rawmemchr.S
@@ -1,363 +1,22 @@
- /* This file is generated from divrem.m4; DO NOT EDIT! */
/*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
+ * Sparc v9 has divide.
+ * As divx takes 68 cycles and sdivcc only 36,
+ * we use sdivcc eventhough it is deprecated.
*/
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- * .rem name of function to generate
- * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
- * true true=true => signed; true=false => unsigned
- *
- * Algorithm parameters:
- * N how many bits per iteration we try to get (4)
- * WORDSIZE total number of bits (32)
- *
- * Derived constants:
- * TOPBITS number of bits in the top decade of a number
- *
- * Important variables:
- * Q the partial quotient under development (initially 0)
- * R the remainder so far, initially the dividend
- * ITER number of main division loop iterations required;
- * equal to ceil(log2(quotient) / N). Note that this
- * is the log base (2^N) of the quotient.
- * V the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- * Current estimate for non-large dividend is
- * ceil(log2(quotient) / N) * (10 + 7N/2) + C
- * A large dividend is one greater than 2^(31-TOPBITS) and takes a
- * different path, as the upper bits of the quotient must be developed
- * one bit at a time.
- */
-
-
-
#include <sysdep.h>
-#include <sys/trap.h>
+ .text
+ .align 32
ENTRY(.rem)
- ! compute sign of result; if neither is negative, no problem
- orcc %o1, %o0, %g0 ! either negative?
- bge 2f ! no, go do the divide
- mov %o0, %g3 ! sign of remainder matches %o0
- tst %o1
- bge 1f
- tst %o0
- ! %o1 is definitely negative; %o0 might also be negative
- bge 2f ! if %o0 not negative...
- sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
-1: ! %o0 is negative, %o1 is nonnegative
- sub %g0, %o0, %o0 ! make %o0 nonnegative
-2:
-
- ! Ready to divide. Compute size of quotient; scale comparand.
- orcc %o1, %g0, %o5
- bne 1f
- mov %o0, %o3
-
- ! Divide by zero trap. If it returns, return 0 (about as
- ! wrong as possible, but that is what SunOS does...).
- ta ST_DIV0
- retl
- clr %o0
-
-1:
- cmp %o3, %o5 ! if %o1 exceeds %o0, done
- blu LOC(got_result) ! (and algorithm fails otherwise)
- clr %o2
- sethi %hi(1 << (32 - 4 - 1)), %g1
- cmp %o3, %g1
- blu LOC(not_really_big)
- clr %o4
-
- ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
- ! as our usual N-at-a-shot divide step will cause overflow and havoc.
- ! The number of bits in the result here is N*ITER+SC, where SC <= N.
- ! Compute ITER in an unorthodox manner: know we need to shift V into
- ! the top decade: so do not even bother to compare to R.
- 1:
- cmp %o5, %g1
- bgeu 3f
- mov 1, %g2
- sll %o5, 4, %o5
- b 1b
- add %o4, 1, %o4
-
- ! Now compute %g2.
- 2: addcc %o5, %o5, %o5
- bcc LOC(not_too_big)
- add %g2, 1, %g2
-
- ! We get here if the %o1 overflowed while shifting.
- ! This means that %o3 has the high-order bit set.
- ! Restore %o5 and subtract from %o3.
- sll %g1, 4, %g1 ! high order bit
- srl %o5, 1, %o5 ! rest of %o5
- add %o5, %g1, %o5
- b LOC(do_single_div)
- sub %g2, 1, %g2
-
- LOC(not_too_big):
- 3: cmp %o5, %o3
- blu 2b
- nop
- be LOC(do_single_div)
- nop
- /* NB: these are commented out in the V8-Sparc manual as well */
- /* (I do not understand this) */
- ! %o5 > %o3: went too far: back up 1 step
- ! srl %o5, 1, %o5
- ! dec %g2
- ! do single-bit divide steps
- !
- ! We have to be careful here. We know that %o3 >= %o5, so we can do the
- ! first divide step without thinking. BUT, the others are conditional,
- ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
- ! order bit set in the first step, just falling into the regular
- ! division loop will mess up the first time around.
- ! So we unroll slightly...
- LOC(do_single_div):
- subcc %g2, 1, %g2
- bl LOC(end_regular_divide)
- nop
- sub %o3, %o5, %o3
- mov 1, %o2
- b LOC(end_single_divloop)
- nop
- LOC(single_divloop):
- sll %o2, 1, %o2
- bl 1f
- srl %o5, 1, %o5
- ! %o3 >= 0
- sub %o3, %o5, %o3
- b 2f
- add %o2, 1, %o2
- 1: ! %o3 < 0
- add %o3, %o5, %o3
- sub %o2, 1, %o2
- 2:
- LOC(end_single_divloop):
- subcc %g2, 1, %g2
- bge LOC(single_divloop)
- tst %o3
- b,a LOC(end_regular_divide)
-
-LOC(not_really_big):
-1:
- sll %o5, 4, %o5
- cmp %o5, %o3
- bleu 1b
- addcc %o4, 1, %o4
- be LOC(got_result)
- sub %o4, 1, %o4
-
- tst %o3 ! set up for initial iteration
-LOC(divloop):
- sll %o2, 4, %o2
- ! depth 1, accumulated bits 0
- bl LOC(1.16)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 2, accumulated bits 1
- bl LOC(2.17)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 3, accumulated bits 3
- bl LOC(3.19)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits 7
- bl LOC(4.23)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (7*2+1), %o2
-
-LOC(4.23):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (7*2-1), %o2
-
-
-LOC(3.19):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits 5
- bl LOC(4.21)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (5*2+1), %o2
-
-LOC(4.21):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (5*2-1), %o2
-
-
-
-LOC(2.17):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 3, accumulated bits 1
- bl LOC(3.17)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits 3
- bl LOC(4.19)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (3*2+1), %o2
-
-LOC(4.19):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (3*2-1), %o2
-
-
-LOC(3.17):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits 1
- bl LOC(4.17)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (1*2+1), %o2
-
-LOC(4.17):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (1*2-1), %o2
-
-
-
-
-LOC(1.16):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 2, accumulated bits -1
- bl LOC(2.15)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 3, accumulated bits -1
- bl LOC(3.15)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits -1
- bl LOC(4.15)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-1*2+1), %o2
-
-LOC(4.15):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-1*2-1), %o2
-
-
-LOC(3.15):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits -3
- bl LOC(4.13)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-3*2+1), %o2
-
-LOC(4.13):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-3*2-1), %o2
-
-
-
-LOC(2.15):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 3, accumulated bits -3
- bl LOC(3.13)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits -5
- bl LOC(4.11)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-5*2+1), %o2
-
-LOC(4.11):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-5*2-1), %o2
-
-
-LOC(3.13):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits -7
- bl LOC(4.9)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-7*2+1), %o2
-
-LOC(4.9):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-7*2-1), %o2
-
-
-
-
-9:
-LOC(end_regular_divide):
- subcc %o4, 1, %o4
- bge LOC(divloop)
- tst %o3
- bl,a LOC(got_result)
- ! non-restoring fixup here (one instruction only!)
- add %o3, %o1, %o3
-
-LOC(got_result):
- ! check to see if answer should be < 0
- tst %g3
- bl,a 1f
- sub %g0, %o3, %o3
-1:
+ sra %o0, 31, %o2
+ wr %o2, 0, %y
+ sdivcc %o0, %o1, %o2
+ xnor %o2, %g0, %o3
+ movvs %icc, %o3, %o2
+ smul %o2, %o1, %o2
retl
- mov %o3, %o0
+ sub %o0, %o2, %o0
END(.rem)
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/rtld-memcpy.c
rename to sysdeps/sparc/sparc32/rtld-memcpy.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/rtld-memmove.c
rename to sysdeps/sparc/sparc32/rtld-memmove.c
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/rtld-memset.c
rename to sysdeps/sparc/sparc32/rtld-memset.c
@@ -1,363 +1,20 @@
- /* This file is generated from divrem.m4; DO NOT EDIT! */
/*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
+ * Sparc v9 has divide.
+ * As divx takes 68 cycles and sdivcc only 36,
+ * we use sdivcc eventhough it is deprecated.
*/
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- * .div name of function to generate
- * div div=div => %o0 / %o1; div=rem => %o0 % %o1
- * true true=true => signed; true=false => unsigned
- *
- * Algorithm parameters:
- * N how many bits per iteration we try to get (4)
- * WORDSIZE total number of bits (32)
- *
- * Derived constants:
- * TOPBITS number of bits in the top decade of a number
- *
- * Important variables:
- * Q the partial quotient under development (initially 0)
- * R the remainder so far, initially the dividend
- * ITER number of main division loop iterations required;
- * equal to ceil(log2(quotient) / N). Note that this
- * is the log base (2^N) of the quotient.
- * V the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- * Current estimate for non-large dividend is
- * ceil(log2(quotient) / N) * (10 + 7N/2) + C
- * A large dividend is one greater than 2^(31-TOPBITS) and takes a
- * different path, as the upper bits of the quotient must be developed
- * one bit at a time.
- */
-
-
-
#include <sysdep.h>
-#include <sys/trap.h>
+ .text
+ .align 32
ENTRY(.div)
- ! compute sign of result; if neither is negative, no problem
- orcc %o1, %o0, %g0 ! either negative?
- bge 2f ! no, go do the divide
- xor %o1, %o0, %g3 ! compute sign in any case
- tst %o1
- bge 1f
- tst %o0
- ! %o1 is definitely negative; %o0 might also be negative
- bge 2f ! if %o0 not negative...
- sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
-1: ! %o0 is negative, %o1 is nonnegative
- sub %g0, %o0, %o0 ! make %o0 nonnegative
-2:
-
- ! Ready to divide. Compute size of quotient; scale comparand.
- orcc %o1, %g0, %o5
- bne 1f
- mov %o0, %o3
-
- ! Divide by zero trap. If it returns, return 0 (about as
- ! wrong as possible, but that is what SunOS does...).
- ta ST_DIV0
- retl
- clr %o0
-
-1:
- cmp %o3, %o5 ! if %o1 exceeds %o0, done
- blu LOC(got_result) ! (and algorithm fails otherwise)
- clr %o2
- sethi %hi(1 << (32 - 4 - 1)), %g1
- cmp %o3, %g1
- blu LOC(not_really_big)
- clr %o4
-
- ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
- ! as our usual N-at-a-shot divide step will cause overflow and havoc.
- ! The number of bits in the result here is N*ITER+SC, where SC <= N.
- ! Compute ITER in an unorthodox manner: know we need to shift V into
- ! the top decade: so do not even bother to compare to R.
- 1:
- cmp %o5, %g1
- bgeu 3f
- mov 1, %g2
- sll %o5, 4, %o5
- b 1b
- add %o4, 1, %o4
-
- ! Now compute %g2.
- 2: addcc %o5, %o5, %o5
- bcc LOC(not_too_big)
- add %g2, 1, %g2
-
- ! We get here if the %o1 overflowed while shifting.
- ! This means that %o3 has the high-order bit set.
- ! Restore %o5 and subtract from %o3.
- sll %g1, 4, %g1 ! high order bit
- srl %o5, 1, %o5 ! rest of %o5
- add %o5, %g1, %o5
- b LOC(do_single_div)
- sub %g2, 1, %g2
-
- LOC(not_too_big):
- 3: cmp %o5, %o3
- blu 2b
- nop
- be LOC(do_single_div)
- nop
- /* NB: these are commented out in the V8-Sparc manual as well */
- /* (I do not understand this) */
- ! %o5 > %o3: went too far: back up 1 step
- ! srl %o5, 1, %o5
- ! dec %g2
- ! do single-bit divide steps
- !
- ! We have to be careful here. We know that %o3 >= %o5, so we can do the
- ! first divide step without thinking. BUT, the others are conditional,
- ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
- ! order bit set in the first step, just falling into the regular
- ! division loop will mess up the first time around.
- ! So we unroll slightly...
- LOC(do_single_div):
- subcc %g2, 1, %g2
- bl LOC(end_regular_divide)
- nop
- sub %o3, %o5, %o3
- mov 1, %o2
- b LOC(end_single_divloop)
- nop
- LOC(single_divloop):
- sll %o2, 1, %o2
- bl 1f
- srl %o5, 1, %o5
- ! %o3 >= 0
- sub %o3, %o5, %o3
- b 2f
- add %o2, 1, %o2
- 1: ! %o3 < 0
- add %o3, %o5, %o3
- sub %o2, 1, %o2
- 2:
- LOC(end_single_divloop):
- subcc %g2, 1, %g2
- bge LOC(single_divloop)
- tst %o3
- b,a LOC(end_regular_divide)
-
-LOC(not_really_big):
-1:
- sll %o5, 4, %o5
- cmp %o5, %o3
- bleu 1b
- addcc %o4, 1, %o4
- be LOC(got_result)
- sub %o4, 1, %o4
-
- tst %o3 ! set up for initial iteration
-LOC(divloop):
- sll %o2, 4, %o2
- ! depth 1, accumulated bits 0
- bl LOC(1.16)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 2, accumulated bits 1
- bl LOC(2.17)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 3, accumulated bits 3
- bl LOC(3.19)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits 7
- bl LOC(4.23)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (7*2+1), %o2
-
-LOC(4.23):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (7*2-1), %o2
-
-
-LOC(3.19):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits 5
- bl LOC(4.21)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (5*2+1), %o2
-
-LOC(4.21):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (5*2-1), %o2
-
-
-
-LOC(2.17):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 3, accumulated bits 1
- bl LOC(3.17)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits 3
- bl LOC(4.19)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (3*2+1), %o2
-
-LOC(4.19):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (3*2-1), %o2
-
-
-LOC(3.17):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits 1
- bl LOC(4.17)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (1*2+1), %o2
-
-LOC(4.17):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (1*2-1), %o2
-
-
-
-
-LOC(1.16):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 2, accumulated bits -1
- bl LOC(2.15)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 3, accumulated bits -1
- bl LOC(3.15)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits -1
- bl LOC(4.15)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-1*2+1), %o2
-
-LOC(4.15):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-1*2-1), %o2
-
-
-LOC(3.15):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits -3
- bl LOC(4.13)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-3*2+1), %o2
-
-LOC(4.13):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-3*2-1), %o2
-
-
-
-LOC(2.15):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 3, accumulated bits -3
- bl LOC(3.13)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits -5
- bl LOC(4.11)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-5*2+1), %o2
-
-LOC(4.11):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-5*2-1), %o2
-
-
-LOC(3.13):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits -7
- bl LOC(4.9)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-7*2+1), %o2
-
-LOC(4.9):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-7*2-1), %o2
-
-
-
-
-9:
-LOC(end_regular_divide):
- subcc %o4, 1, %o4
- bge LOC(divloop)
- tst %o3
- bl,a LOC(got_result)
- ! non-restoring fixup here (one instruction only!)
- sub %o2, 1, %o2
-
-LOC(got_result):
- ! check to see if answer should be < 0
- tst %g3
- bl,a 1f
- sub %g0, %o2, %o2
-1:
+ sra %o0, 31, %o2
+ wr %o2, 0, %y
+ sdivcc %o0, %o1, %o0
+ xnor %o0, %g0, %o2
retl
- mov %o2, %o0
+ movvs %icc, %o2, %o0
END(.div)
deleted file mode 100644
@@ -1,82 +0,0 @@
-/* sem_post -- post to a POSIX semaphore. Generic futex-using version.
- Copyright (C) 2003-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <atomic.h>
-#include <errno.h>
-#include <sysdep.h>
-#include <lowlevellock.h>
-#include <internaltypes.h>
-#include <semaphore.h>
-#include <futex-internal.h>
-
-#include <shlib-compat.h>
-
-
-/* See sem_wait for an explanation of the algorithm. */
-int
-__new_sem_post (sem_t *sem)
-{
- struct new_sem *isem = (struct new_sem *) sem;
- int private = isem->private;
- unsigned int v;
-
- __sparc32_atomic_do_lock24 (&isem->pad);
-
- v = isem->value;
- if ((v >> SEM_VALUE_SHIFT) == SEM_VALUE_MAX)
- {
- __sparc32_atomic_do_unlock24 (&isem->pad);
-
- __set_errno (EOVERFLOW);
- return -1;
- }
- isem->value = v + (1 << SEM_VALUE_SHIFT);
-
- __sparc32_atomic_do_unlock24 (&isem->pad);
-
- if ((v & SEM_NWAITERS_MASK) != 0)
- futex_wake (&isem->value, 1, private);
-
- return 0;
-}
-versioned_symbol (libpthread, __new_sem_post, sem_post, GLIBC_2_1);
-
-
-#if SHLIB_COMPAT (libpthread, GLIBC_2_0, GLIBC_2_1)
-int
-attribute_compat_text_section
-__old_sem_post (sem_t *sem)
-{
- int *futex = (int *) sem;
-
- /* We must need to synchronize with consumers of this token, so the atomic
- increment must have release MO semantics. */
- atomic_write_barrier ();
- (void) atomic_increment_val (futex);
- /* We always have to assume it is a shared semaphore. */
- int err = lll_futex_wake (futex, 1, LLL_SHARED);
- if (__builtin_expect (err, 0) < 0)
- {
- __set_errno (-err);
- return -1;
- }
- return 0;
-}
-compat_symbol (libpthread, __old_sem_post, sem_post, GLIBC_2_0);
-#endif
deleted file mode 100644
@@ -1,146 +0,0 @@
-/* sem_waitcommon -- wait on a semaphore, shared code.
- Copyright (C) 2003-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <errno.h>
-#include <sysdep.h>
-#include <futex-internal.h>
-#include <internaltypes.h>
-#include <semaphore.h>
-#include <sys/time.h>
-
-#include <pthreadP.h>
-#include <shlib-compat.h>
-#include <atomic.h>
-
-
-static void
-__sem_wait_32_finish (struct new_sem *sem);
-
-static void
-__sem_wait_cleanup (void *arg)
-{
- struct new_sem *sem = (struct new_sem *) arg;
-
- __sem_wait_32_finish (sem);
-}
-
-/* Wait until at least one token is available, possibly with a timeout.
- This is in a separate function in order to make sure gcc
- puts the call site into an exception region, and thus the
- cleanups get properly run. TODO still necessary? Other futex_wait
- users don't seem to need it. */
-static int
-__attribute__ ((noinline))
-do_futex_wait (struct new_sem *sem, const struct timespec *abstime)
-{
- int err;
-
- err = futex_abstimed_wait_cancelable (&sem->value, SEM_NWAITERS_MASK,
- abstime, sem->private);
-
- return err;
-}
-
-/* Fast path: Try to grab a token without blocking. */
-static int
-__new_sem_wait_fast (struct new_sem *sem, int definitive_result)
-{
- unsigned int v;
- int ret = 0;
-
- __sparc32_atomic_do_lock24(&sem->pad);
-
- v = sem->value;
- if ((v >> SEM_VALUE_SHIFT) == 0)
- ret = -1;
- else
- sem->value = v - (1 << SEM_VALUE_SHIFT);
-
- __sparc32_atomic_do_unlock24(&sem->pad);
-
- return ret;
-}
-
-/* Slow path that blocks. */
-static int
-__attribute__ ((noinline))
-__new_sem_wait_slow (struct new_sem *sem, const struct timespec *abstime)
-{
- unsigned int v;
- int err = 0;
-
- __sparc32_atomic_do_lock24(&sem->pad);
-
- sem->nwaiters++;
-
- pthread_cleanup_push (__sem_wait_cleanup, sem);
-
- /* Wait for a token to be available. Retry until we can grab one. */
- v = sem->value;
- do
- {
- if (!(v & SEM_NWAITERS_MASK))
- sem->value = v | SEM_NWAITERS_MASK;
-
- /* If there is no token, wait. */
- if ((v >> SEM_VALUE_SHIFT) == 0)
- {
- __sparc32_atomic_do_unlock24(&sem->pad);
-
- err = do_futex_wait(sem, abstime);
- if (err == ETIMEDOUT || err == EINTR)
- {
- __set_errno (err);
- err = -1;
- goto error;
- }
- err = 0;
-
- __sparc32_atomic_do_lock24(&sem->pad);
-
- /* We blocked, so there might be a token now. */
- v = sem->value;
- }
- }
- /* If there is no token, we must not try to grab one. */
- while ((v >> SEM_VALUE_SHIFT) == 0);
-
- sem->value = v - (1 << SEM_VALUE_SHIFT);
-
- __sparc32_atomic_do_unlock24(&sem->pad);
-
-error:
- pthread_cleanup_pop (0);
-
- __sem_wait_32_finish (sem);
-
- return err;
-}
-
-/* Stop being a registered waiter (non-64b-atomics code only). */
-static void
-__sem_wait_32_finish (struct new_sem *sem)
-{
- __sparc32_atomic_do_lock24(&sem->pad);
-
- if (--sem->nwaiters == 0)
- sem->value &= ~SEM_NWAITERS_MASK;
-
- __sparc32_atomic_do_unlock24(&sem->pad);
-}
deleted file mode 100644
@@ -1 +0,0 @@
-sysdep-CFLAGS += -mcpu=v8
deleted file mode 100644
@@ -1,118 +0,0 @@
-! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
-! add the result to a second limb vector.
-
-! Copyright (C) 1992-2019 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-! License for more details.
-
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library; see the file COPYING.LIB. If not,
-! see <https://www.gnu.org/licenses/>.
-
-
-! INPUT PARAMETERS
-! res_ptr o0
-! s1_ptr o1
-! size o2
-! s2_limb o3
-
-#include <sysdep.h>
-
-ENTRY(__mpn_addmul_1)
- ld [%o1+0],%o4 ! 1
- sll %o2,4,%g1
- orcc %g0,%g0,%g2
- mov %o7,%g4 ! Save return address register
- and %g1,(4-1)<<4,%g1
-1: call 2f
- add %o7,3f-1b,%g3
-2: jmp %g3+%g1
- mov %g4,%o7 ! Restore return address register
-
- .align 4
-3:
-LOC(00):
- add %o0,-4,%o0
- b LOC(loop00) /* 4, 8, 12, ... */
- add %o1,-4,%o1
- nop
-LOC(01):
- b LOC(loop01) /* 1, 5, 9, ... */
- nop
- nop
- nop
-LOC(10):
- add %o0,-12,%o0 /* 2, 6, 10, ... */
- b LOC(loop10)
- add %o1,4,%o1
- nop
-LOC(11):
- add %o0,-8,%o0 /* 3, 7, 11, ... */
- b LOC(loop11)
- add %o1,-8,%o1
- nop
-
-LOC(loop):
- addcc %g3,%g2,%g3 ! 1
- ld [%o1+4],%o4 ! 2
- rd %y,%g2 ! 1
- addx %g0,%g2,%g2
- ld [%o0+0],%g1 ! 2
- addcc %g1,%g3,%g3
- st %g3,[%o0+0] ! 1
-LOC(loop00):
- umul %o4,%o3,%g3 ! 2
- ld [%o0+4],%g1 ! 2
- addxcc %g3,%g2,%g3 ! 2
- ld [%o1+8],%o4 ! 3
- rd %y,%g2 ! 2
- addx %g0,%g2,%g2
- nop
- addcc %g1,%g3,%g3
- st %g3,[%o0+4] ! 2
-LOC(loop11):
- umul %o4,%o3,%g3 ! 3
- addxcc %g3,%g2,%g3 ! 3
- ld [%o1+12],%o4 ! 4
- rd %y,%g2 ! 3
- add %o1,16,%o1
- addx %g0,%g2,%g2
- ld [%o0+8],%g1 ! 2
- addcc %g1,%g3,%g3
- st %g3,[%o0+8] ! 3
-LOC(loop10):
- umul %o4,%o3,%g3 ! 4
- addxcc %g3,%g2,%g3 ! 4
- ld [%o1+0],%o4 ! 1
- rd %y,%g2 ! 4
- addx %g0,%g2,%g2
- ld [%o0+12],%g1 ! 2
- addcc %g1,%g3,%g3
- st %g3,[%o0+12] ! 4
- add %o0,16,%o0
- addx %g0,%g2,%g2
-LOC(loop01):
- addcc %o2,-4,%o2
- bg LOC(loop)
- umul %o4,%o3,%g3 ! 1
-
- addcc %g3,%g2,%g3 ! 4
- rd %y,%g2 ! 4
- addx %g0,%g2,%g2
- ld [%o0+0],%g1 ! 2
- addcc %g1,%g3,%g3
- st %g3,[%o0+0] ! 4
- retl
- addx %g0,%g2,%o0
-
-END(__mpn_addmul_1)
deleted file mode 100644
@@ -1,13 +0,0 @@
-/*
- * Sparc v8 has multiply.
- */
-
-#include <sysdep.h>
-
-ENTRY(.mul)
-
- smul %o0, %o1, %o0
- retl
- rd %y, %o1
-
-END(.mul)
deleted file mode 100644
@@ -1,102 +0,0 @@
-! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
-! store the product in a second limb vector.
-
-! Copyright (C) 1992-2019 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-! License for more details.
-
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library; see the file COPYING.LIB. If not,
-! see <https://www.gnu.org/licenses/>.
-
-
-! INPUT PARAMETERS
-! res_ptr o0
-! s1_ptr o1
-! size o2
-! s2_limb o3
-
-#include <sysdep.h>
-
-ENTRY(__mpn_mul_1)
- sll %o2,4,%g1
- mov %o7,%g4 ! Save return address register
- and %g1,(4-1)<<4,%g1
-1: call 2f
- add %o7,3f-1b,%g3
-2: mov %g4,%o7 ! Restore return address register
- jmp %g3+%g1
- ld [%o1+0],%o4 ! 1
-
- .align 4
-3:
-LOC(00):
- add %o0,-4,%o0
- add %o1,-4,%o1
- b LOC(loop00) /* 4, 8, 12, ... */
- orcc %g0,%g0,%g2
-LOC(01):
- b LOC(loop01) /* 1, 5, 9, ... */
- orcc %g0,%g0,%g2
- nop
- nop
-LOC(10):
- add %o0,-12,%o0 /* 2, 6, 10, ... */
- add %o1,4,%o1
- b LOC(loop10)
- orcc %g0,%g0,%g2
- nop
-LOC(11):
- add %o0,-8,%o0 /* 3, 7, 11, ... */
- add %o1,-8,%o1
- b LOC(loop11)
- orcc %g0,%g0,%g2
-
-LOC(loop):
- addcc %g3,%g2,%g3 ! 1
- ld [%o1+4],%o4 ! 2
- st %g3,[%o0+0] ! 1
- rd %y,%g2 ! 1
-LOC(loop00):
- umul %o4,%o3,%g3 ! 2
- addxcc %g3,%g2,%g3 ! 2
- ld [%o1+8],%o4 ! 3
- st %g3,[%o0+4] ! 2
- rd %y,%g2 ! 2
-LOC(loop11):
- umul %o4,%o3,%g3 ! 3
- addxcc %g3,%g2,%g3 ! 3
- ld [%o1+12],%o4 ! 4
- add %o1,16,%o1
- st %g3,[%o0+8] ! 3
- rd %y,%g2 ! 3
-LOC(loop10):
- umul %o4,%o3,%g3 ! 4
- addxcc %g3,%g2,%g3 ! 4
- ld [%o1+0],%o4 ! 1
- st %g3,[%o0+12] ! 4
- add %o0,16,%o0
- rd %y,%g2 ! 4
- addx %g0,%g2,%g2
-LOC(loop01):
- addcc %o2,-4,%o2
- bg LOC(loop)
- umul %o4,%o3,%g3 ! 1
-
- addcc %g3,%g2,%g3 ! 4
- st %g3,[%o0+0] ! 4
- rd %y,%g2 ! 4
- retl
- addx %g0,%g2,%o0
-
-END(__mpn_mul_1)
deleted file mode 100644
@@ -1,21 +0,0 @@
-/*
- * Sparc v8 has divide.
- */
-
-#include <sysdep.h>
-
-ENTRY(.rem)
-
- sra %o0, 31, %o2
- wr %o2, 0, %y
- nop
- nop
- nop
- sdivcc %o0, %o1, %o2
- bvs,a 1f
- xnor %o2, %g0, %o2
-1: smul %o2, %o1, %o2
- retl
- sub %o0, %o2, %o0
-
-END(.rem)
deleted file mode 100644
@@ -1,20 +0,0 @@
-/*
- * Sparc v8 has divide.
- */
-
-#include <sysdep.h>
-
-ENTRY(.div)
-
- sra %o0, 31, %o2
- wr %o2, 0, %y
- nop
- nop
- nop
- sdivcc %o0, %o1, %o0
- bvs,a 1f
- xnor %o0, %g0, %o0
-1: retl
- nop
-
-END(.div)
deleted file mode 100644
@@ -1,57 +0,0 @@
-! SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and
-! subtract the result from a second limb vector.
-
-! Copyright (C) 1992-2019 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-! License for more details.
-
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library; see the file COPYING.LIB. If not,
-! see <https://www.gnu.org/licenses/>.
-
-
-! INPUT PARAMETERS
-! res_ptr o0
-! s1_ptr o1
-! size o2
-! s2_limb o3
-
-#include <sysdep.h>
-
-ENTRY(__mpn_submul_1)
- sub %g0,%o2,%o2 ! negate ...
- sll %o2,2,%o2 ! ... and scale size
- sub %o1,%o2,%o1 ! o1 is offset s1_ptr
- sub %o0,%o2,%g1 ! g1 is offset res_ptr
-
- mov 0,%o0 ! clear cy_limb
-
-LOC(loop):
- ld [%o1+%o2],%o4
- ld [%g1+%o2],%g2
- umul %o4,%o3,%o5
- rd %y,%g3
- addcc %o5,%o0,%o5
- addx %g3,0,%o0
- subcc %g2,%o5,%g2
- addx %o0,0,%o0
- st %g2,[%g1+%o2]
-
- addcc %o2,4,%o2
- bne LOC(loop)
- nop
-
- retl
- nop
-
-END(__mpn_submul_1)
deleted file mode 100644
@@ -1,16 +0,0 @@
-/*
- * Sparc v8 has divide.
- */
-
-#include <sysdep.h>
-
-ENTRY(.udiv)
-
- wr %g0, 0, %y
- nop
- nop
- retl
- udiv %o0, %o1, %o0
-
-END(.udiv)
-strong_alias (.udiv, __wrap_.udiv)
deleted file mode 100644
@@ -1,13 +0,0 @@
-/*
- * Sparc v8 has multiply.
- */
-
-#include <sysdep.h>
-
-ENTRY(.umul)
-
- umul %o0, %o1, %o0
- retl
- rd %y, %o1
-
-END(.umul)
deleted file mode 100644
@@ -1,18 +0,0 @@
-/*
- * Sparc v8 has divide.
- */
-
-#include <sysdep.h>
-
-ENTRY(.urem)
-
- wr %g0, 0, %y
- nop
- nop
- nop
- udiv %o0, %o1, %o2
- umul %o2, %o1, %o2
- retl
- sub %o0, %o2, %o0
-
-END(.urem)
deleted file mode 100644
@@ -1,81 +0,0 @@
-! SPARC v9 32-bit __mpn_addmul_1 -- Multiply a limb vector with a limb
-! and add the result to a second limb vector.
-!
-! Copyright (C) 2013-2019 Free Software Foundation, Inc.
-! This file is part of the GNU C Library.
-! Contributed by David S. Miller <davem@davemloft.net>
-!
-! The GNU C Library is free software; you can redistribute it and/or
-! modify it under the terms of the GNU Lesser General Public
-! License as published by the Free Software Foundation; either
-! version 2.1 of the License, or (at your option) any later version.
-!
-! The GNU C Library is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-! Lesser General Public License for more details.
-!
-! You should have received a copy of the GNU Lesser General Public
-! License along with the GNU C Library; if not, see
-! <https://www.gnu.org/licenses/>.
-
-#include <sysdep.h>
-
-#define res_ptr %i0
-#define s1_ptr %i1
-#define sz_arg %i2
-#define s2l_arg %i3
-#define sz %o4
-#define carry %o5
-#define s2_limb %g1
-#define tmp1 %l0
-#define tmp2 %l1
-#define tmp3 %l2
-#define tmp4 %l3
-#define tmp64_1 %g3
-#define tmp64_2 %o3
-
-ENTRY(__mpn_addmul_1)
- save %sp, -96, %sp
- srl sz_arg, 0, sz
- srl s2l_arg, 0, s2_limb
- subcc sz, 1, sz
- be,pn %icc, .Lfinal_limb
- clr carry
-
-.Lloop:
- lduw [s1_ptr + 0x00], tmp1
- lduw [res_ptr + 0x00], tmp3
- lduw [s1_ptr + 0x04], tmp2
- lduw [res_ptr + 0x04], tmp4
- mulx tmp1, s2_limb, tmp64_1
- add s1_ptr, 8, s1_ptr
- mulx tmp2, s2_limb, tmp64_2
- sub sz, 2, sz
- add res_ptr, 8, res_ptr
- add tmp3, tmp64_1, tmp64_1
- add carry, tmp64_1, tmp64_1
- stw tmp64_1, [res_ptr - 0x08]
- srlx tmp64_1, 32, carry
- add tmp4, tmp64_2, tmp64_2
- add carry, tmp64_2, tmp64_2
- stw tmp64_2, [res_ptr - 0x04]
- brgz sz, .Lloop
- srlx tmp64_2, 32, carry
-
- brlz,pt sz, .Lfinish
- nop
-
-.Lfinal_limb:
- lduw [s1_ptr + 0x00], tmp1
- lduw [res_ptr + 0x00], tmp3
- mulx tmp1, s2_limb, tmp64_1
- add tmp3, tmp64_1, tmp64_1
- add carry, tmp64_1, tmp64_1
- stw tmp64_1, [res_ptr + 0x00]
- srlx tmp64_1, 32, carry
-
-.Lfinish:
- jmpl %i7 + 0x8, %g0
- restore carry, 0, %o0
-END(__mpn_addmul_1)
deleted file mode 100644
@@ -1,108 +0,0 @@
-/* Atomic operations. sparcv9 version.
- Copyright (C) 2003-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <stdint.h>
-
-typedef int8_t atomic8_t;
-typedef uint8_t uatomic8_t;
-typedef int_fast8_t atomic_fast8_t;
-typedef uint_fast8_t uatomic_fast8_t;
-
-typedef int16_t atomic16_t;
-typedef uint16_t uatomic16_t;
-typedef int_fast16_t atomic_fast16_t;
-typedef uint_fast16_t uatomic_fast16_t;
-
-typedef int32_t atomic32_t;
-typedef uint32_t uatomic32_t;
-typedef int_fast32_t atomic_fast32_t;
-typedef uint_fast32_t uatomic_fast32_t;
-
-typedef int64_t atomic64_t;
-typedef uint64_t uatomic64_t;
-typedef int_fast64_t atomic_fast64_t;
-typedef uint_fast64_t uatomic_fast64_t;
-
-typedef intptr_t atomicptr_t;
-typedef uintptr_t uatomicptr_t;
-typedef intmax_t atomic_max_t;
-typedef uintmax_t uatomic_max_t;
-
-#define __HAVE_64B_ATOMICS 0
-#define USE_ATOMIC_COMPILER_BUILTINS 0
-
-/* XXX Is this actually correct? */
-#define ATOMIC_EXCHANGE_USES_CAS 0
-
-
-#define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval) \
- (abort (), (__typeof (*mem)) 0)
-
-#define __arch_compare_and_exchange_val_16_acq(mem, newval, oldval) \
- (abort (), (__typeof (*mem)) 0)
-
-#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
-({ \
- __typeof (*(mem)) __acev_tmp; \
- __typeof (mem) __acev_mem = (mem); \
- if (__builtin_constant_p (oldval) && (oldval) == 0) \
- __asm __volatile ("cas [%3], %%g0, %0" \
- : "=r" (__acev_tmp), "=m" (*__acev_mem) \
- : "m" (*__acev_mem), "r" (__acev_mem), \
- "0" (newval) : "memory"); \
- else \
- __asm __volatile ("cas [%4], %2, %0" \
- : "=r" (__acev_tmp), "=m" (*__acev_mem) \
- : "r" (oldval), "m" (*__acev_mem), "r" (__acev_mem), \
- "0" (newval) : "memory"); \
- __acev_tmp; })
-
-/* This can be implemented if needed. */
-#define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
- (abort (), (__typeof (*mem)) 0)
-
-#define atomic_exchange_acq(mem, newvalue) \
- ({ __typeof (*(mem)) __oldval; \
- __typeof (mem) __memp = (mem); \
- __typeof (*(mem)) __value = (newvalue); \
- \
- if (sizeof (*(mem)) == 4) \
- __asm ("swap %0, %1" \
- : "=m" (*__memp), "=r" (__oldval) \
- : "m" (*__memp), "1" (__value) : "memory"); \
- else \
- abort (); \
- __oldval; })
-
-#define atomic_compare_and_exchange_val_24_acq(mem, newval, oldval) \
- atomic_compare_and_exchange_val_acq (mem, newval, oldval)
-
-#define atomic_exchange_24_rel(mem, newval) \
- atomic_exchange_rel (mem, newval)
-
-#define atomic_full_barrier() \
- __asm __volatile ("membar #LoadLoad | #LoadStore" \
- " | #StoreLoad | #StoreStore" : : : "memory")
-#define atomic_read_barrier() \
- __asm __volatile ("membar #LoadLoad | #LoadStore" : : : "memory")
-#define atomic_write_barrier() \
- __asm __volatile ("membar #LoadStore | #StoreStore" : : : "memory")
-
-extern void __cpu_relax (void);
-#define atomic_spin_nop() __cpu_relax ()
deleted file mode 100644
@@ -1,7 +0,0 @@
-/* Private macros for guiding the backtrace implementation, sparc32 v9
- version. */
-
-#define backtrace_flush_register_windows() \
- asm volatile ("flushw")
-
-#define BACKTRACE_STACK_BIAS 0
deleted file mode 100644
@@ -1 +0,0 @@
-/* bzero is in memset.S */
deleted file mode 100644
@@ -1,17 +0,0 @@
-/*
- * Sparc v9 has multiply.
- */
-
-#include <sysdep.h>
-
- .text
- .align 32
-ENTRY(.mul)
-
- sra %o0, 0, %o0
- sra %o1, 0, %o1
- mulx %o0, %o1, %o0
- retl
- srax %o0, 32, %o1
-
-END(.mul)
deleted file mode 100644
@@ -1,30 +0,0 @@
-/* Float absolute value, sparc32+v9 version.
- Copyright (C) 2011-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-#include <libm-alias-double.h>
-
-ENTRY (__fabs)
- st %o0, [%sp+72]
- st %o1, [%sp+76]
- ldd [%sp+72], %f0
- retl
- fabsd %f0, %f0
-END (__fabs)
-libm_alias_double (__fabs, fabs)
deleted file mode 100644
@@ -1,4 +0,0 @@
-#define ASI_PNF 0x82
-#define ASI_BLK_P 0xf0
-#define XCC icc
-#include <sparc64/memchr.S>
deleted file mode 100644
@@ -1,4 +0,0 @@
-#define ASI_PNF 0x82
-#define ASI_BLK_P 0xf0
-#define XCC icc
-#include <sparc64/memcpy.S>
deleted file mode 100644
@@ -1,4 +0,0 @@
-#define ASI_PNF 0x82
-#define ASI_BLK_P 0xf0
-#define XCC icc
-#include <sparc64/memset.S>
deleted file mode 100644
@@ -1,70 +0,0 @@
-! SPARC v9 32-bit __mpn_mul_1 -- Multiply a limb vector with a single
-! limb and store the product in a second limb vector.
-!
-! Copyright (C) 2013-2019 Free Software Foundation, Inc.
-! This file is part of the GNU C Library.
-! Contributed by David S. Miller <davem@davemloft.net>
-!
-! The GNU C Library is free software; you can redistribute it and/or
-! modify it under the terms of the GNU Lesser General Public
-! License as published by the Free Software Foundation; either
-! version 2.1 of the License, or (at your option) any later version.
-!
-! The GNU C Library is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-! Lesser General Public License for more details.
-!
-! You should have received a copy of the GNU Lesser General Public
-! License along with the GNU C Library; if not, see
-! <https://www.gnu.org/licenses/>.
-
-#include <sysdep.h>
-
-#define res_ptr %o0
-#define s1_ptr %o1
-#define sz %o2
-#define s2_limb %o3
-#define carry %o5
-#define tmp1 %g1
-#define tmp2 %g2
-#define tmp3 %g3
-#define tmp4 %o4
-
-ENTRY(__mpn_mul_1)
- srl sz, 0, sz
- srl s2_limb, 0, s2_limb
- subcc sz, 1, sz
- be,pn %icc, .Lfinal_limb
- clr carry
-
-.Lloop:
- lduw [s1_ptr + 0x00], tmp1
- lduw [s1_ptr + 0x04], tmp2
- mulx tmp1, s2_limb, tmp3
- add s1_ptr, 8, s1_ptr
- mulx tmp2, s2_limb, tmp4
- sub sz, 2, sz
- add res_ptr, 8, res_ptr
- add carry, tmp3, tmp3
- stw tmp3, [res_ptr - 0x08]
- srlx tmp3, 32, carry
- add carry, tmp4, tmp4
- stw tmp4, [res_ptr - 0x04]
- brgz sz, .Lloop
- srlx tmp4, 32, carry
-
- brlz,pt sz, .Lfinish
- nop
-
-.Lfinal_limb:
- lduw [s1_ptr + 0x00], tmp1
- mulx tmp1, s2_limb, tmp3
- add carry, tmp3, tmp3
- stw tmp3, [res_ptr + 0x00]
- srlx tmp3, 32, carry
-
-.Lfinish:
- retl
- mov carry, %o0
-END(__mpn_mul_1)
deleted file mode 100644
@@ -1 +0,0 @@
-#include <nptl/pthread_barrier_wait.c>
deleted file mode 100644
@@ -1 +0,0 @@
-#include <sysdeps/sparc/sparc64/pthread_spin_lock.S>
deleted file mode 100644
@@ -1 +0,0 @@
-#include <sysdeps/sparc/sparc64/pthread_spin_trylock.S>
deleted file mode 100644
@@ -1,22 +0,0 @@
-/*
- * Sparc v9 has divide.
- * As divx takes 68 cycles and sdivcc only 36,
- * we use sdivcc eventhough it is deprecated.
- */
-
-#include <sysdep.h>
-
- .text
- .align 32
-ENTRY(.rem)
-
- sra %o0, 31, %o2
- wr %o2, 0, %y
- sdivcc %o0, %o1, %o2
- xnor %o2, %g0, %o3
- movvs %icc, %o3, %o2
- smul %o2, %o1, %o2
- retl
- sub %o0, %o2, %o0
-
-END(.rem)
deleted file mode 100644
@@ -1,20 +0,0 @@
-/*
- * Sparc v9 has divide.
- * As divx takes 68 cycles and sdivcc only 36,
- * we use sdivcc eventhough it is deprecated.
- */
-
-#include <sysdep.h>
-
- .text
- .align 32
-ENTRY(.div)
-
- sra %o0, 31, %o2
- wr %o2, 0, %y
- sdivcc %o0, %o1, %o0
- xnor %o0, %g0, %o2
- retl
- movvs %icc, %o2, %o0
-
-END(.div)
deleted file mode 100644
@@ -1 +0,0 @@
-#include <nptl/sem_post.c>
deleted file mode 100644
@@ -1 +0,0 @@
-#include <nptl/sem_waitcommon.c>
deleted file mode 100644
@@ -1,4 +0,0 @@
-#define ASI_PNF 0x82
-#define ASI_BLK_P 0xf0
-#define XCC icc
-#include <sparc64/stpcpy.S>
deleted file mode 100644
@@ -1,4 +0,0 @@
-#define ASI_PNF 0x82
-#define ASI_BLK_P 0xf0
-#define XCC icc
-#include <sparc64/strcat.S>
deleted file mode 100644
@@ -1,4 +0,0 @@
-#define ASI_PNF 0x82
-#define ASI_BLK_P 0xf0
-#define XCC icc
-#include <sparc64/strchr.S>
deleted file mode 100644
@@ -1,4 +0,0 @@
-#define ASI_PNF 0x82
-#define ASI_BLK_P 0xf0
-#define XCC icc
-#include <sparc64/strcmp.S>
deleted file mode 100644
@@ -1,4 +0,0 @@
-#define ASI_PNF 0x82
-#define ASI_BLK_P 0xf0
-#define XCC icc
-#include <sparc64/strcpy.S>
deleted file mode 100644
@@ -1 +0,0 @@
-#include <sparc64/strlen.S>
deleted file mode 100644
@@ -1 +0,0 @@
-/* strrchr is in strchr.S */
deleted file mode 100644
@@ -1,82 +0,0 @@
-! SPARC v9 32-bit __mpn_submul_1 -- Multiply a limb vector with a limb
-! and subtract the result from a second limb vector.
-!
-! Copyright (C) 2013-2019 Free Software Foundation, Inc.
-! This file is part of the GNU C Library.
-! Contributed by David S. Miller <davem@davemloft.net>
-!
-! The GNU C Library is free software; you can redistribute it and/or
-! modify it under the terms of the GNU Lesser General Public
-! License as published by the Free Software Foundation; either
-! version 2.1 of the License, or (at your option) any later version.
-!
-! The GNU C Library is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-! Lesser General Public License for more details.
-!
-! You should have received a copy of the GNU Lesser General Public
-! License along with the GNU C Library; if not, see
-! <https://www.gnu.org/licenses/>.
-
-#include <sysdep.h>
-
-#define res_ptr %i0
-#define s1_ptr %i1
-#define sz_arg %i2
-#define s2l_arg %i3
-#define sz %o4
-#define carry %o5
-#define s2_limb %g1
-#define tmp1 %l0
-#define tmp2 %l1
-#define tmp3 %l2
-#define tmp4 %l3
-#define tmp64_1 %g3
-#define tmp64_2 %o3
-
-ENTRY(__mpn_submul_1)
- save %sp, -96, %sp
- srl sz_arg, 0, sz
- srl s2l_arg, 0, s2_limb
- subcc sz, 1, sz
- be,pn %icc, .Lfinal_limb
- subcc %g0, 0, carry
-
-.Lloop:
- lduw [s1_ptr + 0x00], tmp1
- lduw [res_ptr + 0x00], tmp3
- lduw [s1_ptr + 0x04], tmp2
- lduw [res_ptr + 0x04], tmp4
- mulx tmp1, s2_limb, tmp64_1
- add s1_ptr, 8, s1_ptr
- mulx tmp2, s2_limb, tmp64_2
- sub sz, 2, sz
- add res_ptr, 8, res_ptr
- addx carry, tmp64_1, tmp64_1
- srlx tmp64_1, 32, carry
- subcc tmp3, tmp64_1, tmp64_1
- stw tmp64_1, [res_ptr - 0x08]
- addx carry, tmp64_2, tmp64_2
- srlx tmp64_2, 32, carry
- subcc tmp4, tmp64_2, tmp64_2
- brgz sz, .Lloop
- stw tmp64_2, [res_ptr - 0x04]
-
- brlz,pt sz, .Lfinish
- nop
-
-.Lfinal_limb:
- lduw [s1_ptr + 0x00], tmp1
- lduw [res_ptr + 0x00], tmp3
- mulx tmp1, s2_limb, tmp64_1
- addx carry, tmp64_1, tmp64_1
- srlx tmp64_1, 32, carry
- subcc tmp3, tmp64_1, tmp64_1
- stw tmp64_1, [res_ptr + 0x00]
-
-.Lfinish:
- addx carry, 0, carry
- jmpl %i7 + 0x8, %g0
- restore carry, 0, %o0
-END(__mpn_submul_1)
deleted file mode 100644
@@ -1,18 +0,0 @@
-/*
- * Sparc v9 has divide.
- * As divx takes 68 cycles and udiv only 37,
- * we use udiv eventhough it is deprecated.
- */
-
-#include <sysdep.h>
-
- .text
- .align 32
-ENTRY(.udiv)
-
- wr %g0, 0, %y
- retl
- udiv %o0, %o1, %o0
-
-END(.udiv)
-strong_alias (.udiv, __wrap_.udiv)
deleted file mode 100644
@@ -1,17 +0,0 @@
-/*
- * Sparc v9 has multiply.
- */
-
-#include <sysdep.h>
-
- .text
- .align 32
-ENTRY(.umul)
-
- srl %o0, 0, %o0
- srl %o1, 0, %o1
- mulx %o0, %o1, %o0
- retl
- srlx %o0, 32, %o1
-
-END(.umul)
deleted file mode 100644
@@ -1,19 +0,0 @@
-/*
- * Sparc v9 has divide.
- * As divx takes 68 cycles and udiv only 37,
- * we use udiv eventhough it is deprecated.
- */
-
-#include <sysdep.h>
-
- .text
- .align 32
-ENTRY(.urem)
-
- wr %g0, 0, %y
- udiv %o0, %o1, %o2
- umul %o2, %o1, %o2
- retl
- sub %o0, %o2, %o0
-
-END(.urem)
@@ -1,166 +1,4 @@
-/* Copy SRC to DEST returning the address of the terminating '\0' in DEST.
- For SPARC v7.
- Copyright (C) 1996-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jj@ultra.linux.cz>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x01010101) & (~xword) & 0x80808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
- .text
- .align 4
-
-ENTRY(__stpcpy)
- andcc %o1, 3, %g0
- be 20f
- sethi %hi(0x80808080), %o4
-
- ldub [%o1], %o5
- stb %o5, [%o0]
- cmp %o5, 0
- add %o0, 1, %o0
- be 1f
- add %o1, 1, %o1
- andcc %o1, 3, %g0
- be 4f
- or %o4, %lo(0x80808080), %o3
- ldub [%o1], %o5
- stb %o5, [%o0]
- cmp %o5, 0
- add %o0, 1, %o0
- be 1f
- add %o1, 1, %o1
- andcc %o1, 3, %g0
- be 5f
- sethi %hi(0x01010101), %o4
- ldub [%o1], %o5
- stb %o5, [%o0]
- cmp %o5, 0
- add %o0, 1, %o0
- be 1f
- add %o1, 1, %o1
- b 6f
- or %o4, %lo(0x01010101), %o2
-1: retl
- add %o0, -1, %o0
-
-20: or %o4, %lo(0x80808080), %o3
-4: sethi %hi(0x01010101), %o4
-5: or %o4, %lo(0x01010101), %o2
-6: andcc %o0, 3, %g0
- bne 16f
- sub %g0, 4, %g1
-
-11: add %g1, 4, %g1
- ld [%o1 + %g1], %o5
- sub %o5, %o2, %o4
-#ifdef EIGHTBIT_NOT_RARE
- andn %o4, %o5, %o4
-#endif
- andcc %o4, %o3, %g0
- be,a 11b
- st %o5, [%o0 + %g1]
-
- /* Check every byte. */
- srl %o5, 24, %g5
- andcc %g5, 0xff, %g0
- be 14f
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
- be 13f
- srl %o5, 8, %g5
- andcc %g5, 0xff, %g0
- be 12f
- andcc %o5, 0xff, %g0
- bne 11b
- st %o5, [%o0 + %g1]
- add %o0, %g1, %o0
- retl
- add %o0, 3, %o0
-12: srl %o5, 16, %o5
- sth %o5, [%o0 + %g1]
- add %g1, 2, %g1
- stb %g0, [%o0 + %g1]
- retl
- add %o0, %g1, %o0
-13: srl %o5, 16, %o5
- sth %o5, [%o0 + %g1]
- add %g1, 1, %g1
- retl
- add %o0, %g1, %o0
-14: stb %g0, [%o0 + %g1]
- retl
- add %o0, %g1, %o0
-
-15: srl %o5, 24, %o4
- srl %o5, 16, %g1
- stb %o4, [%o0]
- srl %o5, 8, %g4
- stb %g1, [%o0 + 1]
- stb %g4, [%o0 + 2]
- stb %o5, [%o0 + 3]
- add %o0, 4, %o0
-16: ld [%o1], %o5
- sub %o5, %o2, %o4
- andcc %o4, %o3, %g0
- be 15b
- add %o1, 4, %o1
-
- /* Check every byte. */
- srl %o5, 24, %g5
- andcc %g5, 0xff, %g4
- be 19f
- stb %g4, [%o0]
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g4
- be 18f
- stb %g4, [%o0 + 1]
- srl %o5, 8, %g5
- andcc %g5, 0xff, %g4
- be 17f
- stb %g4, [%o0 + 2]
- andcc %o5, 0xff, %g4
- stb %g4, [%o0 + 3]
- bne 16b
- add %o0, 4, %o0
- retl
- sub %o0, 1, %o0
-17: retl
- add %o0, 2, %o0
-18: retl
- add %o0, 1, %o0
-19: retl
- nop
-END(__stpcpy)
-
-weak_alias (__stpcpy, stpcpy)
-libc_hidden_def (__stpcpy)
-libc_hidden_builtin_def (stpcpy)
+#define ASI_PNF 0x82
+#define ASI_BLK_P 0xf0
+#define XCC icc
+#include <sparc64/stpcpy.S>
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/stpncpy.S
rename to sysdeps/sparc/sparc32/stpncpy.S
@@ -1,352 +1,4 @@
-/* strcat (dest, src) -- Append SRC on the end of DEST.
- For SPARC v7.
- Copyright (C) 1996-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jj@ultra.linux.cz>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x01010101) & (~xword) & 0x80808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
- .text
- .align 4
-
-ENTRY(strcat)
- mov %o0, %g2
- andcc %o0, 3, %g0
- be 30f
- sethi %hi(0x80808080), %o4
-
- ldub [%o0], %o5
- cmp %o5, 0
- be 1f
- add %o0, 1, %o0
- andcc %o0, 3, %g0
- be 7f
- or %o4, %lo(0x80808080), %o3
- ldub [%o0], %o5
- cmp %o5, 0
- be 2f
- add %o0, 1, %o0
- andcc %o0, 3, %g0
- be 8f
- sethi %hi(0x01010101), %o4
- ldub [%o0], %o5
- cmp %o5, 0
- be 3f
- add %o0, 1, %o0
- b 9f
- or %o4, %lo(0x01010101), %o2
-1: or %o4, %lo(0x80808080), %o3
-2: sethi %hi(0x01010101), %o4
-3: or %o4, %lo(0x01010101), %o2
- b 3f
- sub %o0, 1, %o0
-
-30: or %o4, %lo(0x80808080), %o3
-7: sethi %hi(0x01010101), %o4
-8: or %o4, %lo(0x01010101), %o2
-9: ld [%o0], %o5
-7: sub %o5, %o2, %o4
-#ifdef EIGHTBIT_NOT_RARE
- andn %o4, %o5, %o4
-#endif
- andcc %o4, %o3, %g0
- be 9b
- add %o0, 4, %o0
-
- srl %o5, 24, %g5
- andcc %g5, 0xff, %g0
- be 3f
- add %o0, -4, %o0
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
- be 3f
- add %o0, 1, %o0
- srl %o5, 8, %g5
- andcc %g5, 0xff, %g0
- be 3f
- add %o0, 1, %o0
- andcc %o5, 0xff, %g0
- add %o0, 2, %o0
- bne,a 7b
- ld [%o0], %o5
- sub %o0, 1, %o0
-3: andcc %o1, 3, %o4
- be 4f
- nop
-
- cmp %o4, 2
- be 11f
- cmp %o4, 3
- ldub [%o1], %o5
- add %o1, 1, %o1
- stb %o5, [%o0]
- be 13f
- cmp %o5, 0
- be 0f
- add %o0, 1, %o0
-11: lduh [%o1], %o5
- add %o1, 2, %o1
- srl %o5, 8, %o4
- cmp %o4, 0
- stb %o4, [%o0]
- bne,a 12f
- stb %o5, [%o0 + 1]
- retl
- mov %g2, %o0
-12: andcc %o5, 0xff, %o5
- bne 4f
- add %o0, 2, %o0
- retl
- mov %g2, %o0
-13: bne 4f
- add %o0, 1, %o0
- retl
- mov %g2, %o0
-
-4: andcc %o0, 3, %g3
- bne 12f
-1: ld [%o1], %o5
- add %o1, 4, %o1
- sub %o5, %o2, %o4
-#ifdef EIGHTBIT_NOT_RARE
- andn %o4, %o5, %o4
-#endif
- add %o0, 4, %o0
- andcc %o4, %o3, %g0
- be,a 1b
- st %o5, [%o0 - 4]
-
- srl %o5, 24, %g5
- andcc %g5, 0xff, %g0
- be 1f
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
- be 2f
- srl %o5, 8, %g5
- andcc %g5, 0xff, %g0
- be 3f
- andcc %o5, 0xff, %g0
- bne 1b
- st %o5, [%o0 - 4]
- retl
- mov %g2, %o0
-3: srl %o5, 16, %o5
- sth %o5, [%o0 - 4]
- stb %g0, [%o0 - 2]
- retl
- mov %g2, %o0
-2: srl %o5, 16, %o5
- sth %o5, [%o0 - 4]
- retl
- mov %g2, %o0
-1: stb %g0, [%o0 - 4]
- retl
- mov %g2, %o0
-
-12: add %o1, 4, %o1
- sub %o5, %o2, %o4
- cmp %g3, 2
- be 2f
- cmp %g3, 3
- be 3f
- andcc %o4, %o3, %g0
- bne 5f
- srl %o5, 24, %g5
- stb %g5, [%o0]
- sub %o0, 1, %o0
- srl %o5, 8, %g5
- sth %g5, [%o0 + 2]
-1: add %o0, 4, %o0
-4: sll %o5, 24, %g6
- ld [%o1], %o5
- add %o1, 4, %o1
- srl %o5, 8, %g5
- sub %o5, %o2, %o4
-#ifdef EIGHTBIT_NOT_RARE
- andn %o4, %o5, %o4
-#endif
- or %g5, %g6, %g5
- andcc %o4, %o3, %g0
- be,a 1b
- st %g5, [%o0]
- srl %o5, 24, %o4
- andcc %o4, 0xff, %g0
- be 6f
- srl %o5, 16, %o4
- andcc %o4, 0xff, %g0
- be 7f
- srl %o5, 8, %o4
- st %g5, [%o0]
- andcc %o4, 0xff, %g0
- be 0f
- andcc %o5, 0xff, %g0
-1: bne 4b
- add %o0, 4, %o0
-9: stb %g0, [%o0]
-0: retl
- mov %g2, %o0
-
-6: srl %g5, 16, %g5
- sth %g5, [%o0]
- retl
- mov %g2, %o0
-
-7: srl %g5, 16, %g5
- sth %g5, [%o0]
- stb %g0, [%o0 + 2]
- retl
- mov %g2, %o0
-
-5: andcc %g5, 0xff, %g4
- be 9b
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
- be 7f
- srl %o5, 8, %g5
- andcc %g5, 0xff, %g0
- stb %g4, [%o0]
- sth %g5, [%o0 + 1]
- sub %o0, 1, %o0
- bne 1b
- andcc %o5, 0xff, %g0
- retl
- mov %g2, %o0
-
-7: stb %g4, [%o0]
- stb %g0, [%o0 + 1]
- retl
- mov %g2, %o0
-
-2: andcc %o4, %o3, %g0
- bne 5f
- srl %o5, 16, %g5
- sth %g5, [%o0]
- sub %o0, 2, %o0
-1: add %o0, 4, %o0
-4: sll %o5, 16, %g6
- ld [%o1], %o5
- add %o1, 4, %o1
- srl %o5, 16, %g5
- sub %o5, %o2, %o4
-#ifdef EIGHTBIT_NOT_RARE
- andn %o4, %o5, %o4
-#endif
- or %g5, %g6, %g5
- andcc %o4, %o3, %g0
- be,a 1b
- st %g5, [%o0]
- srl %o5, 24, %o4
- andcc %o4, 0xff, %g0
- be 7f
- srl %o5, 16, %o4
- st %g5, [%o0]
- andcc %o4, 0xff, %g0
- be 0b
- srl %o5, 8, %o4
-1: andcc %o4, 0xff, %g0
- be 8f
- andcc %o5, 0xff, %g0
- bne 4b
- add %o0, 4, %o0
- sth %o5, [%o0]
- retl
- mov %g2, %o0
-
-7: srl %g5, 16, %g5
- sth %g5, [%o0]
- stb %g0, [%o0 + 2]
- retl
- mov %g2, %o0
-
-8: stb %g0, [%o0 + 4]
- retl
- mov %g2, %o0
-
-5: srl %o5, 24, %g5
- andcc %g5, 0xff, %g0
- be 9b
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
- sth %g5, [%o0]
- sub %o0, 2, %o0
- bne 1b
- srl %o5, 8, %o4
- retl
- mov %g2, %o0
-
-3: bne 5f
- srl %o5, 24, %g5
- stb %g5, [%o0]
- sub %o0, 3, %o0
-1: add %o0, 4, %o0
-4: sll %o5, 8, %g6
- ld [%o1], %o5
- add %o1, 4, %o1
- srl %o5, 24, %g5
- sub %o5, %o2, %o4
-#ifdef EIGHTBIT_NOT_RARE
- andn %o4, %o5, %o4
-#endif
- or %g5, %g6, %g5
- andcc %o4, %o3, %g0
- be 1b
- st %g5, [%o0]
- srl %o5, 24, %o4
- andcc %o4, 0xff, %g0
- be 0b
- srl %o5, 16, %o4
-1: andcc %o4, 0xff, %g0
- be 8b
- srl %o5, 8, %o4
- andcc %o4, 0xff, %g0
- be 9f
- andcc %o5, 0xff, %g0
- bne 4b
- add %o0, 4, %o0
- srl %o5, 8, %o5
- sth %o5, [%o0]
- stb %g0, [%o0 + 2]
- retl
- mov %g2, %o0
-9: srl %o5, 8, %o5
- sth %o5, [%o0 + 4]
- retl
- mov %g2, %o0
-5: andcc %g5, 0xff, %g0
- stb %g5, [%o0]
- sub %o0, 3, %o0
- bne 1b
- srl %o5, 16, %o4
- retl
- mov %g2, %o0
-END(strcat)
-libc_hidden_builtin_def (strcat)
+#define ASI_PNF 0x82
+#define ASI_BLK_P 0xf0
+#define XCC icc
+#include <sparc64/strcat.S>
@@ -1,284 +1,4 @@
-/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
- For SPARC v7.
- Copyright (C) 1996-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
- David S. Miller <davem@caip.rutgers.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x01010101) & (~xword) & 0x80808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
- .text
- .align 4
-ENTRY(strchr)
- andcc %o1, 0xff, %o1
- be 12f
- sll %o1, 8, %o2
- andcc %o0, 3, %g0
- or %o1, %o2, %o2
- sethi %hi(0x80808080), %o4
- sll %o2, 16, %o3
- be 13f
- or %o3, %o2, %g2
-
- ldub [%o0], %g4
- cmp %g4, %o1
- be 11f
- add %o0, 1, %o0
- cmp %g4, 0
- be 9f
- andcc %o0, 3, %g0
- be 4f
- or %o4, %lo(0x80808080), %o3
- ldub [%o0], %g4
- cmp %g4, %o1
- be 11f
- add %o0, 1, %o0
- cmp %g4, 0
- be 9f
- andcc %o0, 3, %g0
- be 5f
- sethi %hi(0x01010101), %o5
- ldub [%o0], %g4
- cmp %g4, %o1
- be 11f
- add %o0, 1, %o0
- cmp %g4, 0
- be 9f
- or %o5, %lo(0x01010101), %o2
- b 6f
- ld [%o0], %g4
-11: retl
- sub %o0, 1, %o0
-
-13: or %o4, %lo(0x80808080), %o3
-4: sethi %hi(0x01010101), %o5
-5: or %o5, %lo(0x01010101), %o2
-7: ld [%o0], %g4
-6: xor %g4, %g2, %g5
- sub %g4, %o2, %o4
-#ifdef EIGHTBIT_NOT_RARE
- sub %g5, %o2, %g6
- andn %o4, %g4, %o4
- andn %g6, %g5, %g5
-#else
- sub %g5, %o2, %g5
-#endif
- or %g5, %o4, %o4
- andcc %o4, %o3, %g0
- be 7b
- add %o0, 4, %o0
-
- /* Check every byte. */
-8: srl %g4, 24, %g5
-7: andcc %g5, 0xff, %g5
- be 9f
- cmp %g5, %o1
- be 4f
- srl %g4, 16, %g5
- andcc %g5, 0xff, %g5
- be 9f
- cmp %g5, %o1
- be 3f
- srl %g4, 8, %g5
- andcc %g5, 0xff, %g5
- be 9f
- cmp %g5, %o1
- be 2f
- andcc %g4, 0xff, %g5
- be 9f
- cmp %g5, %o1
- bne,a 6b
- ld [%o0], %g4
- retl
- sub %o0, 1, %o0
-2: retl
- sub %o0, 2, %o0
-3: retl
- sub %o0, 3, %o0
-4: retl
- sub %o0, 4, %o0
-9: retl
- clr %o0
-
-11: ldub [%o0], %o5
- cmp %o5, 0
- be 1f
- add %o0, 1, %o0
- andcc %o0, 3, %g0
- be 4f
- or %o4, %lo(0x80808080), %o3
- ldub [%o0], %o5
- cmp %o5, 0
- be 1f
- add %o0, 1, %o0
- andcc %o0, 3, %g0
- be 5f
- sethi %hi(0x01010101), %o4
- ldub [%o0], %o5
- cmp %o5, 0
- be 1f
- add %o0, 1, %o0
- b 6f
- or %o4, %lo(0x01010101), %o2
-1: retl
- sub %o0, 1, %o0
-
-12: andcc %o0, 3, %g0
- bne 11b
- sethi %hi(0x80808080), %o4
- or %o4, %lo(0x80808080), %o3
-4: sethi %hi(0x01010101), %o4
-5: or %o4, %lo(0x01010101), %o2
-6: ld [%o0], %o5
-7: sub %o5, %o2, %o4
-#ifdef EIGHTBIT_NOT_RARE
- andn %o4, %o5, %o4
-#endif
- andcc %o4, %o3, %g0
- be 6b
- add %o0, 4, %o0
-
- /* Check every byte. */
- srl %o5, 24, %g5
- andcc %g5, 0xff, %g0
- be 8f
- add %o0, -4, %o4
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
- be 8f
- add %o4, 1, %o4
- srl %o5, 8, %g5
- andcc %g5, 0xff, %g0
- be 8f
- add %o4, 1, %o4
- andcc %o5, 0xff, %g0
- bne,a 7b
- ld [%o0], %o5
- add %o4, 1, %o4
-8: retl
- mov %o4, %o0
-
-13: ldub [%o0], %g4
- cmp %g4, %o1
- add %o0, 1, %o0
- be,a 1f
- sub %o0, 1, %o5
- cmp %g4, 0
- be 9f
-1: andcc %o0, 3, %g0
- be 4f
- or %o4, %lo(0x80808080), %o3
- ldub [%o0], %g4
- cmp %g4, %o1
- add %o0, 1, %o0
- be,a 1f
- sub %o0, 1, %o5
- cmp %g4, 0
- be 9f
-1: andcc %o0, 3, %g0
- be 5f
- sethi %hi(0x01010101), %o4
- ldub [%o0], %g4
- cmp %g4, %o1
- add %o0, 1, %o0
- be,a 1f
- sub %o0, 1, %o5
- cmp %g4, 0
- be 9f
-1: or %o4, %lo(0x01010101), %o2
- b 7f
- ld [%o0], %g4
-END(strchr)
-
-ENTRY(strrchr)
- andcc %o1, 0xff, %o1
- clr %o5
- be 12b
- sll %o1, 8, %o2
- andcc %o0, 3, %g0
- or %o1, %o2, %o2
- sethi %hi(0x80808080), %o4
- sll %o2, 16, %o3
- bne 13b
- or %o3, %o2, %g2
- or %o4, %lo(0x80808080), %o3
-4: sethi %hi(0x01010101), %o4
-5: or %o4, %lo(0x01010101), %o2
-6: ld [%o0], %g4
-7: xor %g4, %g2, %g5
- sub %g4, %o2, %o4
-#ifdef EIGHTBIT_NOT_RARE
- sub %g5, %o2, %g6
- andn %o4, %g4, %o4
- andn %g6, %g5, %g5
-#else
- sub %g5, %o2, %g5
-#endif
- or %g5, %o4, %o4
- andcc %o4, %o3, %g0
- be 6b
- add %o0, 4, %o0
-
- /* Check every byte. */
-3: srl %g4, 24, %g5
-8: andcc %g5, 0xff, %g5
- be 9f
- cmp %g5, %o1
- be,a 1f
- sub %o0, 4, %o5
-1: srl %g4, 16, %g5
- andcc %g5, 0xff, %g5
- be 9f
- cmp %g5, %o1
- be,a 1f
- sub %o0, 3, %o5
-1: srl %g4, 8, %g5
- andcc %g5, 0xff, %g5
- be 9f
- cmp %g5, %o1
- be,a 1f
- sub %o0, 2, %o5
-1: andcc %g4, 0xff, %g5
- be 9f
- cmp %g5, %o1
- be,a 1f
- sub %o0, 1, %o5
-1: b 7b
- ld [%o0], %g4
-9: retl
- mov %o5, %o0
-END(strrchr)
-
-weak_alias (strchr, index)
-weak_alias (strrchr, rindex)
-libc_hidden_builtin_def (strchr)
-libc_hidden_builtin_def (strrchr)
+#define ASI_PNF 0x82
+#define ASI_BLK_P 0xf0
+#define XCC icc
+#include <sparc64/strchr.S>
@@ -1,259 +1,4 @@
-/* Compare two strings for differences.
- For SPARC v7.
- Copyright (C) 1996-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jj@ultra.linux.cz>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x01010101) & (~xword) & 0x80808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
- .text
- .align 4
-
-ENTRY(strcmp)
- andcc %o0, 3, %g0
- be 13f
- sethi %hi(0x80808080), %g1
-
- ldub [%o0], %o4
- add %o0, 1, %o0
- ldub [%o1], %o5
- cmp %o4, 0
- add %o1, 1, %o1
- be 2f
- subcc %o4, %o5, %o4
- bne 2f
- andcc %o0, 3, %g0
- be 4f
- or %g1, %lo(0x80808080), %o3
- ldub [%o0], %o4
- add %o0, 1, %o0
- ldub [%o1], %o5
- cmp %o4, 0
- add %o1, 1, %o1
- be 2f
- subcc %o4, %o5, %o4
- bne 2f
- andcc %o0, 3, %g0
- be 5f
- sethi %hi(0x01010101), %g1
- ldub [%o0], %o4
- add %o0, 1, %o0
- ldub [%o1], %o5
- cmp %o4, 0
- add %o1, 1, %o1
- be 2f
- subcc %o4, %o5, %o4
- bne 2f
- andcc %o1, 3, %g2
- bne 12f
- or %g1, %lo(0x01010101), %o2
- b 1f
- ld [%o0], %o4
-2: retl
- mov %o4, %o0
-
-13: or %g1, %lo(0x80808080), %o3
-4: sethi %hi(0x01010101), %g1
-5: andcc %o1, 3, %g2
- bne 12f
- or %g1, %lo(0x01010101), %o2
-
-0: ld [%o0], %o4
-1: ld [%o1], %o5
- sub %o4, %o2, %g1
- add %o0, 4, %o0
- cmp %o4, %o5
-#ifdef EIGHTBIT_NOT_RARE
- andn %g1, %o4, %g1
-#endif
- bne 11f
- andcc %g1, %o3, %g0
- be 0b
- add %o1, 4, %o1
-
- srl %o4, 24, %g4
- andcc %g4, 0xff, %g0
- be 2f
- srl %o4, 16, %g4
- andcc %g4, 0xff, %g0
- be 2f
- srl %o4, 8, %g4
- andcc %g4, 0xff, %g0
- be 2f
- andcc %o4, 0xff, %g0
- bne,a 1b
- ld [%o0], %o4
-2: retl
- clr %o0
-
-11: srl %o4, 24, %g4
- srl %o5, 24, %g5
- andcc %g4, 0xff, %g0
- be 3f
- subcc %g4, %g5, %g4
- bne 3f
- srl %o5, 16, %g5
- srl %o4, 16, %g4
- andcc %g4, 0xff, %g0
- be 3f
- subcc %g4, %g5, %g4
- bne 3f
- srl %o5, 8, %g5
- srl %o4, 8, %g4
- andcc %g4, 0xff, %g0
- be 3f
- subcc %g4, %g5, %g4
- bne 3f
- subcc %o4, %o5, %o4
- retl
- mov %o4, %o0
-3: retl
- mov %g4, %o0
-
-12: save %sp, -64, %sp
- ld [%i0], %i4
- sll %g2, 3, %g3
- andn %i1, 3, %i1
- mov 32, %l1
- ld [%i1], %l2
- mov -1, %g6
- add %i1, 4, %i1
- sub %l1, %g3, %l1
- sll %g6, %g3, %g6
-
-1: sll %l2, %g3, %g5
- and %i4, %g6, %l3
- sub %i4, %i2, %g1
-#ifdef EIGHTBIT_NOT_RARE
- andn %g1, %i4, %g1
-#endif
- andcc %g1, %i3, %g1
- bne 3f
- cmp %g5, %l3
- bne 2f
- add %i0, 4, %i0
- ld [%i1], %l2
- add %i1, 4, %i1
- srl %l2, %l1, %l4
- or %l4, %g5, %l4
- cmp %l4, %i4
- be,a 1b
- ld [%i0], %i4
- restore %l4, %g0, %o3
- retl
- sub %o4, %o3, %o0
-
-2: sll %l2, %g3, %i2
- srl %i4, %g3, %i3
- srl %i2, %g3, %i2
- restore
- retl
- sub %o3, %o2, %o0
-
-3: srl %i4, 24, %g4
- srl %g5, 24, %l6
- andcc %g4, 0xff, %g0
- be 4f
- subcc %g4, %l6, %g4
- bne 4f
- cmp %g2, 3
- be 6f
- srl %i4, 16, %g4
- srl %g5, 16, %l6
- andcc %g4, 0xff, %g0
- be 4f
- subcc %g4, %l6, %g4
- bne 4f
- cmp %g2, 2
- be 5f
- srl %i4, 8, %g4
- srl %g5, 8, %l6
- andcc %g4, 0xff, %g0
- be 4f
- subcc %g4, %l6, %g4
- bne 4f
- add %i0, 4, %i0
- ld [%i1], %l2
- add %i1, 4, %i1
- srl %l2, 24, %g5
- andcc %i4, 0xff, %g4
- be 4f
- subcc %g4, %g5, %g4
- be,a 1b
- ld [%i0], %i4
-4: jmpl %i7 + 8, %g0
- restore %g4, %g0, %o0
-
-5: ld [%i1], %l2
- add %i1, 4, %i1
- add %i0, 4, %i0
- srl %l2, 24, %l6
- andcc %g4, 0xff, %g4
- be 4b
- subcc %g4, %l6, %g4
- bne 4b
- srl %l2, 16, %l6
- andcc %i4, 0xff, %g4
- and %l6, 0xff, %l6
- be 4b
- subcc %g4, %l6, %g4
- be,a 1b
- ld [%i0], %i4
- jmpl %i7 + 8, %g0
- restore %g4, %g0, %o0
-
-6: ld [%i1], %l2
- add %i1, 4, %i1
- add %i0, 4, %i0
- srl %l2, 24, %l6
- andcc %g4, 0xff, %g4
- be 4b
- subcc %g4, %l6, %g4
- bne 4b
- srl %l2, 16, %l6
- srl %i4, 8, %g4
- and %l6, 0xff, %l6
- andcc %g4, 0xff, %g4
- be 4b
- subcc %g4, %l6, %g4
- bne 4b
- srl %l2, 8, %l6
- andcc %i4, 0xff, %g4
- and %l6, 0xff, %l6
- be 4b
- subcc %g4, %l6, %g4
- be,a 1b
- ld [%i0], %i4
- jmpl %i7 + 8, %g0
- restore %g4, %g0, %o0
-END(strcmp)
-libc_hidden_builtin_def (strcmp)
+#define ASI_PNF 0x82
+#define ASI_BLK_P 0xf0
+#define XCC icc
+#include <sparc64/strcmp.S>
@@ -1,276 +1,4 @@
-/* Copy SRC to DEST returning DEST.
- For SPARC v7.
- Copyright (C) 1996-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jj@ultra.linux.cz>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x01010101) & (~xword) & 0x80808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
- .text
- .align 4
-
-ENTRY(strcpy)
- mov %o0, %g2
- andcc %o1, 3, %g0
- be 10f
- sethi %hi(0x80808080), %o4
-
- ldub [%o1], %o5
- stb %o5, [%o0]
- cmp %o5, 0
- add %o0, 1, %o0
- be 0f
- add %o1, 1, %o1
- andcc %o1, 3, %g0
- be 4f
- or %o4, %lo(0x80808080), %o3
- ldub [%o1], %o5
- stb %o5, [%o0]
- cmp %o5, 0
- add %o0, 1, %o0
- be 0f
- add %o1, 1, %o1
- andcc %o1, 3, %g0
- be 5f
- sethi %hi(0x01010101), %o4
- ldub [%o1], %o5
- stb %o5, [%o0]
- cmp %o5, 0
- add %o0, 1, %o0
- be 0f
- add %o1, 1, %o1
- b 6f
- andcc %o0, 3, %g3
-
-10: or %o4, %lo(0x80808080), %o3
-4: sethi %hi(0x01010101), %o4
-5: andcc %o0, 3, %g3
-6: bne 10f
- or %o4, %lo(0x01010101), %o2
-1: ld [%o1], %o5
- add %o1, 4, %o1
- sub %o5, %o2, %o4
- add %o0, 4, %o0
- andcc %o4, %o3, %g0
- be,a 1b
- st %o5, [%o0 - 4]
-
- srl %o5, 24, %g5
- andcc %g5, 0xff, %g0
- be 1f
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
- be 2f
- srl %o5, 8, %g5
- andcc %g5, 0xff, %g0
- be 3f
- andcc %o5, 0xff, %g0
- bne 1b
- st %o5, [%o0 - 4]
- retl
- mov %g2, %o0
-3: srl %o5, 16, %o5
- sth %o5, [%o0 - 4]
- stb %g0, [%o0 - 2]
- retl
- mov %g2, %o0
-2: srl %o5, 16, %o5
- sth %o5, [%o0 - 4]
- retl
- mov %g2, %o0
-1: stb %g0, [%o0 - 4]
- retl
- mov %g2, %o0
-
-10: ld [%o1], %o5
- add %o1, 4, %o1
- sub %o5, %o2, %o4
- cmp %g3, 2
- be 2f
- cmp %g3, 3
- be 3f
- andcc %o4, %o3, %g0
- bne 5f
- srl %o5, 24, %g5
- stb %g5, [%o0]
- sub %o0, 1, %o0
- srl %o5, 8, %g5
- sth %g5, [%o0 + 2]
-1: add %o0, 4, %o0
-4: sll %o5, 24, %g6
- ld [%o1], %o5
- add %o1, 4, %o1
- srl %o5, 8, %g5
- sub %o5, %o2, %o4
- or %g5, %g6, %g5
- andcc %o4, %o3, %g0
- be,a 1b
- st %g5, [%o0]
- srl %o5, 24, %o4
- andcc %o4, 0xff, %g0
- be 6f
- srl %o5, 16, %o4
- andcc %o4, 0xff, %g0
- be 7f
- srl %o5, 8, %o4
- st %g5, [%o0]
- andcc %o4, 0xff, %g0
- be 0f
- andcc %o5, 0xff, %g0
-1: bne 4b
- add %o0, 4, %o0
-9: stb %g0, [%o0]
-0: retl
- mov %g2, %o0
-6: srl %g5, 16, %g5
- sth %g5, [%o0]
- retl
- mov %g2, %o0
-7: srl %g5, 16, %g5
- sth %g5, [%o0]
- stb %g0, [%o0 + 2]
- retl
- mov %g2, %o0
-5: andcc %g5, 0xff, %g4
- be 9b
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
- be 7f
- srl %o5, 8, %g5
- andcc %g5, 0xff, %g0
- stb %g4, [%o0]
- sth %g5, [%o0 + 1]
- sub %o0, 1, %o0
- bne 1b
- andcc %o5, 0xff, %g0
- retl
- mov %g2, %o0
-7: stb %g4, [%o0]
- stb %g0, [%o0 + 1]
- retl
- mov %g2, %o0
-
-2: andcc %o4, %o3, %g0
- bne 5f
- srl %o5, 16, %g5
- sth %g5, [%o0]
- sub %o0, 2, %o0
-1: add %o0, 4, %o0
-4: sll %o5, 16, %g6
- ld [%o1], %o5
- add %o1, 4, %o1
- srl %o5, 16, %g5
- sub %o5, %o2, %o4
- or %g5, %g6, %g5
- andcc %o4, %o3, %g0
- be,a 1b
- st %g5, [%o0]
- srl %o5, 24, %o4
- andcc %o4, 0xff, %g0
- be 7f
- srl %o5, 16, %o4
- st %g5, [%o0]
- andcc %o4, 0xff, %g0
- be 0b
- srl %o5, 8, %o4
-1: andcc %o4, 0xff, %g0
- be 8f
- andcc %o5, 0xff, %g0
- bne 4b
- add %o0, 4, %o0
- sth %o5, [%o0]
- retl
- mov %g2, %o0
-7: srl %g5, 16, %g5
- sth %g5, [%o0]
- stb %g0, [%o0 + 2]
- retl
- mov %g2, %o0
-8: stb %g0, [%o0 + 4]
- retl
- mov %g2, %o0
-5: srl %o5, 24, %g5
- andcc %g5, 0xff, %g0
- be 9b
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
- sth %g5, [%o0]
- sub %o0, 2, %o0
- bne 1b
- srl %o5, 8, %o4
- retl
- mov %g2, %o0
-
-3: bne 5f
- srl %o5, 24, %g5
- stb %g5, [%o0]
- sub %o0, 3, %o0
-1: add %o0, 4, %o0
-4: sll %o5, 8, %g6
- ld [%o1], %o5
- add %o1, 4, %o1
- srl %o5, 24, %g5
- sub %o5, %o2, %o4
- or %g5, %g6, %g5
- andcc %o4, %o3, %g0
- be 1b
- st %g5, [%o0]
- srl %o5, 24, %o4
- andcc %o4, 0xff, %g0
- be 0b
- srl %o5, 16, %o4
-1: andcc %o4, 0xff, %g0
- be 8b
- srl %o5, 8, %o4
- andcc %o4, 0xff, %g0
- be 9f
- andcc %o5, 0xff, %g0
- bne 4b
- add %o0, 4, %o0
- srl %o5, 8, %o5
- sth %o5, [%o0]
- stb %g0, [%o0 + 2]
- retl
- mov %g2, %o0
-9: srl %o5, 8, %o5
- sth %o5, [%o0 + 4]
- retl
- mov %g2, %o0
-5: andcc %g5, 0xff, %g0
- stb %g5, [%o0]
- sub %o0, 3, %o0
- bne 1b
- srl %o5, 16, %o4
- retl
- mov %g2, %o0
-END(strcpy)
-libc_hidden_builtin_def (strcpy)
+#define ASI_PNF 0x82
+#define ASI_BLK_P 0xf0
+#define XCC icc
+#include <sparc64/strcpy.S>
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/strcspn.S
rename to sysdeps/sparc/sparc32/strcspn.S
@@ -1,75 +1 @@
-/* Determine the length of a string.
- For SPARC v7.
- Copyright (C) 1996-2019 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
- David S. Miller <davem@davemloft.net>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- .text
- .align 4
-
-ENTRY(strlen)
- mov %o0, %o1
- andn %o0, 0x3, %o0
-
- ld [%o0], %o5
- and %o1, 0x3, %g1
- mov -1, %g5
-
- sethi %hi(0x01010101), %o2
- sll %g1, 3, %g1
-
- or %o2, %lo(0x01010101), %o2
- srl %g5, %g1, %g2
-
- orn %o5, %g2, %o5
- sll %o2, 7, %o3
-10: add %o0, 4, %o0
-
- andn %o3, %o5, %g1
- sub %o5, %o2, %g2
-
- andcc %g1, %g2, %g0
- be,a 10b
- ld [%o0], %o5
-
- srl %o5, 24, %g1
-
- andcc %g1, 0xff, %g0
- be 90f
- sub %o0, 4, %o0
-
- srl %o5, 16, %g2
-
- andcc %g2, 0xff, %g0
- be 90f
- add %o0, 1, %o0
-
- srl %o5, 8, %g1
-
- andcc %g1, 0xff, %g0
- be 90f
- add %o0, 1, %o0
-
- add %o0, 1, %o0
-
-90: retl
- sub %o0, %o1, %o0
-END(strlen)
-libc_hidden_builtin_def (strlen)
+#include <sparc64/strlen.S>
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/strncmp.S
rename to sysdeps/sparc/sparc32/strncmp.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/strncpy.S
rename to sysdeps/sparc/sparc32/strncpy.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/strpbrk.S
rename to sysdeps/sparc/sparc32/strpbrk.S
similarity index 100%
rename from sysdeps/sparc/sparc32/sparcv9/strspn.S
rename to sysdeps/sparc/sparc32/strspn.S
@@ -1,146 +1,82 @@
-! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
-! the result from a second limb vector.
+! SPARC v9 32-bit __mpn_submul_1 -- Multiply a limb vector with a limb
+! and subtract the result from a second limb vector.
!
-! Copyright (C) 1992-2019 Free Software Foundation, Inc.
+! Copyright (C) 2013-2019 Free Software Foundation, Inc.
+! This file is part of the GNU C Library.
+! Contributed by David S. Miller <davem@davemloft.net>
!
-! This file is part of the GNU MP Library.
+! The GNU C Library is free software; you can redistribute it and/or
+! modify it under the terms of the GNU Lesser General Public
+! License as published by the Free Software Foundation; either
+! version 2.1 of the License, or (at your option) any later version.
!
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at your
-! option) any later version.
+! The GNU C Library is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+! Lesser General Public License for more details.
!
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-! License for more details.
-!
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library; see the file COPYING.LIB. If not,
-! see <https://www.gnu.org/licenses/>.
-
-
-! INPUT PARAMETERS
-! RES_PTR o0
-! S1_PTR o1
-! SIZE o2
-! S2_LIMB o3
+! You should have received a copy of the GNU Lesser General Public
+! License along with the GNU C Library; if not, see
+! <https://www.gnu.org/licenses/>.
#include <sysdep.h>
-ENTRY(__mpn_submul_1)
- ! Make S1_PTR and RES_PTR point at the end of their blocks
- ! and put (- 4 x SIZE) in index/loop counter.
- sll %o2,2,%o2
- add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
- add %o1,%o2,%o1
- sub %g0,%o2,%o2
-
- cmp %o3,0xfff
- bgu LOC(large)
- nop
-
- ld [%o1+%o2],%o5
- mov 0,%o0
- b LOC(0)
- add %o4,-4,%o4
-LOC(loop0):
- subcc %o5,%g1,%g1
- ld [%o1+%o2],%o5
- addx %o0,%g0,%o0
- st %g1,[%o4+%o2]
-LOC(0): wr %g0,%o3,%y
- sra %o5,31,%g2
- and %o3,%g2,%g2
- andcc %g1,0,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,0,%g1
- sra %g1,20,%g4
- sll %g1,12,%g1
- rd %y,%g3
- srl %g3,20,%g3
- or %g1,%g3,%g1
+#define res_ptr %i0
+#define s1_ptr %i1
+#define sz_arg %i2
+#define s2l_arg %i3
+#define sz %o4
+#define carry %o5
+#define s2_limb %g1
+#define tmp1 %l0
+#define tmp2 %l1
+#define tmp3 %l2
+#define tmp4 %l3
+#define tmp64_1 %g3
+#define tmp64_2 %o3
- addcc %g1,%o0,%g1
- addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
- addcc %o2,4,%o2 ! loop counter
- bne LOC(loop0)
- ld [%o4+%o2],%o5
-
- subcc %o5,%g1,%g1
- addx %o0,%g0,%o0
- retl
- st %g1,[%o4+%o2]
+ENTRY(__mpn_submul_1)
+ save %sp, -96, %sp
+ srl sz_arg, 0, sz
+ srl s2l_arg, 0, s2_limb
+ subcc sz, 1, sz
+ be,pn %icc, .Lfinal_limb
+ subcc %g0, 0, carry
+.Lloop:
+ lduw [s1_ptr + 0x00], tmp1
+ lduw [res_ptr + 0x00], tmp3
+ lduw [s1_ptr + 0x04], tmp2
+ lduw [res_ptr + 0x04], tmp4
+ mulx tmp1, s2_limb, tmp64_1
+ add s1_ptr, 8, s1_ptr
+ mulx tmp2, s2_limb, tmp64_2
+ sub sz, 2, sz
+ add res_ptr, 8, res_ptr
+ addx carry, tmp64_1, tmp64_1
+ srlx tmp64_1, 32, carry
+ subcc tmp3, tmp64_1, tmp64_1
+ stw tmp64_1, [res_ptr - 0x08]
+ addx carry, tmp64_2, tmp64_2
+ srlx tmp64_2, 32, carry
+ subcc tmp4, tmp64_2, tmp64_2
+ brgz sz, .Lloop
+ stw tmp64_2, [res_ptr - 0x04]
-LOC(large):
- ld [%o1+%o2],%o5
- mov 0,%o0
- sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
- b LOC(1)
- add %o4,-4,%o4
-LOC(loop):
- subcc %o5,%g3,%g3
- ld [%o1+%o2],%o5
- addx %o0,%g0,%o0
- st %g3,[%o4+%o2]
-LOC(1): wr %g0,%o5,%y
- and %o5,%g4,%g2
- andcc %g0,%g0,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%g0,%g1
- rd %y,%g3
- addcc %g3,%o0,%g3
- addx %g2,%g1,%o0
- addcc %o2,4,%o2
- bne LOC(loop)
- ld [%o4+%o2],%o5
+ brlz,pt sz, .Lfinish
+ nop
- subcc %o5,%g3,%g3
- addx %o0,%g0,%o0
- retl
- st %g3,[%o4+%o2]
+.Lfinal_limb:
+ lduw [s1_ptr + 0x00], tmp1
+ lduw [res_ptr + 0x00], tmp3
+ mulx tmp1, s2_limb, tmp64_1
+ addx carry, tmp64_1, tmp64_1
+ srlx tmp64_1, 32, carry
+ subcc tmp3, tmp64_1, tmp64_1
+ stw tmp64_1, [res_ptr + 0x00]
+.Lfinish:
+ addx carry, 0, carry
+ jmpl %i7 + 0x8, %g0
+ restore carry, 0, %o0
END(__mpn_submul_1)
@@ -1,347 +1,18 @@
- /* This file is generated from divrem.m4; DO NOT EDIT! */
/*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
+ * Sparc v9 has divide.
+ * As divx takes 68 cycles and udiv only 37,
+ * we use udiv eventhough it is deprecated.
*/
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- * .udiv name of function to generate
- * div div=div => %o0 / %o1; div=rem => %o0 % %o1
- * false false=true => signed; false=false => unsigned
- *
- * Algorithm parameters:
- * N how many bits per iteration we try to get (4)
- * WORDSIZE total number of bits (32)
- *
- * Derived constants:
- * TOPBITS number of bits in the top decade of a number
- *
- * Important variables:
- * Q the partial quotient under development (initially 0)
- * R the remainder so far, initially the dividend
- * ITER number of main division loop iterations required;
- * equal to ceil(log2(quotient) / N). Note that this
- * is the log base (2^N) of the quotient.
- * V the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- * Current estimate for non-large dividend is
- * ceil(log2(quotient) / N) * (10 + 7N/2) + C
- * A large dividend is one greater than 2^(31-TOPBITS) and takes a
- * different path, as the upper bits of the quotient must be developed
- * one bit at a time.
- */
-
-
-
#include <sysdep.h>
-#include <sys/trap.h>
+ .text
+ .align 32
ENTRY(.udiv)
- ! Ready to divide. Compute size of quotient; scale comparand.
- orcc %o1, %g0, %o5
- bne 1f
- mov %o0, %o3
-
- ! Divide by zero trap. If it returns, return 0 (about as
- ! wrong as possible, but that is what SunOS does...).
- ta ST_DIV0
- retl
- clr %o0
-
-1:
- cmp %o3, %o5 ! if %o1 exceeds %o0, done
- blu LOC(got_result) ! (and algorithm fails otherwise)
- clr %o2
- sethi %hi(1 << (32 - 4 - 1)), %g1
- cmp %o3, %g1
- blu LOC(not_really_big)
- clr %o4
-
- ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
- ! as our usual N-at-a-shot divide step will cause overflow and havoc.
- ! The number of bits in the result here is N*ITER+SC, where SC <= N.
- ! Compute ITER in an unorthodox manner: know we need to shift V into
- ! the top decade: so do not even bother to compare to R.
- 1:
- cmp %o5, %g1
- bgeu 3f
- mov 1, %g2
- sll %o5, 4, %o5
- b 1b
- add %o4, 1, %o4
-
- ! Now compute %g2.
- 2: addcc %o5, %o5, %o5
- bcc LOC(not_too_big)
- add %g2, 1, %g2
-
- ! We get here if the %o1 overflowed while shifting.
- ! This means that %o3 has the high-order bit set.
- ! Restore %o5 and subtract from %o3.
- sll %g1, 4, %g1 ! high order bit
- srl %o5, 1, %o5 ! rest of %o5
- add %o5, %g1, %o5
- b LOC(do_single_div)
- sub %g2, 1, %g2
-
- LOC(not_too_big):
- 3: cmp %o5, %o3
- blu 2b
- nop
- be LOC(do_single_div)
- nop
- /* NB: these are commented out in the V8-Sparc manual as well */
- /* (I do not understand this) */
- ! %o5 > %o3: went too far: back up 1 step
- ! srl %o5, 1, %o5
- ! dec %g2
- ! do single-bit divide steps
- !
- ! We have to be careful here. We know that %o3 >= %o5, so we can do the
- ! first divide step without thinking. BUT, the others are conditional,
- ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
- ! order bit set in the first step, just falling into the regular
- ! division loop will mess up the first time around.
- ! So we unroll slightly...
- LOC(do_single_div):
- subcc %g2, 1, %g2
- bl LOC(end_regular_divide)
- nop
- sub %o3, %o5, %o3
- mov 1, %o2
- b LOC(end_single_divloop)
- nop
- LOC(single_divloop):
- sll %o2, 1, %o2
- bl 1f
- srl %o5, 1, %o5
- ! %o3 >= 0
- sub %o3, %o5, %o3
- b 2f
- add %o2, 1, %o2
- 1: ! %o3 < 0
- add %o3, %o5, %o3
- sub %o2, 1, %o2
- 2:
- LOC(end_single_divloop):
- subcc %g2, 1, %g2
- bge LOC(single_divloop)
- tst %o3
- b,a LOC(end_regular_divide)
-
-LOC(not_really_big):
-1:
- sll %o5, 4, %o5
- cmp %o5, %o3
- bleu 1b
- addcc %o4, 1, %o4
- be LOC(got_result)
- sub %o4, 1, %o4
-
- tst %o3 ! set up for initial iteration
-LOC(divloop):
- sll %o2, 4, %o2
- ! depth 1, accumulated bits 0
- bl LOC(1.16)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 2, accumulated bits 1
- bl LOC(2.17)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 3, accumulated bits 3
- bl LOC(3.19)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits 7
- bl LOC(4.23)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (7*2+1), %o2
-
-LOC(4.23):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (7*2-1), %o2
-
-
-LOC(3.19):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits 5
- bl LOC(4.21)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (5*2+1), %o2
-
-LOC(4.21):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (5*2-1), %o2
-
-
-
-LOC(2.17):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 3, accumulated bits 1
- bl LOC(3.17)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits 3
- bl LOC(4.19)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (3*2+1), %o2
-
-LOC(4.19):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (3*2-1), %o2
-
-
-LOC(3.17):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits 1
- bl LOC(4.17)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (1*2+1), %o2
-
-LOC(4.17):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (1*2-1), %o2
-
-
-
-
-LOC(1.16):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 2, accumulated bits -1
- bl LOC(2.15)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 3, accumulated bits -1
- bl LOC(3.15)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits -1
- bl LOC(4.15)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-1*2+1), %o2
-
-LOC(4.15):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-1*2-1), %o2
-
-
-LOC(3.15):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits -3
- bl LOC(4.13)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-3*2+1), %o2
-
-LOC(4.13):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-3*2-1), %o2
-
-
-
-LOC(2.15):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 3, accumulated bits -3
- bl LOC(3.13)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits -5
- bl LOC(4.11)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-5*2+1), %o2
-
-LOC(4.11):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-5*2-1), %o2
-
-
-LOC(3.13):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits -7
- bl LOC(4.9)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-7*2+1), %o2
-
-LOC(4.9):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-7*2-1), %o2
-
-
-
-
-9:
-LOC(end_regular_divide):
- subcc %o4, 1, %o4
- bge LOC(divloop)
- tst %o3
- bl,a LOC(got_result)
- ! non-restoring fixup here (one instruction only!)
- sub %o2, 1, %o2
-
-
-LOC(got_result):
-
+ wr %g0, 0, %y
retl
- mov %o2, %o0
+ udiv %o0, %o1, %o0
END(.udiv)
strong_alias (.udiv, __wrap_.udiv)
@@ -1,155 +1,17 @@
/*
- * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
- * upper 32 bits of the 64-bit product).
- *
- * This code optimizes short (less than 13-bit) multiplies. Short
- * multiplies require 25 instruction cycles, and long ones require
- * 45 instruction cycles.
- *
- * On return, overflow has occurred (%o1 is not zero) if and only if
- * the Z condition code is clear, allowing, e.g., the following:
- *
- * call .umul
- * nop
- * bnz overflow (or tnz)
+ * Sparc v9 has multiply.
*/
#include <sysdep.h>
+ .text
+ .align 32
ENTRY(.umul)
- or %o0, %o1, %o4
- mov %o0, %y ! multiplier -> Y
- andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
- be LOC(mul_shortway) ! if zero, can do it the short way
- andcc %g0, %g0, %o4 ! zero the partial product; clear N & V
- /*
- * Long multiply. 32 steps, followed by a final shift step.
- */
- mulscc %o4, %o1, %o4 ! 1
- mulscc %o4, %o1, %o4 ! 2
- mulscc %o4, %o1, %o4 ! 3
- mulscc %o4, %o1, %o4 ! 4
- mulscc %o4, %o1, %o4 ! 5
- mulscc %o4, %o1, %o4 ! 6
- mulscc %o4, %o1, %o4 ! 7
- mulscc %o4, %o1, %o4 ! 8
- mulscc %o4, %o1, %o4 ! 9
- mulscc %o4, %o1, %o4 ! 10
- mulscc %o4, %o1, %o4 ! 11
- mulscc %o4, %o1, %o4 ! 12
- mulscc %o4, %o1, %o4 ! 13
- mulscc %o4, %o1, %o4 ! 14
- mulscc %o4, %o1, %o4 ! 15
- mulscc %o4, %o1, %o4 ! 16
- mulscc %o4, %o1, %o4 ! 17
- mulscc %o4, %o1, %o4 ! 18
- mulscc %o4, %o1, %o4 ! 19
- mulscc %o4, %o1, %o4 ! 20
- mulscc %o4, %o1, %o4 ! 21
- mulscc %o4, %o1, %o4 ! 22
- mulscc %o4, %o1, %o4 ! 23
- mulscc %o4, %o1, %o4 ! 24
- mulscc %o4, %o1, %o4 ! 25
- mulscc %o4, %o1, %o4 ! 26
- mulscc %o4, %o1, %o4 ! 27
- mulscc %o4, %o1, %o4 ! 28
- mulscc %o4, %o1, %o4 ! 29
- mulscc %o4, %o1, %o4 ! 30
- mulscc %o4, %o1, %o4 ! 31
- mulscc %o4, %o1, %o4 ! 32
- mulscc %o4, %g0, %o4 ! final shift
-
- /*
- * Normally, with the shift-and-add approach, if both numbers are
- * positive you get the correct result. With 32-bit two's-complement
- * numbers, -x is represented as
- *
- * x 32
- * ( 2 - ------ ) mod 2 * 2
- * 32
- * 2
- *
- * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s,
- * we can treat this as if the radix point were just to the left
- * of the sign bit (multiply by 2^32), and get
- *
- * -x = (2 - x) mod 2
- *
- * Then, ignoring the `mod 2's for convenience:
- *
- * x * y = xy
- * -x * y = 2y - xy
- * x * -y = 2x - xy
- * -x * -y = 4 - 2x - 2y + xy
- *
- * For signed multiplies, we subtract (x << 32) from the partial
- * product to fix this problem for negative multipliers (see mul.s).
- * Because of the way the shift into the partial product is calculated
- * (N xor V), this term is automatically removed for the multiplicand,
- * so we don't have to adjust.
- *
- * But for unsigned multiplies, the high order bit wasn't a sign bit,
- * and the correction is wrong. So for unsigned multiplies where the
- * high order bit is one, we end up with xy - (y << 32). To fix it
- * we add y << 32.
- */
-#if 0
- tst %o1
- bl,a 1f ! if %o1 < 0 (high order bit = 1),
- add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half)
-1: rd %y, %o0 ! get lower half of product
- retl
- addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0
-#else
- /* Faster code from tege@sics.se. */
- sra %o1, 31, %o2 ! make mask from sign bit
- and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1
- rd %y, %o0 ! get lower half of product
- retl
- addcc %o4, %o2, %o1 ! add compensation and put upper half in place
-#endif
-
-LOC(mul_shortway):
- /*
- * Short multiply. 12 steps, followed by a final shift step.
- * The resulting bits are off by 12 and (32-12) = 20 bit positions,
- * but there is no problem with %o0 being negative (unlike above),
- * and overflow is impossible (the answer is at most 24 bits long).
- */
- mulscc %o4, %o1, %o4 ! 1
- mulscc %o4, %o1, %o4 ! 2
- mulscc %o4, %o1, %o4 ! 3
- mulscc %o4, %o1, %o4 ! 4
- mulscc %o4, %o1, %o4 ! 5
- mulscc %o4, %o1, %o4 ! 6
- mulscc %o4, %o1, %o4 ! 7
- mulscc %o4, %o1, %o4 ! 8
- mulscc %o4, %o1, %o4 ! 9
- mulscc %o4, %o1, %o4 ! 10
- mulscc %o4, %o1, %o4 ! 11
- mulscc %o4, %o1, %o4 ! 12
- mulscc %o4, %g0, %o4 ! final shift
-
- /*
- * %o4 has 20 of the bits that should be in the result; %y has
- * the bottom 12 (as %y's top 12). That is:
- *
- * %o4 %y
- * +----------------+----------------+
- * | -12- | -20- | -12- | -20- |
- * +------(---------+------)---------+
- * -----result-----
- *
- * The 12 bits of %o4 left of the `result' area are all zero;
- * in fact, all top 20 bits of %o4 are zero.
- */
-
- rd %y, %o5
- sll %o4, 12, %o0 ! shift middle bits left 12
- srl %o5, 20, %o5 ! shift low bits right 20
- or %o5, %o0, %o0
+ srl %o0, 0, %o0
+ srl %o1, 0, %o1
+ mulx %o0, %o1, %o0
retl
- addcc %g0, %g0, %o1 ! %o1 = zero, and set Z
+ srlx %o0, 32, %o1
END(.umul)
@@ -1,346 +1,19 @@
- /* This file is generated from divrem.m4; DO NOT EDIT! */
/*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
+ * Sparc v9 has divide.
+ * As divx takes 68 cycles and udiv only 37,
+ * we use udiv eventhough it is deprecated.
*/
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- * .urem name of function to generate
- * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
- * false false=true => signed; false=false => unsigned
- *
- * Algorithm parameters:
- * N how many bits per iteration we try to get (4)
- * WORDSIZE total number of bits (32)
- *
- * Derived constants:
- * TOPBITS number of bits in the top decade of a number
- *
- * Important variables:
- * Q the partial quotient under development (initially 0)
- * R the remainder so far, initially the dividend
- * ITER number of main division loop iterations required;
- * equal to ceil(log2(quotient) / N). Note that this
- * is the log base (2^N) of the quotient.
- * V the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- * Current estimate for non-large dividend is
- * ceil(log2(quotient) / N) * (10 + 7N/2) + C
- * A large dividend is one greater than 2^(31-TOPBITS) and takes a
- * different path, as the upper bits of the quotient must be developed
- * one bit at a time.
- */
-
-
-
#include <sysdep.h>
-#include <sys/trap.h>
+ .text
+ .align 32
ENTRY(.urem)
- ! Ready to divide. Compute size of quotient; scale comparand.
- orcc %o1, %g0, %o5
- bne 1f
- mov %o0, %o3
-
- ! Divide by zero trap. If it returns, return 0 (about as
- ! wrong as possible, but that is what SunOS does...).
- ta ST_DIV0
- retl
- clr %o0
-
-1:
- cmp %o3, %o5 ! if %o1 exceeds %o0, done
- blu LOC(got_result) ! (and algorithm fails otherwise)
- clr %o2
- sethi %hi(1 << (32 - 4 - 1)), %g1
- cmp %o3, %g1
- blu LOC(not_really_big)
- clr %o4
-
- ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
- ! as our usual N-at-a-shot divide step will cause overflow and havoc.
- ! The number of bits in the result here is N*ITER+SC, where SC <= N.
- ! Compute ITER in an unorthodox manner: know we need to shift V into
- ! the top decade: so do not even bother to compare to R.
- 1:
- cmp %o5, %g1
- bgeu 3f
- mov 1, %g2
- sll %o5, 4, %o5
- b 1b
- add %o4, 1, %o4
-
- ! Now compute %g2.
- 2: addcc %o5, %o5, %o5
- bcc LOC(not_too_big)
- add %g2, 1, %g2
-
- ! We get here if the %o1 overflowed while shifting.
- ! This means that %o3 has the high-order bit set.
- ! Restore %o5 and subtract from %o3.
- sll %g1, 4, %g1 ! high order bit
- srl %o5, 1, %o5 ! rest of %o5
- add %o5, %g1, %o5
- b LOC(do_single_div)
- sub %g2, 1, %g2
-
- LOC(not_too_big):
- 3: cmp %o5, %o3
- blu 2b
- nop
- be LOC(do_single_div)
- nop
- /* NB: these are commented out in the V8-Sparc manual as well */
- /* (I do not understand this) */
- ! %o5 > %o3: went too far: back up 1 step
- ! srl %o5, 1, %o5
- ! dec %g2
- ! do single-bit divide steps
- !
- ! We have to be careful here. We know that %o3 >= %o5, so we can do the
- ! first divide step without thinking. BUT, the others are conditional,
- ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
- ! order bit set in the first step, just falling into the regular
- ! division loop will mess up the first time around.
- ! So we unroll slightly...
- LOC(do_single_div):
- subcc %g2, 1, %g2
- bl LOC(end_regular_divide)
- nop
- sub %o3, %o5, %o3
- mov 1, %o2
- b LOC(end_single_divloop)
- nop
- LOC(single_divloop):
- sll %o2, 1, %o2
- bl 1f
- srl %o5, 1, %o5
- ! %o3 >= 0
- sub %o3, %o5, %o3
- b 2f
- add %o2, 1, %o2
- 1: ! %o3 < 0
- add %o3, %o5, %o3
- sub %o2, 1, %o2
- 2:
- LOC(end_single_divloop):
- subcc %g2, 1, %g2
- bge LOC(single_divloop)
- tst %o3
- b,a LOC(end_regular_divide)
-
-LOC(not_really_big):
-1:
- sll %o5, 4, %o5
- cmp %o5, %o3
- bleu 1b
- addcc %o4, 1, %o4
- be LOC(got_result)
- sub %o4, 1, %o4
-
- tst %o3 ! set up for initial iteration
-LOC(divloop):
- sll %o2, 4, %o2
- ! depth 1, accumulated bits 0
- bl LOC(1.16)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 2, accumulated bits 1
- bl LOC(2.17)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 3, accumulated bits 3
- bl LOC(3.19)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits 7
- bl LOC(4.23)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (7*2+1), %o2
-
-LOC(4.23):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (7*2-1), %o2
-
-
-LOC(3.19):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits 5
- bl LOC(4.21)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (5*2+1), %o2
-
-LOC(4.21):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (5*2-1), %o2
-
-
-
-LOC(2.17):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 3, accumulated bits 1
- bl LOC(3.17)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits 3
- bl LOC(4.19)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (3*2+1), %o2
-
-LOC(4.19):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (3*2-1), %o2
-
-
-LOC(3.17):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits 1
- bl LOC(4.17)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (1*2+1), %o2
-
-LOC(4.17):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (1*2-1), %o2
-
-
-
-
-LOC(1.16):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 2, accumulated bits -1
- bl LOC(2.15)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 3, accumulated bits -1
- bl LOC(3.15)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits -1
- bl LOC(4.15)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-1*2+1), %o2
-
-LOC(4.15):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-1*2-1), %o2
-
-
-LOC(3.15):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits -3
- bl LOC(4.13)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-3*2+1), %o2
-
-LOC(4.13):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-3*2-1), %o2
-
-
-
-LOC(2.15):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 3, accumulated bits -3
- bl LOC(3.13)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits -5
- bl LOC(4.11)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-5*2+1), %o2
-
-LOC(4.11):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-5*2-1), %o2
-
-
-LOC(3.13):
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits -7
- bl LOC(4.9)
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-7*2+1), %o2
-
-LOC(4.9):
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-7*2-1), %o2
-
-
-
-
-9:
-LOC(end_regular_divide):
- subcc %o4, 1, %o4
- bge LOC(divloop)
- tst %o3
- bl,a LOC(got_result)
- ! non-restoring fixup here (one instruction only!)
- add %o3, %o1, %o3
-
-
-LOC(got_result):
-
+ wr %g0, 0, %y
+ udiv %o0, %o1, %o2
+ umul %o2, %o1, %o2
retl
- mov %o3, %o0
+ sub %o0, %o2, %o0
END(.urem)
@@ -19,12 +19,6 @@
#include_next <kernel-features.h>
-/* 32-bit SPARC kernels do not support
- futex_atomic_cmpxchg_inatomic. */
-#if !defined __arch64__ && !defined __sparc_v9__
-# undef __ASSUME_SET_ROBUST_LIST
-#endif
-
/* These syscalls were added for 32-bit in 4.4 (but present for 64-bit
in all supported kernel versions); the architecture-independent
kernel-features.h assumes some of them to be present by default.
deleted file mode 100644
@@ -1,3 +0,0 @@
-# We must list this here to move it ahead of the ldbl-opt code.
-sparc/sparc32/sparcv9/fpu
-sparc/sparc32/fpu
deleted file mode 100644
@@ -1,4 +0,0 @@
-# We must list this here to move it ahead of the ldbl-opt code.
-sparc/sparc32/sparcv9/fpu/multiarch
-sparc/sparc32/sparcv9/fpu
-sparc/sparc32/fpu