diff mbox

Partial ILP32 support for aarch64

Message ID 1479515990.908.96.camel@caviumnetworks.com
State Superseded
Headers show

Commit Message

Steve Ellcey Nov. 19, 2016, 12:39 a.m. UTC
This is the first of the aarch64 ILP32 patches.  I don't know if this
can be approved and checked in before the rest of the ILP32 patches but
I was hoping it could be so that we can get everything checked in before
the glibc code freeze.  

This part of the patch does not include any of the structural changes
to support ILP32 builds but it contains the majority of the assembly
language changes and macro changes.  This patch has been tested to make
sure it does not break the aarch64 LP64 build or cause any regressions.
Almost all LP64 objects are identical before and after this patch,
there are a couple of exceptions due to changes in the add_system_dir
macro in sysdeps/unix/sysv/linux/aarch64/dl-cache.h.

OK to checkin?

Steve Ellcey
sellcey@caviumnetworks.com


2016-11-18  Andrew Pinski  <andrew.pinski@caviumnetworks.com>
	    Yury Norov  <ynorov@caviumnetworks.com>
	    Steve Ellcey  <sellcey@caviumnetworks.com>

	* sysdeps/aarch64/crti.S: Add include of sysdep.h.
	(call_weak_fn): Use PTR_REG to get correct reg name in ILP32.
	* sysdeps/aarch64/dl-irel.h: Add include of sysdep.h.
	(elf_irela): Use AARCH64_R macro to get correct relocation in ILP32.
	* sysdeps/aarch64/dl-machine.h: Add include of sysdep.h.
	(elf_machine_load_address, RTLD_START, RTLD_START_1, RTLD_START,
	elf_machine_type_class, ELF_MACHINE_JMP_SLOT, elf_machine_rela,
	elf_machine_lazy_rel): Add ifdef's for ILP32 support.
	* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return,
	_dl_tlsdesc_return_lazy, _dl_tlsdesc_dynamic,
	_dl_tlsdesc_resolve_hold): Extend pointers in ILP32, use PTR_REG
	to get correct reg name for ILP32.
	* sysdeps/aarch64/dl-trampoline.S (ip01): New Macro.
	(RELA_SIZE): New Macro.
	(_dl_runtime_resolve, _dl_runtime_profile): Use new macros and PTR_REG
	to support ILP32.
	* sysdeps/aarch64/jmpbuf-unwind.h (_JMPBUF_CFA_UNWINDS_ADJ): Add
	cast for ILP32 mode.
	* sysdeps/aarch64/memcmp.S (memcmp): Extend arg pointers for ILP32 mode.
	* sysdeps/aarch64/memcpy.S (memmove, memcpy): Ditto.
	* sysdeps/aarch64/memset.S (__memset): Ditto.
	* sysdeps/aarch64/strchr.S (strchr): Ditto.
	* sysdeps/aarch64/strchrnul.S (__strchrnul): Ditto.
	* sysdeps/aarch64/strcmp.S (strcmp): Ditto.
	* sysdeps/aarch64/strcpy.S (strcpy): Ditto.
	* sysdeps/aarch64/strlen.S (__strlen): Ditto.
	* sysdeps/aarch64/strncmp.S (strncmp): Ditto.
	* sysdeps/aarch64/strnlen.S (strnlen): Ditto.
	* sysdeps/aarch64/strrchr.S (strrchr): Ditto.
	* sysdeps/unix/sysv/linux/aarch64/clone.S: Ditto.
	* sysdeps/unix/sysv/linux/aarch64/setcontext.S (__setcontext): Ditto.
	* sysdeps/unix/sysv/linux/aarch64/swapcontext.S (__swapcontext): Ditto.
	* sysdeps/aarch64/__longjmp.S (__longjmp): Extend pointers in ILP32,
	change PTR_MANGLE call to use register numbers instead of names.
	* sysdeps/unix/sysv/linux/aarch64/getcontext.S (__getcontext): Ditto.
	* sysdeps/aarch64/setjmp.S (__sigsetjmp): Extend arg pointers for
	ILP32 mode, change PTR_MANGLE calls to use register numbers.
	* sysdeps/aarch64/start.S (_start): Ditto.
	* sysdeps/aarch64/nptl/bits/pthreadtypes.h
	(__PTHREAD_RWLOCK_INT_FLAGS_SHARED): New define.
	* sysdeps/aarch64/nptl/bits/semaphore.h (__SIZEOF_SEM_T): Change define.
	* sysdeps/aarch64/sysdep.h (AARCH64_R, PTR_REG, PTR_LOG_SIZE, DELOUSE,
	PTR_SIZE): New Macros.
	(LDST_PCREL, LDST_GLOBAL) Update to use PTR_REG.
	* sysdeps/unix/sysv/linux/aarch64/bits/fcntl.h (O_LARGEFILE):
	Set when in ILP32 mode.
	(F_GETLK64, F_SETLK64, F_SETLKW64): Only set in LP64 mode.
	* sysdeps/unix/sysv/linux/aarch64/dl-cache.h (DL_CACHE_DEFAULT_ID):
	Set elf flags for ILP32.
	(add_system_dir): Set ILP32 library directories.
	* sysdeps/unix/sysv/linux/aarch64/init-first.c
	(_libc_vdso_platform_setup): Set minimum kernel version for ILP32.
	* sysdeps/unix/sysv/linux/aarch64/ldconfig.h
	(SYSDEP_KNOWN_INTERPRETER_NAMES): Add ILP32 names.
	* sysdeps/unix/sysv/linux/aarch64/sigcontextinfo.h (GET_PC, SET_PC):
	New Macros.
	* sysdeps/unix/sysv/linux/aarch64/sysdep.h: Handle ILP32 pointers.

Comments

Joseph Myers Nov. 21, 2016, 4:51 p.m. UTC | #1
On Fri, 18 Nov 2016, Steve Ellcey wrote:

> diff --git a/sysdeps/aarch64/nptl/bits/semaphore.h b/sysdeps/aarch64/nptl/bits/semaphore.h

> index 3cc5b37..1d1389c 100644

> --- a/sysdeps/aarch64/nptl/bits/semaphore.h

> +++ b/sysdeps/aarch64/nptl/bits/semaphore.h

> @@ -21,7 +21,11 @@

>  #endif

>  

>  

> +#ifdef __ILP32__

> +#define __SIZEOF_SEM_T	16

> +#else

>  #define __SIZEOF_SEM_T	32

> +#endif


Missing preprocessor indentation inside #ifdef, should be "# define" in 
both branches of the #ifdef.

> diff --git a/sysdeps/unix/sysv/linux/aarch64/dl-cache.h b/sysdeps/unix/sysv/linux/aarch64/dl-cache.h

> index 9c7b271..044fdba 100644

> --- a/sysdeps/unix/sysv/linux/aarch64/dl-cache.h

> +++ b/sysdeps/unix/sysv/linux/aarch64/dl-cache.h

> @@ -18,7 +18,11 @@

>  

>  #include <ldconfig.h>

>  

> +#ifdef __LP64__

>  #define _DL_CACHE_DEFAULT_ID    (FLAG_AARCH64_LIB64 | FLAG_ELF_LIBC6)

> +#else

> +#define _DL_CACHE_DEFAULT_ID    (FLAG_AARCH64_LIB32 | FLAG_ELF_LIBC6)

> +#endif


Likewise.

-- 
Joseph S. Myers
joseph@codesourcery.com
diff mbox

Patch

diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S
index 65116be..4d411fe 100644
--- a/sysdeps/aarch64/__longjmp.S
+++ b/sysdeps/aarch64/__longjmp.S
@@ -46,6 +46,8 @@  ENTRY (__longjmp)
 	cfi_offset(d14, JB_D14<<3)
 	cfi_offset(d15, JB_D15<<3)
 
+	DELOUSE (0)
+
 	ldp	x19, x20, [x0, #JB_X19<<3]
 	ldp	x21, x22, [x0, #JB_X21<<3]
 	ldp	x23, x24, [x0, #JB_X23<<3]
@@ -53,7 +55,7 @@  ENTRY (__longjmp)
 	ldp	x27, x28, [x0, #JB_X27<<3]
 #ifdef PTR_DEMANGLE
 	ldp	x29,  x4, [x0, #JB_X29<<3]
-	PTR_DEMANGLE (x30, x4, x3, x2)
+	PTR_DEMANGLE (30, 4, 3, 2)
 #else
 	ldp	x29, x30, [x0, #JB_X29<<3]
 #endif
@@ -98,7 +100,7 @@  ENTRY (__longjmp)
 	cfi_same_value(d15)
 #ifdef PTR_DEMANGLE
 	ldr	x4, [x0, #JB_SP<<3]
-	PTR_DEMANGLE (x5, x4, x3, x2)
+	PTR_DEMANGLE (5, 4, 3, 2)
 #else
 	ldr	x5, [x0, #JB_SP<<3]
 #endif
diff --git a/sysdeps/aarch64/crti.S b/sysdeps/aarch64/crti.S
index 53ccb42..5c42fd5 100644
--- a/sysdeps/aarch64/crti.S
+++ b/sysdeps/aarch64/crti.S
@@ -39,6 +39,7 @@ 
    they can be called as functions.  The symbols _init and _fini are
    magic and cause the linker to emit DT_INIT and DT_FINI.  */
 
+#include <sysdep.h>
 #include <libc-symbols.h>
 
 #ifndef PREINIT_FUNCTION
@@ -60,7 +61,7 @@ 
 	.type	call_weak_fn, %function
 call_weak_fn:
 	adrp	x0, :got:PREINIT_FUNCTION
-	ldr	x0, [x0, #:got_lo12:PREINIT_FUNCTION]
+	ldr	PTR_REG (0), [x0, #:got_lo12:PREINIT_FUNCTION]
 	cbz	x0, 1f
 	b	PREINIT_FUNCTION
 1:
diff --git a/sysdeps/aarch64/dl-irel.h b/sysdeps/aarch64/dl-irel.h
index 63a8e50..2effca4 100644
--- a/sysdeps/aarch64/dl-irel.h
+++ b/sysdeps/aarch64/dl-irel.h
@@ -23,6 +23,7 @@ 
 #include <stdio.h>
 #include <unistd.h>
 #include <ldsodefs.h>
+#include <sysdep.h>
 
 #define ELF_MACHINE_IRELA	1
 
@@ -40,7 +41,7 @@  elf_irela (const ElfW(Rela) *reloc)
   ElfW(Addr) *const reloc_addr = (void *) reloc->r_offset;
   const unsigned long int r_type = ELFW(R_TYPE) (reloc->r_info);
 
-  if (__glibc_likely (r_type == R_AARCH64_IRELATIVE))
+  if (__glibc_likely (r_type == AARCH64_R (IRELATIVE)))
     {
       ElfW(Addr) value = elf_ifunc_invoke (reloc->r_addend);
       *reloc_addr = value;
diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
index 282805e..b5ea7a8 100644
--- a/sysdeps/aarch64/dl-machine.h
+++ b/sysdeps/aarch64/dl-machine.h
@@ -21,6 +21,7 @@ 
 
 #define ELF_MACHINE_NAME "aarch64"
 
+#include <sysdep.h>
 #include <tls.h>
 #include <dl-tlsdesc.h>
 #include <dl-irel.h>
@@ -53,19 +54,33 @@  elf_machine_load_address (void)
      by constructing a non GOT reference to the symbol, the dynamic
      address of the symbol we compute using adrp/add to compute the
      symbol's address relative to the PC.
-     This depends on 32bit relocations being resolved at link time
-     and that the static address fits in the 32bits.  */
+     This depends on 32/16bit relocations being resolved at link time
+     and that the static address fits in the 32/16 bits.  */
 
   ElfW(Addr) static_addr;
   ElfW(Addr) dynamic_addr;
 
   asm ("					\n"
 "	adrp	%1, _dl_start;			\n"
+#ifdef __LP64__
 "	add	%1, %1, #:lo12:_dl_start	\n"
+#else
+"	add	%w1, %w1, #:lo12:_dl_start	\n"
+#endif
 "	ldr	%w0, 1f				\n"
 "	b	2f				\n"
 "1:						\n"
+#ifdef __LP64__
 "	.word	_dl_start			\n"
+#else
+# ifdef __AARCH64EB__
+"	.short  0                               \n"
+# endif
+"	.short  _dl_start                       \n"
+# ifndef __AARCH64EB__
+"	.short  0                               \n"
+# endif
+#endif
 "2:						\n"
     : "=r" (static_addr),  "=r" (dynamic_addr));
   return dynamic_addr - static_addr;
@@ -125,80 +140,86 @@  elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 /* Initial entry point for the dynamic linker. The C function
    _dl_start is the real entry point, its return value is the user
    program's entry point */
+#ifdef __LP64__
+# define RTLD_START RTLD_START_1 ("x", "3", "sp")
+#else
+# define RTLD_START RTLD_START_1 ("w", "2", "wsp")
+#endif
 
-#define RTLD_START asm ("\
-.text								\n\
-.globl _start							\n\
-.type _start, %function						\n\
-.globl _dl_start_user						\n\
-.type _dl_start_user, %function					\n\
-_start:								\n\
-	mov	x0,	sp					\n\
-	bl	_dl_start					\n\
-	// returns user entry point in x0			\n\
-	mov	x21, x0						\n\
-_dl_start_user:							\n\
-	// get the original arg count				\n\
-	ldr	x1, [sp]					\n\
-	// get the argv address					\n\
-	add	x2, sp, #8					\n\
-	// get _dl_skip_args to see if we were			\n\
-	// invoked as an executable				\n\
-	adrp	x4, _dl_skip_args				\n\
-        ldr	w4, [x4, #:lo12:_dl_skip_args]			\n\
-	// do we need to adjust argc/argv			\n\
-        cmp	w4, 0						\n\
-	beq	.L_done_stack_adjust				\n\
-	// subtract _dl_skip_args from original arg count	\n\
-	sub	x1, x1, x4					\n\
-	// store adjusted argc back to stack			\n\
-	str	x1, [sp]					\n\
-	// find the first unskipped argument			\n\
-	mov	x3, x2						\n\
-	add	x4, x2, x4, lsl #3				\n\
-	// shuffle argv down					\n\
-1:	ldr	x5, [x4], #8					\n\
-	str	x5, [x3], #8					\n\
-	cmp	x5, #0						\n\
-	bne	1b						\n\
-	// shuffle envp down					\n\
-1:	ldr	x5, [x4], #8					\n\
-	str	x5, [x3], #8					\n\
-	cmp	x5, #0						\n\
-	bne	1b						\n\
-	// shuffle auxv down					\n\
-1:	ldp	x0, x5, [x4, #16]!				\n\
-	stp	x0, x5, [x3], #16				\n\
-	cmp	x0, #0						\n\
-	bne	1b						\n\
-	// Update _dl_argv					\n\
-	adrp	x3, _dl_argv					\n\
-	str	x2, [x3, #:lo12:_dl_argv]			\n\
-.L_done_stack_adjust:						\n\
-	// compute envp						\n\
-	add	x3, x2, x1, lsl #3				\n\
-	add	x3, x3, #8					\n\
-	adrp	x16, _rtld_local				\n\
-        add	x16, x16, #:lo12:_rtld_local			\n\
-        ldr	x0, [x16]					\n\
-	bl	_dl_init					\n\
-	// load the finalizer function				\n\
-	adrp	x0, _dl_fini					\n\
-	add	x0, x0, #:lo12:_dl_fini				\n\
-	// jump to the user_s entry point			\n\
-	br      x21						\n\
+
+#define RTLD_START_1(PTR, PTR_SIZE_LOG, PTR_SP) asm ("\
+.text									\n\
+.globl _start								\n\
+.type _start, %function							\n\
+.globl _dl_start_user							\n\
+.type _dl_start_user, %function						\n\
+_start:									\n\
+	mov	" PTR "0, " PTR_SP "					\n\
+	bl	_dl_start						\n\
+	// returns user entry point in x0				\n\
+	mov	x21, x0							\n\
+_dl_start_user:								\n\
+	// get the original arg count					\n\
+	ldr	" PTR "1, [sp]						\n\
+	// get the argv address						\n\
+	add	" PTR "2, " PTR_SP ", #(1<<"  PTR_SIZE_LOG ")		\n\
+	// get _dl_skip_args to see if we were				\n\
+	// invoked as an executable					\n\
+	adrp	x4, _dl_skip_args					\n\
+        ldr	w4, [x4, #:lo12:_dl_skip_args]				\n\
+	// do we need to adjust argc/argv				\n\
+        cmp	w4, 0							\n\
+	beq	.L_done_stack_adjust					\n\
+	// subtract _dl_skip_args from original arg count		\n\
+	sub	" PTR "1, " PTR "1, " PTR "4				\n\
+	// store adjusted argc back to stack				\n\
+	str	" PTR "1, [sp]						\n\
+	// find the first unskipped argument				\n\
+	mov	" PTR "3, " PTR "2					\n\
+	add	" PTR "4, " PTR "2, " PTR "4, lsl #" PTR_SIZE_LOG "	\n\
+	// shuffle argv down						\n\
+1:	ldr	" PTR "5, [x4], #(1<<"  PTR_SIZE_LOG ")			\n\
+	str	" PTR "5, [x3], #(1<<"  PTR_SIZE_LOG ")			\n\
+	cmp	" PTR "5, #0						\n\
+	bne	1b							\n\
+	// shuffle envp down						\n\
+1:	ldr	" PTR "5, [x4], #(1<<"  PTR_SIZE_LOG ")			\n\
+	str	" PTR "5, [x3], #(1<<"  PTR_SIZE_LOG ")			\n\
+	cmp	" PTR "5, #0						\n\
+	bne	1b							\n\
+	// shuffle auxv down						\n\
+1:	ldp	" PTR "0, " PTR "5, [x4, #(2<<"  PTR_SIZE_LOG ")]!	\n\
+	stp	" PTR "0, " PTR "5, [x3], #(2<<"  PTR_SIZE_LOG ")	\n\
+	cmp	" PTR "0, #0						\n\
+	bne	1b							\n\
+	// Update _dl_argv						\n\
+	adrp	x3, _dl_argv						\n\
+	str	" PTR "2, [x3, #:lo12:_dl_argv]				\n\
+.L_done_stack_adjust:							\n\
+	// compute envp							\n\
+	add	" PTR "3, " PTR "2, " PTR "1, lsl #" PTR_SIZE_LOG "	\n\
+	add	" PTR "3, " PTR "3, #(1<<"  PTR_SIZE_LOG ")		\n\
+	adrp	x16, _rtld_local					\n\
+        add	" PTR "16, " PTR "16, #:lo12:_rtld_local		\n\
+        ldr	" PTR "0, [x16]						\n\
+	bl	_dl_init						\n\
+	// load the finalizer function					\n\
+	adrp	x0, _dl_fini						\n\
+	add	" PTR "0, " PTR "0, #:lo12:_dl_fini			\n\
+	// jump to the user_s entry point				\n\
+	br      x21							\n\
 ");
 
 #define elf_machine_type_class(type)					\
-  ((((type) == R_AARCH64_JUMP_SLOT ||					\
-     (type) == R_AARCH64_TLS_DTPMOD ||					\
-     (type) == R_AARCH64_TLS_DTPREL ||					\
-     (type) == R_AARCH64_TLS_TPREL ||					\
-     (type) == R_AARCH64_TLSDESC) * ELF_RTYPE_CLASS_PLT)		\
-   | (((type) == R_AARCH64_COPY) * ELF_RTYPE_CLASS_COPY)		\
-   | (((type) == R_AARCH64_GLOB_DAT) * ELF_RTYPE_CLASS_EXTERN_PROTECTED_DATA))
+  ((((type) == AARCH64_R (JUMP_SLOT)					\
+  || (type) == AARCH64_R (TLS_DTPMOD)					\
+  || (type) == AARCH64_R (TLS_DTPREL)					\
+  || (type) == AARCH64_R (TLS_TPREL)					\
+  || (type) == AARCH64_R (TLSDESC)) * ELF_RTYPE_CLASS_PLT)		\
+   | (((type) == AARCH64_R (COPY)) * ELF_RTYPE_CLASS_COPY)		\
+   | (((type) == AARCH64_R (GLOB_DAT)) * ELF_RTYPE_CLASS_EXTERN_PROTECTED_DATA))
 
-#define ELF_MACHINE_JMP_SLOT	R_AARCH64_JUMP_SLOT
+#define ELF_MACHINE_JMP_SLOT	AARCH64_R (JUMP_SLOT)
 
 /* AArch64 uses RELA not REL */
 #define ELF_MACHINE_NO_REL 1
@@ -237,9 +258,9 @@  elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
 		  void *const reloc_addr_arg, int skip_ifunc)
 {
   ElfW(Addr) *const reloc_addr = reloc_addr_arg;
-  const unsigned int r_type = ELF64_R_TYPE (reloc->r_info);
+  const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info);
 
-  if (__builtin_expect (r_type == R_AARCH64_RELATIVE, 0))
+  if (__builtin_expect (r_type == AARCH64_R (RELATIVE), 0))
       *reloc_addr = map->l_addr + reloc->r_addend;
   else if (__builtin_expect (r_type == R_AARCH64_NONE, 0))
       return;
@@ -257,7 +278,7 @@  elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
 
       switch (r_type)
 	{
-	case R_AARCH64_COPY:
+	case AARCH64_R (COPY):
 	  if (sym == NULL)
 	      break;
 
@@ -275,15 +296,17 @@  elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
 		  MIN (sym->st_size, refsym->st_size));
 	  break;
 
-	case R_AARCH64_RELATIVE:
-	case R_AARCH64_GLOB_DAT:
-	case R_AARCH64_JUMP_SLOT:
-	case R_AARCH64_ABS32:
-	case R_AARCH64_ABS64:
+	case AARCH64_R (RELATIVE):
+	case AARCH64_R (GLOB_DAT):
+	case AARCH64_R (JUMP_SLOT):
+	case AARCH64_R (ABS32):
+#ifdef __LP64__
+	case AARCH64_R (ABS64):
+#endif
 	  *reloc_addr = value + reloc->r_addend;
 	  break;
 
-	case R_AARCH64_TLSDESC:
+	case AARCH64_R (TLSDESC):
 	  {
 	    struct tlsdesc volatile *td =
 	      (struct tlsdesc volatile *)reloc_addr;
@@ -318,7 +341,7 @@  elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
 	    break;
 	  }
 
-	case R_AARCH64_TLS_DTPMOD:
+	case AARCH64_R (TLS_DTPMOD):
 #ifdef RTLD_BOOTSTRAP
 	  *reloc_addr = 1;
 #else
@@ -329,12 +352,12 @@  elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
 #endif
 	  break;
 
-	case R_AARCH64_TLS_DTPREL:
+	case AARCH64_R (TLS_DTPREL):
 	  if (sym)
 	    *reloc_addr = sym->st_value + reloc->r_addend;
 	  break;
 
-	case R_AARCH64_TLS_TPREL:
+	case AARCH64_R (TLS_TPREL):
 	  if (sym)
 	    {
 	      CHECK_STATIC_TLS (map, sym_map);
@@ -343,7 +366,7 @@  elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
 	    }
 	  break;
 
-	case R_AARCH64_IRELATIVE:
+	case AARCH64_R (IRELATIVE):
 	  value = map->l_addr + reloc->r_addend;
 	  value = elf_ifunc_invoke (value);
 	  *reloc_addr = value;
@@ -374,16 +397,16 @@  elf_machine_lazy_rel (struct link_map *map,
 		      int skip_ifunc)
 {
   ElfW(Addr) *const reloc_addr = (void *) (l_addr + reloc->r_offset);
-  const unsigned int r_type = ELF64_R_TYPE (reloc->r_info);
+  const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info);
   /* Check for unexpected PLT reloc type.  */
-  if (__builtin_expect (r_type == R_AARCH64_JUMP_SLOT, 1))
+  if (__builtin_expect (r_type == AARCH64_R (JUMP_SLOT), 1))
     {
       if (__builtin_expect (map->l_mach.plt, 0) == 0)
 	*reloc_addr += l_addr;
       else
 	*reloc_addr = map->l_mach.plt;
     }
-  else if (__builtin_expect (r_type == R_AARCH64_TLSDESC, 1))
+  else if (__builtin_expect (r_type == AARCH64_R (TLSDESC), 1))
     {
       struct tlsdesc volatile *td =
 	(struct tlsdesc volatile *)reloc_addr;
@@ -392,7 +415,7 @@  elf_machine_lazy_rel (struct link_map *map,
       td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
 			  + map->l_addr);
     }
-  else if (__glibc_unlikely (r_type == R_AARCH64_IRELATIVE))
+  else if (__glibc_unlikely (r_type == AARCH64_R (IRELATIVE)))
     {
       ElfW(Addr) value = map->l_addr + reloc->r_addend;
       if (__glibc_likely (!skip_ifunc))
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index 05be370..42fa943 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -74,7 +74,8 @@ 
 	cfi_startproc
 	.align 2
 _dl_tlsdesc_return:
-	ldr	x0, [x0, #8]
+	DELOUSE (0)
+	ldr	PTR_REG (0), [x0, #PTR_SIZE]
 	RET
 	cfi_endproc
 	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
@@ -95,9 +96,10 @@  _dl_tlsdesc_return_lazy:
 	   so it reads the same value (this function is the final value of
 	   td->entry) and thus it synchronizes with the release store to
 	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
-	   from [x0,#8] here happens after the initialization of td->arg.  */
-	ldar	xzr, [x0]
-	ldr	x0, [x0, #8]
+	   from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
+	DELOUSE (0)
+	ldar	PTR_REG (zr), [x0]
+	ldr	PTR_REG (0), [x0, #PTR_SIZE]
 	RET
 	cfi_endproc
 	.size	_dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
@@ -125,10 +127,11 @@  _dl_tlsdesc_undefweak:
 	   td->entry) and thus it synchronizes with the release store to
 	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
 	   from [x0,#8] here happens after the initialization of td->arg.  */
-	ldar	xzr, [x0]
-	ldr	x0, [x0, #8]
+	DELOUSE (0)
+	ldar	PTR_REG (zr), [x0]
+	ldr	PTR_REG (0), [x0, #PTR_SIZE]
 	mrs	x1, tpidr_el0
-	sub	x0, x0, x1
+	sub	PTR_REG (0), PTR_REG (0), PTR_REG (1)
 	ldr	x1, [sp], #16
 	cfi_adjust_cfa_offset (-16)
 	RET
@@ -174,6 +177,7 @@  _dl_tlsdesc_dynamic:
 	stp	x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
 	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
 	mov	x29, sp
+	DELOUSE (0)
 
 	/* Save just enough registers to support fast path, if we fall
 	   into slow path we will save additional registers.  */
@@ -187,22 +191,22 @@  _dl_tlsdesc_dynamic:
 	   so it reads the same value (this function is the final value of
 	   td->entry) and thus it synchronizes with the release store to
 	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
-	   from [x0,#8] here happens after the initialization of td->arg.  */
-	ldar	xzr, [x0]
-	ldr	x1, [x0,#8]
-	ldr	x0, [x4]
-	ldr	x3, [x1,#16]
-	ldr	x2, [x0]
-	cmp	x3, x2
+	   from [x0,#PTR_SIZE] here happens after the initialization of td->arg.  */
+	ldar	PTR_REG (zr), [x0]
+	ldr	PTR_REG (1), [x0,#PTR_SIZE]
+	ldr	PTR_REG (0), [x4]
+	ldr	PTR_REG (3), [x1,#(PTR_SIZE * 2)]
+	ldr	PTR_REG (2), [x0]
+	cmp	PTR_REG (3), PTR_REG (2)
 	b.hi	2f
-	ldr	x2, [x1]
-	add	x0, x0, x2, lsl #4
-	ldr	x0, [x0]
+	ldr	PTR_REG (2), [x1]
+	add	PTR_REG (0), PTR_REG (0), PTR_REG (2), lsl #(PTR_LOG_SIZE + 1)
+	ldr	PTR_REG (0), [x0]
 	cmn	x0, #0x1
 	b.eq	2f
-	ldr	x1, [x1,#8]
-	add	x0, x0, x1
-	sub	x0, x0, x4
+	ldr	PTR_REG (1), [x1,#(PTR_SIZE * 2)]
+	add	PTR_REG (0), PTR_REG (0), PTR_REG (1)
+	sub	PTR_REG (0), PTR_REG (0), PTR_REG (4)
 1:
 	ldp	 x1,  x2, [sp, #32+16*0]
 	ldp	 x3,  x4, [sp, #32+16*1]
@@ -233,7 +237,7 @@  _dl_tlsdesc_dynamic:
 	bl	__tls_get_addr
 
 	mrs	x1, tpidr_el0
-	sub	x0, x0, x1
+	sub	PTR_REG (0), PTR_REG (0), PTR_REG (1)
 
 	RESTORE_Q_REGISTERS
 
@@ -279,13 +283,15 @@  _dl_tlsdesc_resolve_rela:
 
 	SAVE_Q_REGISTERS
 
-	ldr	x1, [x3, #8]
+	DELOUSE (3)
+	ldr	PTR_REG (1), [x3, #PTR_SIZE]
 	bl	_dl_tlsdesc_resolve_rela_fixup
 
 	RESTORE_Q_REGISTERS
 
 	ldr	x0, [sp, #32+16*8]
-	ldr	x1, [x0]
+	DELOUSE (0)
+	ldr	PTR_REG (1), [x0]
 	blr	x1
 
 	ldp	 x1,  x4, [sp, #32+16*0]
@@ -346,7 +352,8 @@  _dl_tlsdesc_resolve_hold:
 	RESTORE_Q_REGISTERS
 
 	ldr	x0, [sp, #32+16*9]
-	ldr	x1, [x0]
+	DELOUSE (0)
+	ldr	PTR_REG (1), [x0]
 	blr	x1
 
 	ldp	 x1,  x2, [sp, #32+16*0]
diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S
index 947a515..63ef6f7 100644
--- a/sysdeps/aarch64/dl-trampoline.S
+++ b/sysdeps/aarch64/dl-trampoline.S
@@ -22,9 +22,13 @@ 
 #include "dl-link.h"
 
 #define ip0 x16
+#define ip0l PTR_REG (16)
 #define ip1 x17
 #define lr  x30
 
+/* RELA relocatons are 3 pointers */
+#define RELA_SIZE (PTR_SIZE * 3)
+
 	.text
 	.globl _dl_runtime_resolve
 	.type _dl_runtime_resolve, #function
@@ -79,7 +83,7 @@  _dl_runtime_resolve:
 	cfi_rel_offset (q1, 80+7*16)
 
 	/* Get pointer to linker struct.  */
-	ldr	x0, [ip0, #-8]
+	ldr	PTR_REG (0), [ip0, #-PTR_SIZE]
 
 	/* Prepare to call _dl_fixup().  */
 	ldr	x1, [sp, 80+8*16]	/* Recover &PLTGOT[n] */
@@ -87,7 +91,7 @@  _dl_runtime_resolve:
 	sub     x1, x1, ip0
 	add     x1, x1, x1, lsl #1
 	lsl     x1, x1, #3
-	sub     x1, x1, #192
+	sub     x1, x1, #(RELA_SIZE<<3)
 	lsr     x1, x1, #3
 
 	/* Call fixup routine.  */
@@ -191,7 +195,7 @@  _dl_runtime_profile:
 	stp	x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_SP]
 
 	/* Get pointer to linker struct.  */
-	ldr	x0, [ip0, #-8]
+	ldr	PTR_REG (0), [ip0, #-PTR_SIZE]
 
 	/* Prepare to call _dl_profile_fixup().  */
 	ldr	x1, [x29, OFFSET_PLTGOTN]	/* Recover &PLTGOT[n] */
@@ -199,7 +203,7 @@  _dl_runtime_profile:
 	sub     x1, x1, ip0
 	add     x1, x1, x1, lsl #1
 	lsl     x1, x1, #3
-	sub     x1, x1, #192
+	sub     x1, x1, #(RELA_SIZE<<3)
 	lsr     x1, x1, #3
 
 	stp	x0, x1, [x29, #OFFSET_SAVED_CALL_X0]
@@ -210,8 +214,8 @@  _dl_runtime_profile:
 	add	x4, x29, #OFFSET_FS		/* address of framesize */
 	bl	_dl_profile_fixup
 
-	ldr	ip0, [x29, #OFFSET_FS]		/* framesize == 0 */
-	cmp	ip0, #0
+	ldr	ip0l, [x29, #OFFSET_FS]		/* framesize == 0 */
+	cmp	ip0l, #0
 	bge	1f
 	cfi_remember_state
 
@@ -243,7 +247,7 @@  _dl_runtime_profile:
 1:
 	/* The new frame size is in ip0.  */
 
-	sub	x1, x29, ip0
+	sub	PTR_REG (1), PTR_REG (29), ip0l
 	and	sp, x1, #0xfffffffffffffff0
 
 	str	x0, [x29, #OFFSET_T1]
diff --git a/sysdeps/aarch64/jmpbuf-unwind.h b/sysdeps/aarch64/jmpbuf-unwind.h
index 3e0a37d..11ace17 100644
--- a/sysdeps/aarch64/jmpbuf-unwind.h
+++ b/sysdeps/aarch64/jmpbuf-unwind.h
@@ -27,7 +27,7 @@ 
   ((void *) (address) < (void *) demangle (jmpbuf[JB_SP]))
 
 #define _JMPBUF_CFA_UNWINDS_ADJ(jmpbuf, context, adj) \
-  _JMPBUF_UNWINDS_ADJ (jmpbuf, (void *) _Unwind_GetCFA (context), adj)
+  _JMPBUF_UNWINDS_ADJ (jmpbuf, (void *) (size_t) _Unwind_GetCFA (context), adj)
 
 #define _JMPBUF_UNWINDS_ADJ(_jmpbuf, _address, _adj) \
   ((uintptr_t) (_address) - (_adj) < _jmpbuf_sp (_jmpbuf) - (_adj))
diff --git a/sysdeps/aarch64/memcmp.S b/sysdeps/aarch64/memcmp.S
index ae2d997..8b87e9b 100644
--- a/sysdeps/aarch64/memcmp.S
+++ b/sysdeps/aarch64/memcmp.S
@@ -47,6 +47,9 @@ 
 #define mask		x13
 
 ENTRY_ALIGN (memcmp, 6)
+	DELOUSE (0)
+	DELOUSE (1)
+	DELOUSE (2)
 	cbz	limit, L(ret0)
 	eor	tmp1, src1, src2
 	tst	tmp1, #7
diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S
index de73f0f..b269316 100644
--- a/sysdeps/aarch64/memcpy.S
+++ b/sysdeps/aarch64/memcpy.S
@@ -61,6 +61,10 @@ 
 
 ENTRY_ALIGN (memmove, 6)
 
+	DELOUSE (0)
+	DELOUSE (1)
+	DELOUSE (2)
+
 	sub	tmp1, dstin, src
 	cmp	count, 96
 	ccmp	tmp1, count, 2, hi
@@ -71,6 +75,10 @@  END (memmove)
 libc_hidden_builtin_def (memmove)
 ENTRY (memcpy)
 
+	DELOUSE (0)
+	DELOUSE (1)
+	DELOUSE (2)
+
 	prfm	PLDL1KEEP, [src]
 	add	srcend, src, count
 	add	dstend, dstin, count
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
index 4d222c5..7bad29a 100644
--- a/sysdeps/aarch64/memset.S
+++ b/sysdeps/aarch64/memset.S
@@ -39,6 +39,9 @@ 
 
 ENTRY_ALIGN (__memset, 6)
 
+	DELOUSE (0)
+	DELOUSE (2)
+
 	dup	v0.16B, valw
 	add	dstend, dstin, count
 
diff --git a/sysdeps/aarch64/nptl/bits/pthreadtypes.h b/sysdeps/aarch64/nptl/bits/pthreadtypes.h
index c376e64..9dcf8d9 100644
--- a/sysdeps/aarch64/nptl/bits/pthreadtypes.h
+++ b/sysdeps/aarch64/nptl/bits/pthreadtypes.h
@@ -32,6 +32,8 @@ 
 #define __SIZEOF_PTHREAD_BARRIER_T     32
 #define __SIZEOF_PTHREAD_BARRIERATTR_T  8
 
+#define __PTHREAD_RWLOCK_INT_FLAGS_SHARED 1
+
 
 /* Thread identifiers.  The structure of the attribute type is not
    exposed on purpose.  */
diff --git a/sysdeps/aarch64/nptl/bits/semaphore.h b/sysdeps/aarch64/nptl/bits/semaphore.h
index 3cc5b37..1d1389c 100644
--- a/sysdeps/aarch64/nptl/bits/semaphore.h
+++ b/sysdeps/aarch64/nptl/bits/semaphore.h
@@ -21,7 +21,11 @@ 
 #endif
 
 
+#ifdef __ILP32__
+#define __SIZEOF_SEM_T	16
+#else
 #define __SIZEOF_SEM_T	32
+#endif
 
 
 /* Value returned if `sem_open' failed.  */
diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S
index 22f4368..e03b3b5 100644
--- a/sysdeps/aarch64/setjmp.S
+++ b/sysdeps/aarch64/setjmp.S
@@ -33,6 +33,7 @@  END (_setjmp)
 libc_hidden_def (_setjmp)
 
 ENTRY (__sigsetjmp)
+	DELOUSE (0)
 
 1:
 	stp	x19, x20, [x0, #JB_X19<<3]
@@ -42,7 +43,7 @@  ENTRY (__sigsetjmp)
 	stp	x27, x28, [x0, #JB_X27<<3]
 
 #ifdef PTR_MANGLE
-	PTR_MANGLE (x4, x30, x3, x2)
+	PTR_MANGLE (4, 30, 3, 2)
 	stp	x29,  x4, [x0, #JB_X29<<3]
 #else
 	stp	x29, x30, [x0, #JB_X29<<3]
@@ -57,7 +58,7 @@  ENTRY (__sigsetjmp)
 	stp	d14, d15, [x0, #JB_D14<<3]
 #ifdef PTR_MANGLE
 	mov	x4, sp
-	PTR_MANGLE (x5, x4, x3, x2)
+	PTR_MANGLE (5, 4, 3, 2)
 	str	x5, [x0, #JB_SP<<3]
 #else
 	mov	x2,  sp
diff --git a/sysdeps/aarch64/start.S b/sysdeps/aarch64/start.S
index efe2474..9198c57 100644
--- a/sysdeps/aarch64/start.S
+++ b/sysdeps/aarch64/start.S
@@ -16,6 +16,8 @@ 
    License along with the GNU C Library.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <sysdep.h>
+
 /* This is the canonical entry point, usually the first thing in the text
    segment.
 
@@ -25,7 +27,7 @@ 
 
    At this entry point, most registers' values are unspecified, except:
 
-   x0		Contains a function pointer to be registered with `atexit'.
+   x0/w0	Contains a function pointer to be registered with `atexit'.
 		This is how the dynamic linker arranges to have DT_FINI
 		functions called for shared libraries that have been loaded
 		before this code runs.
@@ -52,26 +54,26 @@  _start:
 	mov	x5, x0
 
 	/* Load argc and a pointer to argv */
-	ldr	x1, [sp, #0]
-	add	x2, sp, #8
+	ldr	PTR_REG (1), [sp, #0]
+	add	x2, sp, #PTR_SIZE
 
 	/* Setup stack limit in argument register */
 	mov	x6, sp
 
 #ifdef SHARED
         adrp    x0, :got:main
-	ldr     x0, [x0, #:got_lo12:main]
+	ldr     PTR_REG (0), [x0, #:got_lo12:main]
 
         adrp    x3, :got:__libc_csu_init
-	ldr     x3, [x3, #:got_lo12:__libc_csu_init]
+	ldr     PTR_REG (3), [x3, #:got_lo12:__libc_csu_init]
 
         adrp    x4, :got:__libc_csu_fini
-	ldr     x4, [x4, #:got_lo12:__libc_csu_fini]
+	ldr     PTR_REG (4), [x4, #:got_lo12:__libc_csu_fini]
 #else
 	/* Set up the other arguments in registers */
-	ldr	x0, =main
-	ldr	x3, =__libc_csu_init
-	ldr	x4, =__libc_csu_fini
+	ldr	PTR_REG (0), =main
+	ldr	PTR_REG (3), =__libc_csu_init
+	ldr	PTR_REG (4), =__libc_csu_fini
 #endif
 
 	/* __libc_start_main (main, argc, argv, init, fini, rtld_fini,
diff --git a/sysdeps/aarch64/strchr.S b/sysdeps/aarch64/strchr.S
index 5e3aecf..c66fea3 100644
--- a/sysdeps/aarch64/strchr.S
+++ b/sysdeps/aarch64/strchr.S
@@ -62,6 +62,7 @@ 
 /* Locals and temporaries.  */
 
 ENTRY (strchr)
+	DELOUSE (0)
 	mov	wtmp2, #0x0401
 	movk	wtmp2, #0x4010, lsl #16
 	dup	vrepchr.16b, chrin
diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S
index a624c8d..c2cc47e 100644
--- a/sysdeps/aarch64/strchrnul.S
+++ b/sysdeps/aarch64/strchrnul.S
@@ -60,6 +60,7 @@ 
    identify exactly which byte is causing the termination.  */
 
 ENTRY (__strchrnul)
+	DELOUSE (0)
 	/* Magic constant 0x40100401 to allow us to identify which lane
 	   matches the termination condition.  */
 	mov	wtmp2, #0x0401
diff --git a/sysdeps/aarch64/strcmp.S b/sysdeps/aarch64/strcmp.S
index ba0ccb4..49e528b 100644
--- a/sysdeps/aarch64/strcmp.S
+++ b/sysdeps/aarch64/strcmp.S
@@ -49,6 +49,8 @@ 
 	/* Start of performance-critical section  -- one 64B cache line.  */
 ENTRY_ALIGN(strcmp, 6)
 
+	DELOUSE (0)
+	DELOUSE (1)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
 	tst	tmp1, #7
diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S
index 0694199..45809e8 100644
--- a/sysdeps/aarch64/strcpy.S
+++ b/sysdeps/aarch64/strcpy.S
@@ -91,6 +91,8 @@ 
 #define MIN_PAGE_SIZE (1 << MIN_PAGE_P2)
 
 ENTRY_ALIGN (STRCPY, 6)
+	DELOUSE (0)
+	DELOUSE (1)
 	/* For moderately short strings, the fastest way to do the copy is to
 	   calculate the length of the string in the same way as strlen, then
 	   essentially do a memcpy of the result.  This avoids the need for
diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S
index a07834b..5fb653a 100644
--- a/sysdeps/aarch64/strlen.S
+++ b/sysdeps/aarch64/strlen.S
@@ -85,6 +85,8 @@ 
 	   boundary.  */
 
 ENTRY_ALIGN (__strlen, 6)
+	DELOUSE (0)
+	DELOUSE (1)
 	and	tmp1, srcin, MIN_PAGE_SIZE - 1
 	mov	zeroones, REP8_01
 	cmp	tmp1, MIN_PAGE_SIZE - 16
diff --git a/sysdeps/aarch64/strncmp.S b/sysdeps/aarch64/strncmp.S
index f6a17fd..02de93c 100644
--- a/sysdeps/aarch64/strncmp.S
+++ b/sysdeps/aarch64/strncmp.S
@@ -51,6 +51,9 @@ 
 #define endloop		x15
 
 ENTRY_ALIGN_AND_PAD (strncmp, 6, 7)
+	DELOUSE (0)
+	DELOUSE (1)
+	DELOUSE (2)
 	cbz	limit, L(ret0)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S
index 4cce45f..af765f1 100644
--- a/sysdeps/aarch64/strnlen.S
+++ b/sysdeps/aarch64/strnlen.S
@@ -50,6 +50,9 @@ 
 #define REP8_80 0x8080808080808080
 
 ENTRY_ALIGN_AND_PAD (__strnlen, 6, 9)
+	DELOUSE (0)
+	DELOUSE (1)
+	DELOUSE (2)
 	cbz	limit, L(hit_limit)
 	mov	zeroones, #REP8_01
 	bic	src, srcin, #15
diff --git a/sysdeps/aarch64/strrchr.S b/sysdeps/aarch64/strrchr.S
index 44c1917..ea37968 100644
--- a/sysdeps/aarch64/strrchr.S
+++ b/sysdeps/aarch64/strrchr.S
@@ -68,6 +68,7 @@ 
    identify exactly which byte is causing the termination, and why.  */
 
 ENTRY(strrchr)
+	DELOUSE (0)
 	cbz	x1, L(null_search)
 	/* Magic constant 0x40100401 to allow us to identify which lane
 	   matches the requested byte.  Magic constant 0x80200802 used
diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h
index e045759..0a7dccb 100644
--- a/sysdeps/aarch64/sysdep.h
+++ b/sysdeps/aarch64/sysdep.h
@@ -16,8 +16,25 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#ifndef _AARCH64_SYSDEP_H
+#define _AARCH64_SYSDEP_H
+
 #include <sysdeps/generic/sysdep.h>
 
+#ifdef __LP64__
+# define AARCH64_R(NAME)	R_AARCH64_ ## NAME
+# define PTR_REG(n)		x##n
+# define PTR_LOG_SIZE		3
+# define DELOUSE(n)
+#else
+# define AARCH64_R(NAME)	R_AARCH64_P32_ ## NAME
+# define PTR_REG(n)		w##n
+# define PTR_LOG_SIZE		2
+# define DELOUSE(n)		mov     w##n, w##n
+#endif
+
+#define PTR_SIZE	(1<<PTR_LOG_SIZE)
+
 #ifdef	__ASSEMBLER__
 
 /* Syntactic details of assembler.  */
@@ -107,16 +124,18 @@ 
 # define L(name)         .L##name
 #endif
 
-/* Load or store to/from a pc-relative EXPR into/from R, using T.  */
-#define LDST_PCREL(OP, R, T, EXPR)  \
-	adrp	T, EXPR;	    \
-	OP	R, [T, #:lo12:EXPR];\
+/* Load or store to/from a pc-relative EXPR into/from R, using T.
+   Note R and T are register numbers and not register names.  */
+#define LDST_PCREL(OP, R, T, EXPR)			\
+	adrp	x##T, EXPR;				\
+	OP	PTR_REG (R), [x##T, #:lo12:EXPR];	\
 
-/* Load or store to/from a got-relative EXPR into/from R, using T.  */
-#define LDST_GLOBAL(OP, R, T, EXPR)     \
-	adrp	T, :got:EXPR;		\
-	ldr	T, [T, #:got_lo12:EXPR];\
-	OP	R, [T];
+/* Load or store to/from a got-relative EXPR into/from R, using T.
+   Note R and T are register numbers and not register names.  */
+#define LDST_GLOBAL(OP, R, T,  EXPR)			\
+	adrp	x##T, :got:EXPR;			\
+	ldr	PTR_REG (T), [x##T, #:got_lo12:EXPR];	\
+	OP	PTR_REG (R), [x##T];
 
 /* Since C identifiers are not normally prefixed with an underscore
    on this system, the asm identifier `syscall_error' intrudes on the
@@ -125,3 +144,5 @@ 
 #define mcount		_mcount
 
 #endif	/* __ASSEMBLER__ */
+
+#endif  /* _AARCH64_SYSDEP_H */
diff --git a/sysdeps/unix/sysv/linux/aarch64/bits/fcntl.h b/sysdeps/unix/sysv/linux/aarch64/bits/fcntl.h
index 658f696..1717c35 100644
--- a/sysdeps/unix/sysv/linux/aarch64/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/aarch64/bits/fcntl.h
@@ -25,11 +25,17 @@ 
 #define __O_NOFOLLOW	0100000
 #define __O_DIRECT	0200000
 
-#define __O_LARGEFILE	0
+#ifdef __ILP32__
+# define __O_LARGEFILE	0400000
+#else
+# define __O_LARGEFILE	0
+#endif
 
+#ifdef __LP64__
 # define F_GETLK64	5
 # define F_SETLK64	6
 # define F_SETLKW64	7
+#endif
 
 struct flock
   {
diff --git a/sysdeps/unix/sysv/linux/aarch64/clone.S b/sysdeps/unix/sysv/linux/aarch64/clone.S
index 76baa7a..220ec11 100644
--- a/sysdeps/unix/sysv/linux/aarch64/clone.S
+++ b/sysdeps/unix/sysv/linux/aarch64/clone.S
@@ -39,6 +39,13 @@ 
  */
         .text
 ENTRY(__clone)
+	DELOUSE (0)
+	DELOUSE (1)
+	DELOUSE (2)
+	DELOUSE (3)
+	DELOUSE (4)
+	DELOUSE (5)
+	DELOUSE (6)
 	/* Save args for the child.  */
 	mov	x10, x0
 	mov	x11, x2
diff --git a/sysdeps/unix/sysv/linux/aarch64/dl-cache.h b/sysdeps/unix/sysv/linux/aarch64/dl-cache.h
index 9c7b271..044fdba 100644
--- a/sysdeps/unix/sysv/linux/aarch64/dl-cache.h
+++ b/sysdeps/unix/sysv/linux/aarch64/dl-cache.h
@@ -18,7 +18,11 @@ 
 
 #include <ldconfig.h>
 
+#ifdef __LP64__
 #define _DL_CACHE_DEFAULT_ID    (FLAG_AARCH64_LIB64 | FLAG_ELF_LIBC6)
+#else
+#define _DL_CACHE_DEFAULT_ID    (FLAG_AARCH64_LIB32 | FLAG_ELF_LIBC6)
+#endif
 
 #define _dl_cache_check_flags(flags)                    \
   ((flags) == _DL_CACHE_DEFAULT_ID)
@@ -27,18 +31,25 @@ 
   do								\
     {								\
       size_t len = strlen (dir);				\
-      char path[len + 3];					\
+      char path[len + 6];					\
       memcpy (path, dir, len + 1);				\
       if (len >= 6 && ! memcmp (path + len - 6, "/lib64", 6))	\
 	{							\
 	  len -= 2;						\
 	  path[len] = '\0';					\
 	}							\
+      if (len >= 9 && ! memcmp (path + len - 9, "/libilp32", 9))\
+	{							\
+	  len -= 5;						\
+	  path[len] = '\0';					\
+	}							\
       add_dir (path);						\
       if (len >= 4 && ! memcmp (path + len - 4, "/lib", 4))	\
 	{							\
 	  memcpy (path + len, "64", 3);				\
 	  add_dir (path);					\
+	  memcpy (path + len, "ilp32", 6);			\
+	  add_dir (path);					\
 	}							\
     } while (0)
 
diff --git a/sysdeps/unix/sysv/linux/aarch64/getcontext.S b/sysdeps/unix/sysv/linux/aarch64/getcontext.S
index c2dd5b8..f6bf24f 100644
--- a/sysdeps/unix/sysv/linux/aarch64/getcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/getcontext.S
@@ -30,6 +30,7 @@ 
 	.text
 
 ENTRY(__getcontext)
+	DELOUSE (0)
 	/* The saved context will return to the getcontext() call point
 	   with a return value of 0 */
 	str	xzr,	  [x0, oX0 +  0 * SZREG]
@@ -90,7 +91,7 @@  ENTRY(__getcontext)
 
 	/* Grab the signal mask */
 	/* rt_sigprocmask (SIG_BLOCK, NULL, &ucp->uc_sigmask, _NSIG8) */
-	add	x2, x0, #UCONTEXT_SIGMASK
+	add	PTR_REG (2), PTR_REG (0), #UCONTEXT_SIGMASK
 	mov	x0, SIG_BLOCK
 	mov	x1, 0
 	mov	x3, _NSIG8
diff --git a/sysdeps/unix/sysv/linux/aarch64/init-first.c b/sysdeps/unix/sysv/linux/aarch64/init-first.c
index f7224a2..f7bfc4d 100644
--- a/sysdeps/unix/sysv/linux/aarch64/init-first.c
+++ b/sysdeps/unix/sysv/linux/aarch64/init-first.c
@@ -27,17 +27,21 @@  int (*VDSO_SYMBOL(clock_getres)) (clockid_t, struct timespec *);
 static inline void
 _libc_vdso_platform_setup (void)
 {
-  PREPARE_VERSION (linux2639, "LINUX_2.6.39", 123718537);
+#ifdef __LP64__
+  PREPARE_VERSION (linux_version, "LINUX_2.6.39", 123718537);
+#else
+  PREPARE_VERSION (linux_version, "LINUX_4.9", 61765625);
+#endif
 
-  void *p = _dl_vdso_vsym ("__kernel_gettimeofday", &linux2639);
+  void *p = _dl_vdso_vsym ("__kernel_gettimeofday", &linux_version);
   PTR_MANGLE (p);
   VDSO_SYMBOL(gettimeofday) = p;
 
-  p = _dl_vdso_vsym ("__kernel_clock_gettime", &linux2639);
+  p = _dl_vdso_vsym ("__kernel_clock_gettime", &linux_version);
   PTR_MANGLE (p);
   VDSO_SYMBOL(clock_gettime) = p;
 
-  p = _dl_vdso_vsym ("__kernel_clock_getres", &linux2639);
+  p = _dl_vdso_vsym ("__kernel_clock_getres", &linux_version);
   PTR_MANGLE (p);
   VDSO_SYMBOL(clock_getres) = p;
 }
diff --git a/sysdeps/unix/sysv/linux/aarch64/ldconfig.h b/sysdeps/unix/sysv/linux/aarch64/ldconfig.h
index ee91ef8..ac84194 100644
--- a/sysdeps/unix/sysv/linux/aarch64/ldconfig.h
+++ b/sysdeps/unix/sysv/linux/aarch64/ldconfig.h
@@ -21,6 +21,8 @@ 
 #define SYSDEP_KNOWN_INTERPRETER_NAMES \
   { "/lib/ld-linux-aarch64.so.1", FLAG_ELF_LIBC6 }, \
   { "/lib/ld-linux-aarch64_be.so.1", FLAG_ELF_LIBC6 }, \
+  { "/lib/ld-linux-aarch64_ilp32.so.1", FLAG_ELF_LIBC6 }, \
+  { "/lib/ld-linux-aarch64_be_ilp32.so.1", FLAG_ELF_LIBC6 }, \
   { "/lib/ld-linux.so.3", FLAG_ELF_LIBC6 }, \
   { "/lib/ld-linux-armhf.so.3", FLAG_ELF_LIBC6 },
 #define SYSDEP_KNOWN_LIBRARY_NAMES \
diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
index d17f8c8..c2bca26 100644
--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
@@ -34,6 +34,7 @@ 
 	.text
 
 ENTRY (__setcontext)
+	DELOUSE (0)
 	/* Save a copy of UCP.  */
 	mov	x9, x0
 
diff --git a/sysdeps/unix/sysv/linux/aarch64/sigcontextinfo.h b/sysdeps/unix/sysv/linux/aarch64/sigcontextinfo.h
index a579501..ee54222 100644
--- a/sysdeps/unix/sysv/linux/aarch64/sigcontextinfo.h
+++ b/sysdeps/unix/sysv/linux/aarch64/sigcontextinfo.h
@@ -19,7 +19,7 @@ 
 #include <sys/ucontext.h>
 
 #define SIGCONTEXT siginfo_t *_si, struct ucontext *
-#define GET_PC(ctx) ((void *) (ctx)->uc_mcontext.pc)
+#define GET_PC(ctx) ((void *) (size_t) (ctx)->uc_mcontext.pc)
 
 /* There is no reliable way to get the sigcontext unless we use a
    three-argument signal handler.  */
diff --git a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
index c1a16f3..8e2cadd 100644
--- a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
@@ -27,6 +27,7 @@ 
 
 	.text
 ENTRY(__swapcontext)
+	DELOUSE (0)
 	/* Set the value returned when swapcontext() returns in this context. */
 	str	xzr,      [x0, oX0 +  0 * SZREG]
 
diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
index a397e50..1ffabc2 100644
--- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
@@ -250,12 +250,14 @@ 
      (!defined SHARED && (IS_IN (libc) \
 			  || IS_IN (libpthread))))
 # ifdef __ASSEMBLER__
+/* Note, dst, src, guard, and tmp are all register numbers rather than
+   register names so they will work with both ILP32 and LP64. */
 #  define PTR_MANGLE(dst, src, guard, tmp)                                \
   LDST_PCREL (ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local)); \
   PTR_MANGLE2 (dst, src, guard)
 /* Use PTR_MANGLE2 for efficiency if guard is already loaded.  */
 #  define PTR_MANGLE2(dst, src, guard)\
-  eor dst, src, guard
+  eor x##dst, x##src, x##guard
 #  define PTR_DEMANGLE(dst, src, guard, tmp)\
   PTR_MANGLE (dst, src, guard, tmp)
 #  define PTR_DEMANGLE2(dst, src, guard)\
@@ -268,12 +270,14 @@  extern uintptr_t __pointer_chk_guard_local attribute_relro attribute_hidden;
 # endif
 #else
 # ifdef __ASSEMBLER__
+/* Note, dst, src, guard, and tmp are all register numbers rather than
+   register names so they will work with both ILP32 and LP64. */
 #  define PTR_MANGLE(dst, src, guard, tmp)                             \
   LDST_GLOBAL (ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard));   \
   PTR_MANGLE2 (dst, src, guard)
 /* Use PTR_MANGLE2 for efficiency if guard is already loaded.  */
 #  define PTR_MANGLE2(dst, src, guard)\
-  eor dst, src, guard
+  eor x##dst, x##src, x##guard
 #  define PTR_DEMANGLE(dst, src, guard, tmp)\
   PTR_MANGLE (dst, src, guard, tmp)
 #  define PTR_DEMANGLE2(dst, src, guard)\