diff mbox series

[v4,2/4] elf: Do not change stack permission on dlopen/dlmopen

Message ID 20240725201159.3286231-3-adhemerval.zanella@linaro.org
State New
Headers show
Series Improve executable stack handling | expand

Commit Message

Adhemerval Zanella July 25, 2024, 8:05 p.m. UTC
If some shared library loaded with dlopen/dlmopen requires an executable
stack, either implicitly because of a missing GNU_STACK ELF header
(where the ABI default flags implies in the executable bit) or explicitly
because of the executable bit from GNU_STACK; the loader will try to set
the both the main thread and all thread stacks (from the pthread cache)
as executable.

Besides the issue where any __nptl_change_stack_perm failure does not
undo the previous executable transition (meaning that if the library
fails to load, there can be thread stacks with executable stacks), this
behavior was used on recent CVE [1] as a vector for RCE.

This patch changes that if a shared library requires an executable
stack, and the current stack is not executable, dlopen fails.  The
change is done only for dynamically loaded modules, if the program
or any dependency requires an executable stack, the loader will still
change the main thread before program execution and any thread created
with default stack configuration.

[1] https://www.qualys.com/2023/07/19/cve-2023-38408/rce-openssh-forwarded-ssh-agent.txt

Checked on x86_64-linux-gnu and i686-linux-gnu.
---
 NEWS                                   |   6 +-
 elf/dl-load.c                          |  11 +-
 elf/tst-execstack.c                    | 142 ++++++++++---------------
 nptl/allocatestack.c                   |  19 ----
 sysdeps/nptl/pthreadP.h                |   6 --
 sysdeps/unix/sysv/linux/Versions       |   3 -
 sysdeps/unix/sysv/linux/dl-execstack.c |  67 +-----------
 sysdeps/unix/sysv/linux/mips/Makefile  |   7 ++
 8 files changed, 76 insertions(+), 185 deletions(-)
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index d488874aba..6218c45d41 100644
--- a/NEWS
+++ b/NEWS
@@ -27,7 +27,11 @@  Major new features:
 
 Deprecated and removed features, and other changes affecting compatibility:
 
-  [Add deprecations, removals and changes affecting compatibility here]
+* dlopen and dlmopen no longer make the stack executable if a shared
+  library requires it, either implicitly because of a missing GNU_STACK ELF
+  header (and default ABI permission having the executable bit set) or
+  explicitly because of the executable bit in GNU_STACK, and the stack is
+  not already executable.
 
 Changes to build and runtime requirements:
 
diff --git a/elf/dl-load.c b/elf/dl-load.c
index 8a89b71016..015595aac4 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -1298,12 +1298,17 @@  _dl_map_object_from_fd (const char *name, const char *origname, int fd,
   if (__glibc_unlikely ((stack_flags &~ GL(dl_stack_flags)) & PF_X))
     {
       /* The stack is presently not executable, but this module
-	 requires that it be executable.  */
+	 requires that it be executable.  Only tries to change the
+	 stack protection during process startup.  */
+      if ((mode & __RTLD_DLOPEN) == 0)
 #if PTHREAD_IN_LIBC
-      errval = _dl_make_stacks_executable (stack_endp);
+	errval = _dl_make_stacks_executable (stack_endp);
 #else
-      errval = (*GL(dl_make_stack_executable_hook)) (stack_endp);
+        errval = (*GL(dl_make_stack_executable_hook)) (stack_endp);
 #endif
+     else
+       errval = EINVAL;
+
       if (errval)
 	{
 	  errstring = N_("\
diff --git a/elf/tst-execstack.c b/elf/tst-execstack.c
index 560b353918..cd758c089e 100644
--- a/elf/tst-execstack.c
+++ b/elf/tst-execstack.c
@@ -9,6 +9,11 @@ 
 #include <error.h>
 #include <stackinfo.h>
 
+#include <stdlib.h>
+#include <support/check.h>
+#include <support/xthread.h>
+#include <support/xdlfcn.h>
+
 static void
 print_maps (void)
 {
@@ -20,11 +25,21 @@  print_maps (void)
 #endif
 }
 
-static void deeper (void (*f) (void));
+#ifndef DEFAULT_RWX_STACK
+# define DEFAULT_RWX_STACK 0
+#else
+static void
+deeper (void (*f) (void))
+{
+  char stack[1100 * 1024];
+  explicit_bzero (stack, sizeof stack);
+  (*f) ();
+  memfrob (stack, sizeof stack);
+}
+#endif
 
 #if USE_PTHREADS
-# include <pthread.h>
-
+# if DEFAULT_RWX_STACK
 static void *
 tryme_thread (void *f)
 {
@@ -32,16 +47,21 @@  tryme_thread (void *f)
 
   return 0;
 }
+# endif
 
 static pthread_barrier_t startup_barrier, go_barrier;
 static void *
 waiter_thread (void *arg)
 {
-  void **f = arg;
-  pthread_barrier_wait (&startup_barrier);
-  pthread_barrier_wait (&go_barrier);
+  xpthread_barrier_wait (&startup_barrier);
+  xpthread_barrier_wait (&go_barrier);
 
+# if DEFAULT_RWX_STACK
+  void **f = arg;
   (*((void (*) (void)) *f)) ();
+# else
+  abort ();
+# endif
 
   return 0;
 }
@@ -83,52 +103,36 @@  do_test (void)
 
   printf ("executable stacks %sallowed\n", allow_execstack ? "" : "not ");
 
+#if USE_PTHREADS || DEFAULT_RWX_STACK
   static void *f;		/* Address of this is used in other threads. */
+#endif
 
 #if USE_PTHREADS
   /* Create some threads while stacks are nonexecutable.  */
   #define N 5
-  pthread_t thr[N];
 
-  pthread_barrier_init (&startup_barrier, NULL, N + 1);
-  pthread_barrier_init (&go_barrier, NULL, N + 1);
+  xpthread_barrier_init (&startup_barrier, NULL, N + 1);
+  xpthread_barrier_init (&go_barrier, NULL, N + 1);
 
   for (int i = 0; i < N; ++i)
-    {
-      int rc = pthread_create (&thr[i], NULL, &waiter_thread, &f);
-      if (rc)
-	error (1, rc, "pthread_create");
-    }
+    xpthread_create (NULL, &waiter_thread, &f);
 
   /* Make sure they are all there using their stacks.  */
-  pthread_barrier_wait (&startup_barrier);
+  xpthread_barrier_wait (&startup_barrier);
   puts ("threads waiting");
 #endif
 
   print_maps ();
 
-#if USE_PTHREADS
+#if USE_PTHREADS && DEFAULT_RWX_STACK
   void *old_stack_addr, *new_stack_addr;
   size_t stack_size;
   pthread_t me = pthread_self ();
   pthread_attr_t attr;
-  int ret = 0;
-
-  ret = pthread_getattr_np (me, &attr);
-  if (ret)
-    {
-      printf ("before execstack: pthread_getattr_np returned error: %s\n",
-	      strerror (ret));
-      return 1;
-    }
 
-  ret = pthread_attr_getstack (&attr, &old_stack_addr, &stack_size);
-  if (ret)
-    {
-      printf ("before execstack: pthread_attr_getstack returned error: %s\n",
-	      strerror (ret));
-      return 1;
-    }
+  TEST_VERIFY_EXIT (pthread_getattr_np (me, &attr) == 0);
+  TEST_VERIFY_EXIT (pthread_attr_getstack (&attr, &old_stack_addr,
+					    &stack_size) == 0);
 # if _STACK_GROWS_DOWN
     old_stack_addr += stack_size;
 # else
@@ -143,18 +147,12 @@  do_test (void)
   const char *soname = "tst-execstack-mod.so";
 #endif
   void *h = dlopen (soname, RTLD_LAZY);
-  if (h == NULL)
-    {
-      printf ("cannot load: %s\n", dlerror ());
-      return allow_execstack;
-    }
+#if !DEFAULT_RWX_STACK
+  TEST_VERIFY_EXIT (h == NULL);
+#else
+  TEST_VERIFY_EXIT (h != NULL);
 
-  f = dlsym (h, "tryme");
-  if (f == NULL)
-    {
-      printf ("symbol not found: %s\n", dlerror ());
-      return 1;
-    }
+  f = xdlsym (h, "tryme");
 
   /* Test if that really made our stack executable.
      The `tryme' function should crash if not.  */
@@ -163,28 +161,15 @@  do_test (void)
 
   print_maps ();
 
-#if USE_PTHREADS
-  ret = pthread_getattr_np (me, &attr);
-  if (ret)
-    {
-      printf ("after execstack: pthread_getattr_np returned error: %s\n",
-	      strerror (ret));
-      return 1;
-    }
-
-  ret = pthread_attr_getstack (&attr, &new_stack_addr, &stack_size);
-  if (ret)
-    {
-      printf ("after execstack: pthread_attr_getstack returned error: %s\n",
-	      strerror (ret));
-      return 1;
-    }
-
-# if _STACK_GROWS_DOWN
+# if USE_PTHREADS
+  TEST_VERIFY_EXIT (pthread_getattr_np (me, &attr) == 0);
+  TEST_VERIFY_EXIT (pthread_attr_getstack (&attr, &new_stack_addr,
+					    &stack_size) == 0);
+#  if _STACK_GROWS_DOWN
     new_stack_addr += stack_size;
-# else
+#  else
     new_stack_addr -= stack_size;
-# endif
+#  endif
 
   /* It is possible that the dlopen'd module may have been mmapped just below
      the stack.  The stack size is taken as MIN(stack rlimit size, end of last
@@ -194,48 +179,29 @@  do_test (void)
      stacksize and stackaddr respectively.  If the size changes due to the
      above, then both stacksize and stackaddr can change, but the stack bottom
      should remain the same, which is computed as stackaddr + stacksize.  */
-  if (old_stack_addr != new_stack_addr)
-    {
-      printf ("Stack end changed, old: %p, new: %p\n",
-	      old_stack_addr, new_stack_addr);
-      return 1;
-    }
+  TEST_VERIFY_EXIT (old_stack_addr == new_stack_addr);
   printf ("Stack address remains the same: %p\n", old_stack_addr);
-#endif
+# endif
 
   /* Test that growing the stack region gets new executable pages too.  */
   deeper ((void (*) (void)) f);
 
   print_maps ();
 
-#if USE_PTHREADS
+# if USE_PTHREADS
   /* Test that a fresh thread now gets an executable stack.  */
-  {
-    pthread_t th;
-    int rc = pthread_create (&th, NULL, &tryme_thread, f);
-    if (rc)
-      error (1, rc, "pthread_create");
-  }
+  xpthread_create (NULL, &tryme_thread, f);
 
   puts ("threads go");
   /* The existing threads' stacks should have been changed.
      Let them run to test it.  */
-  pthread_barrier_wait (&go_barrier);
+  xpthread_barrier_wait (&go_barrier);
 
   pthread_exit ((void *) (long int) (! allow_execstack));
+# endif
 #endif
 
   return ! allow_execstack;
 }
 
-static void
-deeper (void (*f) (void))
-{
-  char stack[1100 * 1024];
-  explicit_bzero (stack, sizeof stack);
-  (*f) ();
-  memfrob (stack, sizeof stack);
-}
-
-
 #include <support/test-driver.c>
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index f35a8369bd..8069d3e0ae 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -446,25 +446,6 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 
 	  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
-
-	  /* There might have been a race.  Another thread might have
-	     caused the stacks to get exec permission while this new
-	     stack was prepared.  Detect if this was possible and
-	     change the permission if necessary.  */
-	  if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
-				&& (prot & PROT_EXEC) == 0, 0))
-	    {
-	      int err = __nptl_change_stack_perm (pd);
-	      if (err != 0)
-		{
-		  /* Free the stack memory we just allocated.  */
-		  (void) __munmap (mem, size);
-
-		  return err;
-		}
-	    }
-
-
 	  /* Note that all of the stack and the thread descriptor is
 	     zeroed.  This means we do not have to initialize fields
 	     with initial value zero.  This is specifically true for
diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h
index 30e8a2d177..79af27449a 100644
--- a/sysdeps/nptl/pthreadP.h
+++ b/sysdeps/nptl/pthreadP.h
@@ -280,12 +280,6 @@  __do_cancel (void)
 extern void __nptl_free_tcb (struct pthread *pd);
 libc_hidden_proto (__nptl_free_tcb)
 
-/* Change the permissions of a thread stack.  Called from
-   _dl_make_stacks_executable and pthread_create.  */
-int
-__nptl_change_stack_perm (struct pthread *pd);
-rtld_hidden_proto (__nptl_change_stack_perm)
-
 /* longjmp handling.  */
 extern void __pthread_cleanup_upto (__jmp_buf target, char *targetframe);
 libc_hidden_proto (__pthread_cleanup_upto)
diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions
index 268ba1b6ac..05297934d6 100644
--- a/sysdeps/unix/sysv/linux/Versions
+++ b/sysdeps/unix/sysv/linux/Versions
@@ -356,7 +356,4 @@  ld {
     __rseq_offset;
     __rseq_size;
   }
-  GLIBC_PRIVATE {
-    __nptl_change_stack_perm;
-  }
 }
diff --git a/sysdeps/unix/sysv/linux/dl-execstack.c b/sysdeps/unix/sysv/linux/dl-execstack.c
index b986898598..0b71ffca6d 100644
--- a/sysdeps/unix/sysv/linux/dl-execstack.c
+++ b/sysdeps/unix/sysv/linux/dl-execstack.c
@@ -16,19 +16,10 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <errno.h>
 #include <ldsodefs.h>
-#include <libintl.h>
-#include <list.h>
-#include <pthreadP.h>
-#include <stackinfo.h>
-#include <stdbool.h>
-#include <sys/mman.h>
-#include <sysdep.h>
-#include <unistd.h>
 
-static int
-make_main_stack_executable (void **stack_endp)
+int
+_dl_make_stacks_executable (void **stack_endp)
 {
   /* This gives us the highest/lowest page that needs to be changed.  */
   uintptr_t page = ((uintptr_t) *stack_endp
@@ -52,57 +43,3 @@  make_main_stack_executable (void **stack_endp)
 
   return 0;
 }
-
-int
-_dl_make_stacks_executable (void **stack_endp)
-{
-  /* First the main thread's stack.  */
-  int err = make_main_stack_executable (stack_endp);
-  if (err != 0)
-    return err;
-
-  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
-
-  list_t *runp;
-  list_for_each (runp, &GL (dl_stack_used))
-    {
-      err = __nptl_change_stack_perm (list_entry (runp, struct pthread, list));
-      if (err != 0)
-	break;
-    }
-
-  /* Also change the permission for the currently unused stacks.  This
-     might be wasted time but better spend it here than adding a check
-     in the fast path.  */
-  if (err == 0)
-    list_for_each (runp, &GL (dl_stack_cache))
-      {
-	err = __nptl_change_stack_perm (list_entry (runp, struct pthread,
-						    list));
-	if (err != 0)
-	  break;
-      }
-
-  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
-
-  return err;
-}
-
-int
-__nptl_change_stack_perm (struct pthread *pd)
-{
-#if _STACK_GROWS_DOWN
-  void *stack = pd->stackblock + pd->guardsize;
-  size_t len = pd->stackblock_size - pd->guardsize;
-#elif _STACK_GROWS_UP
-  void *stack = pd->stackblock;
-  size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
-#else
-# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
-#endif
-  if (__mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
-    return errno;
-
-  return 0;
-}
-rtld_hidden_def (__nptl_change_stack_perm)
diff --git a/sysdeps/unix/sysv/linux/mips/Makefile b/sysdeps/unix/sysv/linux/mips/Makefile
index d5725c69d8..05ec9150b2 100644
--- a/sysdeps/unix/sysv/linux/mips/Makefile
+++ b/sysdeps/unix/sysv/linux/mips/Makefile
@@ -61,6 +61,7 @@  ifeq ($(subdir),elf)
 # this test is expected to fail.
 ifneq ($(mips-has-gnustack),yes)
 test-xfail-check-execstack = yes
+CFLAGS-tst-execstack.c += -DDEFAULT_RWX_STACK=1
 endif
 endif
 
@@ -68,6 +69,12 @@  ifeq ($(subdir),stdlib)
 gen-as-const-headers += ucontext_i.sym
 endif
 
+ifeq ($(subdir),nptl)
+ifeq ($(mips-force-execstack),yes)
+CFLAGS-tst-execstack-threads.c += -DDEFAULT_RWX_STACK=1
+endif
+endif
+
 ifeq ($(mips-force-execstack),yes)
 CFLAGS-.o += -Wa,-execstack
 CFLAGS-.os += -Wa,-execstack