Message ID | 20190731183136.21545-3-adhemerval.zanella@linaro.org |
---|---|
State | New |
Headers | show |
Series | [v2,1/5] mips: Do not malloc on getdents64 fallback | expand |
Ping. On 31/07/2019 15:31, Adhemerval Zanella wrote: > Changes from previous version: > > * Move the logic of stack mapping creation to stackmap.h and > added a guard page allocation for the compatibility case. > > -- > > The current internal posix_spawn symbol for Linux (__spawni) requires > to allocate a dynamic stack based on input arguments to handle the > SPAWN_XFLAGS_USE_PATH internal flag, which re-issue the input binary > as a shell script if execve call return ENOEXEC (to execute shell > scripts with an initial shebang). > > This is done only for compatibility mode and the generic case does not > require the extra calculation plus the potential large mmap/munmap > call. For default case, a pre-defined buffer is sufficed to use on the > clone call instead. > > This patch optimizes Linux spawni by allocating a dynamic stack only > for compatibility symbol (SPAWN_XFLAGS_USE_PATH). For generic case, > an mmap allocated buffer is used along with a guard page, similar to > what NPTL uses for thread stacks hardening. > > For default case, it is a fixed code path with fixed stack usage in helper > process, so assuming a large enough stack buffer it would never overflow. > It also does not prevent to adapt to the vfork-like to re-use process > stack, once it is implemented. > > Checked x86_64-linux-gnu and i686-linux-gnu. > > * sysdeps/unix/sysv/linux/spawni.c (posix_spawn_args): Remove > argc member. > (maybe_script_execute): Remove function. > (execve_compat, __spawni_clone, __spawnix_compat): New function. > (__spawni_child): Remove maybe_script_execute call. > (__spawnix): Remove magic stack slack constant with stack_slack > identifier. > (__spawni): Only allocates a variable stack when > SPAWN_XFLAGS_TRY_SHELL is used. > * posix/stackmap.h: New file. > * sysdeps/ia64/nptl/pthreaddef.h (NEED_SEPARATE_REGISTER_STACK): Move > to ... > * sysdeps/ia64/stackinfo.h: ... here. > --- > posix/stackmap.h | 115 +++++++++++++ > sysdeps/ia64/nptl/pthreaddef.h | 3 - > sysdeps/ia64/stackinfo.h | 3 + > sysdeps/unix/sysv/linux/spawni.c | 277 +++++++++++++++++++------------ > 4 files changed, 285 insertions(+), 113 deletions(-) > create mode 100644 posix/stackmap.h > > diff --git a/posix/stackmap.h b/posix/stackmap.h > new file mode 100644 > index 0000000000..be500e378a > --- /dev/null > +++ b/posix/stackmap.h > @@ -0,0 +1,115 @@ > +/* Functions to create stack mappings for helper processes. > + Copyright (C) 2019 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#ifndef _STACKMAP_H > +#define _STACKMAP_H > + > +#include <unistd.h> > +#include <sys/mman.h> > +#include <ldsodefs.h> > +#include <stdbool.h> > + > +static inline int > +stack_prot (void) > +{ > + return (PROT_READ | PROT_WRITE > + | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0)); > +} > + > +static inline size_t > +stack_guard_size (void) > +{ > + return GLRO (dl_pagesize); > +} > + > +/* Return a aligning mask based on system pagesize. */ > +static inline size_t > +stack_pagesize_m1_mask (void) > +{ > + size_t pagesize_m1 = __getpagesize () - 1; > + return ~pagesize_m1; > +} > + > +/* Return the guard page position on memory segment MEM with total size SIZE > + and with a guard page of size GUARDIZE. */ > +static inline void * > +stack_guard_position (void *mem, size_t size, size_t guardsize) > +{ > +#ifdef NEED_SEPARATE_REGISTER_STACK > + return mem + (((size - guardsize) / 2) & stack_pagesize_m1_mask ()); > +#elif _STACK_GROWS_DOWN > + return mem; > +#elif _STACK_GROWS_UP > + return (void *) (((uintptr_t)(mem + size)- guardsize) > + & stack_pagesize_m1_mask ()); > +#endif > +} > + > +/* Setup the expected stack memory protection value (based on stack_prot) > + for the memory segment MEM with size SIZE based on the guard page > + GUARD with size GUARDSIZE. The memory segment is expected to be allocated > + with PROT_NOTE. */ > +static inline bool > +stack_setup_prot (char *mem, size_t size, char *guard, size_t guardsize) > +{ > + const int prot = stack_prot (); > + > + char *guardend = guard + guardsize; > +#if _STACK_GROWS_DOWN && !defined(NEED_SEPARATE_REGISTER_STACK) > + /* As defined at guard_position, for architectures with downward stack > + the guard page is always at start of the allocated area. */ > + if (__mprotect (guardend, size - guardsize, prot) != 0) > + return false; > +#else > + size_t mprots1 = (uintptr_t) guard - (uintptr_t) mem; > + if (__mprotect (mem, mprots1, prot) != 0) > + return false; > + size_t mprots2 = ((uintptr_t) mem + size) - (uintptr_t) guardend; > + if (__mprotect (guardend, mprots2, prot) != 0) > + return false; > +#endif > + return true; > +} > + > +/* Allocated a memory segment with size SIZE plus GUARSIZE with mmap and > + setup the expected protection for both a guard page and the stack > + itself. */ > +static inline void * > +stack_allocate (size_t size, size_t guardsize) > +{ > + const int prot = stack_prot (); > + > + /* If a guard page is required, avoid committing memory by first > + allocate with PROT_NONE and then reserve with required permission > + excluding the guard page. */ > + void *mem = __mmap (NULL, size, (guardsize == 0) ? prot : PROT_NONE, > + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); > + if (guardsize) > + { > + void *guard = stack_guard_position (mem, size, guardsize); > + if (!stack_setup_prot (mem, size, guard, guardsize)) > + { > + __munmap (mem, size); > + return MAP_FAILED; > + } > + } > + > + return mem; > +} > + > +#endif /* _STACKMAP_H */ > diff --git a/sysdeps/ia64/nptl/pthreaddef.h b/sysdeps/ia64/nptl/pthreaddef.h > index bf52d5af62..11579f11b4 100644 > --- a/sysdeps/ia64/nptl/pthreaddef.h > +++ b/sysdeps/ia64/nptl/pthreaddef.h > @@ -18,9 +18,6 @@ > /* Default stack size. */ > #define ARCH_STACK_DEFAULT_SIZE (32 * 1024 * 1024) > > -/* IA-64 uses a normal stack and a register stack. */ > -#define NEED_SEPARATE_REGISTER_STACK > - > /* Required stack pointer alignment at beginning. */ > #define STACK_ALIGN 16 > > diff --git a/sysdeps/ia64/stackinfo.h b/sysdeps/ia64/stackinfo.h > index 6433a89945..d942426fcf 100644 > --- a/sysdeps/ia64/stackinfo.h > +++ b/sysdeps/ia64/stackinfo.h > @@ -30,4 +30,7 @@ > /* Default to a non-executable stack. */ > #define DEFAULT_STACK_PERMS (PF_R|PF_W) > > +/* IA-64 uses a normal stack and a register stack. */ > +#define NEED_SEPARATE_REGISTER_STACK > + > #endif /* stackinfo.h */ > diff --git a/sysdeps/unix/sysv/linux/spawni.c b/sysdeps/unix/sysv/linux/spawni.c > index ca7bf99825..0f7a8ca5df 100644 > --- a/sysdeps/unix/sysv/linux/spawni.c > +++ b/sysdeps/unix/sysv/linux/spawni.c > @@ -23,10 +23,11 @@ > #include <not-cancel.h> > #include <local-setxid.h> > #include <shlib-compat.h> > -#include <sigsetops.h> > -#include <internal-signals.h> > -#include <ldsodefs.h> > +#include <nptl/pthreadP.h> > #include <ctype.h> > +#include <dl-sysdep.h> > +#include <libc-pointer-arith.h> > +#include <stackmap.h> > #include "spawn_int.h" > > /* The Linux implementation of posix_spawn{p} uses the clone syscall directly > @@ -70,7 +71,6 @@ > # define STACK(__stack, __stack_size) (__stack + __stack_size) > #endif > > - > struct posix_spawn_args > { > sigset_t oldmask; > @@ -79,37 +79,11 @@ struct posix_spawn_args > const posix_spawn_file_actions_t *fa; > const posix_spawnattr_t *restrict attr; > char *const *argv; > - ptrdiff_t argc; > char *const *envp; > int xflags; > int err; > }; > > -/* Older version requires that shell script without shebang definition > - to be called explicitly using /bin/sh (_PATH_BSHELL). */ > -static void > -maybe_script_execute (struct posix_spawn_args *args) > -{ > - if (SHLIB_COMPAT (libc, GLIBC_2_2, GLIBC_2_15) > - && (args->xflags & SPAWN_XFLAGS_TRY_SHELL) && errno == ENOEXEC) > - { > - char *const *argv = args->argv; > - ptrdiff_t argc = args->argc; > - > - /* Construct an argument list for the shell. */ > - char *new_argv[argc + 2]; > - new_argv[0] = (char *) _PATH_BSHELL; > - new_argv[1] = (char *) args->file; > - if (argc > 1) > - memcpy (new_argv + 2, argv + 1, argc * sizeof (char *)); > - else > - new_argv[2] = NULL; > - > - /* Execute the shell. */ > - args->exec (new_argv[0], new_argv, args->envp); > - } > -} > - > /* Close all file descriptor up to FROM by interacting /proc/self/fd. */ > static bool > spawn_closefrom (int from) > @@ -152,7 +126,7 @@ spawn_closefrom (int from) > attributes, and file actions. It run on its own stack (provided by the > posix_spawn call). */ > static int > -__spawni_child (void *arguments) > +spawni_child (void *arguments) > { > struct posix_spawn_args *args = arguments; > const posix_spawnattr_t *restrict attr = args->attr; > @@ -330,11 +304,6 @@ __spawni_child (void *arguments) > > args->exec (args->file, args->argv, args->envp); > > - /* This is compatibility function required to enable posix_spawn run > - script without shebang definition for older posix_spawn versions > - (2.15). */ > - maybe_script_execute (args); > - > fail: > /* errno should have an appropriate non-zero value; otherwise, > there's a bug in glibc or the kernel. For lack of an error code > @@ -345,71 +314,12 @@ fail: > _exit (SPAWN_ERROR); > } > > -/* Spawn a new process executing PATH with the attributes describes in *ATTRP. > - Before running the process perform the actions described in FILE-ACTIONS. */ > static int > -__spawnix (pid_t * pid, const char *file, > - const posix_spawn_file_actions_t * file_actions, > - const posix_spawnattr_t * attrp, char *const argv[], > - char *const envp[], int xflags, > - int (*exec) (const char *, char *const *, char *const *)) > +spawni_clone (struct posix_spawn_args *args, void *stack, size_t stack_size, > + pid_t *pid) > { > - pid_t new_pid; > - struct posix_spawn_args args; > int ec; > - > - /* To avoid imposing hard limits on posix_spawn{p} the total number of > - arguments is first calculated to allocate a mmap to hold all possible > - values. */ > - ptrdiff_t argc = 0; > - /* Linux allows at most max (0x7FFFFFFF, 1/4 stack size) arguments > - to be used in a execve call. We limit to INT_MAX minus one due the > - compatiblity code that may execute a shell script (maybe_script_execute) > - where it will construct another argument list with an additional > - argument. */ > - ptrdiff_t limit = INT_MAX - 1; > - while (argv[argc++] != NULL) > - if (argc == limit) > - { > - errno = E2BIG; > - return errno; > - } > - > - int prot = (PROT_READ | PROT_WRITE > - | ((GL (dl_stack_flags) & PF_X) ? PROT_EXEC : 0)); > - > - size_t argv_size = (argc * sizeof (void *)); > - /* We need at least a few pages in case the compiler's stack checking is > - enabled. In some configs, it is known to use at least 24KiB. We use > - 32KiB to be "safe" from anything the compiler might do. Besides, the > - extra pages won't actually be allocated unless they get used. > - It also acts the slack for spawn_closefrom (including MIPS64 getdents64 > - where it might use about 1k extra stack space. */ > - argv_size += (32 * 1024); > - size_t stack_size = ALIGN_UP (argv_size, GLRO(dl_pagesize)); > - void *stack = __mmap (NULL, stack_size, prot, > - MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); > - if (__glibc_unlikely (stack == MAP_FAILED)) > - return errno; > - > - /* Disable asynchronous cancellation. */ > - int state; > - __libc_ptf_call (__pthread_setcancelstate, > - (PTHREAD_CANCEL_DISABLE, &state), 0); > - > - /* Child must set args.err to something non-negative - we rely on > - the parent and child sharing VM. */ > - args.err = 0; > - args.file = file; > - args.exec = exec; > - args.fa = file_actions; > - args.attr = attrp ? attrp : &(const posix_spawnattr_t) { 0 }; > - args.argv = argv; > - args.argc = argc; > - args.envp = envp; > - args.xflags = xflags; > - > - __libc_signal_block_all (&args.oldmask); > + pid_t new_pid; > > /* The clone flags used will create a new child that will run in the same > memory space (CLONE_VM) and the execution of calling thread will be > @@ -419,8 +329,8 @@ __spawnix (pid_t * pid, const char *file, > need for CLONE_SETTLS. Although parent and child share the same TLS > namespace, there will be no concurrent access for TLS variables (errno > for instance). */ > - new_pid = CLONE (__spawni_child, STACK (stack, stack_size), stack_size, > - CLONE_VM | CLONE_VFORK | SIGCHLD, &args); > + new_pid = CLONE (spawni_child, STACK (stack, stack_size), stack_size, > + CLONE_VM | CLONE_VFORK | SIGCHLD, args); > > /* It needs to collect the case where the auxiliary process was created > but failed to execute the file (due either any preparation step or > @@ -433,7 +343,7 @@ __spawnix (pid_t * pid, const char *file, > only in case of failure, so in case of premature termination > due a signal args.err will remain zeroed and it will be up to > caller to actually collect it. */ > - ec = args.err; > + ec = args->err; > if (ec > 0) > /* There still an unlikely case where the child is cancelled after > setting args.err, due to a positive error value. Also there is > @@ -446,14 +356,139 @@ __spawnix (pid_t * pid, const char *file, > else > ec = -new_pid; > > - __munmap (stack, stack_size); > - > if ((ec == 0) && (pid != NULL)) > *pid = new_pid; > > - __libc_signal_restore_set (&args.oldmask); > + return ec; > +} > > - __libc_ptf_call (__pthread_setcancelstate, (state, NULL), 0); > +#if SHLIB_COMPAT (libc, GLIBC_2_2, GLIBC_2_15) > +/* This is compatibility function required to enable posix_spawn run > + script without shebang definition for older posix_spawn versions > + (2.15). */ > +static int > +execve_compat (const char *filename, char *const argv[], char *const envp[]) > +{ > + __execve (filename, argv, envp); > + > + if (errno == ENOEXEC) > + { > + char *const *cargv = argv; > + ptrdiff_t argc = 0; > + while (cargv[argc++] != NULL); > + > + /* Construct an argument list for the shell. */ > + char *new_argv[argc + 2]; > + new_argv[0] = (char *) _PATH_BSHELL; > + new_argv[1] = (char *) filename; > + if (argc > 1) > + memcpy (new_argv + 2, argv + 1, argc * sizeof (char *)); > + else > + new_argv[2] = NULL; > + > + /* Execute the shell. */ > + __execve (new_argv[0], new_argv, envp); > + } > + > + return -1; > +} > + > +/* Allocates a stack using mmap to call clone. The stack size is based on > + number of arguments since it would be used on compat mode which may call > + execvpe/execve_compat. */ > +static int > +spawnix_compat (struct posix_spawn_args *args, pid_t *pid) > +{ > + char *const *argv = args->argv; > + > + /* To avoid imposing hard limits on posix_spawn{p} the total number of > + arguments is first calculated to allocate a mmap to hold all possible > + values. */ > + ptrdiff_t argc = 0; > + /* Linux allows at most max (0x7FFFFFFF, 1/4 stack size) arguments > + to be used in a execve call. We limit to INT_MAX minus one due the > + compatiblity code that may execute a shell script (maybe_script_execute) > + where it will construct another argument list with an additional > + argument. */ > + ptrdiff_t limit = INT_MAX - 1; > + while (argv[argc++] != NULL) > + if (argc == limit) > + { > + errno = E2BIG; > + return errno; > + } > + > + size_t argv_size = (argc * sizeof (void *)); > + /* We need at least a few pages in case the compiler's stack checking is > + enabled. In some configs, it is known to use at least 24KiB. We use > + 32KiB to be "safe" from anything the compiler might do. Besides, the > + extra pages won't actually be allocated unless they get used. > + It also acts the slack for spawn_closefrom (including MIPS64 getdents64 > + where it might use about 1k extra stack space. */ > + argv_size += (32 * 1024); > + > + /* Allocate a stack with an extra guard page. */ > + size_t guard_size = stack_guard_size (); > + size_t stack_size = guard_size + ALIGN_UP (argv_size, __getpagesize ()); > + void *stack = stack_allocate (stack_size, guard_size); > + if (__glibc_unlikely (stack == MAP_FAILED)) > + return errno; > + > + int ec = spawni_clone (args, stack, stack_size, pid); > + > + __munmap (stack, stack_size); > + > + return ec; > +} > +#endif > + > +/* For SPAWN_XFLAGS_TRY_SHELL we need to execute a script even without > + a shebang. To accomplish it we pass as callback to spawni_child > + __execvpe (which call maybe_script_execute for such case) or > + execve_compat (which mimics the semantic using execve). */ > +static int > +spawn_process (struct posix_spawn_args *args, pid_t *pid) > +{ > + int ec; > + > +#if SHLIB_COMPAT (libc, GLIBC_2_2, GLIBC_2_15) > + if (args->xflags & SPAWN_XFLAGS_TRY_SHELL) > + { > + args->exec = args->xflags & SPAWN_XFLAGS_USE_PATH > + ? __execvpe : execve_compat; > + ec = spawnix_compat (args, pid); > + } > + else > +#endif > + { > + args->exec = args->xflags & SPAWN_XFLAGS_USE_PATH > + ? __execvpex : __execve; > + > + /* spawni_clone stack usage need to take in consideration spawni_child > + stack usage and subsequent functions called: > + > + - sigprocmask: might allocate an extra sigset_t (128 bytes). > + - __libc_sigaction: allocate a struct kernel_sigaction (144 bytes on > + 64-bit, 136 on 32-bit). > + - __sched_setparam, __sched_setscheduler, __setsig, __setpgid, > + local_seteuid, local_setegid, __close_nocancel, __getrlimit64, > + __close_nocancel, __open_nocancel, __dup2, __chdir, __fchdir: > + and direct syscall. > + - __fcntl: wrapper only uses local variables. > + - spawn_closefrom: uses up to 1024 bytes as local buffer > + - __direntries_read > + - __getdents64: MIPS64 uses up to buffer size used, 1024 in this > + specific usage. > + - __direntries_next: local variables. > + - __close_nocancel: direct syscall. > + - execvpe allocates at least (NAME_MAX + 1) + PATH_MAX to create the > + combination of PATH entry and program name (1024 + 255 + 1). > + > + It allocates 2048 plus some stack for automatic variables and function > + calls. */ > + char stack[2560]; > + ec = spawni_clone (args, stack, sizeof stack, pid); > + } > > return ec; > } > @@ -462,12 +497,34 @@ __spawnix (pid_t * pid, const char *file, > Before running the process perform the actions described in FILE-ACTIONS. */ > int > __spawni (pid_t * pid, const char *file, > - const posix_spawn_file_actions_t * acts, > + const posix_spawn_file_actions_t * file_actions, > const posix_spawnattr_t * attrp, char *const argv[], > char *const envp[], int xflags) > { > - /* It uses __execvpex to avoid run ENOEXEC in non compatibility mode (it > - will be handled by maybe_script_execute). */ > - return __spawnix (pid, file, acts, attrp, argv, envp, xflags, > - xflags & SPAWN_XFLAGS_USE_PATH ? __execvpex :__execve); > + /* Child must set args.err to something non-negative - we rely on > + the parent and child sharing VM. */ > + struct posix_spawn_args args = { > + .err = 0, > + .file = file, > + .fa = file_actions, > + .attr = attrp ? attrp : &(const posix_spawnattr_t) { 0 }, > + .argv = argv, > + .envp = envp, > + .xflags = xflags > + }; > + > + /* Disable asynchronous cancellation. */ > + int state; > + __libc_ptf_call (__pthread_setcancelstate, > + (PTHREAD_CANCEL_DISABLE, &state), 0); > + > + __libc_signal_block_all (&args.oldmask); > + > + int ec = spawn_process (&args, pid); > + > + __libc_signal_restore_set (&args.oldmask); > + > + __libc_ptf_call (__pthread_setcancelstate, (state, NULL), 0); > + > + return ec; > } >
Ping (x2). On 28/08/2019 11:09, Adhemerval Zanella wrote: > Ping. > > On 31/07/2019 15:31, Adhemerval Zanella wrote: >> Changes from previous version: >> >> * Move the logic of stack mapping creation to stackmap.h and >> added a guard page allocation for the compatibility case. >> >> -- >> >> The current internal posix_spawn symbol for Linux (__spawni) requires >> to allocate a dynamic stack based on input arguments to handle the >> SPAWN_XFLAGS_USE_PATH internal flag, which re-issue the input binary >> as a shell script if execve call return ENOEXEC (to execute shell >> scripts with an initial shebang). >> >> This is done only for compatibility mode and the generic case does not >> require the extra calculation plus the potential large mmap/munmap >> call. For default case, a pre-defined buffer is sufficed to use on the >> clone call instead. >> >> This patch optimizes Linux spawni by allocating a dynamic stack only >> for compatibility symbol (SPAWN_XFLAGS_USE_PATH). For generic case, >> an mmap allocated buffer is used along with a guard page, similar to >> what NPTL uses for thread stacks hardening. >> >> For default case, it is a fixed code path with fixed stack usage in helper >> process, so assuming a large enough stack buffer it would never overflow. >> It also does not prevent to adapt to the vfork-like to re-use process >> stack, once it is implemented. >> >> Checked x86_64-linux-gnu and i686-linux-gnu. >> >> * sysdeps/unix/sysv/linux/spawni.c (posix_spawn_args): Remove >> argc member. >> (maybe_script_execute): Remove function. >> (execve_compat, __spawni_clone, __spawnix_compat): New function. >> (__spawni_child): Remove maybe_script_execute call. >> (__spawnix): Remove magic stack slack constant with stack_slack >> identifier. >> (__spawni): Only allocates a variable stack when >> SPAWN_XFLAGS_TRY_SHELL is used. >> * posix/stackmap.h: New file. >> * sysdeps/ia64/nptl/pthreaddef.h (NEED_SEPARATE_REGISTER_STACK): Move >> to ... >> * sysdeps/ia64/stackinfo.h: ... here. >> --- >> posix/stackmap.h | 115 +++++++++++++ >> sysdeps/ia64/nptl/pthreaddef.h | 3 - >> sysdeps/ia64/stackinfo.h | 3 + >> sysdeps/unix/sysv/linux/spawni.c | 277 +++++++++++++++++++------------ >> 4 files changed, 285 insertions(+), 113 deletions(-) >> create mode 100644 posix/stackmap.h >> >> diff --git a/posix/stackmap.h b/posix/stackmap.h >> new file mode 100644 >> index 0000000000..be500e378a >> --- /dev/null >> +++ b/posix/stackmap.h >> @@ -0,0 +1,115 @@ >> +/* Functions to create stack mappings for helper processes. >> + Copyright (C) 2019 Free Software Foundation, Inc. >> + This file is part of the GNU C Library. >> + >> + The GNU C Library is free software; you can redistribute it and/or >> + modify it under the terms of the GNU Lesser General Public >> + License as published by the Free Software Foundation; either >> + version 2.1 of the License, or (at your option) any later version. >> + >> + The GNU C Library is distributed in the hope that it will be useful, >> + but WITHOUT ANY WARRANTY; without even the implied warranty of >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + Lesser General Public License for more details. >> + >> + You should have received a copy of the GNU Lesser General Public >> + License along with the GNU C Library; if not, see >> + <http://www.gnu.org/licenses/>. */ >> + >> +#ifndef _STACKMAP_H >> +#define _STACKMAP_H >> + >> +#include <unistd.h> >> +#include <sys/mman.h> >> +#include <ldsodefs.h> >> +#include <stdbool.h> >> + >> +static inline int >> +stack_prot (void) >> +{ >> + return (PROT_READ | PROT_WRITE >> + | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0)); >> +} >> + >> +static inline size_t >> +stack_guard_size (void) >> +{ >> + return GLRO (dl_pagesize); >> +} >> + >> +/* Return a aligning mask based on system pagesize. */ >> +static inline size_t >> +stack_pagesize_m1_mask (void) >> +{ >> + size_t pagesize_m1 = __getpagesize () - 1; >> + return ~pagesize_m1; >> +} >> + >> +/* Return the guard page position on memory segment MEM with total size SIZE >> + and with a guard page of size GUARDIZE. */ >> +static inline void * >> +stack_guard_position (void *mem, size_t size, size_t guardsize) >> +{ >> +#ifdef NEED_SEPARATE_REGISTER_STACK >> + return mem + (((size - guardsize) / 2) & stack_pagesize_m1_mask ()); >> +#elif _STACK_GROWS_DOWN >> + return mem; >> +#elif _STACK_GROWS_UP >> + return (void *) (((uintptr_t)(mem + size)- guardsize) >> + & stack_pagesize_m1_mask ()); >> +#endif >> +} >> + >> +/* Setup the expected stack memory protection value (based on stack_prot) >> + for the memory segment MEM with size SIZE based on the guard page >> + GUARD with size GUARDSIZE. The memory segment is expected to be allocated >> + with PROT_NOTE. */ >> +static inline bool >> +stack_setup_prot (char *mem, size_t size, char *guard, size_t guardsize) >> +{ >> + const int prot = stack_prot (); >> + >> + char *guardend = guard + guardsize; >> +#if _STACK_GROWS_DOWN && !defined(NEED_SEPARATE_REGISTER_STACK) >> + /* As defined at guard_position, for architectures with downward stack >> + the guard page is always at start of the allocated area. */ >> + if (__mprotect (guardend, size - guardsize, prot) != 0) >> + return false; >> +#else >> + size_t mprots1 = (uintptr_t) guard - (uintptr_t) mem; >> + if (__mprotect (mem, mprots1, prot) != 0) >> + return false; >> + size_t mprots2 = ((uintptr_t) mem + size) - (uintptr_t) guardend; >> + if (__mprotect (guardend, mprots2, prot) != 0) >> + return false; >> +#endif >> + return true; >> +} >> + >> +/* Allocated a memory segment with size SIZE plus GUARSIZE with mmap and >> + setup the expected protection for both a guard page and the stack >> + itself. */ >> +static inline void * >> +stack_allocate (size_t size, size_t guardsize) >> +{ >> + const int prot = stack_prot (); >> + >> + /* If a guard page is required, avoid committing memory by first >> + allocate with PROT_NONE and then reserve with required permission >> + excluding the guard page. */ >> + void *mem = __mmap (NULL, size, (guardsize == 0) ? prot : PROT_NONE, >> + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); >> + if (guardsize) >> + { >> + void *guard = stack_guard_position (mem, size, guardsize); >> + if (!stack_setup_prot (mem, size, guard, guardsize)) >> + { >> + __munmap (mem, size); >> + return MAP_FAILED; >> + } >> + } >> + >> + return mem; >> +} >> + >> +#endif /* _STACKMAP_H */ >> diff --git a/sysdeps/ia64/nptl/pthreaddef.h b/sysdeps/ia64/nptl/pthreaddef.h >> index bf52d5af62..11579f11b4 100644 >> --- a/sysdeps/ia64/nptl/pthreaddef.h >> +++ b/sysdeps/ia64/nptl/pthreaddef.h >> @@ -18,9 +18,6 @@ >> /* Default stack size. */ >> #define ARCH_STACK_DEFAULT_SIZE (32 * 1024 * 1024) >> >> -/* IA-64 uses a normal stack and a register stack. */ >> -#define NEED_SEPARATE_REGISTER_STACK >> - >> /* Required stack pointer alignment at beginning. */ >> #define STACK_ALIGN 16 >> >> diff --git a/sysdeps/ia64/stackinfo.h b/sysdeps/ia64/stackinfo.h >> index 6433a89945..d942426fcf 100644 >> --- a/sysdeps/ia64/stackinfo.h >> +++ b/sysdeps/ia64/stackinfo.h >> @@ -30,4 +30,7 @@ >> /* Default to a non-executable stack. */ >> #define DEFAULT_STACK_PERMS (PF_R|PF_W) >> >> +/* IA-64 uses a normal stack and a register stack. */ >> +#define NEED_SEPARATE_REGISTER_STACK >> + >> #endif /* stackinfo.h */ >> diff --git a/sysdeps/unix/sysv/linux/spawni.c b/sysdeps/unix/sysv/linux/spawni.c >> index ca7bf99825..0f7a8ca5df 100644 >> --- a/sysdeps/unix/sysv/linux/spawni.c >> +++ b/sysdeps/unix/sysv/linux/spawni.c >> @@ -23,10 +23,11 @@ >> #include <not-cancel.h> >> #include <local-setxid.h> >> #include <shlib-compat.h> >> -#include <sigsetops.h> >> -#include <internal-signals.h> >> -#include <ldsodefs.h> >> +#include <nptl/pthreadP.h> >> #include <ctype.h> >> +#include <dl-sysdep.h> >> +#include <libc-pointer-arith.h> >> +#include <stackmap.h> >> #include "spawn_int.h" >> >> /* The Linux implementation of posix_spawn{p} uses the clone syscall directly >> @@ -70,7 +71,6 @@ >> # define STACK(__stack, __stack_size) (__stack + __stack_size) >> #endif >> >> - >> struct posix_spawn_args >> { >> sigset_t oldmask; >> @@ -79,37 +79,11 @@ struct posix_spawn_args >> const posix_spawn_file_actions_t *fa; >> const posix_spawnattr_t *restrict attr; >> char *const *argv; >> - ptrdiff_t argc; >> char *const *envp; >> int xflags; >> int err; >> }; >> >> -/* Older version requires that shell script without shebang definition >> - to be called explicitly using /bin/sh (_PATH_BSHELL). */ >> -static void >> -maybe_script_execute (struct posix_spawn_args *args) >> -{ >> - if (SHLIB_COMPAT (libc, GLIBC_2_2, GLIBC_2_15) >> - && (args->xflags & SPAWN_XFLAGS_TRY_SHELL) && errno == ENOEXEC) >> - { >> - char *const *argv = args->argv; >> - ptrdiff_t argc = args->argc; >> - >> - /* Construct an argument list for the shell. */ >> - char *new_argv[argc + 2]; >> - new_argv[0] = (char *) _PATH_BSHELL; >> - new_argv[1] = (char *) args->file; >> - if (argc > 1) >> - memcpy (new_argv + 2, argv + 1, argc * sizeof (char *)); >> - else >> - new_argv[2] = NULL; >> - >> - /* Execute the shell. */ >> - args->exec (new_argv[0], new_argv, args->envp); >> - } >> -} >> - >> /* Close all file descriptor up to FROM by interacting /proc/self/fd. */ >> static bool >> spawn_closefrom (int from) >> @@ -152,7 +126,7 @@ spawn_closefrom (int from) >> attributes, and file actions. It run on its own stack (provided by the >> posix_spawn call). */ >> static int >> -__spawni_child (void *arguments) >> +spawni_child (void *arguments) >> { >> struct posix_spawn_args *args = arguments; >> const posix_spawnattr_t *restrict attr = args->attr; >> @@ -330,11 +304,6 @@ __spawni_child (void *arguments) >> >> args->exec (args->file, args->argv, args->envp); >> >> - /* This is compatibility function required to enable posix_spawn run >> - script without shebang definition for older posix_spawn versions >> - (2.15). */ >> - maybe_script_execute (args); >> - >> fail: >> /* errno should have an appropriate non-zero value; otherwise, >> there's a bug in glibc or the kernel. For lack of an error code >> @@ -345,71 +314,12 @@ fail: >> _exit (SPAWN_ERROR); >> } >> >> -/* Spawn a new process executing PATH with the attributes describes in *ATTRP. >> - Before running the process perform the actions described in FILE-ACTIONS. */ >> static int >> -__spawnix (pid_t * pid, const char *file, >> - const posix_spawn_file_actions_t * file_actions, >> - const posix_spawnattr_t * attrp, char *const argv[], >> - char *const envp[], int xflags, >> - int (*exec) (const char *, char *const *, char *const *)) >> +spawni_clone (struct posix_spawn_args *args, void *stack, size_t stack_size, >> + pid_t *pid) >> { >> - pid_t new_pid; >> - struct posix_spawn_args args; >> int ec; >> - >> - /* To avoid imposing hard limits on posix_spawn{p} the total number of >> - arguments is first calculated to allocate a mmap to hold all possible >> - values. */ >> - ptrdiff_t argc = 0; >> - /* Linux allows at most max (0x7FFFFFFF, 1/4 stack size) arguments >> - to be used in a execve call. We limit to INT_MAX minus one due the >> - compatiblity code that may execute a shell script (maybe_script_execute) >> - where it will construct another argument list with an additional >> - argument. */ >> - ptrdiff_t limit = INT_MAX - 1; >> - while (argv[argc++] != NULL) >> - if (argc == limit) >> - { >> - errno = E2BIG; >> - return errno; >> - } >> - >> - int prot = (PROT_READ | PROT_WRITE >> - | ((GL (dl_stack_flags) & PF_X) ? PROT_EXEC : 0)); >> - >> - size_t argv_size = (argc * sizeof (void *)); >> - /* We need at least a few pages in case the compiler's stack checking is >> - enabled. In some configs, it is known to use at least 24KiB. We use >> - 32KiB to be "safe" from anything the compiler might do. Besides, the >> - extra pages won't actually be allocated unless they get used. >> - It also acts the slack for spawn_closefrom (including MIPS64 getdents64 >> - where it might use about 1k extra stack space. */ >> - argv_size += (32 * 1024); >> - size_t stack_size = ALIGN_UP (argv_size, GLRO(dl_pagesize)); >> - void *stack = __mmap (NULL, stack_size, prot, >> - MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); >> - if (__glibc_unlikely (stack == MAP_FAILED)) >> - return errno; >> - >> - /* Disable asynchronous cancellation. */ >> - int state; >> - __libc_ptf_call (__pthread_setcancelstate, >> - (PTHREAD_CANCEL_DISABLE, &state), 0); >> - >> - /* Child must set args.err to something non-negative - we rely on >> - the parent and child sharing VM. */ >> - args.err = 0; >> - args.file = file; >> - args.exec = exec; >> - args.fa = file_actions; >> - args.attr = attrp ? attrp : &(const posix_spawnattr_t) { 0 }; >> - args.argv = argv; >> - args.argc = argc; >> - args.envp = envp; >> - args.xflags = xflags; >> - >> - __libc_signal_block_all (&args.oldmask); >> + pid_t new_pid; >> >> /* The clone flags used will create a new child that will run in the same >> memory space (CLONE_VM) and the execution of calling thread will be >> @@ -419,8 +329,8 @@ __spawnix (pid_t * pid, const char *file, >> need for CLONE_SETTLS. Although parent and child share the same TLS >> namespace, there will be no concurrent access for TLS variables (errno >> for instance). */ >> - new_pid = CLONE (__spawni_child, STACK (stack, stack_size), stack_size, >> - CLONE_VM | CLONE_VFORK | SIGCHLD, &args); >> + new_pid = CLONE (spawni_child, STACK (stack, stack_size), stack_size, >> + CLONE_VM | CLONE_VFORK | SIGCHLD, args); >> >> /* It needs to collect the case where the auxiliary process was created >> but failed to execute the file (due either any preparation step or >> @@ -433,7 +343,7 @@ __spawnix (pid_t * pid, const char *file, >> only in case of failure, so in case of premature termination >> due a signal args.err will remain zeroed and it will be up to >> caller to actually collect it. */ >> - ec = args.err; >> + ec = args->err; >> if (ec > 0) >> /* There still an unlikely case where the child is cancelled after >> setting args.err, due to a positive error value. Also there is >> @@ -446,14 +356,139 @@ __spawnix (pid_t * pid, const char *file, >> else >> ec = -new_pid; >> >> - __munmap (stack, stack_size); >> - >> if ((ec == 0) && (pid != NULL)) >> *pid = new_pid; >> >> - __libc_signal_restore_set (&args.oldmask); >> + return ec; >> +} >> >> - __libc_ptf_call (__pthread_setcancelstate, (state, NULL), 0); >> +#if SHLIB_COMPAT (libc, GLIBC_2_2, GLIBC_2_15) >> +/* This is compatibility function required to enable posix_spawn run >> + script without shebang definition for older posix_spawn versions >> + (2.15). */ >> +static int >> +execve_compat (const char *filename, char *const argv[], char *const envp[]) >> +{ >> + __execve (filename, argv, envp); >> + >> + if (errno == ENOEXEC) >> + { >> + char *const *cargv = argv; >> + ptrdiff_t argc = 0; >> + while (cargv[argc++] != NULL); >> + >> + /* Construct an argument list for the shell. */ >> + char *new_argv[argc + 2]; >> + new_argv[0] = (char *) _PATH_BSHELL; >> + new_argv[1] = (char *) filename; >> + if (argc > 1) >> + memcpy (new_argv + 2, argv + 1, argc * sizeof (char *)); >> + else >> + new_argv[2] = NULL; >> + >> + /* Execute the shell. */ >> + __execve (new_argv[0], new_argv, envp); >> + } >> + >> + return -1; >> +} >> + >> +/* Allocates a stack using mmap to call clone. The stack size is based on >> + number of arguments since it would be used on compat mode which may call >> + execvpe/execve_compat. */ >> +static int >> +spawnix_compat (struct posix_spawn_args *args, pid_t *pid) >> +{ >> + char *const *argv = args->argv; >> + >> + /* To avoid imposing hard limits on posix_spawn{p} the total number of >> + arguments is first calculated to allocate a mmap to hold all possible >> + values. */ >> + ptrdiff_t argc = 0; >> + /* Linux allows at most max (0x7FFFFFFF, 1/4 stack size) arguments >> + to be used in a execve call. We limit to INT_MAX minus one due the >> + compatiblity code that may execute a shell script (maybe_script_execute) >> + where it will construct another argument list with an additional >> + argument. */ >> + ptrdiff_t limit = INT_MAX - 1; >> + while (argv[argc++] != NULL) >> + if (argc == limit) >> + { >> + errno = E2BIG; >> + return errno; >> + } >> + >> + size_t argv_size = (argc * sizeof (void *)); >> + /* We need at least a few pages in case the compiler's stack checking is >> + enabled. In some configs, it is known to use at least 24KiB. We use >> + 32KiB to be "safe" from anything the compiler might do. Besides, the >> + extra pages won't actually be allocated unless they get used. >> + It also acts the slack for spawn_closefrom (including MIPS64 getdents64 >> + where it might use about 1k extra stack space. */ >> + argv_size += (32 * 1024); >> + >> + /* Allocate a stack with an extra guard page. */ >> + size_t guard_size = stack_guard_size (); >> + size_t stack_size = guard_size + ALIGN_UP (argv_size, __getpagesize ()); >> + void *stack = stack_allocate (stack_size, guard_size); >> + if (__glibc_unlikely (stack == MAP_FAILED)) >> + return errno; >> + >> + int ec = spawni_clone (args, stack, stack_size, pid); >> + >> + __munmap (stack, stack_size); >> + >> + return ec; >> +} >> +#endif >> + >> +/* For SPAWN_XFLAGS_TRY_SHELL we need to execute a script even without >> + a shebang. To accomplish it we pass as callback to spawni_child >> + __execvpe (which call maybe_script_execute for such case) or >> + execve_compat (which mimics the semantic using execve). */ >> +static int >> +spawn_process (struct posix_spawn_args *args, pid_t *pid) >> +{ >> + int ec; >> + >> +#if SHLIB_COMPAT (libc, GLIBC_2_2, GLIBC_2_15) >> + if (args->xflags & SPAWN_XFLAGS_TRY_SHELL) >> + { >> + args->exec = args->xflags & SPAWN_XFLAGS_USE_PATH >> + ? __execvpe : execve_compat; >> + ec = spawnix_compat (args, pid); >> + } >> + else >> +#endif >> + { >> + args->exec = args->xflags & SPAWN_XFLAGS_USE_PATH >> + ? __execvpex : __execve; >> + >> + /* spawni_clone stack usage need to take in consideration spawni_child >> + stack usage and subsequent functions called: >> + >> + - sigprocmask: might allocate an extra sigset_t (128 bytes). >> + - __libc_sigaction: allocate a struct kernel_sigaction (144 bytes on >> + 64-bit, 136 on 32-bit). >> + - __sched_setparam, __sched_setscheduler, __setsig, __setpgid, >> + local_seteuid, local_setegid, __close_nocancel, __getrlimit64, >> + __close_nocancel, __open_nocancel, __dup2, __chdir, __fchdir: >> + and direct syscall. >> + - __fcntl: wrapper only uses local variables. >> + - spawn_closefrom: uses up to 1024 bytes as local buffer >> + - __direntries_read >> + - __getdents64: MIPS64 uses up to buffer size used, 1024 in this >> + specific usage. >> + - __direntries_next: local variables. >> + - __close_nocancel: direct syscall. >> + - execvpe allocates at least (NAME_MAX + 1) + PATH_MAX to create the >> + combination of PATH entry and program name (1024 + 255 + 1). >> + >> + It allocates 2048 plus some stack for automatic variables and function >> + calls. */ >> + char stack[2560]; >> + ec = spawni_clone (args, stack, sizeof stack, pid); >> + } >> >> return ec; >> } >> @@ -462,12 +497,34 @@ __spawnix (pid_t * pid, const char *file, >> Before running the process perform the actions described in FILE-ACTIONS. */ >> int >> __spawni (pid_t * pid, const char *file, >> - const posix_spawn_file_actions_t * acts, >> + const posix_spawn_file_actions_t * file_actions, >> const posix_spawnattr_t * attrp, char *const argv[], >> char *const envp[], int xflags) >> { >> - /* It uses __execvpex to avoid run ENOEXEC in non compatibility mode (it >> - will be handled by maybe_script_execute). */ >> - return __spawnix (pid, file, acts, attrp, argv, envp, xflags, >> - xflags & SPAWN_XFLAGS_USE_PATH ? __execvpex :__execve); >> + /* Child must set args.err to something non-negative - we rely on >> + the parent and child sharing VM. */ >> + struct posix_spawn_args args = { >> + .err = 0, >> + .file = file, >> + .fa = file_actions, >> + .attr = attrp ? attrp : &(const posix_spawnattr_t) { 0 }, >> + .argv = argv, >> + .envp = envp, >> + .xflags = xflags >> + }; >> + >> + /* Disable asynchronous cancellation. */ >> + int state; >> + __libc_ptf_call (__pthread_setcancelstate, >> + (PTHREAD_CANCEL_DISABLE, &state), 0); >> + >> + __libc_signal_block_all (&args.oldmask); >> + >> + int ec = spawn_process (&args, pid); >> + >> + __libc_signal_restore_set (&args.oldmask); >> + >> + __libc_ptf_call (__pthread_setcancelstate, (state, NULL), 0); >> + >> + return ec; >> } >>
diff --git a/posix/stackmap.h b/posix/stackmap.h new file mode 100644 index 0000000000..be500e378a --- /dev/null +++ b/posix/stackmap.h @@ -0,0 +1,115 @@ +/* Functions to create stack mappings for helper processes. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _STACKMAP_H +#define _STACKMAP_H + +#include <unistd.h> +#include <sys/mman.h> +#include <ldsodefs.h> +#include <stdbool.h> + +static inline int +stack_prot (void) +{ + return (PROT_READ | PROT_WRITE + | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0)); +} + +static inline size_t +stack_guard_size (void) +{ + return GLRO (dl_pagesize); +} + +/* Return a aligning mask based on system pagesize. */ +static inline size_t +stack_pagesize_m1_mask (void) +{ + size_t pagesize_m1 = __getpagesize () - 1; + return ~pagesize_m1; +} + +/* Return the guard page position on memory segment MEM with total size SIZE + and with a guard page of size GUARDIZE. */ +static inline void * +stack_guard_position (void *mem, size_t size, size_t guardsize) +{ +#ifdef NEED_SEPARATE_REGISTER_STACK + return mem + (((size - guardsize) / 2) & stack_pagesize_m1_mask ()); +#elif _STACK_GROWS_DOWN + return mem; +#elif _STACK_GROWS_UP + return (void *) (((uintptr_t)(mem + size)- guardsize) + & stack_pagesize_m1_mask ()); +#endif +} + +/* Setup the expected stack memory protection value (based on stack_prot) + for the memory segment MEM with size SIZE based on the guard page + GUARD with size GUARDSIZE. The memory segment is expected to be allocated + with PROT_NOTE. */ +static inline bool +stack_setup_prot (char *mem, size_t size, char *guard, size_t guardsize) +{ + const int prot = stack_prot (); + + char *guardend = guard + guardsize; +#if _STACK_GROWS_DOWN && !defined(NEED_SEPARATE_REGISTER_STACK) + /* As defined at guard_position, for architectures with downward stack + the guard page is always at start of the allocated area. */ + if (__mprotect (guardend, size - guardsize, prot) != 0) + return false; +#else + size_t mprots1 = (uintptr_t) guard - (uintptr_t) mem; + if (__mprotect (mem, mprots1, prot) != 0) + return false; + size_t mprots2 = ((uintptr_t) mem + size) - (uintptr_t) guardend; + if (__mprotect (guardend, mprots2, prot) != 0) + return false; +#endif + return true; +} + +/* Allocated a memory segment with size SIZE plus GUARSIZE with mmap and + setup the expected protection for both a guard page and the stack + itself. */ +static inline void * +stack_allocate (size_t size, size_t guardsize) +{ + const int prot = stack_prot (); + + /* If a guard page is required, avoid committing memory by first + allocate with PROT_NONE and then reserve with required permission + excluding the guard page. */ + void *mem = __mmap (NULL, size, (guardsize == 0) ? prot : PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (guardsize) + { + void *guard = stack_guard_position (mem, size, guardsize); + if (!stack_setup_prot (mem, size, guard, guardsize)) + { + __munmap (mem, size); + return MAP_FAILED; + } + } + + return mem; +} + +#endif /* _STACKMAP_H */ diff --git a/sysdeps/ia64/nptl/pthreaddef.h b/sysdeps/ia64/nptl/pthreaddef.h index bf52d5af62..11579f11b4 100644 --- a/sysdeps/ia64/nptl/pthreaddef.h +++ b/sysdeps/ia64/nptl/pthreaddef.h @@ -18,9 +18,6 @@ /* Default stack size. */ #define ARCH_STACK_DEFAULT_SIZE (32 * 1024 * 1024) -/* IA-64 uses a normal stack and a register stack. */ -#define NEED_SEPARATE_REGISTER_STACK - /* Required stack pointer alignment at beginning. */ #define STACK_ALIGN 16 diff --git a/sysdeps/ia64/stackinfo.h b/sysdeps/ia64/stackinfo.h index 6433a89945..d942426fcf 100644 --- a/sysdeps/ia64/stackinfo.h +++ b/sysdeps/ia64/stackinfo.h @@ -30,4 +30,7 @@ /* Default to a non-executable stack. */ #define DEFAULT_STACK_PERMS (PF_R|PF_W) +/* IA-64 uses a normal stack and a register stack. */ +#define NEED_SEPARATE_REGISTER_STACK + #endif /* stackinfo.h */ diff --git a/sysdeps/unix/sysv/linux/spawni.c b/sysdeps/unix/sysv/linux/spawni.c index ca7bf99825..0f7a8ca5df 100644 --- a/sysdeps/unix/sysv/linux/spawni.c +++ b/sysdeps/unix/sysv/linux/spawni.c @@ -23,10 +23,11 @@ #include <not-cancel.h> #include <local-setxid.h> #include <shlib-compat.h> -#include <sigsetops.h> -#include <internal-signals.h> -#include <ldsodefs.h> +#include <nptl/pthreadP.h> #include <ctype.h> +#include <dl-sysdep.h> +#include <libc-pointer-arith.h> +#include <stackmap.h> #include "spawn_int.h" /* The Linux implementation of posix_spawn{p} uses the clone syscall directly @@ -70,7 +71,6 @@ # define STACK(__stack, __stack_size) (__stack + __stack_size) #endif - struct posix_spawn_args { sigset_t oldmask; @@ -79,37 +79,11 @@ struct posix_spawn_args const posix_spawn_file_actions_t *fa; const posix_spawnattr_t *restrict attr; char *const *argv; - ptrdiff_t argc; char *const *envp; int xflags; int err; }; -/* Older version requires that shell script without shebang definition - to be called explicitly using /bin/sh (_PATH_BSHELL). */ -static void -maybe_script_execute (struct posix_spawn_args *args) -{ - if (SHLIB_COMPAT (libc, GLIBC_2_2, GLIBC_2_15) - && (args->xflags & SPAWN_XFLAGS_TRY_SHELL) && errno == ENOEXEC) - { - char *const *argv = args->argv; - ptrdiff_t argc = args->argc; - - /* Construct an argument list for the shell. */ - char *new_argv[argc + 2]; - new_argv[0] = (char *) _PATH_BSHELL; - new_argv[1] = (char *) args->file; - if (argc > 1) - memcpy (new_argv + 2, argv + 1, argc * sizeof (char *)); - else - new_argv[2] = NULL; - - /* Execute the shell. */ - args->exec (new_argv[0], new_argv, args->envp); - } -} - /* Close all file descriptor up to FROM by interacting /proc/self/fd. */ static bool spawn_closefrom (int from) @@ -152,7 +126,7 @@ spawn_closefrom (int from) attributes, and file actions. It run on its own stack (provided by the posix_spawn call). */ static int -__spawni_child (void *arguments) +spawni_child (void *arguments) { struct posix_spawn_args *args = arguments; const posix_spawnattr_t *restrict attr = args->attr; @@ -330,11 +304,6 @@ __spawni_child (void *arguments) args->exec (args->file, args->argv, args->envp); - /* This is compatibility function required to enable posix_spawn run - script without shebang definition for older posix_spawn versions - (2.15). */ - maybe_script_execute (args); - fail: /* errno should have an appropriate non-zero value; otherwise, there's a bug in glibc or the kernel. For lack of an error code @@ -345,71 +314,12 @@ fail: _exit (SPAWN_ERROR); } -/* Spawn a new process executing PATH with the attributes describes in *ATTRP. - Before running the process perform the actions described in FILE-ACTIONS. */ static int -__spawnix (pid_t * pid, const char *file, - const posix_spawn_file_actions_t * file_actions, - const posix_spawnattr_t * attrp, char *const argv[], - char *const envp[], int xflags, - int (*exec) (const char *, char *const *, char *const *)) +spawni_clone (struct posix_spawn_args *args, void *stack, size_t stack_size, + pid_t *pid) { - pid_t new_pid; - struct posix_spawn_args args; int ec; - - /* To avoid imposing hard limits on posix_spawn{p} the total number of - arguments is first calculated to allocate a mmap to hold all possible - values. */ - ptrdiff_t argc = 0; - /* Linux allows at most max (0x7FFFFFFF, 1/4 stack size) arguments - to be used in a execve call. We limit to INT_MAX minus one due the - compatiblity code that may execute a shell script (maybe_script_execute) - where it will construct another argument list with an additional - argument. */ - ptrdiff_t limit = INT_MAX - 1; - while (argv[argc++] != NULL) - if (argc == limit) - { - errno = E2BIG; - return errno; - } - - int prot = (PROT_READ | PROT_WRITE - | ((GL (dl_stack_flags) & PF_X) ? PROT_EXEC : 0)); - - size_t argv_size = (argc * sizeof (void *)); - /* We need at least a few pages in case the compiler's stack checking is - enabled. In some configs, it is known to use at least 24KiB. We use - 32KiB to be "safe" from anything the compiler might do. Besides, the - extra pages won't actually be allocated unless they get used. - It also acts the slack for spawn_closefrom (including MIPS64 getdents64 - where it might use about 1k extra stack space. */ - argv_size += (32 * 1024); - size_t stack_size = ALIGN_UP (argv_size, GLRO(dl_pagesize)); - void *stack = __mmap (NULL, stack_size, prot, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); - if (__glibc_unlikely (stack == MAP_FAILED)) - return errno; - - /* Disable asynchronous cancellation. */ - int state; - __libc_ptf_call (__pthread_setcancelstate, - (PTHREAD_CANCEL_DISABLE, &state), 0); - - /* Child must set args.err to something non-negative - we rely on - the parent and child sharing VM. */ - args.err = 0; - args.file = file; - args.exec = exec; - args.fa = file_actions; - args.attr = attrp ? attrp : &(const posix_spawnattr_t) { 0 }; - args.argv = argv; - args.argc = argc; - args.envp = envp; - args.xflags = xflags; - - __libc_signal_block_all (&args.oldmask); + pid_t new_pid; /* The clone flags used will create a new child that will run in the same memory space (CLONE_VM) and the execution of calling thread will be @@ -419,8 +329,8 @@ __spawnix (pid_t * pid, const char *file, need for CLONE_SETTLS. Although parent and child share the same TLS namespace, there will be no concurrent access for TLS variables (errno for instance). */ - new_pid = CLONE (__spawni_child, STACK (stack, stack_size), stack_size, - CLONE_VM | CLONE_VFORK | SIGCHLD, &args); + new_pid = CLONE (spawni_child, STACK (stack, stack_size), stack_size, + CLONE_VM | CLONE_VFORK | SIGCHLD, args); /* It needs to collect the case where the auxiliary process was created but failed to execute the file (due either any preparation step or @@ -433,7 +343,7 @@ __spawnix (pid_t * pid, const char *file, only in case of failure, so in case of premature termination due a signal args.err will remain zeroed and it will be up to caller to actually collect it. */ - ec = args.err; + ec = args->err; if (ec > 0) /* There still an unlikely case where the child is cancelled after setting args.err, due to a positive error value. Also there is @@ -446,14 +356,139 @@ __spawnix (pid_t * pid, const char *file, else ec = -new_pid; - __munmap (stack, stack_size); - if ((ec == 0) && (pid != NULL)) *pid = new_pid; - __libc_signal_restore_set (&args.oldmask); + return ec; +} - __libc_ptf_call (__pthread_setcancelstate, (state, NULL), 0); +#if SHLIB_COMPAT (libc, GLIBC_2_2, GLIBC_2_15) +/* This is compatibility function required to enable posix_spawn run + script without shebang definition for older posix_spawn versions + (2.15). */ +static int +execve_compat (const char *filename, char *const argv[], char *const envp[]) +{ + __execve (filename, argv, envp); + + if (errno == ENOEXEC) + { + char *const *cargv = argv; + ptrdiff_t argc = 0; + while (cargv[argc++] != NULL); + + /* Construct an argument list for the shell. */ + char *new_argv[argc + 2]; + new_argv[0] = (char *) _PATH_BSHELL; + new_argv[1] = (char *) filename; + if (argc > 1) + memcpy (new_argv + 2, argv + 1, argc * sizeof (char *)); + else + new_argv[2] = NULL; + + /* Execute the shell. */ + __execve (new_argv[0], new_argv, envp); + } + + return -1; +} + +/* Allocates a stack using mmap to call clone. The stack size is based on + number of arguments since it would be used on compat mode which may call + execvpe/execve_compat. */ +static int +spawnix_compat (struct posix_spawn_args *args, pid_t *pid) +{ + char *const *argv = args->argv; + + /* To avoid imposing hard limits on posix_spawn{p} the total number of + arguments is first calculated to allocate a mmap to hold all possible + values. */ + ptrdiff_t argc = 0; + /* Linux allows at most max (0x7FFFFFFF, 1/4 stack size) arguments + to be used in a execve call. We limit to INT_MAX minus one due the + compatiblity code that may execute a shell script (maybe_script_execute) + where it will construct another argument list with an additional + argument. */ + ptrdiff_t limit = INT_MAX - 1; + while (argv[argc++] != NULL) + if (argc == limit) + { + errno = E2BIG; + return errno; + } + + size_t argv_size = (argc * sizeof (void *)); + /* We need at least a few pages in case the compiler's stack checking is + enabled. In some configs, it is known to use at least 24KiB. We use + 32KiB to be "safe" from anything the compiler might do. Besides, the + extra pages won't actually be allocated unless they get used. + It also acts the slack for spawn_closefrom (including MIPS64 getdents64 + where it might use about 1k extra stack space. */ + argv_size += (32 * 1024); + + /* Allocate a stack with an extra guard page. */ + size_t guard_size = stack_guard_size (); + size_t stack_size = guard_size + ALIGN_UP (argv_size, __getpagesize ()); + void *stack = stack_allocate (stack_size, guard_size); + if (__glibc_unlikely (stack == MAP_FAILED)) + return errno; + + int ec = spawni_clone (args, stack, stack_size, pid); + + __munmap (stack, stack_size); + + return ec; +} +#endif + +/* For SPAWN_XFLAGS_TRY_SHELL we need to execute a script even without + a shebang. To accomplish it we pass as callback to spawni_child + __execvpe (which call maybe_script_execute for such case) or + execve_compat (which mimics the semantic using execve). */ +static int +spawn_process (struct posix_spawn_args *args, pid_t *pid) +{ + int ec; + +#if SHLIB_COMPAT (libc, GLIBC_2_2, GLIBC_2_15) + if (args->xflags & SPAWN_XFLAGS_TRY_SHELL) + { + args->exec = args->xflags & SPAWN_XFLAGS_USE_PATH + ? __execvpe : execve_compat; + ec = spawnix_compat (args, pid); + } + else +#endif + { + args->exec = args->xflags & SPAWN_XFLAGS_USE_PATH + ? __execvpex : __execve; + + /* spawni_clone stack usage need to take in consideration spawni_child + stack usage and subsequent functions called: + + - sigprocmask: might allocate an extra sigset_t (128 bytes). + - __libc_sigaction: allocate a struct kernel_sigaction (144 bytes on + 64-bit, 136 on 32-bit). + - __sched_setparam, __sched_setscheduler, __setsig, __setpgid, + local_seteuid, local_setegid, __close_nocancel, __getrlimit64, + __close_nocancel, __open_nocancel, __dup2, __chdir, __fchdir: + and direct syscall. + - __fcntl: wrapper only uses local variables. + - spawn_closefrom: uses up to 1024 bytes as local buffer + - __direntries_read + - __getdents64: MIPS64 uses up to buffer size used, 1024 in this + specific usage. + - __direntries_next: local variables. + - __close_nocancel: direct syscall. + - execvpe allocates at least (NAME_MAX + 1) + PATH_MAX to create the + combination of PATH entry and program name (1024 + 255 + 1). + + It allocates 2048 plus some stack for automatic variables and function + calls. */ + char stack[2560]; + ec = spawni_clone (args, stack, sizeof stack, pid); + } return ec; } @@ -462,12 +497,34 @@ __spawnix (pid_t * pid, const char *file, Before running the process perform the actions described in FILE-ACTIONS. */ int __spawni (pid_t * pid, const char *file, - const posix_spawn_file_actions_t * acts, + const posix_spawn_file_actions_t * file_actions, const posix_spawnattr_t * attrp, char *const argv[], char *const envp[], int xflags) { - /* It uses __execvpex to avoid run ENOEXEC in non compatibility mode (it - will be handled by maybe_script_execute). */ - return __spawnix (pid, file, acts, attrp, argv, envp, xflags, - xflags & SPAWN_XFLAGS_USE_PATH ? __execvpex :__execve); + /* Child must set args.err to something non-negative - we rely on + the parent and child sharing VM. */ + struct posix_spawn_args args = { + .err = 0, + .file = file, + .fa = file_actions, + .attr = attrp ? attrp : &(const posix_spawnattr_t) { 0 }, + .argv = argv, + .envp = envp, + .xflags = xflags + }; + + /* Disable asynchronous cancellation. */ + int state; + __libc_ptf_call (__pthread_setcancelstate, + (PTHREAD_CANCEL_DISABLE, &state), 0); + + __libc_signal_block_all (&args.oldmask); + + int ec = spawn_process (&args, pid); + + __libc_signal_restore_set (&args.oldmask); + + __libc_ptf_call (__pthread_setcancelstate, (state, NULL), 0); + + return ec; }