Message ID | 20190731183136.21545-4-adhemerval.zanella@linaro.org |
---|---|
State | New |
Headers | show |
Series | [v2,1/5] mips: Do not malloc on getdents64 fallback | expand |
Ping. On 31/07/2019 15:31, Adhemerval Zanella wrote: > The child helper process on Linux posix_spawn child must ensure that no signal > handler are enabled, so the signal disposition must be either SIG_DFL or > SIG_IGN. However, it requires a sigprocmask to obtain the current signal mask > and at least _NSIG sigaction calls to reset the signal handlers for each > posix_spawn call. > > This patch optimizes it by tracking on sigaction implementation when a signal > action is set different than SIG_DFL or SIG_IGN. It allows remove a > sigprocmask call and isse sigaction to reset the disposition only the signals > that has non-default actions set. > > It might incur in false positive, since it not easy to remove bits from the > mask without race conditions, but it does not allow false negative since the > mask is updated atomically prior the syscall. The false positive incur in > just extra sigactions on posix_spawn. > > Checked on x86_64 and i686. > > * include/atomic.h (atomic_fetch_or_seq_cst, atomic_fetch_or_seq_cst): > New macros. > * posix/Makefile (tests): Add tst-spawn6. > * posix/tst-spawn6.c: New file. > * sysdeps/generic/sigsetops.h (__sigorset_atomic): New macro. > * sysdeps/unix/sysv/linux/internal-signals.h (__get_sighandler_set): > New prototype. > * sysdeps/unix/sysv/linux/sigaction.c (__get_sighandler_set): New > function. > (__libc_sigaction): Set the internal handler_set for a new action. > * sysdeps/unix/sysv/linux/sigsetops.h (__sigorset_atomic, > __sigaddset_atomic): New macros. > * sysdeps/unix/sysv/linux/spawni.c (spawni_child): Replace > __sigprocmask with __get_sighandler_set. > --- > include/atomic.h | 10 + > posix/Makefile | 4 +- > posix/tst-spawn6.c | 220 +++++++++++++++++++++ > sysdeps/generic/sigsetops.h | 7 + > sysdeps/unix/sysv/linux/internal-signals.h | 3 + > sysdeps/unix/sysv/linux/sigaction.c | 17 ++ > sysdeps/unix/sysv/linux/sigsetops.h | 15 ++ > sysdeps/unix/sysv/linux/spawni.c | 9 +- > 8 files changed, 278 insertions(+), 7 deletions(-) > create mode 100644 posix/tst-spawn6.c > > diff --git a/include/atomic.h b/include/atomic.h > index ee1978eb3b..72609efde9 100644 > --- a/include/atomic.h > +++ b/include/atomic.h > @@ -646,6 +646,9 @@ void __atomic_link_error (void); > # define atomic_fetch_or_release(mem, operand) \ > ({ __atomic_check_size((mem)); \ > __atomic_fetch_or ((mem), (operand), __ATOMIC_RELEASE); }) > +# define atomic_fetch_or_seq_cst(mem, operand) \ > + ({ __atomic_check_size((mem)); \ > + __atomic_fetch_or ((mem), (operand), __ATOMIC_SEQ_CST); }) > > # define atomic_fetch_xor_release(mem, operand) \ > ({ __atomic_check_size((mem)); \ > @@ -791,6 +794,13 @@ void __atomic_link_error (void); > ({ atomic_thread_fence_release (); \ > atomic_fetch_or_acquire ((mem), (operand)); }) > # endif > +# ifndef atomic_fetch_or_seq_cst > +# define atomic_fetch_or_seq_cst(mem, operand) \ > + ({ atomic_thread_fence_acquire (); \ > + atomic_fetch_or_relaxed ((mem), (operand)); \ > + atomic_thread_fence_release (); }) > +# endif > + > > # ifndef atomic_fetch_xor_release > /* Failing the atomic_compare_exchange_weak_release reloads the value in > diff --git a/posix/Makefile b/posix/Makefile > index 1ac41ad85a..131ae052fd 100644 > --- a/posix/Makefile > +++ b/posix/Makefile > @@ -102,7 +102,8 @@ tests := test-errno tstgetopt testfnm runtests runptests \ > tst-sysconf-empty-chroot tst-glob_symlinks tst-fexecve \ > tst-glob-tilde test-ssize-max tst-spawn4 bug-regex37 \ > bug-regex38 tst-regcomp-truncated tst-spawn-chdir \ > - tst-spawn5 > + tst-spawn5 \ > + tst-spawn6 > tests-internal := bug-regex5 bug-regex20 bug-regex33 \ > tst-rfc3484 tst-rfc3484-2 tst-rfc3484-3 \ > tst-glob_lstat_compat tst-spawn4-compat > @@ -255,6 +256,7 @@ tst-exec-ARGS = -- $(host-test-program-cmd) > tst-exec-static-ARGS = $(tst-exec-ARGS) > tst-execvpe5-ARGS = -- $(host-test-program-cmd) > tst-spawn-ARGS = -- $(host-test-program-cmd) > +tst-spawn6-ARGS = -- $(host-test-program-cmd) > tst-spawn-static-ARGS = $(tst-spawn-ARGS) > tst-spawn5-ARGS = -- $(host-test-program-cmd) > tst-dir-ARGS = `pwd` `cd $(common-objdir)/$(subdir); pwd` `cd $(common-objdir); pwd` $(objpfx)tst-dir > diff --git a/posix/tst-spawn6.c b/posix/tst-spawn6.c > new file mode 100644 > index 0000000000..466e66f104 > --- /dev/null > +++ b/posix/tst-spawn6.c > @@ -0,0 +1,220 @@ > +/* Tests for posix_spawn signal handling. > + Copyright (C) 2019 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <http://www.gnu.org/licenses/>. */ > + > +#include <stdio.h> > +#include <stdlib.h> > +#include <getopt.h> > +#include <spawn.h> > +#include <sys/wait.h> > + > +#include <support/check.h> > +#include <support/xunistd.h> > +#include <support/support.h> > +#include <array_length.h> > + > +/* Nonzero if the program gets called via `exec'. */ > +static int restart; > +#define CMDLINE_OPTIONS \ > + { "restart", no_argument, &restart, 1 }, > + > +enum spawn_test_t > +{ > + SPAWN_SETSIGMASK, > + SPAWN_SETSIGDEF > +}; > + > +static int signal_to_check[] = > +{ > + SIGHUP, SIGINT, SIGALRM, SIGUSR2 > +}; > + > +/* Called on process re-execution. */ > +static int > +handle_restart (enum spawn_test_t test) > +{ > + switch (test) > + { > + case SPAWN_SETSIGMASK: > + { > + sigset_t mask; > + sigprocmask (SIG_BLOCK, NULL, &mask); > + for (int i = 0; i < array_length (signal_to_check); i++) > + if (sigismember (&mask, signal_to_check[i]) != 1) > + exit (EXIT_FAILURE); > + } > + break; > + case SPAWN_SETSIGDEF: > + { > + for (int i = 0; i < array_length (signal_to_check); i++) > + { > + struct sigaction act; > + if (sigaction (signal_to_check[i], NULL, &act) != 0) > + exit (EXIT_FAILURE); > + if (act.sa_handler != SIG_DFL) > + exit (EXIT_FAILURE); > + } > + } > + break; > + } > + > + return 0; > +} > + > +/* Common argument used for process re-execution. */ > +static char *initial_spargv[5]; > +static size_t initial_spargv_size; > + > +/* Re-execute the test process with both '--direct', '--restart', and the > + TEST (as integer value) as arguments. */ > +static void > +reexecute (enum spawn_test_t test, const posix_spawnattr_t *attrp) > +{ > + char *spargv[8]; > + int i; > + > + for (i = 0; i < initial_spargv_size; i++) > + spargv[i] = initial_spargv[i]; > + /* Three digits per byte plus null terminator. */ > + char teststr[3 * sizeof (test) + 1]; > + snprintf (teststr, array_length (teststr), "%d", test); > + spargv[i++] = teststr; > + spargv[i] = NULL; > + TEST_VERIFY (i < 8); > + > + pid_t pid; > + int status; > + > + TEST_COMPARE (posix_spawn (&pid, spargv[0], NULL, attrp, spargv, environ), > + 0); > + TEST_COMPARE (xwaitpid (pid, &status, 0), pid); > + TEST_VERIFY (WIFEXITED (status)); > + TEST_VERIFY (!WIFSIGNALED (status)); > + TEST_COMPARE (WEXITSTATUS (status), 0); > +} > + > +/* Test if POSIX_SPAWN_SETSIGMASK change the spawn process signal mask to > + the value blocked signals defined by SIGNAL_TO_CHECK signals. */ > +static void > +do_test_setsigmask (void) > +{ > + posix_spawnattr_t attr; > + /* posix_spawnattr_init does not fail. */ > + posix_spawnattr_init (&attr); > + > + { > + sigset_t mask; > + TEST_COMPARE (sigemptyset (&mask), 0); > + for (int i = 0; i < array_length (signal_to_check); i++) > + TEST_COMPARE (sigaddset (&mask, signal_to_check[i]), 0); > + TEST_COMPARE (posix_spawnattr_setsigmask (&attr, &mask), 0); > + TEST_COMPARE (posix_spawnattr_setflags (&attr, POSIX_SPAWN_SETSIGMASK), 0); > + } > + > + /* Change current mask to be different than the one asked for spawned > + process. */ > + { > + sigset_t empty_mask, current_mask; > + TEST_COMPARE (sigemptyset (&empty_mask), 0); > + TEST_COMPARE (sigprocmask (SIG_BLOCK, &empty_mask, ¤t_mask), 0); > + > + reexecute (SPAWN_SETSIGMASK, &attr); > + > + TEST_COMPARE (sigprocmask (SIG_SETMASK, ¤t_mask, NULL), 0); > + } > +} > + > +/* Test if POSIX_SPAWN_SETSIGDEF change the spawn process signal actions > + defined by SIGNAL_TO_CHECK signals to default actions. */ > +static void > +do_test_setsigdef (void) > +{ > + posix_spawnattr_t attr; > + /* posix_spawnattr_init does not fail. */ > + posix_spawnattr_init (&attr); > + > + { > + sigset_t mask; > + TEST_COMPARE (sigemptyset (&mask), 0); > + for (int i = 0; i < array_length (signal_to_check); i++) > + TEST_COMPARE (sigaddset (&mask, signal_to_check[i]), 0); > + TEST_COMPARE (posix_spawnattr_setsigdefault (&attr, &mask), 0); > + TEST_COMPARE (posix_spawnattr_setflags (&attr, POSIX_SPAWN_SETSIGDEF), 0); > + } > + > + /* Change current signal disposition to be different than the one asked for > + spawned process. */ > + struct sigaction default_act[array_length (signal_to_check)]; > + { > + sigset_t empty_mask; > + TEST_COMPARE (sigemptyset (&empty_mask), 0); > + for (int i = 0; i < array_length (signal_to_check); i++) > + TEST_COMPARE (sigaction (signal_to_check[i], > + &((struct sigaction) { .sa_handler = SIG_IGN, > + .sa_mask = empty_mask, > + .sa_flags = 0 }), > + &default_act[i]), > + 0); > + } > + > + reexecute (SPAWN_SETSIGDEF, &attr); > + > + /* Restore signal dispositions. */ > + for (int i = 0; i < array_length (signal_to_check); i++) > + TEST_COMPARE (sigaction (signal_to_check[i], &default_act[i], NULL), 0); > +} > + > +static int > +do_test (int argc, char *argv[]) > +{ > + /* We must have one or four parameters left if called initially: > + + path for ld.so optional > + + "--library-path" optional > + + the library path optional > + + the application name > + > + Plus one parameter to indicate which test to execute through > + re-execution. > + > + So for default usage without --enable-hardcoded-path-in-tests, it > + will be called initially with 5 arguments and later with 2. For > + --enable-hardcoded-path-in-tests it will be called with 2 arguments > + regardless. */ > + > + if (argc != (restart ? 2 : 5) && argc != 2) > + FAIL_EXIT1 ("wrong number of arguments (%d)", argc); > + > + if (restart) > + return handle_restart (atoi (argv[1])); > + > + { > + int i; > + for (i = 0; i < (argc == 5 ? 4 : 1); i++) > + initial_spargv[i] = argv[i + 1]; > + initial_spargv[i++] = (char *) "--direct"; > + initial_spargv[i++] = (char *) "--restart"; > + initial_spargv_size = i; > + } > + > + do_test_setsigmask (); > + do_test_setsigdef (); > + > + return 0; > +} > + > +#define TEST_FUNCTION_ARGV do_test > +#include <support/test-driver.c> > diff --git a/sysdeps/generic/sigsetops.h b/sysdeps/generic/sigsetops.h > index ddeeb0b0d5..9cae11771b 100644 > --- a/sysdeps/generic/sigsetops.h > +++ b/sysdeps/generic/sigsetops.h > @@ -66,6 +66,13 @@ > 0; \ > })) > > +# define __sigorset_atomic(set) \ > + (__extension__ ({ \ > + __sigset_t __mask = __sigmask (sig); \ > + atomic_fetch_or_seq_cst (set, mask); \ > + 0; \ > + })) > + > # define __sigdelset(set, sig) \ > (__extension__ ({ \ > __sigset_t __mask = __sigmask (sig); \ > diff --git a/sysdeps/unix/sysv/linux/internal-signals.h b/sysdeps/unix/sysv/linux/internal-signals.h > index 3562011d21..385442f81e 100644 > --- a/sysdeps/unix/sysv/linux/internal-signals.h > +++ b/sysdeps/unix/sysv/linux/internal-signals.h > @@ -88,4 +88,7 @@ __libc_signal_restore_set (const sigset_t *set) > /* Used to communicate with signal handler. */ > extern struct xid_command *__xidcmd attribute_hidden; > > +/* Used to obtained the modified signal handlers. */ > +extern void __get_sighandler_set (sigset_t *set) attribute_hidden; > + > #endif > diff --git a/sysdeps/unix/sysv/linux/sigaction.c b/sysdeps/unix/sysv/linux/sigaction.c > index 52722b08ae..3bcf3946ab 100644 > --- a/sysdeps/unix/sysv/linux/sigaction.c > +++ b/sysdeps/unix/sysv/linux/sigaction.c > @@ -20,6 +20,7 @@ > #include <string.h> > > #include <sysdep.h> > +#include <sigsetops.h> > #include <sys/syscall.h> > > /* New ports should not define the obsolete SA_RESTORER, however some > @@ -36,6 +37,13 @@ > # define STUB(act, sigsetsize) (sigsetsize) > #endif > > +static sigset_t handler_set; > + > +void __get_sighandler_set (sigset_t *set) > +{ > + *set = handler_set; > +} > + > /* If ACT is not NULL, change the action for SIG to *ACT. > If OACT is not NULL, put the old action for SIG in *OACT. */ > int > @@ -47,6 +55,15 @@ __libc_sigaction (int sig, const struct sigaction *act, struct sigaction *oact) > > if (act) > { > + /* Tracks which signal had a signal handler set different from default > + (SIG_DFL/SIG_IGN). It allows optimize posix_spawn to reset only > + those signals. It might incur in false positive, since it not easy > + to remove bits from the mask without race conditions, but it does not > + allow false negative since the mask is updated atomically prior the > + syscall. The false positive incur in just extra sigactions on > + posix_spawn. */ > + if (act->sa_handler != SIG_DFL && act->sa_handler != SIG_IGN) > + __sigaddset_atomic (&handler_set, sig); > kact.k_sa_handler = act->sa_handler; > memcpy (&kact.sa_mask, &act->sa_mask, sizeof (sigset_t)); > kact.sa_flags = act->sa_flags; > diff --git a/sysdeps/unix/sysv/linux/sigsetops.h b/sysdeps/unix/sysv/linux/sigsetops.h > index 713d4840d8..6c98c83e42 100644 > --- a/sysdeps/unix/sysv/linux/sigsetops.h > +++ b/sysdeps/unix/sysv/linux/sigsetops.h > @@ -20,6 +20,7 @@ > #define _SIGSETOPS_H 1 > > #include <signal.h> > +#include <atomic.h> > > /* Return a mask that includes the bit for SIG only. */ > # define __sigmask(sig) \ > @@ -80,6 +81,12 @@ > (void)0; \ > })) > > +# define __sigorset_atomic(dest, left, right) \ > + (__extension__ ({ \ > + atomic_fetch_or_seq_cst (dest, left, right); \ > + 0; \ > + })) > + > /* These macros needn't check for a bogus signal number; > error checking is done in the non-__ versions. */ > # define __sigismember(set, sig) \ > @@ -97,6 +104,14 @@ > (void)0; \ > })) > > +# define __sigaddset_atomic(set, sig) \ > + (__extension__ ({ \ > + unsigned long int __mask = __sigmask (sig); \ > + unsigned long int __word = __sigword (sig); \ > + atomic_fetch_or_seq_cst (&((set)->__val[__word]), __mask); \ > + (void)0; \ > + })) > + > # define __sigdelset(set, sig) \ > (__extension__ ({ \ > unsigned long int __mask = __sigmask (sig); \ > diff --git a/sysdeps/unix/sysv/linux/spawni.c b/sysdeps/unix/sysv/linux/spawni.c > index 0f7a8ca5df..264edd09c6 100644 > --- a/sysdeps/unix/sysv/linux/spawni.c > +++ b/sysdeps/unix/sysv/linux/spawni.c > @@ -132,17 +132,14 @@ spawni_child (void *arguments) > const posix_spawnattr_t *restrict attr = args->attr; > const posix_spawn_file_actions_t *file_actions = args->fa; > > - /* The child must ensure that no signal handler are enabled because it shared > + /* The child must ensure that no signal handler are enabled because it share > memory with parent, so the signal disposition must be either SIG_DFL or > - SIG_IGN. It does by iterating over all signals and although it could > - possibly be more optimized (by tracking which signal potentially have a > - signal handler), it might requires system specific solutions (since the > - sigset_t data type can be very different on different architectures). */ > + SIG_IGN. */ > struct sigaction sa; > memset (&sa, '\0', sizeof (sa)); > > sigset_t hset; > - __sigprocmask (SIG_BLOCK, 0, &hset); > + __get_sighandler_set (&hset); > for (int sig = 1; sig < _NSIG; ++sig) > { > if ((attr->__flags & POSIX_SPAWN_SETSIGDEF) >
* Adhemerval Zanella: > * include/atomic.h (atomic_fetch_or_seq_cst, atomic_fetch_or_seq_cst): > New macros. Why isn't a regular release store/acquire load synchronization sufficient here? I wonder if we can get kernel support for this in the new clone system call with more flags. Then we don't have to complicate the sigaction implementation. Thanks, Florian
On 29/08/2019 05:38, Florian Weimer wrote: > * Adhemerval Zanella: > >> * include/atomic.h (atomic_fetch_or_seq_cst, atomic_fetch_or_seq_cst): >> New macros. > > Why isn't a regular release store/acquire load synchronization > sufficient here? It should works, my understanding is a weaker store barrier might incur in a slight more false positive in a highly concurrent sigaction call scenario. But I assume that this is not a common scenario, so I used the strongest barrier just to avoid the extra false positives. > > I wonder if we can get kernel support for this in the new clone system > call with more flags. Then we don't have to complicate the sigaction > implementation. Maybe a CLONE_RESET_SIGNALS where the cloned process sets its signal disposition to default SIG_IGN/SIG_DFL values may help us here. However afaik clone now is out of space on 'flags' for newer ones (it already defines 24 flags plus it reserve 8 bits for signal to be sent at process exit) and it would take time to use this feature on glibc.
* Adhemerval Zanella: > On 29/08/2019 05:38, Florian Weimer wrote: >> * Adhemerval Zanella: >> >>> * include/atomic.h (atomic_fetch_or_seq_cst, atomic_fetch_or_seq_cst): >>> New macros. >> >> Why isn't a regular release store/acquire load synchronization >> sufficient here? > > It should works, my understanding is a weaker store barrier might incur in > a slight more false positive in a highly concurrent sigaction call scenario. > But I assume that this is not a common scenario, so I used the strongest > barrier just to avoid the extra false positives. I don't see how false positives are possible. It would require bits getting set which have never been added to the mask, which would be a bug even for relaxed MO (as a QoI issue, the memory model is buggy and allows this). My main worry would be reading an outdated value in posix_spawn, but my understanding is that the release store/acquire load synchronization avoids that. >> I wonder if we can get kernel support for this in the new clone system >> call with more flags. Then we don't have to complicate the sigaction >> implementation. > > Maybe a CLONE_RESET_SIGNALS where the cloned process sets its signal > disposition to default SIG_IGN/SIG_DFL values may help us here. However > afaik clone now is out of space on 'flags' for newer ones (it already > defines 24 flags plus it reserve 8 bits for signal to be sent at process > exit) and it would take time to use this feature on glibc. Christian Brauner has been working on fixing this. Thanks, Florian
On 30/08/2019 07:07, Florian Weimer wrote: > * Adhemerval Zanella: > >> On 29/08/2019 05:38, Florian Weimer wrote: >>> * Adhemerval Zanella: >>> >>>> * include/atomic.h (atomic_fetch_or_seq_cst, atomic_fetch_or_seq_cst): >>>> New macros. >>> >>> Why isn't a regular release store/acquire load synchronization >>> sufficient here? >> >> It should works, my understanding is a weaker store barrier might incur in >> a slight more false positive in a highly concurrent sigaction call scenario. >> But I assume that this is not a common scenario, so I used the strongest >> barrier just to avoid the extra false positives. > > I don't see how false positives are possible. It would require bits > getting set which have never been added to the mask, which would be a > bug even for relaxed MO (as a QoI issue, the memory model is buggy and > allows this). > > My main worry would be reading an outdated value in posix_spawn, but my > understanding is that the release store/acquire load synchronization > avoids that. The false positives happens for the case where the signal disposition was set, the bit in the bitmask set, and the signal disposition reset to default value. The bit will be stick and posix_spawn will always issue the sigaction (even though strictly it is not required). The problem is in fact false negatives, where posix_spawn will get a mask *without* the bit set, but with a set signal disposition. In fact I think due the syscall, even relaxed operations would work (since the syscall acts a strong memory barrier). > >>> I wonder if we can get kernel support for this in the new clone system >>> call with more flags. Then we don't have to complicate the sigaction >>> implementation. >> >> Maybe a CLONE_RESET_SIGNALS where the cloned process sets its signal >> disposition to default SIG_IGN/SIG_DFL values may help us here. However >> afaik clone now is out of space on 'flags' for newer ones (it already >> defines 24 flags plus it reserve 8 bits for signal to be sent at process >> exit) and it would take time to use this feature on glibc. > > Christian Brauner has been working on fixing this. Which strategy he is proposing? Even with proper kernel support, it would take time to enable glibc to use it.
* Adhemerval Zanella: > The problem is in fact false negatives, where posix_spawn will get a mask > *without* the bit set, but with a set signal disposition. Hmm. Right. Incidentally, the Go routine should be fine with that: | // When using cgo, call the C library for sigaction, so that we call into | // any sanitizer interceptors. This supports using the memory | // sanitizer with Go programs. The memory sanitizer only applies to | // C/C++ code; this permits that code to see the Go runtime's existing signal | // handlers when registering new signal handlers for the process. | | //go:cgo_import_static x_cgo_sigaction | //go:linkname x_cgo_sigaction x_cgo_sigaction | //go:linkname _cgo_sigaction _cgo_sigaction | var x_cgo_sigaction byte | var _cgo_sigaction = &x_cgo_sigaction libjsig also keeps calling to glibc. Is there anything else we should check? > In fact I think due the syscall, even relaxed operations would work > (since the syscall acts a strong memory barrier). Only as a signal fence, not a thread fence. Some architectures can even keep cache inconsistency across fork system calls. I find it a bit counter-intuitive that calling sigaction or signal directly without the glibc wrappers could lead to data corruption, even when done for standard signals such as SIGINT. But that's what's going to happen with this change, unfortunately. >>>> I wonder if we can get kernel support for this in the new clone system >>>> call with more flags. Then we don't have to complicate the sigaction >>>> implementation. >>> >>> Maybe a CLONE_RESET_SIGNALS where the cloned process sets its signal >>> disposition to default SIG_IGN/SIG_DFL values may help us here. However >>> afaik clone now is out of space on 'flags' for newer ones (it already >>> defines 24 flags plus it reserve 8 bits for signal to be sent at process >>> exit) and it would take time to use this feature on glibc. >> >> Christian Brauner has been working on fixing this. > > Which strategy he is proposing? Even with proper kernel support, it would > take time to enable glibc to use it. Lots of flag arguments, with the reset of the arguments located indirectly via a pointer argument. For a pure optimization, I think it's not too bad to require kernel backports of system calls. Thanks, Florian
On 02/09/2019 10:14, Florian Weimer wrote: > * Adhemerval Zanella: > >> The problem is in fact false negatives, where posix_spawn will get a mask >> *without* the bit set, but with a set signal disposition. > > Hmm. Right. Incidentally, the Go routine should be fine with that: > > | // When using cgo, call the C library for sigaction, so that we call into > | // any sanitizer interceptors. This supports using the memory > | // sanitizer with Go programs. The memory sanitizer only applies to > | // C/C++ code; this permits that code to see the Go runtime's existing signal > | // handlers when registering new signal handlers for the process. > | > | //go:cgo_import_static x_cgo_sigaction > | //go:linkname x_cgo_sigaction x_cgo_sigaction > | //go:linkname _cgo_sigaction _cgo_sigaction > | var x_cgo_sigaction byte > | var _cgo_sigaction = &x_cgo_sigaction > > libjsig also keeps calling to glibc. > > Is there anything else we should check? No idea, my take on that is once you start to calling syscall directly where libbc provide a wrapper you are in your own. We had a similar discussing with clone usage by some container applications and their expectation regarding libc internal state afterwards. > >> In fact I think due the syscall, even relaxed operations would work >> (since the syscall acts a strong memory barrier). > > Only as a signal fence, not a thread fence. Some architectures can even > keep cache inconsistency across fork system calls. > > I find it a bit counter-intuitive that calling sigaction or signal > directly without the glibc wrappers could lead to data corruption, even > when done for standard signals such as SIGINT. But that's what's going > to happen with this change, unfortunately. What is counter-intuitive imho is to rely on libc to keep its internal consistency by bypassing it. This might be even worse if glibc start to wrapper the signal handler as a way to implement BZ#19702, for instance. One thing we may do it to make it clean on manual that an application is *not* expect to call sigaction using syscall(). > >>>>> I wonder if we can get kernel support for this in the new clone system >>>>> call with more flags. Then we don't have to complicate the sigaction >>>>> implementation. >>>> >>>> Maybe a CLONE_RESET_SIGNALS where the cloned process sets its signal >>>> disposition to default SIG_IGN/SIG_DFL values may help us here. However >>>> afaik clone now is out of space on 'flags' for newer ones (it already >>>> defines 24 flags plus it reserve 8 bits for signal to be sent at process >>>> exit) and it would take time to use this feature on glibc. >>> >>> Christian Brauner has been working on fixing this. >> >> Which strategy he is proposing? Even with proper kernel support, it would >> take time to enable glibc to use it. > > Lots of flag arguments, with the reset of the arguments located > indirectly via a pointer argument. > > For a pure optimization, I think it's not too bad to require kernel > backports of system calls. > > Thanks, > Florian >
Florian, do you still hold objection to this patch? On 02/09/2019 16:47, Adhemerval Zanella wrote: > > > On 02/09/2019 10:14, Florian Weimer wrote: >> * Adhemerval Zanella: >> >>> The problem is in fact false negatives, where posix_spawn will get a mask >>> *without* the bit set, but with a set signal disposition. >> >> Hmm. Right. Incidentally, the Go routine should be fine with that: >> >> | // When using cgo, call the C library for sigaction, so that we call into >> | // any sanitizer interceptors. This supports using the memory >> | // sanitizer with Go programs. The memory sanitizer only applies to >> | // C/C++ code; this permits that code to see the Go runtime's existing signal >> | // handlers when registering new signal handlers for the process. >> | >> | //go:cgo_import_static x_cgo_sigaction >> | //go:linkname x_cgo_sigaction x_cgo_sigaction >> | //go:linkname _cgo_sigaction _cgo_sigaction >> | var x_cgo_sigaction byte >> | var _cgo_sigaction = &x_cgo_sigaction >> >> libjsig also keeps calling to glibc. >> >> Is there anything else we should check? > > No idea, my take on that is once you start to calling syscall directly > where libbc provide a wrapper you are in your own. We had a similar > discussing with clone usage by some container applications and their > expectation regarding libc internal state afterwards. > >> >>> In fact I think due the syscall, even relaxed operations would work >>> (since the syscall acts a strong memory barrier). >> >> Only as a signal fence, not a thread fence. Some architectures can even >> keep cache inconsistency across fork system calls. >> >> I find it a bit counter-intuitive that calling sigaction or signal >> directly without the glibc wrappers could lead to data corruption, even >> when done for standard signals such as SIGINT. But that's what's going >> to happen with this change, unfortunately. > > What is counter-intuitive imho is to rely on libc to keep its internal > consistency by bypassing it. This might be even worse if glibc start to > wrapper the signal handler as a way to implement BZ#19702, for instance. > > One thing we may do it to make it clean on manual that an application is > *not* expect to call sigaction using syscall(). > >> >>>>>> I wonder if we can get kernel support for this in the new clone system >>>>>> call with more flags. Then we don't have to complicate the sigaction >>>>>> implementation. >>>>> >>>>> Maybe a CLONE_RESET_SIGNALS where the cloned process sets its signal >>>>> disposition to default SIG_IGN/SIG_DFL values may help us here. However >>>>> afaik clone now is out of space on 'flags' for newer ones (it already >>>>> defines 24 flags plus it reserve 8 bits for signal to be sent at process >>>>> exit) and it would take time to use this feature on glibc. >>>> >>>> Christian Brauner has been working on fixing this. >>> >>> Which strategy he is proposing? Even with proper kernel support, it would >>> take time to enable glibc to use it. >> >> Lots of flag arguments, with the reset of the arguments located >> indirectly via a pointer argument. >> >> For a pure optimization, I think it's not too bad to require kernel >> backports of system calls. >> >> Thanks, >> Florian >>
On Mon, Oct 07, 2019 at 02:50:56PM -0300, Adhemerval Zanella wrote: > Florian, do you still hold objection to this patch? > > On 02/09/2019 16:47, Adhemerval Zanella wrote: > > > > > > On 02/09/2019 10:14, Florian Weimer wrote: > >> * Adhemerval Zanella: > >> > >>> The problem is in fact false negatives, where posix_spawn will get a mask > >>> *without* the bit set, but with a set signal disposition. > >> > >> Hmm. Right. Incidentally, the Go routine should be fine with that: > >> > >> | // When using cgo, call the C library for sigaction, so that we call into > >> | // any sanitizer interceptors. This supports using the memory > >> | // sanitizer with Go programs. The memory sanitizer only applies to > >> | // C/C++ code; this permits that code to see the Go runtime's existing signal > >> | // handlers when registering new signal handlers for the process. > >> | > >> | //go:cgo_import_static x_cgo_sigaction > >> | //go:linkname x_cgo_sigaction x_cgo_sigaction > >> | //go:linkname _cgo_sigaction _cgo_sigaction > >> | var x_cgo_sigaction byte > >> | var _cgo_sigaction = &x_cgo_sigaction > >> > >> libjsig also keeps calling to glibc. > >> > >> Is there anything else we should check? > > > > No idea, my take on that is once you start to calling syscall directly > > where libbc provide a wrapper you are in your own. We had a similar > > discussing with clone usage by some container applications and their > > expectation regarding libc internal state afterwards. > > > >> > >>> In fact I think due the syscall, even relaxed operations would work > >>> (since the syscall acts a strong memory barrier). > >> > >> Only as a signal fence, not a thread fence. Some architectures can even > >> keep cache inconsistency across fork system calls. > >> > >> I find it a bit counter-intuitive that calling sigaction or signal > >> directly without the glibc wrappers could lead to data corruption, even > >> when done for standard signals such as SIGINT. But that's what's going > >> to happen with this change, unfortunately. > > > > What is counter-intuitive imho is to rely on libc to keep its internal > > consistency by bypassing it. This might be even worse if glibc start to > > wrapper the signal handler as a way to implement BZ#19702, for instance. > > > > One thing we may do it to make it clean on manual that an application is > > *not* expect to call sigaction using syscall(). > > > >> > >>>>>> I wonder if we can get kernel support for this in the new clone system > >>>>>> call with more flags. Then we don't have to complicate the sigaction > >>>>>> implementation. > >>>>> > >>>>> Maybe a CLONE_RESET_SIGNALS where the cloned process sets its signal > >>>>> disposition to default SIG_IGN/SIG_DFL values may help us here. However > >>>>> afaik clone now is out of space on 'flags' for newer ones (it already > >>>>> defines 24 flags plus it reserve 8 bits for signal to be sent at process > >>>>> exit) and it would take time to use this feature on glibc. > >>>> > >>>> Christian Brauner has been working on fixing this. > >>> > >>> Which strategy he is proposing? Even with proper kernel support, it would > >>> take time to enable glibc to use it. > >> > >> Lots of flag arguments, with the reset of the arguments located > >> indirectly via a pointer argument. > >> > >> For a pure optimization, I think it's not too bad to require kernel > >> backports of system calls. So I just accidently caught wind of this discussion. :) I'm open to extending clone3() to support something like the above. My new clone3() version has been released with Linux 5.3. It takes a struct clone_args. The structure is versioned by size and thus - in theory - extensible indefinitely. (I also sent a PR for v5.4-rc2 that got merged for the copy_struct_from_user() work from Aleksa. It adds a common helper for copying structure arguments version by size. This will guarantee that future syscalls will all use the same size-versioning logic (Yes, we need to be careful with unions.).) [1]: fork: add clone3 https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=7f192e3cd316ba58c88dfa26796cf77789dd9872 [2]: lib: introduce copy_struct_from_user() helper https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f5a1a536fa14895ccff4e94e6a5af90901ce86aa Christian
* Christian Brauner: >> >>>>> Maybe a CLONE_RESET_SIGNALS where the cloned process sets its signal >> >>>>> disposition to default SIG_IGN/SIG_DFL values may help us here. However >> >>>>> afaik clone now is out of space on 'flags' for newer ones (it already >> >>>>> defines 24 flags plus it reserve 8 bits for signal to be sent at process >> >>>>> exit) and it would take time to use this feature on glibc. >> >>>> >> >>>> Christian Brauner has been working on fixing this. >> >>> >> >>> Which strategy he is proposing? Even with proper kernel support, it would >> >>> take time to enable glibc to use it. >> >> >> >> Lots of flag arguments, with the reset of the arguments located >> >> indirectly via a pointer argument. >> >> >> >> For a pure optimization, I think it's not too bad to require kernel >> >> backports of system calls. > > So I just accidently caught wind of this discussion. :) Good. 8-) > I'm open to extending clone3() to support something like the above. > My new clone3() version has been released with Linux 5.3. It takes a > struct clone_args. The structure is versioned by size and thus - in > theory - extensible indefinitely. Christian, would you be able to implement the CLONE_RESET_SIGNALS flag for us? It should reset any handler which is not SIG_IGN or SIG_DFL to SIG_DFL. We'd also need a way to probe that the flag is supported, so that we can fall back to the current way of doing things otherwise. Thanks, Florian
On 07/10/2019 15:25, Christian Brauner wrote: > On Mon, Oct 07, 2019 at 02:50:56PM -0300, Adhemerval Zanella wrote: >> Florian, do you still hold objection to this patch? >> >> On 02/09/2019 16:47, Adhemerval Zanella wrote: >>> >>> >>> On 02/09/2019 10:14, Florian Weimer wrote: >>>> * Adhemerval Zanella: >>>> >>>>> The problem is in fact false negatives, where posix_spawn will get a mask >>>>> *without* the bit set, but with a set signal disposition. >>>> >>>> Hmm. Right. Incidentally, the Go routine should be fine with that: >>>> >>>> | // When using cgo, call the C library for sigaction, so that we call into >>>> | // any sanitizer interceptors. This supports using the memory >>>> | // sanitizer with Go programs. The memory sanitizer only applies to >>>> | // C/C++ code; this permits that code to see the Go runtime's existing signal >>>> | // handlers when registering new signal handlers for the process. >>>> | >>>> | //go:cgo_import_static x_cgo_sigaction >>>> | //go:linkname x_cgo_sigaction x_cgo_sigaction >>>> | //go:linkname _cgo_sigaction _cgo_sigaction >>>> | var x_cgo_sigaction byte >>>> | var _cgo_sigaction = &x_cgo_sigaction >>>> >>>> libjsig also keeps calling to glibc. >>>> >>>> Is there anything else we should check? >>> >>> No idea, my take on that is once you start to calling syscall directly >>> where libbc provide a wrapper you are in your own. We had a similar >>> discussing with clone usage by some container applications and their >>> expectation regarding libc internal state afterwards. >>> >>>> >>>>> In fact I think due the syscall, even relaxed operations would work >>>>> (since the syscall acts a strong memory barrier). >>>> >>>> Only as a signal fence, not a thread fence. Some architectures can even >>>> keep cache inconsistency across fork system calls. >>>> >>>> I find it a bit counter-intuitive that calling sigaction or signal >>>> directly without the glibc wrappers could lead to data corruption, even >>>> when done for standard signals such as SIGINT. But that's what's going >>>> to happen with this change, unfortunately. >>> >>> What is counter-intuitive imho is to rely on libc to keep its internal >>> consistency by bypassing it. This might be even worse if glibc start to >>> wrapper the signal handler as a way to implement BZ#19702, for instance. >>> >>> One thing we may do it to make it clean on manual that an application is >>> *not* expect to call sigaction using syscall(). >>> >>>> >>>>>>>> I wonder if we can get kernel support for this in the new clone system >>>>>>>> call with more flags. Then we don't have to complicate the sigaction >>>>>>>> implementation. >>>>>>> >>>>>>> Maybe a CLONE_RESET_SIGNALS where the cloned process sets its signal >>>>>>> disposition to default SIG_IGN/SIG_DFL values may help us here. However >>>>>>> afaik clone now is out of space on 'flags' for newer ones (it already >>>>>>> defines 24 flags plus it reserve 8 bits for signal to be sent at process >>>>>>> exit) and it would take time to use this feature on glibc. >>>>>> >>>>>> Christian Brauner has been working on fixing this. >>>>> >>>>> Which strategy he is proposing? Even with proper kernel support, it would >>>>> take time to enable glibc to use it. >>>> >>>> Lots of flag arguments, with the reset of the arguments located >>>> indirectly via a pointer argument. >>>> >>>> For a pure optimization, I think it's not too bad to require kernel >>>> backports of system calls. > > So I just accidently caught wind of this discussion. :) > I'm open to extending clone3() to support something like the above. > My new clone3() version has been released with Linux 5.3. It takes a > struct clone_args. The structure is versioned by size and thus - in > theory - extensible indefinitely. > > (I also sent a PR for v5.4-rc2 that got merged for the > copy_struct_from_user() work from Aleksa. It adds a common helper for > copying structure arguments version by size. This will guarantee that > future syscalls will all use the same size-versioning logic (Yes, we > need to be careful with unions.).) > > [1]: fork: add clone3 > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=7f192e3cd316ba58c88dfa26796cf77789dd9872 > > [2]: lib: introduce copy_struct_from_user() helper > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f5a1a536fa14895ccff4e94e6a5af90901ce86aa > > Christian > Yeah, I am aware of it and ideally if my patch eventually gets merged I will probably add a code path to use clone3 when possible (not sure if it worth to be enable only for --enable-kernel or a main path with ENOSYS fallback to clone). However, glibc supports older kernels as old as v3.2 and it will take some years and releases to make v5.3 or new the minimum support kernel. And I think it would be nice to have this optimization even for older kernels.
* Adhemerval Zanella: > However, glibc supports older kernels as old as v3.2 and it will take > some years and releases to make v5.3 or new the minimum support kernel. > And I think it would be nice to have this optimization even for older > kernels. But wouldn't it make sense to backport clone3 to these older kernels, so that further enhancements are possible, in cooperation with the kernel. I'm all for supporting older kernels, but performance optimizations for old kernels, merely to work around missing system call/flag support for things which are straightforward to backport into the kernel seems not the right priority to me, sorry. If things can be fixed in the kernel, fix it there. That applies to both performance and functionality bugs. Thanks, Florian
* Adhemerval Zanella:
> Florian, do you still hold objection to this patch?
Yes, I still don't like it. Sorry.
Is this really important to you? Have seen this showing up in profiles?
Thanks,
Florian
On 07/10/2019 15:40, Florian Weimer wrote: > * Adhemerval Zanella: > >> However, glibc supports older kernels as old as v3.2 and it will take >> some years and releases to make v5.3 or new the minimum support kernel. >> And I think it would be nice to have this optimization even for older >> kernels. > > But wouldn't it make sense to backport clone3 to these older kernels, so > that further enhancements are possible, in cooperation with the kernel. For a kernel standpoint sure, for libc one it only make sense if it becomes de-facto kernel ABI. It can be quite feasible from a distribution standpoint, where it controls both kernel and userland deployment. But it is not the only scenario glibc aims to work neither we should prioritize it. > > I'm all for supporting older kernels, but performance optimizations for > old kernels, merely to work around missing system call/flag support for > things which are straightforward to backport into the kernel seems not > the right priority to me, sorry. Although the Linux idea is to use everything upstream, which means to have the latest kernel up and running all the time a new is release; this is far from reality and it won't change in nearby feature (although I do see some steps towards it in various projects). > > If things can be fixed in the kernel, fix it there. That applies to > both performance and functionality bugs. I do agree with you, but this is mostly an engineering decision where it aims to provide an optimization to a broader audience rather than to an specific scenario. > Is this really important to you? Have seen this showing up in profiles? Not right not, but mostly because projects usually do not use posix_spawn as the way to spawn process. But over the years we not only fixed all its various issues but also optimize in both memory and cpu usage and also added some extra extensions that some projects had that prevent them to use posix_spawn instead of the old fork plus execve. So I expect that more and more posix_spawn should be the expected way to spawn processes on Linux (some projects are indeed using it, such as gnome3) and by avoiding ~120 syscall each time a process is spawned is a nice performance improvement.
On Mon, 7 Oct 2019, Adhemerval Zanella wrote: > However, glibc supports older kernels as old as v3.2 and it will take > some years and releases to make v5.3 or new the minimum support kernel. > And I think it would be nice to have this optimization even for older > kernels. I should note there are two reasons I haven't proposed an increase from 3.2 to a more recent minimum kernel version, given that 3.2 is no longer a maintained Linux kernel release series. 1. An increase to 3.16, the current oldest longterm kernel at <https://www.kernel.org/category/releases.html>, doesn't really allow much in the way of cleanups. The next minimum kernel version that would allow many cleanups is 4.4 (most separate socket syscalls available on all socketcall architectures, so most socketcall support can be removed) - I think that's the next update that brings enough benefits to be worthwhile. (And after that, the next such update would be to require 64-bit time syscalls on 32-bit architectures once all older kernel series stop being maintained - in 2024 based on the dates on that page.) 2. I was hoping that before we next increase the minimum kernel version we can have the changes Carlos was proposing to stop giving an error at startup for a too-old kernel, instead allowing code to run with syscalls possibly failing, in order to work better in the case of (new userspace in a container running under) older kernel version numbers with features possibly backported or no relevant new features actually required by glibc. -- Joseph S. Myers joseph@codesourcery.com
On Mon, Oct 07, 2019 at 08:32:52PM +0200, Florian Weimer wrote: > * Christian Brauner: > > >> >>>>> Maybe a CLONE_RESET_SIGNALS where the cloned process sets its signal > >> >>>>> disposition to default SIG_IGN/SIG_DFL values may help us here. However > >> >>>>> afaik clone now is out of space on 'flags' for newer ones (it already > >> >>>>> defines 24 flags plus it reserve 8 bits for signal to be sent at process > >> >>>>> exit) and it would take time to use this feature on glibc. > >> >>>> > >> >>>> Christian Brauner has been working on fixing this. > >> >>> > >> >>> Which strategy he is proposing? Even with proper kernel support, it would > >> >>> take time to enable glibc to use it. > >> >> > >> >> Lots of flag arguments, with the reset of the arguments located > >> >> indirectly via a pointer argument. > >> >> > >> >> For a pure optimization, I think it's not too bad to require kernel > >> >> backports of system calls. > > > > So I just accidently caught wind of this discussion. :) > > Good. 8-) > > > I'm open to extending clone3() to support something like the above. > > My new clone3() version has been released with Linux 5.3. It takes a > > struct clone_args. The structure is versioned by size and thus - in > > theory - extensible indefinitely. > > Christian, would you be able to implement the CLONE_RESET_SIGNALS flag > for us? It should reset any handler which is not SIG_IGN or SIG_DFL to > SIG_DFL. We'd also need a way to probe that the flag is supported, so > that we can fall back to the current way of doing things otherwise. Yeah, I can implement this. It shouldn't be too much work and if I can Cc you and point out that you'd need/want this feature I can send a PR for my thread updates for the 5.5 merge window. Would that work for you? Thanks! Christian
* Adhemerval Zanella: > On 07/10/2019 15:40, Florian Weimer wrote: >> * Adhemerval Zanella: >> >>> However, glibc supports older kernels as old as v3.2 and it will take >>> some years and releases to make v5.3 or new the minimum support kernel. >>> And I think it would be nice to have this optimization even for older >>> kernels. >> >> But wouldn't it make sense to backport clone3 to these older kernels, so >> that further enhancements are possible, in cooperation with the kernel. > > For a kernel standpoint sure, for libc one it only make sense if it becomes > de-facto kernel ABI. It can be quite feasible from a distribution standpoint, > where it controls both kernel and userland deployment. But it is not the only > scenario glibc aims to work neither we should prioritize it. Sure. But I think we should keep in mind here that this is not a localized optimization. It optimizes posix_spawn with something that extends into something that is (at least superficially) completely unrelated. If we can get kernel assistance for the optimization (and it looks like we'll receive it), we can avoid that complexity. The patch Christian posted is very small. It's on top of clone3, sure, but I expect that people will want the system call anyway for some container support case soon enough, so long-term maintained kernels will get it essentially for free. Using the new clone3 flag looks inherently backportable to me on the glibc side. Compared to that, the sigaction changes look a bit risky to me. Thanks, Florian
On Wed, Oct 09, 2019 at 11:37:30AM +0200, Florian Weimer wrote: > * Adhemerval Zanella: > > > On 07/10/2019 15:40, Florian Weimer wrote: > >> * Adhemerval Zanella: > >> > >>> However, glibc supports older kernels as old as v3.2 and it will take > >>> some years and releases to make v5.3 or new the minimum support kernel. > >>> And I think it would be nice to have this optimization even for older > >>> kernels. > >> > >> But wouldn't it make sense to backport clone3 to these older kernels, so > >> that further enhancements are possible, in cooperation with the kernel. > > > > For a kernel standpoint sure, for libc one it only make sense if it becomes > > de-facto kernel ABI. It can be quite feasible from a distribution standpoint, > > where it controls both kernel and userland deployment. But it is not the only > > scenario glibc aims to work neither we should prioritize it. > > Sure. But I think we should keep in mind here that this is not a > localized optimization. It optimizes posix_spawn with something that > extends into something that is (at least superficially) completely > unrelated. > > If we can get kernel assistance for the optimization (and it looks like > we'll receive it), we can avoid that complexity. The patch Christian > posted is very small. It's on top of clone3, sure, but I expect that > people will want the system call anyway for some container support case > soon enough, so long-term maintained kernels will get it essentially for > free. Yeah, the time namespace patchset will introduce CLONE_NEWTIME and that flag will only be possible with clone3() for obvious reasons. And I'm pretty sure that a lot of database workloads will want that... > > Using the new clone3 flag looks inherently backportable to me on the > glibc side. Compared to that, the sigaction changes look a bit risky to > me. In the future I would like certain sets of changes that we currently do in a racy way in the child to be made right at process creation time. Another flag that I has been in my mind for a long time is e.g. pdeath_signal. There should probably be an extension to struct clone_args that introduces a new member pdeath_signal which won't be reset and will be delivered to the child once the parent dies. Christian
On 09/10/2019 06:37, Florian Weimer wrote: > * Adhemerval Zanella: > >> On 07/10/2019 15:40, Florian Weimer wrote: >>> * Adhemerval Zanella: >>> >>>> However, glibc supports older kernels as old as v3.2 and it will take >>>> some years and releases to make v5.3 or new the minimum support kernel. >>>> And I think it would be nice to have this optimization even for older >>>> kernels. >>> >>> But wouldn't it make sense to backport clone3 to these older kernels, so >>> that further enhancements are possible, in cooperation with the kernel. >> >> For a kernel standpoint sure, for libc one it only make sense if it becomes >> de-facto kernel ABI. It can be quite feasible from a distribution standpoint, >> where it controls both kernel and userland deployment. But it is not the only >> scenario glibc aims to work neither we should prioritize it. > > Sure. But I think we should keep in mind here that this is not a > localized optimization. It optimizes posix_spawn with something that > extends into something that is (at least superficially) completely > unrelated. > > If we can get kernel assistance for the optimization (and it looks like > we'll receive it), we can avoid that complexity. The patch Christian > posted is very small. It's on top of clone3, sure, but I expect that > people will want the system call anyway for some container support case > soon enough, so long-term maintained kernels will get it essentially for > free. > > Using the new clone3 flag looks inherently backportable to me on the > glibc side. Compared to that, the sigaction changes look a bit risky to > me. But still I don't see that this being more complex to backport as being a impeding reason to push it upstream. Even we push the minimum kernel version higher and remove the inherent minimum kernel version check, it would be case where the posix_spawn signals reset will still trigger in older kernels. I give you that eventually we might remove this optimization once we assume a minimum kernel version, but as I said this might take some time and I think optimizing posix_spawn (along with adding the required extensions developers see as useful) it a way to promote it over the fork plus execve and its deficiencies.
* Adhemerval Zanella: > But still I don't see that this being more complex to backport as being > a impeding reason to push it upstream. Your argument was that the glibc-only optimization would be available to users more quickly. That's why I brought backporting up. I still think the direction this optimization is taking quite wrong. We shouldn't add code to system call wrappers to collect secondary information if we can help it. openat had something like this, and it went wrong with O_TMPFILE. sigaction is not really simple, either. It has its own flags, and the kernel might enhance the system call in unexpected ways, too. Just to be clear, I think it's worthwhile to optimize this for the reasons you indicated. We merely disagree about the means. 8-) Thanks, Florian
diff --git a/include/atomic.h b/include/atomic.h index ee1978eb3b..72609efde9 100644 --- a/include/atomic.h +++ b/include/atomic.h @@ -646,6 +646,9 @@ void __atomic_link_error (void); # define atomic_fetch_or_release(mem, operand) \ ({ __atomic_check_size((mem)); \ __atomic_fetch_or ((mem), (operand), __ATOMIC_RELEASE); }) +# define atomic_fetch_or_seq_cst(mem, operand) \ + ({ __atomic_check_size((mem)); \ + __atomic_fetch_or ((mem), (operand), __ATOMIC_SEQ_CST); }) # define atomic_fetch_xor_release(mem, operand) \ ({ __atomic_check_size((mem)); \ @@ -791,6 +794,13 @@ void __atomic_link_error (void); ({ atomic_thread_fence_release (); \ atomic_fetch_or_acquire ((mem), (operand)); }) # endif +# ifndef atomic_fetch_or_seq_cst +# define atomic_fetch_or_seq_cst(mem, operand) \ + ({ atomic_thread_fence_acquire (); \ + atomic_fetch_or_relaxed ((mem), (operand)); \ + atomic_thread_fence_release (); }) +# endif + # ifndef atomic_fetch_xor_release /* Failing the atomic_compare_exchange_weak_release reloads the value in diff --git a/posix/Makefile b/posix/Makefile index 1ac41ad85a..131ae052fd 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -102,7 +102,8 @@ tests := test-errno tstgetopt testfnm runtests runptests \ tst-sysconf-empty-chroot tst-glob_symlinks tst-fexecve \ tst-glob-tilde test-ssize-max tst-spawn4 bug-regex37 \ bug-regex38 tst-regcomp-truncated tst-spawn-chdir \ - tst-spawn5 + tst-spawn5 \ + tst-spawn6 tests-internal := bug-regex5 bug-regex20 bug-regex33 \ tst-rfc3484 tst-rfc3484-2 tst-rfc3484-3 \ tst-glob_lstat_compat tst-spawn4-compat @@ -255,6 +256,7 @@ tst-exec-ARGS = -- $(host-test-program-cmd) tst-exec-static-ARGS = $(tst-exec-ARGS) tst-execvpe5-ARGS = -- $(host-test-program-cmd) tst-spawn-ARGS = -- $(host-test-program-cmd) +tst-spawn6-ARGS = -- $(host-test-program-cmd) tst-spawn-static-ARGS = $(tst-spawn-ARGS) tst-spawn5-ARGS = -- $(host-test-program-cmd) tst-dir-ARGS = `pwd` `cd $(common-objdir)/$(subdir); pwd` `cd $(common-objdir); pwd` $(objpfx)tst-dir diff --git a/posix/tst-spawn6.c b/posix/tst-spawn6.c new file mode 100644 index 0000000000..466e66f104 --- /dev/null +++ b/posix/tst-spawn6.c @@ -0,0 +1,220 @@ +/* Tests for posix_spawn signal handling. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <stdlib.h> +#include <getopt.h> +#include <spawn.h> +#include <sys/wait.h> + +#include <support/check.h> +#include <support/xunistd.h> +#include <support/support.h> +#include <array_length.h> + +/* Nonzero if the program gets called via `exec'. */ +static int restart; +#define CMDLINE_OPTIONS \ + { "restart", no_argument, &restart, 1 }, + +enum spawn_test_t +{ + SPAWN_SETSIGMASK, + SPAWN_SETSIGDEF +}; + +static int signal_to_check[] = +{ + SIGHUP, SIGINT, SIGALRM, SIGUSR2 +}; + +/* Called on process re-execution. */ +static int +handle_restart (enum spawn_test_t test) +{ + switch (test) + { + case SPAWN_SETSIGMASK: + { + sigset_t mask; + sigprocmask (SIG_BLOCK, NULL, &mask); + for (int i = 0; i < array_length (signal_to_check); i++) + if (sigismember (&mask, signal_to_check[i]) != 1) + exit (EXIT_FAILURE); + } + break; + case SPAWN_SETSIGDEF: + { + for (int i = 0; i < array_length (signal_to_check); i++) + { + struct sigaction act; + if (sigaction (signal_to_check[i], NULL, &act) != 0) + exit (EXIT_FAILURE); + if (act.sa_handler != SIG_DFL) + exit (EXIT_FAILURE); + } + } + break; + } + + return 0; +} + +/* Common argument used for process re-execution. */ +static char *initial_spargv[5]; +static size_t initial_spargv_size; + +/* Re-execute the test process with both '--direct', '--restart', and the + TEST (as integer value) as arguments. */ +static void +reexecute (enum spawn_test_t test, const posix_spawnattr_t *attrp) +{ + char *spargv[8]; + int i; + + for (i = 0; i < initial_spargv_size; i++) + spargv[i] = initial_spargv[i]; + /* Three digits per byte plus null terminator. */ + char teststr[3 * sizeof (test) + 1]; + snprintf (teststr, array_length (teststr), "%d", test); + spargv[i++] = teststr; + spargv[i] = NULL; + TEST_VERIFY (i < 8); + + pid_t pid; + int status; + + TEST_COMPARE (posix_spawn (&pid, spargv[0], NULL, attrp, spargv, environ), + 0); + TEST_COMPARE (xwaitpid (pid, &status, 0), pid); + TEST_VERIFY (WIFEXITED (status)); + TEST_VERIFY (!WIFSIGNALED (status)); + TEST_COMPARE (WEXITSTATUS (status), 0); +} + +/* Test if POSIX_SPAWN_SETSIGMASK change the spawn process signal mask to + the value blocked signals defined by SIGNAL_TO_CHECK signals. */ +static void +do_test_setsigmask (void) +{ + posix_spawnattr_t attr; + /* posix_spawnattr_init does not fail. */ + posix_spawnattr_init (&attr); + + { + sigset_t mask; + TEST_COMPARE (sigemptyset (&mask), 0); + for (int i = 0; i < array_length (signal_to_check); i++) + TEST_COMPARE (sigaddset (&mask, signal_to_check[i]), 0); + TEST_COMPARE (posix_spawnattr_setsigmask (&attr, &mask), 0); + TEST_COMPARE (posix_spawnattr_setflags (&attr, POSIX_SPAWN_SETSIGMASK), 0); + } + + /* Change current mask to be different than the one asked for spawned + process. */ + { + sigset_t empty_mask, current_mask; + TEST_COMPARE (sigemptyset (&empty_mask), 0); + TEST_COMPARE (sigprocmask (SIG_BLOCK, &empty_mask, ¤t_mask), 0); + + reexecute (SPAWN_SETSIGMASK, &attr); + + TEST_COMPARE (sigprocmask (SIG_SETMASK, ¤t_mask, NULL), 0); + } +} + +/* Test if POSIX_SPAWN_SETSIGDEF change the spawn process signal actions + defined by SIGNAL_TO_CHECK signals to default actions. */ +static void +do_test_setsigdef (void) +{ + posix_spawnattr_t attr; + /* posix_spawnattr_init does not fail. */ + posix_spawnattr_init (&attr); + + { + sigset_t mask; + TEST_COMPARE (sigemptyset (&mask), 0); + for (int i = 0; i < array_length (signal_to_check); i++) + TEST_COMPARE (sigaddset (&mask, signal_to_check[i]), 0); + TEST_COMPARE (posix_spawnattr_setsigdefault (&attr, &mask), 0); + TEST_COMPARE (posix_spawnattr_setflags (&attr, POSIX_SPAWN_SETSIGDEF), 0); + } + + /* Change current signal disposition to be different than the one asked for + spawned process. */ + struct sigaction default_act[array_length (signal_to_check)]; + { + sigset_t empty_mask; + TEST_COMPARE (sigemptyset (&empty_mask), 0); + for (int i = 0; i < array_length (signal_to_check); i++) + TEST_COMPARE (sigaction (signal_to_check[i], + &((struct sigaction) { .sa_handler = SIG_IGN, + .sa_mask = empty_mask, + .sa_flags = 0 }), + &default_act[i]), + 0); + } + + reexecute (SPAWN_SETSIGDEF, &attr); + + /* Restore signal dispositions. */ + for (int i = 0; i < array_length (signal_to_check); i++) + TEST_COMPARE (sigaction (signal_to_check[i], &default_act[i], NULL), 0); +} + +static int +do_test (int argc, char *argv[]) +{ + /* We must have one or four parameters left if called initially: + + path for ld.so optional + + "--library-path" optional + + the library path optional + + the application name + + Plus one parameter to indicate which test to execute through + re-execution. + + So for default usage without --enable-hardcoded-path-in-tests, it + will be called initially with 5 arguments and later with 2. For + --enable-hardcoded-path-in-tests it will be called with 2 arguments + regardless. */ + + if (argc != (restart ? 2 : 5) && argc != 2) + FAIL_EXIT1 ("wrong number of arguments (%d)", argc); + + if (restart) + return handle_restart (atoi (argv[1])); + + { + int i; + for (i = 0; i < (argc == 5 ? 4 : 1); i++) + initial_spargv[i] = argv[i + 1]; + initial_spargv[i++] = (char *) "--direct"; + initial_spargv[i++] = (char *) "--restart"; + initial_spargv_size = i; + } + + do_test_setsigmask (); + do_test_setsigdef (); + + return 0; +} + +#define TEST_FUNCTION_ARGV do_test +#include <support/test-driver.c> diff --git a/sysdeps/generic/sigsetops.h b/sysdeps/generic/sigsetops.h index ddeeb0b0d5..9cae11771b 100644 --- a/sysdeps/generic/sigsetops.h +++ b/sysdeps/generic/sigsetops.h @@ -66,6 +66,13 @@ 0; \ })) +# define __sigorset_atomic(set) \ + (__extension__ ({ \ + __sigset_t __mask = __sigmask (sig); \ + atomic_fetch_or_seq_cst (set, mask); \ + 0; \ + })) + # define __sigdelset(set, sig) \ (__extension__ ({ \ __sigset_t __mask = __sigmask (sig); \ diff --git a/sysdeps/unix/sysv/linux/internal-signals.h b/sysdeps/unix/sysv/linux/internal-signals.h index 3562011d21..385442f81e 100644 --- a/sysdeps/unix/sysv/linux/internal-signals.h +++ b/sysdeps/unix/sysv/linux/internal-signals.h @@ -88,4 +88,7 @@ __libc_signal_restore_set (const sigset_t *set) /* Used to communicate with signal handler. */ extern struct xid_command *__xidcmd attribute_hidden; +/* Used to obtained the modified signal handlers. */ +extern void __get_sighandler_set (sigset_t *set) attribute_hidden; + #endif diff --git a/sysdeps/unix/sysv/linux/sigaction.c b/sysdeps/unix/sysv/linux/sigaction.c index 52722b08ae..3bcf3946ab 100644 --- a/sysdeps/unix/sysv/linux/sigaction.c +++ b/sysdeps/unix/sysv/linux/sigaction.c @@ -20,6 +20,7 @@ #include <string.h> #include <sysdep.h> +#include <sigsetops.h> #include <sys/syscall.h> /* New ports should not define the obsolete SA_RESTORER, however some @@ -36,6 +37,13 @@ # define STUB(act, sigsetsize) (sigsetsize) #endif +static sigset_t handler_set; + +void __get_sighandler_set (sigset_t *set) +{ + *set = handler_set; +} + /* If ACT is not NULL, change the action for SIG to *ACT. If OACT is not NULL, put the old action for SIG in *OACT. */ int @@ -47,6 +55,15 @@ __libc_sigaction (int sig, const struct sigaction *act, struct sigaction *oact) if (act) { + /* Tracks which signal had a signal handler set different from default + (SIG_DFL/SIG_IGN). It allows optimize posix_spawn to reset only + those signals. It might incur in false positive, since it not easy + to remove bits from the mask without race conditions, but it does not + allow false negative since the mask is updated atomically prior the + syscall. The false positive incur in just extra sigactions on + posix_spawn. */ + if (act->sa_handler != SIG_DFL && act->sa_handler != SIG_IGN) + __sigaddset_atomic (&handler_set, sig); kact.k_sa_handler = act->sa_handler; memcpy (&kact.sa_mask, &act->sa_mask, sizeof (sigset_t)); kact.sa_flags = act->sa_flags; diff --git a/sysdeps/unix/sysv/linux/sigsetops.h b/sysdeps/unix/sysv/linux/sigsetops.h index 713d4840d8..6c98c83e42 100644 --- a/sysdeps/unix/sysv/linux/sigsetops.h +++ b/sysdeps/unix/sysv/linux/sigsetops.h @@ -20,6 +20,7 @@ #define _SIGSETOPS_H 1 #include <signal.h> +#include <atomic.h> /* Return a mask that includes the bit for SIG only. */ # define __sigmask(sig) \ @@ -80,6 +81,12 @@ (void)0; \ })) +# define __sigorset_atomic(dest, left, right) \ + (__extension__ ({ \ + atomic_fetch_or_seq_cst (dest, left, right); \ + 0; \ + })) + /* These macros needn't check for a bogus signal number; error checking is done in the non-__ versions. */ # define __sigismember(set, sig) \ @@ -97,6 +104,14 @@ (void)0; \ })) +# define __sigaddset_atomic(set, sig) \ + (__extension__ ({ \ + unsigned long int __mask = __sigmask (sig); \ + unsigned long int __word = __sigword (sig); \ + atomic_fetch_or_seq_cst (&((set)->__val[__word]), __mask); \ + (void)0; \ + })) + # define __sigdelset(set, sig) \ (__extension__ ({ \ unsigned long int __mask = __sigmask (sig); \ diff --git a/sysdeps/unix/sysv/linux/spawni.c b/sysdeps/unix/sysv/linux/spawni.c index 0f7a8ca5df..264edd09c6 100644 --- a/sysdeps/unix/sysv/linux/spawni.c +++ b/sysdeps/unix/sysv/linux/spawni.c @@ -132,17 +132,14 @@ spawni_child (void *arguments) const posix_spawnattr_t *restrict attr = args->attr; const posix_spawn_file_actions_t *file_actions = args->fa; - /* The child must ensure that no signal handler are enabled because it shared + /* The child must ensure that no signal handler are enabled because it share memory with parent, so the signal disposition must be either SIG_DFL or - SIG_IGN. It does by iterating over all signals and although it could - possibly be more optimized (by tracking which signal potentially have a - signal handler), it might requires system specific solutions (since the - sigset_t data type can be very different on different architectures). */ + SIG_IGN. */ struct sigaction sa; memset (&sa, '\0', sizeof (sa)); sigset_t hset; - __sigprocmask (SIG_BLOCK, 0, &hset); + __get_sighandler_set (&hset); for (int sig = 1; sig < _NSIG; ++sig) { if ((attr->__flags & POSIX_SPAWN_SETSIGDEF)