mbox series

[v2,0/3] Checkpoint Support for Syscall User Dispatch

Message ID 20230118201055.147228-1-gregory.price@memverge.com
Headers show
Series Checkpoint Support for Syscall User Dispatch | expand

Message

Gregory Price Jan. 18, 2023, 8:10 p.m. UTC
v2: Implements the getter/setter interface in ptrace rather than prctl

Syscall user dispatch makes it possible to cleanly intercept system
calls from user-land.  However, most transparent checkpoint software
presently leverages some combination of ptrace and system call
injection to place software in a ready-to-checkpoint state.

If Syscall User Dispatch is enabled at the time of being quiesced,
injected system calls will subsequently be interposed upon and
dispatched to the task's signal handler.

This patch set implements 3 features to enable software such as CRIU
to cleanly interpose upon software leveraging syscall user dispatch.

- Implement PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH, akin to a similar
  feature for SECCOMP.  This allows a ptracer to temporarily disable
  syscall user dispatch, making syscall injection possible.

- Implement an fs/proc extension that reports whether Syscall User
  Dispatch is being used in proc/status.  A similar value is present
  for SECCOMP, and is used to determine whether special logic is
  needed during checkpoint/resume.

- Implement a getter interface for Syscall User Dispatch config info.
  To resume successfully, the checkpoint/resume software has to
  save and restore this information.  Presently this configuration
  is write-only, with no way for C/R software to save it.
	This was done in ptrace because syscall user dispatch is not part of
  uapi.  The syscall_user_dispatch_config structure was added to the
  ptrace exports.


Signed-off-by: Gregory Price <gregory.price@memverge.com>

Gregory Price (3):
  ptrace,syscall_user_dispatch: Implement Syscall User Dispatch
    Suspension
  fs/proc/array: Add Syscall User Dispatch to proc status
  ptrace,syscall_user_dispatch: add a getter/setter for sud
    configuration

 .../admin-guide/syscall-user-dispatch.rst     |  5 +-
 fs/proc/array.c                               |  8 +++
 include/linux/ptrace.h                        |  2 +
 include/linux/syscall_user_dispatch.h         | 19 +++++++
 include/uapi/linux/ptrace.h                   | 16 +++++-
 kernel/entry/syscall_user_dispatch.c          | 54 +++++++++++++++++++
 kernel/ptrace.c                               | 14 +++++
 7 files changed, 116 insertions(+), 2 deletions(-)

Comments

Peter Zijlstra Jan. 20, 2023, 10:23 a.m. UTC | #1
On Wed, Jan 18, 2023 at 03:10:53PM -0500, Gregory Price wrote:
> Adds PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH to ptrace options, and
> modify Syscall User Dispatch to suspend interception when enabled.
> 
> This is modeled after the SUSPEND_SECCOMP feature, which suspends
> SECCOMP interposition.  Without doing this, software like CRIU will
> inject system calls into a process and be intercepted by Syscall
> User Dispatch, either causing a crash (due to blocked signals) or
> the delivery of those signals to a ptracer (not the intended behavior).
> 
> Since Syscall User Dispatch is not a privileged feature, a check
> for permissions is not required, however attempting to set this
> option when CONFIG_CHECKPOINT_RESTORE it not supported should be
> disallowed, as its intended use is checkpoint/resume.
> 
> Signed-off-by: Gregory Price <gregory.price@memverge.com>

One small nit -- see below, otherwise:

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

> ---
>  include/linux/ptrace.h               | 2 ++
>  include/uapi/linux/ptrace.h          | 6 +++++-
>  kernel/entry/syscall_user_dispatch.c | 5 +++++
>  kernel/ptrace.c                      | 5 +++++
>  4 files changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
> index eaaef3ffec22..461ae5c99d57 100644
> --- a/include/linux/ptrace.h
> +++ b/include/linux/ptrace.h
> @@ -45,6 +45,8 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
>  
>  #define PT_EXITKILL		(PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
>  #define PT_SUSPEND_SECCOMP	(PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)
> +#define PT_SUSPEND_SYSCALL_USER_DISPATCH \
> +	(PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH << PT_OPT_FLAG_SHIFT)
>  
>  extern long arch_ptrace(struct task_struct *child, long request,
>  			unsigned long addr, unsigned long data);
> diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
> index 195ae64a8c87..ba9e3f19a22c 100644
> --- a/include/uapi/linux/ptrace.h
> +++ b/include/uapi/linux/ptrace.h
> @@ -146,9 +146,13 @@ struct ptrace_rseq_configuration {
>  /* eventless options */
>  #define PTRACE_O_EXITKILL		(1 << 20)
>  #define PTRACE_O_SUSPEND_SECCOMP	(1 << 21)
> +#define PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH	(1 << 22)
>  
>  #define PTRACE_O_MASK		(\
> -	0x000000ff | PTRACE_O_EXITKILL | PTRACE_O_SUSPEND_SECCOMP)
> +	0x000000ff | \
> +	PTRACE_O_EXITKILL | \
> +	PTRACE_O_SUSPEND_SECCOMP | \
> +	PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH)
>  
>  #include <asm/ptrace.h>
>  
> diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
> index 0b6379adff6b..7607f4598dd8 100644
> --- a/kernel/entry/syscall_user_dispatch.c
> +++ b/kernel/entry/syscall_user_dispatch.c
> @@ -8,6 +8,7 @@
>  #include <linux/uaccess.h>
>  #include <linux/signal.h>
>  #include <linux/elf.h>
> +#include <linux/ptrace.h>
>  
>  #include <linux/sched/signal.h>
>  #include <linux/sched/task_stack.h>
> @@ -36,6 +37,10 @@ bool syscall_user_dispatch(struct pt_regs *regs)
>  	struct syscall_user_dispatch *sd = &current->syscall_dispatch;
>  	char state;
>  
> +	if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
> +		unlikely(current->ptrace & PT_SUSPEND_SYSCALL_USER_DISPATCH))

Align with the '(' pleaase.

> +		return false;
> +
>  	if (likely(instruction_pointer(regs) - sd->offset < sd->len))
>  		return false;
>  
> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
> index 54482193e1ed..a6ad815bd4be 100644
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -370,6 +370,11 @@ static int check_ptrace_options(unsigned long data)
>  	if (data & ~(unsigned long)PTRACE_O_MASK)
>  		return -EINVAL;
>  
> +	if (unlikely(data & PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH)) {
> +		if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTART))
> +			return -EINVAL;
> +	}
> +
>  	if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
>  		if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) ||
>  		    !IS_ENABLED(CONFIG_SECCOMP))
> -- 
> 2.39.0
>
Damien Le Moal Jan. 20, 2023, 10:49 a.m. UTC | #2
On 1/20/23 19:23, Peter Zijlstra wrote:
> On Wed, Jan 18, 2023 at 03:10:53PM -0500, Gregory Price wrote:
>> Adds PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH to ptrace options, and
>> modify Syscall User Dispatch to suspend interception when enabled.
>>
>> This is modeled after the SUSPEND_SECCOMP feature, which suspends
>> SECCOMP interposition.  Without doing this, software like CRIU will
>> inject system calls into a process and be intercepted by Syscall
>> User Dispatch, either causing a crash (due to blocked signals) or
>> the delivery of those signals to a ptracer (not the intended behavior).
>>
>> Since Syscall User Dispatch is not a privileged feature, a check
>> for permissions is not required, however attempting to set this
>> option when CONFIG_CHECKPOINT_RESTORE it not supported should be
>> disallowed, as its intended use is checkpoint/resume.
>>
>> Signed-off-by: Gregory Price <gregory.price@memverge.com>
> 
> One small nit -- see below, otherwise:
> 
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> 
>> ---
>>  include/linux/ptrace.h               | 2 ++
>>  include/uapi/linux/ptrace.h          | 6 +++++-
>>  kernel/entry/syscall_user_dispatch.c | 5 +++++
>>  kernel/ptrace.c                      | 5 +++++
>>  4 files changed, 17 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
>> index eaaef3ffec22..461ae5c99d57 100644
>> --- a/include/linux/ptrace.h
>> +++ b/include/linux/ptrace.h
>> @@ -45,6 +45,8 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
>>  
>>  #define PT_EXITKILL		(PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
>>  #define PT_SUSPEND_SECCOMP	(PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)
>> +#define PT_SUSPEND_SYSCALL_USER_DISPATCH \
>> +	(PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH << PT_OPT_FLAG_SHIFT)
>>  
>>  extern long arch_ptrace(struct task_struct *child, long request,
>>  			unsigned long addr, unsigned long data);
>> diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
>> index 195ae64a8c87..ba9e3f19a22c 100644
>> --- a/include/uapi/linux/ptrace.h
>> +++ b/include/uapi/linux/ptrace.h
>> @@ -146,9 +146,13 @@ struct ptrace_rseq_configuration {
>>  /* eventless options */
>>  #define PTRACE_O_EXITKILL		(1 << 20)
>>  #define PTRACE_O_SUSPEND_SECCOMP	(1 << 21)
>> +#define PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH	(1 << 22)
>>  
>>  #define PTRACE_O_MASK		(\
>> -	0x000000ff | PTRACE_O_EXITKILL | PTRACE_O_SUSPEND_SECCOMP)
>> +	0x000000ff | \
>> +	PTRACE_O_EXITKILL | \
>> +	PTRACE_O_SUSPEND_SECCOMP | \
>> +	PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH)
>>  
>>  #include <asm/ptrace.h>
>>  
>> diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
>> index 0b6379adff6b..7607f4598dd8 100644
>> --- a/kernel/entry/syscall_user_dispatch.c
>> +++ b/kernel/entry/syscall_user_dispatch.c
>> @@ -8,6 +8,7 @@
>>  #include <linux/uaccess.h>
>>  #include <linux/signal.h>
>>  #include <linux/elf.h>
>> +#include <linux/ptrace.h>
>>  
>>  #include <linux/sched/signal.h>
>>  #include <linux/sched/task_stack.h>
>> @@ -36,6 +37,10 @@ bool syscall_user_dispatch(struct pt_regs *regs)
>>  	struct syscall_user_dispatch *sd = &current->syscall_dispatch;
>>  	char state;
>>  
>> +	if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
>> +		unlikely(current->ptrace & PT_SUSPEND_SYSCALL_USER_DISPATCH))
> 
> Align with the '(' pleaase.
> 
>> +		return false;
>> +
>>  	if (likely(instruction_pointer(regs) - sd->offset < sd->len))
>>  		return false;
>>  
>> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
>> index 54482193e1ed..a6ad815bd4be 100644
>> --- a/kernel/ptrace.c
>> +++ b/kernel/ptrace.c
>> @@ -370,6 +370,11 @@ static int check_ptrace_options(unsigned long data)
>>  	if (data & ~(unsigned long)PTRACE_O_MASK)
>>  		return -EINVAL;
>>  
>> +	if (unlikely(data & PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH)) {
>> +		if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTART))

Why not one if with a && ?

>> +			return -EINVAL;
>> +	}
>> +
>>  	if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
>>  		if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) ||
>>  		    !IS_ENABLED(CONFIG_SECCOMP))
>> -- 
>> 2.39.0
>>