diff mbox series

[v4,2/2] selftests/vm/pkeys: Add a regression test for setting PKRU through ptrace

Message ID 20220805230158.39378-2-khuey@kylehuey.com
State New
Headers show
Series [v4,1/2] x86/fpu: Allow PKRU to be (once again) written by ptrace. | expand

Commit Message

Kyle Huey Aug. 5, 2022, 11:01 p.m. UTC
From: Kyle Huey <me@kylehuey.com>

This tests PTRACE_SETREGSET with NT_X86_XSTATE modifying PKRU directly and
removing the PKRU bit from XSTATE_BV.

Signed-off-by: Kyle Huey <me@kylehuey.com>
---
 tools/testing/selftests/vm/pkey-x86.h        | 12 +++
 tools/testing/selftests/vm/protection_keys.c | 88 +++++++++++++++++++-
 2 files changed, 98 insertions(+), 2 deletions(-)

Comments

Ingo Molnar Aug. 6, 2022, 8:52 a.m. UTC | #1
* Kyle Huey <me@kylehuey.com> wrote:

> From: Kyle Huey <me@kylehuey.com>
> 
> This tests PTRACE_SETREGSET with NT_X86_XSTATE modifying PKRU directly and
> removing the PKRU bit from XSTATE_BV.
> 
> Signed-off-by: Kyle Huey <me@kylehuey.com>
> ---
>  tools/testing/selftests/vm/pkey-x86.h        | 12 +++
>  tools/testing/selftests/vm/protection_keys.c | 88 +++++++++++++++++++-
>  2 files changed, 98 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
> index b078ce9c6d2a..72c14cd3ddc7 100644
> --- a/tools/testing/selftests/vm/pkey-x86.h
> +++ b/tools/testing/selftests/vm/pkey-x86.h
> @@ -104,6 +104,18 @@ static inline int cpu_has_pkeys(void)
>  	return 1;
>  }
>  
> +static inline int cpu_max_xsave_size(void)
> +{
> +	unsigned long XSTATE_CPUID = 0xd;
> +	unsigned int eax;
> +	unsigned int ebx;
> +	unsigned int ecx;
> +	unsigned int edx;
> +
> +	__cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx);
> +	return ecx;
> +}
> +
>  static inline u32 pkey_bit_position(int pkey)
>  {
>  	return pkey * PKEY_BITS_PER_PKEY;
> diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
> index 291bc1e07842..27759d3ed9cd 100644
> --- a/tools/testing/selftests/vm/protection_keys.c
> +++ b/tools/testing/selftests/vm/protection_keys.c
> @@ -18,12 +18,13 @@
>   *	do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
>   *
>   * Compile like this:
> - *	gcc      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> - *	gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> + *	gcc -mxsave      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> + *	gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
>   */
>  #define _GNU_SOURCE
>  #define __SANE_USERSPACE_TYPES__
>  #include <errno.h>
> +#include <linux/elf.h>
>  #include <linux/futex.h>
>  #include <time.h>
>  #include <sys/time.h>
> @@ -1550,6 +1551,86 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
>  	do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
>  }
>  
> +#if defined(__i386__) || defined(__x86_64__)
> +void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
> +{
> +	pid_t child;
> +	int status, ret;
> +	int pkey_offset = pkey_reg_xstate_offset();
> +	size_t xsave_size = cpu_max_xsave_size();
> +	void *xsave;
> +	u32 *pkey_register;
> +	u64 *xstate_bv;
> +	struct iovec iov;
> +
> +	child = fork();
> +	pkey_assert(child >= 0);
> +	dprintf3("[%d] fork() ret: %d\n", getpid(), child);
> +	if (!child) {
> +		u32 pkey_register = read_pkey_reg();
> +
> +		ptrace(PTRACE_TRACEME, 0, 0, 0);
> +		raise(SIGSTOP);
> +
> +		/*
> +		 * need __read_pkey_reg() version so we do not do shadow_pkey_reg
> +		 * checking
> +		 */
> +		if (pkey_register == __read_pkey_reg())
> +			exit(1);
> +
> +		raise(SIGSTOP);
> +
> +		exit(__read_pkey_reg());
> +	}
> +
> +	pkey_assert(child == waitpid(child, &status, 0));
> +	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> +	pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
> +
> +	xsave = (void *)malloc(xsave_size);
> +	pkey_assert(xsave > 0);
> +
> +	iov.iov_base = xsave;
> +	iov.iov_len = xsave_size;
> +	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> +	pkey_assert(ret == 0);
> +
> +	pkey_register = (u32 *)(xsave + pkey_offset);
> +	pkey_assert(*pkey_register == read_pkey_reg());
> +
> +	*pkey_register = !read_pkey_reg();
> +
> +	ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> +	pkey_assert(ret == 0);
> +
> +	ret = ptrace(PTRACE_CONT, child, 0, 0);
> +	pkey_assert(ret == 0);
> +
> +	pkey_assert(child == waitpid(child, &status, 0));
> +	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> +	pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
> +
> +	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> +	pkey_assert(ret == 0);
> +
> +	xstate_bv = (u64 *)(xsave + 512);
> +	*xstate_bv &= ~(1 << 9);
> +
> +	ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> +	pkey_assert(ret == 0);
> +
> +	ret = ptrace(PTRACE_CONT, child, 0, 0);
> +	pkey_assert(ret == 0);
> +
> +	pkey_assert(child == waitpid(child, &status, 0));
> +	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> +	pkey_assert(WIFEXITED(status));
> +	pkey_assert(WEXITSTATUS(status) == 0);
> +	free(xsave);

LGTM.

May I ask for a bit more in terms of testing the ABI: writing some 
non-trivial (not all-zero and not all-ones) value into the PKRU register, 
forcing the child task to go through a FPU save/restore context switch
and then reading it back and verifying the value, or something like that?

Thanks,

	Ingo
Kyle Huey Aug. 6, 2022, 12:46 p.m. UTC | #2
On Sat, Aug 6, 2022 at 1:52 AM Ingo Molnar <mingo@kernel.org> wrote:
>
>
> * Kyle Huey <me@kylehuey.com> wrote:
>
> > From: Kyle Huey <me@kylehuey.com>
> >
> > This tests PTRACE_SETREGSET with NT_X86_XSTATE modifying PKRU directly and
> > removing the PKRU bit from XSTATE_BV.
> >
> > Signed-off-by: Kyle Huey <me@kylehuey.com>
> > ---
> >  tools/testing/selftests/vm/pkey-x86.h        | 12 +++
> >  tools/testing/selftests/vm/protection_keys.c | 88 +++++++++++++++++++-
> >  2 files changed, 98 insertions(+), 2 deletions(-)
> >
> > diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
> > index b078ce9c6d2a..72c14cd3ddc7 100644
> > --- a/tools/testing/selftests/vm/pkey-x86.h
> > +++ b/tools/testing/selftests/vm/pkey-x86.h
> > @@ -104,6 +104,18 @@ static inline int cpu_has_pkeys(void)
> >       return 1;
> >  }
> >
> > +static inline int cpu_max_xsave_size(void)
> > +{
> > +     unsigned long XSTATE_CPUID = 0xd;
> > +     unsigned int eax;
> > +     unsigned int ebx;
> > +     unsigned int ecx;
> > +     unsigned int edx;
> > +
> > +     __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx);
> > +     return ecx;
> > +}
> > +
> >  static inline u32 pkey_bit_position(int pkey)
> >  {
> >       return pkey * PKEY_BITS_PER_PKEY;
> > diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
> > index 291bc1e07842..27759d3ed9cd 100644
> > --- a/tools/testing/selftests/vm/protection_keys.c
> > +++ b/tools/testing/selftests/vm/protection_keys.c
> > @@ -18,12 +18,13 @@
> >   *   do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
> >   *
> >   * Compile like this:
> > - *   gcc      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > - *   gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > + *   gcc -mxsave      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > + *   gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> >   */
> >  #define _GNU_SOURCE
> >  #define __SANE_USERSPACE_TYPES__
> >  #include <errno.h>
> > +#include <linux/elf.h>
> >  #include <linux/futex.h>
> >  #include <time.h>
> >  #include <sys/time.h>
> > @@ -1550,6 +1551,86 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
> >       do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
> >  }
> >
> > +#if defined(__i386__) || defined(__x86_64__)
> > +void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
> > +{
> > +     pid_t child;
> > +     int status, ret;
> > +     int pkey_offset = pkey_reg_xstate_offset();
> > +     size_t xsave_size = cpu_max_xsave_size();
> > +     void *xsave;
> > +     u32 *pkey_register;
> > +     u64 *xstate_bv;
> > +     struct iovec iov;
> > +
> > +     child = fork();
> > +     pkey_assert(child >= 0);
> > +     dprintf3("[%d] fork() ret: %d\n", getpid(), child);
> > +     if (!child) {
> > +             u32 pkey_register = read_pkey_reg();
> > +
> > +             ptrace(PTRACE_TRACEME, 0, 0, 0);
> > +             raise(SIGSTOP);
> > +
> > +             /*
> > +              * need __read_pkey_reg() version so we do not do shadow_pkey_reg
> > +              * checking
> > +              */
> > +             if (pkey_register == __read_pkey_reg())
> > +                     exit(1);
> > +
> > +             raise(SIGSTOP);
> > +
> > +             exit(__read_pkey_reg());
> > +     }
> > +
> > +     pkey_assert(child == waitpid(child, &status, 0));
> > +     dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> > +     pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
> > +
> > +     xsave = (void *)malloc(xsave_size);
> > +     pkey_assert(xsave > 0);
> > +
> > +     iov.iov_base = xsave;
> > +     iov.iov_len = xsave_size;
> > +     ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > +     pkey_assert(ret == 0);
> > +
> > +     pkey_register = (u32 *)(xsave + pkey_offset);
> > +     pkey_assert(*pkey_register == read_pkey_reg());
> > +
> > +     *pkey_register = !read_pkey_reg();
> > +
> > +     ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > +     pkey_assert(ret == 0);
> > +
> > +     ret = ptrace(PTRACE_CONT, child, 0, 0);
> > +     pkey_assert(ret == 0);
> > +
> > +     pkey_assert(child == waitpid(child, &status, 0));
> > +     dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> > +     pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
> > +
> > +     ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > +     pkey_assert(ret == 0);
> > +
> > +     xstate_bv = (u64 *)(xsave + 512);
> > +     *xstate_bv &= ~(1 << 9);
> > +
> > +     ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > +     pkey_assert(ret == 0);
> > +
> > +     ret = ptrace(PTRACE_CONT, child, 0, 0);
> > +     pkey_assert(ret == 0);
> > +
> > +     pkey_assert(child == waitpid(child, &status, 0));
> > +     dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> > +     pkey_assert(WIFEXITED(status));
> > +     pkey_assert(WEXITSTATUS(status) == 0);
> > +     free(xsave);
>
> LGTM.
>
> May I ask for a bit more in terms of testing the ABI: writing some
> non-trivial (not all-zero and not all-ones) value into the PKRU register,
> forcing the child task to go through a FPU save/restore context switch
> and then reading it back and verifying the value, or something like that?

Can you elaborate a bit on what you mean here? I'm not sure what "a
FPU save/restore context switch" is. The XSTATE (and everything else)
will be saved/restored at the ptrace stops (for the raise(SIGSTOP)s)
already.

- Kyle

> Thanks,
>
>         Ingo
Ingo Molnar Aug. 6, 2022, 6:55 p.m. UTC | #3
* Kyle Huey <me@kylehuey.com> wrote:

> On Sat, Aug 6, 2022 at 1:52 AM Ingo Molnar <mingo@kernel.org> wrote:
> >
> >
> > * Kyle Huey <me@kylehuey.com> wrote:
> >
> > > From: Kyle Huey <me@kylehuey.com>
> > >
> > > This tests PTRACE_SETREGSET with NT_X86_XSTATE modifying PKRU directly and
> > > removing the PKRU bit from XSTATE_BV.
> > >
> > > Signed-off-by: Kyle Huey <me@kylehuey.com>
> > > ---
> > >  tools/testing/selftests/vm/pkey-x86.h        | 12 +++
> > >  tools/testing/selftests/vm/protection_keys.c | 88 +++++++++++++++++++-
> > >  2 files changed, 98 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
> > > index b078ce9c6d2a..72c14cd3ddc7 100644
> > > --- a/tools/testing/selftests/vm/pkey-x86.h
> > > +++ b/tools/testing/selftests/vm/pkey-x86.h
> > > @@ -104,6 +104,18 @@ static inline int cpu_has_pkeys(void)
> > >       return 1;
> > >  }
> > >
> > > +static inline int cpu_max_xsave_size(void)
> > > +{
> > > +     unsigned long XSTATE_CPUID = 0xd;
> > > +     unsigned int eax;
> > > +     unsigned int ebx;
> > > +     unsigned int ecx;
> > > +     unsigned int edx;
> > > +
> > > +     __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx);
> > > +     return ecx;
> > > +}
> > > +
> > >  static inline u32 pkey_bit_position(int pkey)
> > >  {
> > >       return pkey * PKEY_BITS_PER_PKEY;
> > > diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
> > > index 291bc1e07842..27759d3ed9cd 100644
> > > --- a/tools/testing/selftests/vm/protection_keys.c
> > > +++ b/tools/testing/selftests/vm/protection_keys.c
> > > @@ -18,12 +18,13 @@
> > >   *   do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
> > >   *
> > >   * Compile like this:
> > > - *   gcc      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > > - *   gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > > + *   gcc -mxsave      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > > + *   gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > >   */
> > >  #define _GNU_SOURCE
> > >  #define __SANE_USERSPACE_TYPES__
> > >  #include <errno.h>
> > > +#include <linux/elf.h>
> > >  #include <linux/futex.h>
> > >  #include <time.h>
> > >  #include <sys/time.h>
> > > @@ -1550,6 +1551,86 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
> > >       do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
> > >  }
> > >
> > > +#if defined(__i386__) || defined(__x86_64__)
> > > +void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
> > > +{
> > > +     pid_t child;
> > > +     int status, ret;
> > > +     int pkey_offset = pkey_reg_xstate_offset();
> > > +     size_t xsave_size = cpu_max_xsave_size();
> > > +     void *xsave;
> > > +     u32 *pkey_register;
> > > +     u64 *xstate_bv;
> > > +     struct iovec iov;
> > > +
> > > +     child = fork();
> > > +     pkey_assert(child >= 0);
> > > +     dprintf3("[%d] fork() ret: %d\n", getpid(), child);
> > > +     if (!child) {
> > > +             u32 pkey_register = read_pkey_reg();
> > > +
> > > +             ptrace(PTRACE_TRACEME, 0, 0, 0);
> > > +             raise(SIGSTOP);
> > > +
> > > +             /*
> > > +              * need __read_pkey_reg() version so we do not do shadow_pkey_reg
> > > +              * checking
> > > +              */
> > > +             if (pkey_register == __read_pkey_reg())
> > > +                     exit(1);
> > > +
> > > +             raise(SIGSTOP);
> > > +
> > > +             exit(__read_pkey_reg());
> > > +     }
> > > +
> > > +     pkey_assert(child == waitpid(child, &status, 0));
> > > +     dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> > > +     pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
> > > +
> > > +     xsave = (void *)malloc(xsave_size);
> > > +     pkey_assert(xsave > 0);
> > > +
> > > +     iov.iov_base = xsave;
> > > +     iov.iov_len = xsave_size;
> > > +     ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > > +     pkey_assert(ret == 0);
> > > +
> > > +     pkey_register = (u32 *)(xsave + pkey_offset);
> > > +     pkey_assert(*pkey_register == read_pkey_reg());
> > > +
> > > +     *pkey_register = !read_pkey_reg();
> > > +
> > > +     ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > > +     pkey_assert(ret == 0);
> > > +
> > > +     ret = ptrace(PTRACE_CONT, child, 0, 0);
> > > +     pkey_assert(ret == 0);
> > > +
> > > +     pkey_assert(child == waitpid(child, &status, 0));
> > > +     dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> > > +     pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
> > > +
> > > +     ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > > +     pkey_assert(ret == 0);
> > > +
> > > +     xstate_bv = (u64 *)(xsave + 512);
> > > +     *xstate_bv &= ~(1 << 9);
> > > +
> > > +     ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > > +     pkey_assert(ret == 0);
> > > +
> > > +     ret = ptrace(PTRACE_CONT, child, 0, 0);
> > > +     pkey_assert(ret == 0);
> > > +
> > > +     pkey_assert(child == waitpid(child, &status, 0));
> > > +     dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> > > +     pkey_assert(WIFEXITED(status));
> > > +     pkey_assert(WEXITSTATUS(status) == 0);
> > > +     free(xsave);
> >
> > LGTM.
> >
> > May I ask for a bit more in terms of testing the ABI: writing some
> > non-trivial (not all-zero and not all-ones) value into the PKRU register,
> > forcing the child task to go through a FPU save/restore context switch
> > and then reading it back and verifying the value, or something like that?
> 
> Can you elaborate a bit on what you mean here? I'm not sure what "a
> FPU save/restore context switch" is. The XSTATE (and everything else)
> will be saved/restored at the ptrace stops (for the raise(SIGSTOP)s)
> already.

Yeah, here I meant that the ptraced child actually has to execute to carry 
the new values - and AFAICS that already happens in your testcase, as 
there's a PTRACE_CONT+waitpid() between the PTRACE_SETREGSET and the second 
PTRACE_GETREGSET call, right?

If so, then the testcase should be mostly fine, except would it make sense 
to use something less trivial than clearing the permission bitmask:

> > > +     xstate_bv = (u64 *)(xsave + 512);
> > > +     *xstate_bv &= ~(1 << 9);

if I'm reading the code right? A 01010101 bitmask perhaps?

Thanks,

	Ingo
Kyle Huey Aug. 6, 2022, 7:20 p.m. UTC | #4
On Sat, Aug 6, 2022 at 11:55 AM Ingo Molnar <mingo@kernel.org> wrote:
>
>
> * Kyle Huey <me@kylehuey.com> wrote:
>
> > On Sat, Aug 6, 2022 at 1:52 AM Ingo Molnar <mingo@kernel.org> wrote:
> > >
> > >
> > > * Kyle Huey <me@kylehuey.com> wrote:
> > >
> > > > From: Kyle Huey <me@kylehuey.com>
> > > >
> > > > This tests PTRACE_SETREGSET with NT_X86_XSTATE modifying PKRU directly and
> > > > removing the PKRU bit from XSTATE_BV.
> > > >
> > > > Signed-off-by: Kyle Huey <me@kylehuey.com>
> > > > ---
> > > >  tools/testing/selftests/vm/pkey-x86.h        | 12 +++
> > > >  tools/testing/selftests/vm/protection_keys.c | 88 +++++++++++++++++++-
> > > >  2 files changed, 98 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
> > > > index b078ce9c6d2a..72c14cd3ddc7 100644
> > > > --- a/tools/testing/selftests/vm/pkey-x86.h
> > > > +++ b/tools/testing/selftests/vm/pkey-x86.h
> > > > @@ -104,6 +104,18 @@ static inline int cpu_has_pkeys(void)
> > > >       return 1;
> > > >  }
> > > >
> > > > +static inline int cpu_max_xsave_size(void)
> > > > +{
> > > > +     unsigned long XSTATE_CPUID = 0xd;
> > > > +     unsigned int eax;
> > > > +     unsigned int ebx;
> > > > +     unsigned int ecx;
> > > > +     unsigned int edx;
> > > > +
> > > > +     __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx);
> > > > +     return ecx;
> > > > +}
> > > > +
> > > >  static inline u32 pkey_bit_position(int pkey)
> > > >  {
> > > >       return pkey * PKEY_BITS_PER_PKEY;
> > > > diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
> > > > index 291bc1e07842..27759d3ed9cd 100644
> > > > --- a/tools/testing/selftests/vm/protection_keys.c
> > > > +++ b/tools/testing/selftests/vm/protection_keys.c
> > > > @@ -18,12 +18,13 @@
> > > >   *   do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
> > > >   *
> > > >   * Compile like this:
> > > > - *   gcc      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > > > - *   gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > > > + *   gcc -mxsave      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > > > + *   gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > > >   */
> > > >  #define _GNU_SOURCE
> > > >  #define __SANE_USERSPACE_TYPES__
> > > >  #include <errno.h>
> > > > +#include <linux/elf.h>
> > > >  #include <linux/futex.h>
> > > >  #include <time.h>
> > > >  #include <sys/time.h>
> > > > @@ -1550,6 +1551,86 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
> > > >       do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
> > > >  }
> > > >
> > > > +#if defined(__i386__) || defined(__x86_64__)
> > > > +void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
> > > > +{
> > > > +     pid_t child;
> > > > +     int status, ret;
> > > > +     int pkey_offset = pkey_reg_xstate_offset();
> > > > +     size_t xsave_size = cpu_max_xsave_size();
> > > > +     void *xsave;
> > > > +     u32 *pkey_register;
> > > > +     u64 *xstate_bv;
> > > > +     struct iovec iov;
> > > > +
> > > > +     child = fork();
> > > > +     pkey_assert(child >= 0);
> > > > +     dprintf3("[%d] fork() ret: %d\n", getpid(), child);
> > > > +     if (!child) {
> > > > +             u32 pkey_register = read_pkey_reg();
> > > > +
> > > > +             ptrace(PTRACE_TRACEME, 0, 0, 0);
> > > > +             raise(SIGSTOP);
> > > > +
> > > > +             /*
> > > > +              * need __read_pkey_reg() version so we do not do shadow_pkey_reg
> > > > +              * checking
> > > > +              */
> > > > +             if (pkey_register == __read_pkey_reg())
> > > > +                     exit(1);
> > > > +
> > > > +             raise(SIGSTOP);
> > > > +
> > > > +             exit(__read_pkey_reg());
> > > > +     }
> > > > +
> > > > +     pkey_assert(child == waitpid(child, &status, 0));
> > > > +     dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> > > > +     pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
> > > > +
> > > > +     xsave = (void *)malloc(xsave_size);
> > > > +     pkey_assert(xsave > 0);
> > > > +
> > > > +     iov.iov_base = xsave;
> > > > +     iov.iov_len = xsave_size;
> > > > +     ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > > > +     pkey_assert(ret == 0);
> > > > +
> > > > +     pkey_register = (u32 *)(xsave + pkey_offset);
> > > > +     pkey_assert(*pkey_register == read_pkey_reg());
> > > > +
> > > > +     *pkey_register = !read_pkey_reg();
> > > > +
> > > > +     ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > > > +     pkey_assert(ret == 0);
> > > > +
> > > > +     ret = ptrace(PTRACE_CONT, child, 0, 0);
> > > > +     pkey_assert(ret == 0);
> > > > +
> > > > +     pkey_assert(child == waitpid(child, &status, 0));
> > > > +     dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> > > > +     pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
> > > > +
> > > > +     ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > > > +     pkey_assert(ret == 0);
> > > > +
> > > > +     xstate_bv = (u64 *)(xsave + 512);
> > > > +     *xstate_bv &= ~(1 << 9);
> > > > +
> > > > +     ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > > > +     pkey_assert(ret == 0);
> > > > +
> > > > +     ret = ptrace(PTRACE_CONT, child, 0, 0);
> > > > +     pkey_assert(ret == 0);
> > > > +
> > > > +     pkey_assert(child == waitpid(child, &status, 0));
> > > > +     dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> > > > +     pkey_assert(WIFEXITED(status));
> > > > +     pkey_assert(WEXITSTATUS(status) == 0);
> > > > +     free(xsave);
> > >
> > > LGTM.
> > >
> > > May I ask for a bit more in terms of testing the ABI: writing some
> > > non-trivial (not all-zero and not all-ones) value into the PKRU register,
> > > forcing the child task to go through a FPU save/restore context switch
> > > and then reading it back and verifying the value, or something like that?
> >
> > Can you elaborate a bit on what you mean here? I'm not sure what "a
> > FPU save/restore context switch" is. The XSTATE (and everything else)
> > will be saved/restored at the ptrace stops (for the raise(SIGSTOP)s)
> > already.
>
> Yeah, here I meant that the ptraced child actually has to execute to carry
> the new values - and AFAICS that already happens in your testcase, as
> there's a PTRACE_CONT+waitpid() between the PTRACE_SETREGSET and the second
> PTRACE_GETREGSET call, right?

Yeah. The gdb command sequence I reported is essentially doing
PTRACE_SETREGSET immediately followed by PTRACE_GETREGSET with no
intervening execution. And while that is a visible manifestation of
the bug, what I really care about is the modifications being reflected
in the execution of the ptracee. I could add code to read back the
value via PTRACE_GETREGSET too if desired.

> If so, then the testcase should be mostly fine, except would it make sense
> to use something less trivial than clearing the permission bitmask:
>
> > > > +     xstate_bv = (u64 *)(xsave + 512);
> > > > +     *xstate_bv &= ~(1 << 9);
>
> if I'm reading the code right? A 01010101 bitmask perhaps?

So there's two tests (the two PTRACE_SETREGSET calls). The first one
tests modifying the PKRU value stored in the XSTATE. I actually meant
this to negate the existing PKRU value and flip all the bits but I
realize now I wrote ! instead of ~ (! is the bitwise negation operator
in Rust, which is what I do most of the time).  !PKRU is still
different from PKRU though, so the test does fail on affected kernels
and pass after the fix which the child sees that the PKRU value did
change somehow and doesn't take the early exit(1).

The second part, which you've highlighted, tests clearing the PKRU bit
in the XSTATE_BV field. This would cause an XRSTOR to set PKRU to the
hardware init value (0) and the test checks that the value is indeed
zero (by having the child exit with the PKRU register value as its
return code, and then checking that the child exited with code 0).

- Kyle

> Thanks,
>
>         Ingo
Ingo Molnar Aug. 6, 2022, 7:36 p.m. UTC | #5
* Kyle Huey <me@kylehuey.com> wrote:

> On Sat, Aug 6, 2022 at 11:55 AM Ingo Molnar <mingo@kernel.org> wrote:
> >
> >
> > * Kyle Huey <me@kylehuey.com> wrote:
> >
> > > On Sat, Aug 6, 2022 at 1:52 AM Ingo Molnar <mingo@kernel.org> wrote:
> > > >
> > > >
> > > > * Kyle Huey <me@kylehuey.com> wrote:
> > > >
> > > > > From: Kyle Huey <me@kylehuey.com>
> > > > >
> > > > > This tests PTRACE_SETREGSET with NT_X86_XSTATE modifying PKRU directly and
> > > > > removing the PKRU bit from XSTATE_BV.
> > > > >
> > > > > Signed-off-by: Kyle Huey <me@kylehuey.com>
> > > > > ---
> > > > >  tools/testing/selftests/vm/pkey-x86.h        | 12 +++
> > > > >  tools/testing/selftests/vm/protection_keys.c | 88 +++++++++++++++++++-
> > > > >  2 files changed, 98 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
> > > > > index b078ce9c6d2a..72c14cd3ddc7 100644
> > > > > --- a/tools/testing/selftests/vm/pkey-x86.h
> > > > > +++ b/tools/testing/selftests/vm/pkey-x86.h
> > > > > @@ -104,6 +104,18 @@ static inline int cpu_has_pkeys(void)
> > > > >       return 1;
> > > > >  }
> > > > >
> > > > > +static inline int cpu_max_xsave_size(void)
> > > > > +{
> > > > > +     unsigned long XSTATE_CPUID = 0xd;
> > > > > +     unsigned int eax;
> > > > > +     unsigned int ebx;
> > > > > +     unsigned int ecx;
> > > > > +     unsigned int edx;
> > > > > +
> > > > > +     __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx);
> > > > > +     return ecx;
> > > > > +}
> > > > > +
> > > > >  static inline u32 pkey_bit_position(int pkey)
> > > > >  {
> > > > >       return pkey * PKEY_BITS_PER_PKEY;
> > > > > diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
> > > > > index 291bc1e07842..27759d3ed9cd 100644
> > > > > --- a/tools/testing/selftests/vm/protection_keys.c
> > > > > +++ b/tools/testing/selftests/vm/protection_keys.c
> > > > > @@ -18,12 +18,13 @@
> > > > >   *   do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
> > > > >   *
> > > > >   * Compile like this:
> > > > > - *   gcc      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > > > > - *   gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > > > > + *   gcc -mxsave      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > > > > + *   gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
> > > > >   */
> > > > >  #define _GNU_SOURCE
> > > > >  #define __SANE_USERSPACE_TYPES__
> > > > >  #include <errno.h>
> > > > > +#include <linux/elf.h>
> > > > >  #include <linux/futex.h>
> > > > >  #include <time.h>
> > > > >  #include <sys/time.h>
> > > > > @@ -1550,6 +1551,86 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
> > > > >       do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
> > > > >  }
> > > > >
> > > > > +#if defined(__i386__) || defined(__x86_64__)
> > > > > +void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
> > > > > +{
> > > > > +     pid_t child;
> > > > > +     int status, ret;
> > > > > +     int pkey_offset = pkey_reg_xstate_offset();
> > > > > +     size_t xsave_size = cpu_max_xsave_size();
> > > > > +     void *xsave;
> > > > > +     u32 *pkey_register;
> > > > > +     u64 *xstate_bv;
> > > > > +     struct iovec iov;
> > > > > +
> > > > > +     child = fork();
> > > > > +     pkey_assert(child >= 0);
> > > > > +     dprintf3("[%d] fork() ret: %d\n", getpid(), child);
> > > > > +     if (!child) {
> > > > > +             u32 pkey_register = read_pkey_reg();
> > > > > +
> > > > > +             ptrace(PTRACE_TRACEME, 0, 0, 0);
> > > > > +             raise(SIGSTOP);
> > > > > +
> > > > > +             /*
> > > > > +              * need __read_pkey_reg() version so we do not do shadow_pkey_reg
> > > > > +              * checking
> > > > > +              */
> > > > > +             if (pkey_register == __read_pkey_reg())
> > > > > +                     exit(1);
> > > > > +
> > > > > +             raise(SIGSTOP);
> > > > > +
> > > > > +             exit(__read_pkey_reg());
> > > > > +     }
> > > > > +
> > > > > +     pkey_assert(child == waitpid(child, &status, 0));
> > > > > +     dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> > > > > +     pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
> > > > > +
> > > > > +     xsave = (void *)malloc(xsave_size);
> > > > > +     pkey_assert(xsave > 0);
> > > > > +
> > > > > +     iov.iov_base = xsave;
> > > > > +     iov.iov_len = xsave_size;
> > > > > +     ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > > > > +     pkey_assert(ret == 0);
> > > > > +
> > > > > +     pkey_register = (u32 *)(xsave + pkey_offset);
> > > > > +     pkey_assert(*pkey_register == read_pkey_reg());
> > > > > +
> > > > > +     *pkey_register = !read_pkey_reg();
> > > > > +
> > > > > +     ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > > > > +     pkey_assert(ret == 0);
> > > > > +
> > > > > +     ret = ptrace(PTRACE_CONT, child, 0, 0);
> > > > > +     pkey_assert(ret == 0);
> > > > > +
> > > > > +     pkey_assert(child == waitpid(child, &status, 0));
> > > > > +     dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> > > > > +     pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
> > > > > +
> > > > > +     ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > > > > +     pkey_assert(ret == 0);
> > > > > +
> > > > > +     xstate_bv = (u64 *)(xsave + 512);
> > > > > +     *xstate_bv &= ~(1 << 9);
> > > > > +
> > > > > +     ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
> > > > > +     pkey_assert(ret == 0);
> > > > > +
> > > > > +     ret = ptrace(PTRACE_CONT, child, 0, 0);
> > > > > +     pkey_assert(ret == 0);
> > > > > +
> > > > > +     pkey_assert(child == waitpid(child, &status, 0));
> > > > > +     dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
> > > > > +     pkey_assert(WIFEXITED(status));
> > > > > +     pkey_assert(WEXITSTATUS(status) == 0);
> > > > > +     free(xsave);
> > > >
> > > > LGTM.
> > > >
> > > > May I ask for a bit more in terms of testing the ABI: writing some
> > > > non-trivial (not all-zero and not all-ones) value into the PKRU register,
> > > > forcing the child task to go through a FPU save/restore context switch
> > > > and then reading it back and verifying the value, or something like that?
> > >
> > > Can you elaborate a bit on what you mean here? I'm not sure what "a
> > > FPU save/restore context switch" is. The XSTATE (and everything else)
> > > will be saved/restored at the ptrace stops (for the raise(SIGSTOP)s)
> > > already.
> >
> > Yeah, here I meant that the ptraced child actually has to execute to carry
> > the new values - and AFAICS that already happens in your testcase, as
> > there's a PTRACE_CONT+waitpid() between the PTRACE_SETREGSET and the second
> > PTRACE_GETREGSET call, right?
> 
> Yeah. The gdb command sequence I reported is essentially doing
> PTRACE_SETREGSET immediately followed by PTRACE_GETREGSET with no
> intervening execution. And while that is a visible manifestation of
> the bug, what I really care about is the modifications being reflected
> in the execution of the ptracee. I could add code to read back the
> value via PTRACE_GETREGSET too if desired.

Yeah, that would be nice and completes the test cycle pretty robustly if 
the value written isn't "trivial" - and 'flipping' is fine:

> > If so, then the testcase should be mostly fine, except would it make sense
> > to use something less trivial than clearing the permission bitmask:
> >
> > > > > +     xstate_bv = (u64 *)(xsave + 512);
> > > > > +     *xstate_bv &= ~(1 << 9);
> >
> > if I'm reading the code right? A 01010101 bitmask perhaps?
> 
> So there's two tests (the two PTRACE_SETREGSET calls). The first one
> tests modifying the PKRU value stored in the XSTATE. I actually meant
> this to negate the existing PKRU value and flip all the bits but I
> realize now I wrote ! instead of ~ (! is the bitwise negation operator
> in Rust, which is what I do most of the time).  !PKRU is still
> different from PKRU though, so the test does fail on affected kernels
> and pass after the fix which the child sees that the PKRU value did
> change somehow and doesn't take the early exit(1).

Yeah, flipping the bits is even better, as it excercises all the bits.

> The second part, which you've highlighted, tests clearing the PKRU bit in 
> the XSTATE_BV field. This would cause an XRSTOR to set PKRU to the 
> hardware init value (0) and the test checks that the value is indeed zero 
> (by having the child exit with the PKRU register value as its return 
> code, and then checking that the child exited with code 0).

OK - I missed the 'PKRU == exit code' trick in the child, although 
*technically* the range of exit codes is restricted, with silent clipping 
of bits, so it's not a full 32-bit return code. I'd suggest exiting with a 
known exit code instead if the PKRU value departs from expectations. A 
debug session in that case will tell all the details - in the general case 
we don't really expect these tests to fail.

Thanks,

	Ingo
diff mbox series

Patch

diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index b078ce9c6d2a..72c14cd3ddc7 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -104,6 +104,18 @@  static inline int cpu_has_pkeys(void)
 	return 1;
 }
 
+static inline int cpu_max_xsave_size(void)
+{
+	unsigned long XSTATE_CPUID = 0xd;
+	unsigned int eax;
+	unsigned int ebx;
+	unsigned int ecx;
+	unsigned int edx;
+
+	__cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx);
+	return ecx;
+}
+
 static inline u32 pkey_bit_position(int pkey)
 {
 	return pkey * PKEY_BITS_PER_PKEY;
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 291bc1e07842..27759d3ed9cd 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -18,12 +18,13 @@ 
  *	do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
  *
  * Compile like this:
- *	gcc      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- *	gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ *	gcc -mxsave      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ *	gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
  */
 #define _GNU_SOURCE
 #define __SANE_USERSPACE_TYPES__
 #include <errno.h>
+#include <linux/elf.h>
 #include <linux/futex.h>
 #include <time.h>
 #include <sys/time.h>
@@ -1550,6 +1551,86 @@  void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
 	do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
 }
 
+#if defined(__i386__) || defined(__x86_64__)
+void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
+{
+	pid_t child;
+	int status, ret;
+	int pkey_offset = pkey_reg_xstate_offset();
+	size_t xsave_size = cpu_max_xsave_size();
+	void *xsave;
+	u32 *pkey_register;
+	u64 *xstate_bv;
+	struct iovec iov;
+
+	child = fork();
+	pkey_assert(child >= 0);
+	dprintf3("[%d] fork() ret: %d\n", getpid(), child);
+	if (!child) {
+		u32 pkey_register = read_pkey_reg();
+
+		ptrace(PTRACE_TRACEME, 0, 0, 0);
+		raise(SIGSTOP);
+
+		/*
+		 * need __read_pkey_reg() version so we do not do shadow_pkey_reg
+		 * checking
+		 */
+		if (pkey_register == __read_pkey_reg())
+			exit(1);
+
+		raise(SIGSTOP);
+
+		exit(__read_pkey_reg());
+	}
+
+	pkey_assert(child == waitpid(child, &status, 0));
+	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+	pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+	xsave = (void *)malloc(xsave_size);
+	pkey_assert(xsave > 0);
+
+	iov.iov_base = xsave;
+	iov.iov_len = xsave_size;
+	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+	pkey_assert(ret == 0);
+
+	pkey_register = (u32 *)(xsave + pkey_offset);
+	pkey_assert(*pkey_register == read_pkey_reg());
+
+	*pkey_register = !read_pkey_reg();
+
+	ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+	pkey_assert(ret == 0);
+
+	ret = ptrace(PTRACE_CONT, child, 0, 0);
+	pkey_assert(ret == 0);
+
+	pkey_assert(child == waitpid(child, &status, 0));
+	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+	pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+	pkey_assert(ret == 0);
+
+	xstate_bv = (u64 *)(xsave + 512);
+	*xstate_bv &= ~(1 << 9);
+
+	ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+	pkey_assert(ret == 0);
+
+	ret = ptrace(PTRACE_CONT, child, 0, 0);
+	pkey_assert(ret == 0);
+
+	pkey_assert(child == waitpid(child, &status, 0));
+	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+	pkey_assert(WIFEXITED(status));
+	pkey_assert(WEXITSTATUS(status) == 0);
+	free(xsave);
+}
+#endif
+
 void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
 {
 	int size = PAGE_SIZE;
@@ -1585,6 +1666,9 @@  void (*pkey_tests[])(int *ptr, u16 pkey) = {
 	test_pkey_syscalls_bad_args,
 	test_pkey_alloc_exhaust,
 	test_pkey_alloc_free_attach_pkey0,
+#if defined(__i386__) || defined(__x86_64__)
+	test_ptrace_modifies_pkru,
+#endif
 };
 
 void run_tests_once(void)