diff mbox series

[v3,3/4] selftests/bpf: Add tests verifying bpf lsm userns_create hook

Message ID 20220721172808.585539-4-fred@cloudflare.com
State New
Headers show
Series Introduce security_create_user_ns() | expand

Commit Message

Frederick Lawler July 21, 2022, 5:28 p.m. UTC
The LSM hook userns_create was introduced to provide LSM's an
opportunity to block or allow unprivileged user namespace creation. This
test serves two purposes: it provides a test eBPF implementation, and
tests the hook successfully blocks or allows user namespace creation.

This tests 4 cases:

        1. Unattached bpf program does not block unpriv user namespace
           creation.
        2. Attached bpf program allows user namespace creation given
           CAP_SYS_ADMIN privileges.
        3. Attached bpf program denies user namespace creation for a
           user without CAP_SYS_ADMIN.
        4. The sleepable implementation loads

Signed-off-by: Frederick Lawler <fred@cloudflare.com>

---
The generic deny_namespace file name is used for future namespace
expansion. I didn't want to limit these files to just the create_user_ns
hook.
Changes since v2:
- Rename create_user_ns hook to userns_create
Changes since v1:
- Introduce this patch
---
 .../selftests/bpf/prog_tests/deny_namespace.c | 88 +++++++++++++++++++
 .../selftests/bpf/progs/test_deny_namespace.c | 39 ++++++++
 2 files changed, 127 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/deny_namespace.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_deny_namespace.c

Comments

Martin KaFai Lau July 22, 2022, 6:07 a.m. UTC | #1
On Thu, Jul 21, 2022 at 12:28:07PM -0500, Frederick Lawler wrote:
> The LSM hook userns_create was introduced to provide LSM's an
> opportunity to block or allow unprivileged user namespace creation. This
> test serves two purposes: it provides a test eBPF implementation, and
> tests the hook successfully blocks or allows user namespace creation.
> 
> This tests 4 cases:
> 
>         1. Unattached bpf program does not block unpriv user namespace
>            creation.
>         2. Attached bpf program allows user namespace creation given
>            CAP_SYS_ADMIN privileges.
>         3. Attached bpf program denies user namespace creation for a
>            user without CAP_SYS_ADMIN.
>         4. The sleepable implementation loads
> 
> Signed-off-by: Frederick Lawler <fred@cloudflare.com>
> 
> ---
> The generic deny_namespace file name is used for future namespace
> expansion. I didn't want to limit these files to just the create_user_ns
> hook.
> Changes since v2:
> - Rename create_user_ns hook to userns_create
> Changes since v1:
> - Introduce this patch
> ---
>  .../selftests/bpf/prog_tests/deny_namespace.c | 88 +++++++++++++++++++
>  .../selftests/bpf/progs/test_deny_namespace.c | 39 ++++++++
>  2 files changed, 127 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/deny_namespace.c
>  create mode 100644 tools/testing/selftests/bpf/progs/test_deny_namespace.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/deny_namespace.c b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
> new file mode 100644
> index 000000000000..9e4714295008
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#define _GNU_SOURCE
> +#include <test_progs.h>
> +#include "test_deny_namespace.skel.h"
> +#include <sched.h>
> +#include "cap_helpers.h"
> +
> +#define STACK_SIZE (1024 * 1024)
Does the child need 1M stack space ?

> +static char child_stack[STACK_SIZE];
> +
> +int clone_callback(void *arg)
static

> +{
> +	return 0;
> +}
> +
> +static int create_new_user_ns(void)
> +{
> +	int status;
> +	pid_t cpid;
> +
> +	cpid = clone(clone_callback, child_stack + STACK_SIZE,
> +		     CLONE_NEWUSER | SIGCHLD, NULL);
> +
> +	if (cpid == -1)
> +		return errno;
> +
> +	if (cpid == 0)
Not an expert in clone() call and it is not clear what 0
return value mean from the man page.  Could you explain ?

> +		return 0;
> +
> +	waitpid(cpid, &status, 0);
> +	if (WIFEXITED(status))
> +		return WEXITSTATUS(status);
> +
> +	return -1;
> +}
> +
> +static void test_userns_create_bpf(void)
> +{
> +	__u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
> +	__u64 old_caps = 0;
> +
> +	ASSERT_OK(create_new_user_ns(), "priv new user ns");
Does it need to enable CAP_SYS_ADMIN first ?

> +
> +	cap_disable_effective(cap_mask, &old_caps);
> +
> +	ASSERT_EQ(create_new_user_ns(), EPERM, "unpriv new user ns");
> +
> +	if (cap_mask & old_caps)
> +		cap_enable_effective(cap_mask, NULL);
> +}
> +
> +static void test_unpriv_userns_create_no_bpf(void)
> +{
> +	__u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
> +	__u64 old_caps = 0;
> +
> +	cap_disable_effective(cap_mask, &old_caps);
> +
> +	ASSERT_OK(create_new_user_ns(), "no-bpf unpriv new user ns");
> +
> +	if (cap_mask & old_caps)
> +		cap_enable_effective(cap_mask, NULL);
> +}
> +
> +void test_deny_namespace(void)
> +{
> +	struct test_deny_namespace *skel = NULL;
> +	int err;
> +
> +	if (test__start_subtest("unpriv_userns_create_no_bpf"))
> +		test_unpriv_userns_create_no_bpf();
> +
> +	skel = test_deny_namespace__open_and_load();
> +	if (!ASSERT_OK_PTR(skel, "skel load"))
> +		goto close_prog;
> +
> +	err = test_deny_namespace__attach(skel);
> +	if (!ASSERT_OK(err, "attach"))
> +		goto close_prog;
> +
> +	if (test__start_subtest("userns_create_bpf"))
> +		test_userns_create_bpf();
> +
> +	test_deny_namespace__detach(skel);
> +
> +close_prog:
> +	test_deny_namespace__destroy(skel);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
> new file mode 100644
> index 000000000000..9ec9dabc8372
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
> @@ -0,0 +1,39 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/bpf.h>
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +#include <errno.h>
> +#include <linux/capability.h>
> +
> +struct kernel_cap_struct {
> +	__u32 cap[_LINUX_CAPABILITY_U32S_3];
> +} __attribute__((preserve_access_index));
> +
> +struct cred {
> +	struct kernel_cap_struct cap_effective;
> +} __attribute__((preserve_access_index));
> +
> +char _license[] SEC("license") = "GPL";
> +
> +SEC("lsm/userns_create")
> +int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
> +{
> +	struct kernel_cap_struct caps = cred->cap_effective;
> +	int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN);
> +	__u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN);
> +
> +	if (ret)
> +		return 0;
> +
> +	ret = -EPERM;
> +	if (caps.cap[cap_index] & cap_mask)
> +		return 0;
> +
> +	return -EPERM;
> +}
> +
> +SEC("lsm.s/userns_create")
> +int BPF_PROG(test_sleepable_userns_create, const struct cred *cred, int ret)
> +{
An empty program is weird.  If the intention is
to ensure a sleepable program can attach to userns_create,
move the test logic here and remove the non-sleepable
program above.

> +	return 0;
> +}
> -- 
> 2.30.2
>
Christian Brauner July 22, 2022, 8:15 a.m. UTC | #2
On Thu, Jul 21, 2022 at 12:28:07PM -0500, Frederick Lawler wrote:
> The LSM hook userns_create was introduced to provide LSM's an
> opportunity to block or allow unprivileged user namespace creation. This
> test serves two purposes: it provides a test eBPF implementation, and
> tests the hook successfully blocks or allows user namespace creation.
> 
> This tests 4 cases:
> 
>         1. Unattached bpf program does not block unpriv user namespace
>            creation.
>         2. Attached bpf program allows user namespace creation given
>            CAP_SYS_ADMIN privileges.
>         3. Attached bpf program denies user namespace creation for a
>            user without CAP_SYS_ADMIN.
>         4. The sleepable implementation loads

Sounds good!

> 
> Signed-off-by: Frederick Lawler <fred@cloudflare.com>
> 
> ---
> The generic deny_namespace file name is used for future namespace
> expansion. I didn't want to limit these files to just the create_user_ns
> hook.
> Changes since v2:
> - Rename create_user_ns hook to userns_create
> Changes since v1:
> - Introduce this patch
> ---
>  .../selftests/bpf/prog_tests/deny_namespace.c | 88 +++++++++++++++++++
>  .../selftests/bpf/progs/test_deny_namespace.c | 39 ++++++++
>  2 files changed, 127 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/deny_namespace.c
>  create mode 100644 tools/testing/selftests/bpf/progs/test_deny_namespace.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/deny_namespace.c b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
> new file mode 100644
> index 000000000000..9e4714295008
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#define _GNU_SOURCE
> +#include <test_progs.h>
> +#include "test_deny_namespace.skel.h"
> +#include <sched.h>
> +#include "cap_helpers.h"
> +
> +#define STACK_SIZE (1024 * 1024)
> +static char child_stack[STACK_SIZE];
> +
> +int clone_callback(void *arg)
> +{
> +	return 0;
> +}
> +
> +static int create_new_user_ns(void)
> +{
> +	int status;
> +	pid_t cpid;
> +
> +	cpid = clone(clone_callback, child_stack + STACK_SIZE,
> +		     CLONE_NEWUSER | SIGCHLD, NULL);
> +
> +	if (cpid == -1)
> +		return errno;
> +
> +	if (cpid == 0)
> +		return 0;

Martin asked about this already but fwiw, this cannot happen with
clone(). The clone() function doesn't return twice. It always returns
the PID of the child process or an error.

> +
> +	waitpid(cpid, &status, 0);
> +	if (WIFEXITED(status))
> +		return WEXITSTATUS(status);
> +
> +	return -1;
> +}

You can also just avoid the clone() dance and simply do sm like:

static int wait_for_pid(pid_t pid)
{
	int status, ret;

again:
	ret = waitpid(pid, &status, 0);
	if (ret == -1) {
		if (errno == EINTR)
			goto again;

		return -1;
	}

	if (!WIFEXITED(status))
		return -1;

	return WEXITSTATUS(status);
}

/* negative return value -> some internal error
 * positive return value -> userns creation failed
 * 0                     -> userns creation succeeded
 */
static int create_user_ns(void)
{
	pid_t pid;

	pid = fork();
	if (pid < 0)
		return -1;

	if (pid == 0) {
		if (unshare(CLONE_NEWUSER))
			_exit(EXIT_FAILURE);
		_exit(EXIT_SUCCESS);
	}

	return wait_for_pid(pid);
}

Same difference since both codepaths hit the right spot in the kernel.

> +
> +static void test_userns_create_bpf(void)
> +{
> +	__u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
> +	__u64 old_caps = 0;
> +
> +	ASSERT_OK(create_new_user_ns(), "priv new user ns");
> +
> +	cap_disable_effective(cap_mask, &old_caps);
> +
> +	ASSERT_EQ(create_new_user_ns(), EPERM, "unpriv new user ns");
> +
> +	if (cap_mask & old_caps)
> +		cap_enable_effective(cap_mask, NULL);
> +}
> +
> +static void test_unpriv_userns_create_no_bpf(void)
> +{
> +	__u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
> +	__u64 old_caps = 0;
> +
> +	cap_disable_effective(cap_mask, &old_caps);
> +
> +	ASSERT_OK(create_new_user_ns(), "no-bpf unpriv new user ns");
> +
> +	if (cap_mask & old_caps)
> +		cap_enable_effective(cap_mask, NULL);
> +}
> +
> +void test_deny_namespace(void)
> +{
> +	struct test_deny_namespace *skel = NULL;
> +	int err;
> +
> +	if (test__start_subtest("unpriv_userns_create_no_bpf"))
> +		test_unpriv_userns_create_no_bpf();
> +
> +	skel = test_deny_namespace__open_and_load();
> +	if (!ASSERT_OK_PTR(skel, "skel load"))
> +		goto close_prog;
> +
> +	err = test_deny_namespace__attach(skel);
> +	if (!ASSERT_OK(err, "attach"))
> +		goto close_prog;
> +
> +	if (test__start_subtest("userns_create_bpf"))
> +		test_userns_create_bpf();
> +
> +	test_deny_namespace__detach(skel);
> +
> +close_prog:
> +	test_deny_namespace__destroy(skel);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
> new file mode 100644
> index 000000000000..9ec9dabc8372
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
> @@ -0,0 +1,39 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/bpf.h>
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +#include <errno.h>
> +#include <linux/capability.h>
> +
> +struct kernel_cap_struct {
> +	__u32 cap[_LINUX_CAPABILITY_U32S_3];
> +} __attribute__((preserve_access_index));
> +
> +struct cred {
> +	struct kernel_cap_struct cap_effective;
> +} __attribute__((preserve_access_index));
> +
> +char _license[] SEC("license") = "GPL";
> +
> +SEC("lsm/userns_create")
> +int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
> +{
> +	struct kernel_cap_struct caps = cred->cap_effective;
> +	int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN);
> +	__u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN);
> +
> +	if (ret)
> +		return 0;
> +
> +	ret = -EPERM;
> +	if (caps.cap[cap_index] & cap_mask)
> +		return 0;
> +
> +	return -EPERM;
> +}

Looks nice and simple.
Acked-by: Christian Brauner (Microsoft) <brauner@kernel.org>
Frederick Lawler July 22, 2022, 1:41 p.m. UTC | #3
On 7/22/22 1:07 AM, Martin KaFai Lau wrote:
> On Thu, Jul 21, 2022 at 12:28:07PM -0500, Frederick Lawler wrote:
>> The LSM hook userns_create was introduced to provide LSM's an
>> opportunity to block or allow unprivileged user namespace creation. This
>> test serves two purposes: it provides a test eBPF implementation, and
>> tests the hook successfully blocks or allows user namespace creation.
>>
>> This tests 4 cases:
>>
>>          1. Unattached bpf program does not block unpriv user namespace
>>             creation.
>>          2. Attached bpf program allows user namespace creation given
>>             CAP_SYS_ADMIN privileges.
>>          3. Attached bpf program denies user namespace creation for a
>>             user without CAP_SYS_ADMIN.
>>          4. The sleepable implementation loads
>>
>> Signed-off-by: Frederick Lawler <fred@cloudflare.com>
>>
>> ---
>> The generic deny_namespace file name is used for future namespace
>> expansion. I didn't want to limit these files to just the create_user_ns
>> hook.
>> Changes since v2:
>> - Rename create_user_ns hook to userns_create
>> Changes since v1:
>> - Introduce this patch
>> ---
>>   .../selftests/bpf/prog_tests/deny_namespace.c | 88 +++++++++++++++++++
>>   .../selftests/bpf/progs/test_deny_namespace.c | 39 ++++++++
>>   2 files changed, 127 insertions(+)
>>   create mode 100644 tools/testing/selftests/bpf/prog_tests/deny_namespace.c
>>   create mode 100644 tools/testing/selftests/bpf/progs/test_deny_namespace.c
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/deny_namespace.c b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
>> new file mode 100644
>> index 000000000000..9e4714295008
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
>> @@ -0,0 +1,88 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +#define _GNU_SOURCE
>> +#include <test_progs.h>
>> +#include "test_deny_namespace.skel.h"
>> +#include <sched.h>
>> +#include "cap_helpers.h"
>> +
>> +#define STACK_SIZE (1024 * 1024)
> Does the child need 1M stack space ?

No, I can reduce that.

> 
>> +static char child_stack[STACK_SIZE];
>> +
>> +int clone_callback(void *arg)
> static
> 
>> +{
>> +	return 0;
>> +}
>> +
>> +static int create_new_user_ns(void)
>> +{
>> +	int status;
>> +	pid_t cpid;
>> +
>> +	cpid = clone(clone_callback, child_stack + STACK_SIZE,
>> +		     CLONE_NEWUSER | SIGCHLD, NULL);
>> +
>> +	if (cpid == -1)
>> +		return errno;
>> +
>> +	if (cpid == 0)
> Not an expert in clone() call and it is not clear what 0
> return value mean from the man page.  Could you explain ?
> 

Good catch. This is using the libc clone().

>> +		return 0;
>> +
>> +	waitpid(cpid, &status, 0);
>> +	if (WIFEXITED(status))
>> +		return WEXITSTATUS(status);
>> +
>> +	return -1;
>> +}
>> +
>> +static void test_userns_create_bpf(void)
>> +{
>> +	__u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
>> +	__u64 old_caps = 0;
>> +
>> +	ASSERT_OK(create_new_user_ns(), "priv new user ns");
> Does it need to enable CAP_SYS_ADMIN first ?
> 

You're right, this should be more explicitly set. I ran tests with the 
vmtest.sh script supplied with sefltests/bpf which run under root. I 
should always set CAP_SYS_ADMIN here to be consistent.

>> +
>> +	cap_disable_effective(cap_mask, &old_caps);
>> +
>> +	ASSERT_EQ(create_new_user_ns(), EPERM, "unpriv new user ns");
>> +
>> +	if (cap_mask & old_caps)
>> +		cap_enable_effective(cap_mask, NULL);
>> +}
>> +
>> +static void test_unpriv_userns_create_no_bpf(void)
>> +{
>> +	__u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
>> +	__u64 old_caps = 0;
>> +
>> +	cap_disable_effective(cap_mask, &old_caps);
>> +
>> +	ASSERT_OK(create_new_user_ns(), "no-bpf unpriv new user ns");
>> +
>> +	if (cap_mask & old_caps)
>> +		cap_enable_effective(cap_mask, NULL);
>> +}
>> +
>> +void test_deny_namespace(void)
>> +{
>> +	struct test_deny_namespace *skel = NULL;
>> +	int err;
>> +
>> +	if (test__start_subtest("unpriv_userns_create_no_bpf"))
>> +		test_unpriv_userns_create_no_bpf();
>> +
>> +	skel = test_deny_namespace__open_and_load();
>> +	if (!ASSERT_OK_PTR(skel, "skel load"))
>> +		goto close_prog;
>> +
>> +	err = test_deny_namespace__attach(skel);
>> +	if (!ASSERT_OK(err, "attach"))
>> +		goto close_prog;
>> +
>> +	if (test__start_subtest("userns_create_bpf"))
>> +		test_userns_create_bpf();
>> +
>> +	test_deny_namespace__detach(skel);
>> +
>> +close_prog:
>> +	test_deny_namespace__destroy(skel);
>> +}
>> diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
>> new file mode 100644
>> index 000000000000..9ec9dabc8372
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
>> @@ -0,0 +1,39 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +#include <linux/bpf.h>
>> +#include <bpf/bpf_helpers.h>
>> +#include <bpf/bpf_tracing.h>
>> +#include <errno.h>
>> +#include <linux/capability.h>
>> +
>> +struct kernel_cap_struct {
>> +	__u32 cap[_LINUX_CAPABILITY_U32S_3];
>> +} __attribute__((preserve_access_index));
>> +
>> +struct cred {
>> +	struct kernel_cap_struct cap_effective;
>> +} __attribute__((preserve_access_index));
>> +
>> +char _license[] SEC("license") = "GPL";
>> +
>> +SEC("lsm/userns_create")
>> +int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
>> +{
>> +	struct kernel_cap_struct caps = cred->cap_effective;
>> +	int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN);
>> +	__u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN);
>> +
>> +	if (ret)
>> +		return 0;
>> +
>> +	ret = -EPERM;
>> +	if (caps.cap[cap_index] & cap_mask)
>> +		return 0;
>> +
>> +	return -EPERM;
>> +}
>> +
>> +SEC("lsm.s/userns_create")
>> +int BPF_PROG(test_sleepable_userns_create, const struct cred *cred, int ret)
>> +{
> An empty program is weird.  If the intention is
> to ensure a sleepable program can attach to userns_create,
> move the test logic here and remove the non-sleepable
> program above.
> 

Sure, I can do that.

>> +	return 0;
>> +}
>> -- 
>> 2.30.2
>>
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/deny_namespace.c b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
new file mode 100644
index 000000000000..9e4714295008
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
@@ -0,0 +1,88 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include "test_deny_namespace.skel.h"
+#include <sched.h>
+#include "cap_helpers.h"
+
+#define STACK_SIZE (1024 * 1024)
+static char child_stack[STACK_SIZE];
+
+int clone_callback(void *arg)
+{
+	return 0;
+}
+
+static int create_new_user_ns(void)
+{
+	int status;
+	pid_t cpid;
+
+	cpid = clone(clone_callback, child_stack + STACK_SIZE,
+		     CLONE_NEWUSER | SIGCHLD, NULL);
+
+	if (cpid == -1)
+		return errno;
+
+	if (cpid == 0)
+		return 0;
+
+	waitpid(cpid, &status, 0);
+	if (WIFEXITED(status))
+		return WEXITSTATUS(status);
+
+	return -1;
+}
+
+static void test_userns_create_bpf(void)
+{
+	__u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
+	__u64 old_caps = 0;
+
+	ASSERT_OK(create_new_user_ns(), "priv new user ns");
+
+	cap_disable_effective(cap_mask, &old_caps);
+
+	ASSERT_EQ(create_new_user_ns(), EPERM, "unpriv new user ns");
+
+	if (cap_mask & old_caps)
+		cap_enable_effective(cap_mask, NULL);
+}
+
+static void test_unpriv_userns_create_no_bpf(void)
+{
+	__u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
+	__u64 old_caps = 0;
+
+	cap_disable_effective(cap_mask, &old_caps);
+
+	ASSERT_OK(create_new_user_ns(), "no-bpf unpriv new user ns");
+
+	if (cap_mask & old_caps)
+		cap_enable_effective(cap_mask, NULL);
+}
+
+void test_deny_namespace(void)
+{
+	struct test_deny_namespace *skel = NULL;
+	int err;
+
+	if (test__start_subtest("unpriv_userns_create_no_bpf"))
+		test_unpriv_userns_create_no_bpf();
+
+	skel = test_deny_namespace__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel load"))
+		goto close_prog;
+
+	err = test_deny_namespace__attach(skel);
+	if (!ASSERT_OK(err, "attach"))
+		goto close_prog;
+
+	if (test__start_subtest("userns_create_bpf"))
+		test_userns_create_bpf();
+
+	test_deny_namespace__detach(skel);
+
+close_prog:
+	test_deny_namespace__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
new file mode 100644
index 000000000000..9ec9dabc8372
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
@@ -0,0 +1,39 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+#include <linux/capability.h>
+
+struct kernel_cap_struct {
+	__u32 cap[_LINUX_CAPABILITY_U32S_3];
+} __attribute__((preserve_access_index));
+
+struct cred {
+	struct kernel_cap_struct cap_effective;
+} __attribute__((preserve_access_index));
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm/userns_create")
+int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
+{
+	struct kernel_cap_struct caps = cred->cap_effective;
+	int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN);
+	__u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN);
+
+	if (ret)
+		return 0;
+
+	ret = -EPERM;
+	if (caps.cap[cap_index] & cap_mask)
+		return 0;
+
+	return -EPERM;
+}
+
+SEC("lsm.s/userns_create")
+int BPF_PROG(test_sleepable_userns_create, const struct cred *cred, int ret)
+{
+	return 0;
+}