diff mbox series

[v2,2/4] capabilities: Add securebit to restrict userns caps

Message ID 20240609104355.442002-3-jcalmels@3xx0.net
State New
Headers show
Series Introduce user namespace capabilities | expand

Commit Message

Jonathan Calmels June 9, 2024, 10:43 a.m. UTC
This patch adds a new capability security bit designed to constrain a
task’s userns capability set to its bounding set. The reason for this is
twofold:

- This serves as a quick and easy way to lock down a set of capabilities
  for a task, thus ensuring that any namespace it creates will never be
  more privileged than itself is.
- This helps userspace transition to more secure defaults by not requiring
  specific logic for the userns capability set, or libcap support.

Example:

    # capsh --secbits=$((1 << 8)) --drop=cap_sys_rawio -- \
            -c 'unshare -r grep Cap /proc/self/status'
    CapInh: 0000000000000000
    CapPrm: 000001fffffdffff
    CapEff: 000001fffffdffff
    CapBnd: 000001fffffdffff
    CapAmb: 0000000000000000
    CapUNs: 000001fffffdffff

Signed-off-by: Jonathan Calmels <jcalmels@3xx0.net>
---
 include/linux/securebits.h      |  1 +
 include/uapi/linux/securebits.h | 11 ++++++++++-
 kernel/user_namespace.c         |  5 +++++
 3 files changed, 16 insertions(+), 1 deletion(-)

Comments

Jarkko Sakkinen June 28, 2024, 2:43 p.m. UTC | #1
On Sun, 2024-06-09 at 03:43 -0700, Jonathan Calmels wrote:
> This patch adds a new capability security bit designed to constrain a


nit: if you think of it "This patch adds" could be just "add", right? :-)

Also name the exact thing/symbol/whatever here. This is not a HBO series.

> task’s userns capability set to its bounding set. The reason for this
> is
> twofold:
> 
> - This serves as a quick and easy way to lock down a set of
> capabilities
>   for a task, thus ensuring that any namespace it creates will never
> be
>   more privileged than itself is.
> - This helps userspace transition to more secure defaults by not
> requiring
>   specific logic for the userns capability set, or libcap support.
> 
> Example:
> 
>     # capsh --secbits=$((1 << 8)) --drop=cap_sys_rawio -- \
>             -c 'unshare -r grep Cap /proc/self/status'
>     CapInh: 0000000000000000
>     CapPrm: 000001fffffdffff
>     CapEff: 000001fffffdffff
>     CapBnd: 000001fffffdffff
>     CapAmb: 0000000000000000
>     CapUNs: 000001fffffdffff
> 
> Signed-off-by: Jonathan Calmels <jcalmels@3xx0.net>
> ---
>  include/linux/securebits.h      |  1 +
>  include/uapi/linux/securebits.h | 11 ++++++++++-
>  kernel/user_namespace.c         |  5 +++++
>  3 files changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/securebits.h b/include/linux/securebits.h
> index 656528673983..5f9d85cd69c3 100644
> --- a/include/linux/securebits.h
> +++ b/include/linux/securebits.h
> @@ -5,4 +5,5 @@
>  #include <uapi/linux/securebits.h>
>  
>  #define issecure(X)		(issecure_mask(X) &
> current_cred_xxx(securebits))
> +#define iscredsecure(cred, X)	(issecure_mask(X) & cred-
> >securebits)
>  #endif /* !_LINUX_SECUREBITS_H */
> diff --git a/include/uapi/linux/securebits.h
> b/include/uapi/linux/securebits.h
> index d6d98877ff1a..2da3f4be4531 100644
> --- a/include/uapi/linux/securebits.h
> +++ b/include/uapi/linux/securebits.h
> @@ -52,10 +52,19 @@
>  #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \
>  			(issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_L
> OCKED))
>  
> +/* When set, user namespace capabilities are restricted to their
> parent's bounding set. */
> +#define SECURE_USERNS_STRICT_CAPS			8
> +#define SECURE_USERNS_STRICT_CAPS_LOCKED		9  /* make



> bit-8 immutable */
> +
> +#define SECBIT_USERNS_STRICT_CAPS
> (issecure_mask(SECURE_USERNS_STRICT_CAPS))
> +#define SECBIT_USERNS_STRICT_CAPS_LOCKED \
> +			(issecure_mask(SECURE_USERNS_STRICT_CAPS_LOC
> KED))
> +
>  #define
> SECURE_ALL_BITS		(issecure_mask(SECURE_NOROOT) | \
>  				
> issecure_mask(SECURE_NO_SETUID_FIXUP) | \
>  				 issecure_mask(SECURE_KEEP_CAPS) | \
> -				
> issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
> +				
> issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE) | \
> +				

spurious new lines in the diff

please as first priority aim for absolute minimal diff or at least
do grow diff proactively like this.

If we really think after that, that we need some "extras" to the
patch set, then we decide that. These only take energy away from
reviewers.


> issecure_mask(SECURE_USERNS_STRICT_CAPS))
>  #define SECURE_ALL_LOCKS	(SECURE_ALL_BITS << 1)
>  
>  #endif /* _UAPI_LINUX_SECUREBITS_H */
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 7e624607330b..53848e2b68cd 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -10,6 +10,7 @@
>  #include <linux/cred.h>
>  #include <linux/securebits.h>
>  #include <linux/security.h>
> +#include <linux/capability.h>
>  #include <linux/keyctl.h>
>  #include <linux/key-type.h>
>  #include <keys/user-type.h>
> @@ -42,6 +43,10 @@ static void dec_user_namespaces(struct ucounts
> *ucounts)
>  
>  static void set_cred_user_ns(struct cred *cred, struct
> user_namespace *user_ns)
>  {
> +	/* Limit userns capabilities to our parent's bounding set.
> */
> +	if (iscredsecure(cred, SECURE_USERNS_STRICT_CAPS))
> +		cred->cap_userns = cap_intersect(cred->cap_userns,
> cred->cap_bset);
> +
>  	/* Start with the capabilities defined in the userns set. */
>  	cred->cap_bset = cred->cap_userns;
>  	cred->cap_permitted = cred->cap_userns;

Going for 4 week holiday starting for next week so focus in on nits
but since this is something to do access control:

1. Please go surgical with the diff's because this type of patches
also require a surgical review. Now reviewing this like riding on 
a bumpy road with a car of which suspension mechanics is broken
;-)

Hope you grab my argument here. I only want to look at the problem
and solution for that not random stuff..

BR, Jarkko
Jarkko Sakkinen June 28, 2024, 2:45 p.m. UTC | #2
On Fri, 2024-06-28 at 17:43 +0300, Jarkko Sakkinen wrote:
> On Sun, 2024-06-09 at 03:43 -0700, Jonathan Calmels wrote:
> > This patch adds a new capability security bit designed to constrain
> > a
> 
> 
> nit: if you think of it "This patch adds" could be just "add", right?
> :-)
> 
> Also name the exact thing/symbol/whatever here. This is not a HBO
> series.
> 
> > task’s userns capability set to its bounding set. The reason for
> > this
> > is
> > twofold:
> > 
> > - This serves as a quick and easy way to lock down a set of
> > capabilities
> >   for a task, thus ensuring that any namespace it creates will
> > never
> > be
> >   more privileged than itself is.
> > - This helps userspace transition to more secure defaults by not
> > requiring
> >   specific logic for the userns capability set, or libcap support.
> > 
> > Example:
> > 
> >     # capsh --secbits=$((1 << 8)) --drop=cap_sys_rawio -- \
> >             -c 'unshare -r grep Cap /proc/self/status'
> >     CapInh: 0000000000000000
> >     CapPrm: 000001fffffdffff
> >     CapEff: 000001fffffdffff
> >     CapBnd: 000001fffffdffff
> >     CapAmb: 0000000000000000
> >     CapUNs: 000001fffffdffff
> > 
> > Signed-off-by: Jonathan Calmels <jcalmels@3xx0.net>
> > ---
> >  include/linux/securebits.h      |  1 +
> >  include/uapi/linux/securebits.h | 11 ++++++++++-
> >  kernel/user_namespace.c         |  5 +++++
> >  3 files changed, 16 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/linux/securebits.h
> > b/include/linux/securebits.h
> > index 656528673983..5f9d85cd69c3 100644
> > --- a/include/linux/securebits.h
> > +++ b/include/linux/securebits.h
> > @@ -5,4 +5,5 @@
> >  #include <uapi/linux/securebits.h>
> >  
> >  #define issecure(X)		(issecure_mask(X) &
> > current_cred_xxx(securebits))
> > +#define iscredsecure(cred, X)	(issecure_mask(X) & cred-
> > > securebits)
> >  #endif /* !_LINUX_SECUREBITS_H */
> > diff --git a/include/uapi/linux/securebits.h
> > b/include/uapi/linux/securebits.h
> > index d6d98877ff1a..2da3f4be4531 100644
> > --- a/include/uapi/linux/securebits.h
> > +++ b/include/uapi/linux/securebits.h
> > @@ -52,10 +52,19 @@
> >  #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \
> >  			(issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE
> > _L
> > OCKED))
> >  
> > +/* When set, user namespace capabilities are restricted to their
> > parent's bounding set. */
> > +#define SECURE_USERNS_STRICT_CAPS			8
> > +#define SECURE_USERNS_STRICT_CAPS_LOCKED		9  /* make
> 
> 
> 
> > bit-8 immutable */
> > +
> > +#define SECBIT_USERNS_STRICT_CAPS
> > (issecure_mask(SECURE_USERNS_STRICT_CAPS))
> > +#define SECBIT_USERNS_STRICT_CAPS_LOCKED \
> > +			(issecure_mask(SECURE_USERNS_STRICT_CAPS_L
> > OC
> > KED))
> > +
> >  #define
> > SECURE_ALL_BITS		(issecure_mask(SECURE_NOROOT) | \
> >  				
> > issecure_mask(SECURE_NO_SETUID_FIXUP) | \
> >  				 issecure_mask(SECURE_KEEP_CAPS) |
> > \
> > -				
> > issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
> > +				
> > issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE) | \
> > +				
> 
> spurious new lines in the diff
> 
> please as first priority aim for absolute minimal diff or at least
> do grow diff proactively like this.
> 
> If we really think after that, that we need some "extras" to the
> patch set, then we decide that. These only take energy away from
> reviewers.
> 
> 
> > issecure_mask(SECURE_USERNS_STRICT_CAPS))
> >  #define SECURE_ALL_LOCKS	(SECURE_ALL_BITS << 1)
> >  
> >  #endif /* _UAPI_LINUX_SECUREBITS_H */
> > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> > index 7e624607330b..53848e2b68cd 100644
> > --- a/kernel/user_namespace.c
> > +++ b/kernel/user_namespace.c
> > @@ -10,6 +10,7 @@
> >  #include <linux/cred.h>
> >  #include <linux/securebits.h>
> >  #include <linux/security.h>
> > +#include <linux/capability.h>
> >  #include <linux/keyctl.h>
> >  #include <linux/key-type.h>
> >  #include <keys/user-type.h>
> > @@ -42,6 +43,10 @@ static void dec_user_namespaces(struct ucounts
> > *ucounts)
> >  
> >  static void set_cred_user_ns(struct cred *cred, struct
> > user_namespace *user_ns)
> >  {
> > +	/* Limit userns capabilities to our parent's bounding set.
> > */
> > +	if (iscredsecure(cred, SECURE_USERNS_STRICT_CAPS))
> > +		cred->cap_userns = cap_intersect(cred->cap_userns,
> > cred->cap_bset);
> > +
> >  	/* Start with the capabilities defined in the userns set.
> > */
> >  	cred->cap_bset = cred->cap_userns;
> >  	cred->cap_permitted = cred->cap_userns;
> 
> Going for 4 week holiday starting for next week so focus in on nits
> but since this is something to do access control:
> 
> 1. Please go surgical with the diff's because this type of patches
> also require a surgical review. Now reviewing this like riding on 
> a bumpy road with a car of which suspension mechanics is broken
> ;-)
> 
> Hope you grab my argument here. I only want to look at the problem
> and solution for that not random stuff..

I skip the other patches because of my eager to get on holiday but
my instinct tells me that at least some of this feedback applies
to all of the patches.

So put your solution in sight, not clean ups.


BR, Jarkko
diff mbox series

Patch

diff --git a/include/linux/securebits.h b/include/linux/securebits.h
index 656528673983..5f9d85cd69c3 100644
--- a/include/linux/securebits.h
+++ b/include/linux/securebits.h
@@ -5,4 +5,5 @@ 
 #include <uapi/linux/securebits.h>
 
 #define issecure(X)		(issecure_mask(X) & current_cred_xxx(securebits))
+#define iscredsecure(cred, X)	(issecure_mask(X) & cred->securebits)
 #endif /* !_LINUX_SECUREBITS_H */
diff --git a/include/uapi/linux/securebits.h b/include/uapi/linux/securebits.h
index d6d98877ff1a..2da3f4be4531 100644
--- a/include/uapi/linux/securebits.h
+++ b/include/uapi/linux/securebits.h
@@ -52,10 +52,19 @@ 
 #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \
 			(issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED))
 
+/* When set, user namespace capabilities are restricted to their parent's bounding set. */
+#define SECURE_USERNS_STRICT_CAPS			8
+#define SECURE_USERNS_STRICT_CAPS_LOCKED		9  /* make bit-8 immutable */
+
+#define SECBIT_USERNS_STRICT_CAPS (issecure_mask(SECURE_USERNS_STRICT_CAPS))
+#define SECBIT_USERNS_STRICT_CAPS_LOCKED \
+			(issecure_mask(SECURE_USERNS_STRICT_CAPS_LOCKED))
+
 #define SECURE_ALL_BITS		(issecure_mask(SECURE_NOROOT) | \
 				 issecure_mask(SECURE_NO_SETUID_FIXUP) | \
 				 issecure_mask(SECURE_KEEP_CAPS) | \
-				 issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
+				 issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE) | \
+				 issecure_mask(SECURE_USERNS_STRICT_CAPS))
 #define SECURE_ALL_LOCKS	(SECURE_ALL_BITS << 1)
 
 #endif /* _UAPI_LINUX_SECUREBITS_H */
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 7e624607330b..53848e2b68cd 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -10,6 +10,7 @@ 
 #include <linux/cred.h>
 #include <linux/securebits.h>
 #include <linux/security.h>
+#include <linux/capability.h>
 #include <linux/keyctl.h>
 #include <linux/key-type.h>
 #include <keys/user-type.h>
@@ -42,6 +43,10 @@  static void dec_user_namespaces(struct ucounts *ucounts)
 
 static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
 {
+	/* Limit userns capabilities to our parent's bounding set. */
+	if (iscredsecure(cred, SECURE_USERNS_STRICT_CAPS))
+		cred->cap_userns = cap_intersect(cred->cap_userns, cred->cap_bset);
+
 	/* Start with the capabilities defined in the userns set. */
 	cred->cap_bset = cred->cap_userns;
 	cred->cap_permitted = cred->cap_userns;