diff mbox series

[v4,2/4] pidfd: add PIDFD_SELF_* sentinels to refer to own thread/process

Message ID 3bf7f2d8efc768007b5de8122275405afc9942d4.1729198898.git.lorenzo.stoakes@oracle.com
State Superseded
Headers show
Series introduce PIDFD_SELF* sentinels | expand

Commit Message

Lorenzo Stoakes Oct. 17, 2024, 9:05 p.m. UTC
It is useful to be able to utilise the pidfd mechanism to reference the
current thread or process (from a userland point of view - thread group
leader from the kernel's point of view).

Therefore introduce PIDFD_SELF_THREAD to refer to the current thread, and
PIDFD_SELF_THREAD_GROUP to refer to the current thread group leader.

For convenience and to avoid confusion from userland's perspective we alias
these:

* PIDFD_SELF is an alias for PIDFD_SELF_THREAD - This is nearly always what
  the user will want to use, as they would find it surprising if for
  instance fd's were unshared()'d and they wanted to invoke pidfd_getfd()
  and that failed.

* PIDFD_SELF_PROCESS is an alias for PIDFD_SELF_THREAD_GROUP - Most users
  have no concept of thread groups or what a thread group leader is, and
  from userland's perspective and nomenclature this is what userland
  considers to be a process.

Due to the refactoring of the central __pidfd_get_pid() function we can
implement this functionality centrally, providing the use of this sentinel
in most functionality which utilises pidfd's.

We need to explicitly adjust kernel_waitid_prepare() to permit this (though
it wouldn't really make sense to use this there, we provide the ability for
consistency).

We explicitly disallow use of this in setns(), which would otherwise have
required explicit custom handling, as it doesn't make sense to set the
current calling thread to join the namespace of itself.

As the callers of pidfd_get_pid() expect an increased reference count on
the pid we do so in the self case, reducing churn and avoiding any breakage
from existing logic which decrements this reference count.

This change implicitly provides PIDFD_SELF_* support in the waitid(P_PIDFS,
...), process_madvise(), process_mrelease(), pidfd_send_signal(), and
pidfd_getfd() system calls.

Things such as polling a pidfs and general fd operations are not supported,
this strictly provides the sentinel for APIs which explicitly accept a
pidfd.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 include/linux/pid.h        |  8 ++++--
 include/uapi/linux/pidfd.h | 15 +++++++++++
 kernel/exit.c              |  3 ++-
 kernel/nsproxy.c           |  1 +
 kernel/pid.c               | 51 ++++++++++++++++++++++++--------------
 5 files changed, 57 insertions(+), 21 deletions(-)

Comments

Lorenzo Stoakes Oct. 21, 2024, 8:11 a.m. UTC | #1
On Thu, Oct 17, 2024 at 10:05:50PM +0100, Lorenzo Stoakes wrote:
> It is useful to be able to utilise the pidfd mechanism to reference the
> current thread or process (from a userland point of view - thread group
> leader from the kernel's point of view).
>
> Therefore introduce PIDFD_SELF_THREAD to refer to the current thread, and
> PIDFD_SELF_THREAD_GROUP to refer to the current thread group leader.
>
> For convenience and to avoid confusion from userland's perspective we alias
> these:
>
> * PIDFD_SELF is an alias for PIDFD_SELF_THREAD - This is nearly always what
>   the user will want to use, as they would find it surprising if for
>   instance fd's were unshared()'d and they wanted to invoke pidfd_getfd()
>   and that failed.
>
> * PIDFD_SELF_PROCESS is an alias for PIDFD_SELF_THREAD_GROUP - Most users
>   have no concept of thread groups or what a thread group leader is, and
>   from userland's perspective and nomenclature this is what userland
>   considers to be a process.
>
> Due to the refactoring of the central __pidfd_get_pid() function we can
> implement this functionality centrally, providing the use of this sentinel
> in most functionality which utilises pidfd's.
>
> We need to explicitly adjust kernel_waitid_prepare() to permit this (though
> it wouldn't really make sense to use this there, we provide the ability for
> consistency).
>
> We explicitly disallow use of this in setns(), which would otherwise have
> required explicit custom handling, as it doesn't make sense to set the
> current calling thread to join the namespace of itself.
>
> As the callers of pidfd_get_pid() expect an increased reference count on
> the pid we do so in the self case, reducing churn and avoiding any breakage
> from existing logic which decrements this reference count.
>
> This change implicitly provides PIDFD_SELF_* support in the waitid(P_PIDFS,
> ...), process_madvise(), process_mrelease(), pidfd_send_signal(), and
> pidfd_getfd() system calls.
>
> Things such as polling a pidfs and general fd operations are not supported,
> this strictly provides the sentinel for APIs which explicitly accept a
> pidfd.
>

We need a:

Suggested-by: Pedro Falcato <pedro.falcato@gmail.com>

here, will add if respin, otherwise could you add Christian?

My apologies Pedro, this was wholly an oversight and my mistake!

> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Shakeel Butt Oct. 23, 2024, 12:53 a.m. UTC | #2
On Thu, Oct 17, 2024 at 10:05:50PM GMT, Lorenzo Stoakes wrote:
> It is useful to be able to utilise the pidfd mechanism to reference the
> current thread or process (from a userland point of view - thread group
> leader from the kernel's point of view).
> 
> Therefore introduce PIDFD_SELF_THREAD to refer to the current thread, and
> PIDFD_SELF_THREAD_GROUP to refer to the current thread group leader.
> 
> For convenience and to avoid confusion from userland's perspective we alias
> these:
> 
> * PIDFD_SELF is an alias for PIDFD_SELF_THREAD - This is nearly always what
>   the user will want to use, as they would find it surprising if for
>   instance fd's were unshared()'d and they wanted to invoke pidfd_getfd()
>   and that failed.
> 
> * PIDFD_SELF_PROCESS is an alias for PIDFD_SELF_THREAD_GROUP - Most users
>   have no concept of thread groups or what a thread group leader is, and
>   from userland's perspective and nomenclature this is what userland
>   considers to be a process.

Should users use PIDFD_SELF_PROCESS in process_madvise() for self
madvise() (once the support is added)?

> 
[...]
>  
> +static struct pid *pidfd_get_pid_self(unsigned int pidfd, unsigned int *flags)
> +{
> +	bool is_thread = pidfd == PIDFD_SELF_THREAD;
> +	enum pid_type type = is_thread ? PIDTYPE_PID : PIDTYPE_TGID;
> +	struct pid *pid = *task_pid_ptr(current, type);
> +
> +	/* The caller expects an elevated reference count. */
> +	get_pid(pid);

Do you want this helper to work for scenarios where pid is used across
context? Otherwise can't we get rid of this get and later put for self?

> +	return pid;
> +}
> +

Overall looks good to me.

Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
Lorenzo Stoakes Oct. 23, 2024, 7:18 a.m. UTC | #3
On Tue, Oct 22, 2024 at 05:53:00PM -0700, Shakeel Butt wrote:
> On Thu, Oct 17, 2024 at 10:05:50PM GMT, Lorenzo Stoakes wrote:
> > It is useful to be able to utilise the pidfd mechanism to reference the
> > current thread or process (from a userland point of view - thread group
> > leader from the kernel's point of view).
> >
> > Therefore introduce PIDFD_SELF_THREAD to refer to the current thread, and
> > PIDFD_SELF_THREAD_GROUP to refer to the current thread group leader.
> >
> > For convenience and to avoid confusion from userland's perspective we alias
> > these:
> >
> > * PIDFD_SELF is an alias for PIDFD_SELF_THREAD - This is nearly always what
> >   the user will want to use, as they would find it surprising if for
> >   instance fd's were unshared()'d and they wanted to invoke pidfd_getfd()
> >   and that failed.
> >
> > * PIDFD_SELF_PROCESS is an alias for PIDFD_SELF_THREAD_GROUP - Most users
> >   have no concept of thread groups or what a thread group leader is, and
> >   from userland's perspective and nomenclature this is what userland
> >   considers to be a process.
>
> Should users use PIDFD_SELF_PROCESS in process_madvise() for self
> madvise() (once the support is added)?

You can use either it will make no difference as both will get you to
current->mm which has to be shared. So I'd go with PIDFD_SELF for brevity
:)

This series and the prerequisites I already added to process_madvise()
already provide support so with this in you can just use this for that.

>
> >
> [...]
> >
> > +static struct pid *pidfd_get_pid_self(unsigned int pidfd, unsigned int *flags)
> > +{
> > +	bool is_thread = pidfd == PIDFD_SELF_THREAD;
> > +	enum pid_type type = is_thread ? PIDTYPE_PID : PIDTYPE_TGID;
> > +	struct pid *pid = *task_pid_ptr(current, type);
> > +
> > +	/* The caller expects an elevated reference count. */
> > +	get_pid(pid);
>
> Do you want this helper to work for scenarios where pid is used across
> context? Otherwise can't we get rid of this get and later put for self?

Yeah I hate doing this but it's to keep things as generic as possible and
to ensure that no user of this logic accidentally drops the reference count
unintentionally + to reduce churn.

I think doing it this way is better than trying to special case the put,
and in any case if you did the 'old' way of referencing yourself you'd have
ended up doing this anyway so it's at least no delta!

>
> > +	return pid;
> > +}
> > +
>
> Overall looks good to me.
>
> Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>

Thanks!
Shakeel Butt Oct. 23, 2024, 5:18 p.m. UTC | #4
On Wed, Oct 23, 2024 at 08:18:35AM GMT, Lorenzo Stoakes wrote:
> On Tue, Oct 22, 2024 at 05:53:00PM -0700, Shakeel Butt wrote:
> > On Thu, Oct 17, 2024 at 10:05:50PM GMT, Lorenzo Stoakes wrote:
> > > It is useful to be able to utilise the pidfd mechanism to reference the
> > > current thread or process (from a userland point of view - thread group
> > > leader from the kernel's point of view).
> > >
> > > Therefore introduce PIDFD_SELF_THREAD to refer to the current thread, and
> > > PIDFD_SELF_THREAD_GROUP to refer to the current thread group leader.
> > >
> > > For convenience and to avoid confusion from userland's perspective we alias
> > > these:
> > >
> > > * PIDFD_SELF is an alias for PIDFD_SELF_THREAD - This is nearly always what
> > >   the user will want to use, as they would find it surprising if for
> > >   instance fd's were unshared()'d and they wanted to invoke pidfd_getfd()
> > >   and that failed.
> > >
> > > * PIDFD_SELF_PROCESS is an alias for PIDFD_SELF_THREAD_GROUP - Most users
> > >   have no concept of thread groups or what a thread group leader is, and
> > >   from userland's perspective and nomenclature this is what userland
> > >   considers to be a process.
> >
> > Should users use PIDFD_SELF_PROCESS in process_madvise() for self
> > madvise() (once the support is added)?
> 
> You can use either it will make no difference as both will get you to
> current->mm which has to be shared. So I'd go with PIDFD_SELF for brevity
> :)
> 
> This series and the prerequisites I already added to process_madvise()
> already provide support so with this in you can just use this for that.

Thanks a lot, this is awesome. Is the plan for this series to go through
mm-tree or through Christian?
Lorenzo Stoakes Oct. 23, 2024, 5:24 p.m. UTC | #5
On Wed, Oct 23, 2024 at 10:18:28AM -0700, Shakeel Butt wrote:
> On Wed, Oct 23, 2024 at 08:18:35AM GMT, Lorenzo Stoakes wrote:
> > On Tue, Oct 22, 2024 at 05:53:00PM -0700, Shakeel Butt wrote:
> > > On Thu, Oct 17, 2024 at 10:05:50PM GMT, Lorenzo Stoakes wrote:
> > > > It is useful to be able to utilise the pidfd mechanism to reference the
> > > > current thread or process (from a userland point of view - thread group
> > > > leader from the kernel's point of view).
> > > >
> > > > Therefore introduce PIDFD_SELF_THREAD to refer to the current thread, and
> > > > PIDFD_SELF_THREAD_GROUP to refer to the current thread group leader.
> > > >
> > > > For convenience and to avoid confusion from userland's perspective we alias
> > > > these:
> > > >
> > > > * PIDFD_SELF is an alias for PIDFD_SELF_THREAD - This is nearly always what
> > > >   the user will want to use, as they would find it surprising if for
> > > >   instance fd's were unshared()'d and they wanted to invoke pidfd_getfd()
> > > >   and that failed.
> > > >
> > > > * PIDFD_SELF_PROCESS is an alias for PIDFD_SELF_THREAD_GROUP - Most users
> > > >   have no concept of thread groups or what a thread group leader is, and
> > > >   from userland's perspective and nomenclature this is what userland
> > > >   considers to be a process.
> > >
> > > Should users use PIDFD_SELF_PROCESS in process_madvise() for self
> > > madvise() (once the support is added)?
> >
> > You can use either it will make no difference as both will get you to
> > current->mm which has to be shared. So I'd go with PIDFD_SELF for brevity
> > :)
> >
> > This series and the prerequisites I already added to process_madvise()
> > already provide support so with this in you can just use this for that.
>
> Thanks a lot, this is awesome. Is the plan for this series to go through
> mm-tree or through Christian?
>

Thanks!

I am assuming this will go through Christian's tree as entirely within the
pidfd realm :)

Christian - I am assuming this is your expectation too right?

I cc'd linux-mm more for convenience as obviously am hoping to use this for
an mm thing myself.
diff mbox series

Patch

diff --git a/include/linux/pid.h b/include/linux/pid.h
index d466890e1b35..3b2ac7567a88 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -78,11 +78,15 @@  struct file;
  * __pidfd_get_pid() - Retrieve a pid associated with the specified pidfd.
  *
  * @pidfd:      The pidfd whose pid we want, or the fd of a /proc/<pid> file if
- *              @alloc_proc is also set.
+ *              @alloc_proc is also set, or PIDFD_SELF_* to refer to the current
+ *              thread or thread group leader.
  * @allow_proc: If set, then an fd of a /proc/<pid> file can be passed instead
  *              of a pidfd, and this will be used to determine the pid.
+
  * @flags:      Output variable, if non-NULL, then the file->f_flags of the
- *              pidfd will be set here.
+ *              pidfd will be set here or If PIDFD_SELF_THREAD is set, this is
+ *              set to PIDFD_THREAD, otherwise if PIDFD_SELF_THREAD_GROUP then
+ *              this is set to zero.
  *
  * Returns: If successful, the pid associated with the pidfd, otherwise an
  *          error.
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 565fc0629fff..0ca2ebf906fd 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -29,4 +29,19 @@ 
 #define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
 #define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
 
+/*
+ * Special sentinel values which can be used to refer to the current thread or
+ * thread group leader (which from a userland perspective is the process).
+ */
+#define PIDFD_SELF		PIDFD_SELF_THREAD
+#define PIDFD_SELF_PROCESS	PIDFD_SELF_THREAD_GROUP
+
+#define PIDFD_SELF_THREAD	-100 /* Current thread. */
+#define PIDFD_SELF_THREAD_GROUP	-200 /* Current thread group leader. */
+
+static inline int pidfd_is_self_sentinel(pid_t pid)
+{
+	return pid == PIDFD_SELF_THREAD || pid == PIDFD_SELF_THREAD_GROUP;
+}
+
 #endif /* _UAPI_LINUX_PIDFD_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index 619f0014c33b..3eb20f8252ee 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -71,6 +71,7 @@ 
 #include <linux/user_events.h>
 #include <linux/uaccess.h>
 
+#include <uapi/linux/pidfd.h>
 #include <uapi/linux/wait.h>
 
 #include <asm/unistd.h>
@@ -1739,7 +1740,7 @@  int kernel_waitid_prepare(struct wait_opts *wo, int which, pid_t upid,
 		break;
 	case P_PIDFD:
 		type = PIDTYPE_PID;
-		if (upid < 0)
+		if (upid < 0 && !pidfd_is_self_sentinel(upid))
 			return -EINVAL;
 
 		pid = pidfd_get_pid(upid, &f_flags);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index dc952c3b05af..d239f7eeaa1f 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -550,6 +550,7 @@  SYSCALL_DEFINE2(setns, int, fd, int, flags)
 	struct nsset nsset = {};
 	int err = 0;
 
+	/* If fd is PIDFD_SELF_*, implicitly fail here, as invalid. */
 	if (!fd_file(f))
 		return -EBADF;
 
diff --git a/kernel/pid.c b/kernel/pid.c
index 94c97559e5c5..8742157b36f8 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -535,33 +535,48 @@  struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 }
 EXPORT_SYMBOL_GPL(find_ge_pid);
 
+static struct pid *pidfd_get_pid_self(unsigned int pidfd, unsigned int *flags)
+{
+	bool is_thread = pidfd == PIDFD_SELF_THREAD;
+	enum pid_type type = is_thread ? PIDTYPE_PID : PIDTYPE_TGID;
+	struct pid *pid = *task_pid_ptr(current, type);
+
+	/* The caller expects an elevated reference count. */
+	get_pid(pid);
+	return pid;
+}
+
 struct pid *__pidfd_get_pid(unsigned int pidfd, bool allow_proc,
 			    unsigned int *flags)
 {
-	struct pid *pid;
-	struct fd f = fdget(pidfd);
-	struct file *file = fd_file(f);
+	if (pidfd_is_self_sentinel(pidfd)) {
+		return pidfd_get_pid_self(pidfd, flags);
+	} else {
+		struct pid *pid;
+		struct fd f = fdget(pidfd);
+		struct file *file = fd_file(f);
 
-	if (!file)
-		return ERR_PTR(-EBADF);
+		if (!file)
+			return ERR_PTR(-EBADF);
 
-	pid = pidfd_pid(file);
-	/* If we allow opening a pidfd via /proc/<pid>, do so. */
-	if (IS_ERR(pid) && allow_proc)
-		pid = tgid_pidfd_to_pid(file);
+		pid = pidfd_pid(file);
+		/* If we allow opening a pidfd via /proc/<pid>, do so. */
+		if (IS_ERR(pid) && allow_proc)
+			pid = tgid_pidfd_to_pid(file);
 
-	if (IS_ERR(pid)) {
+		if (IS_ERR(pid)) {
+			fdput(f);
+			return pid;
+		}
+
+		/* Pin pid before we release fd. */
+		get_pid(pid);
+		if (flags)
+			*flags = file->f_flags;
 		fdput(f);
+
 		return pid;
 	}
-
-	/* Pin pid before we release fd. */
-	get_pid(pid);
-	if (flags)
-		*flags = file->f_flags;
-	fdput(f);
-
-	return pid;
 }
 
 /**