Message ID | 20220107021502.1121597-2-lgoncalv@redhat.com |
---|---|
State | New |
Headers | show |
Series | Linux v5.10.90-rt61-rc1 | expand |
Hi, OT: Btw for some reason you're sending a copy to "stable-rt@vger.kernel.org"@redhat.com. On Fri, Jan 7, 2022 at 8:04 PM Luis Claudio R. Goncalves <lgoncalv@redhat.com> wrote: > > From: Thomas Gleixner <tglx@linutronix.de> > > v5.10.90-rt61-rc1 stable review patch. > If anyone has any objections, please let me know. > > ----------- > > > Upstream commit b542e383d8c005f06a131e2b40d5889b812f19c6 > > The recursion protection for eventfd_signal() is based on a per CPU > variable and relies on the !RT semantics of spin_lock_irqsave() for > protecting this per CPU variable. On RT kernels spin_lock_irqsave() neither > disables preemption nor interrupts which allows the spin lock held section > to be preempted. If the preempting task invokes eventfd_signal() as well, > then the recursion warning triggers. > > Paolo suggested to protect the per CPU variable with a local lock, but > that's heavyweight and actually not necessary. The goal of this protection > is to prevent the task stack from overflowing, which can be achieved with a > per task recursion protection as well. > > Replace the per CPU variable with a per task bit similar to other recursion > protection bits like task_struct::in_page_owner. This works on both !RT and > RT kernels and removes as a side effect the extra per CPU storage. > > No functional change for !RT kernels. > > Reported-by: Daniel Bristot de Oliveira <bristot@redhat.com> > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Tested-by: Daniel Bristot de Oliveira <bristot@redhat.com> > Acked-by: Jason Wang <jasowang@redhat.com> > Cc: Al Viro <viro@zeniv.linux.org.uk> > Link: https://lore.kernel.org/r/87wnp9idso.ffs@tglx > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> > --- > fs/aio.c | 2 +- > fs/eventfd.c | 12 +++++------- > include/linux/eventfd.h | 11 +++++------ > include/linux/sched.h | 4 ++++ > 4 files changed, 15 insertions(+), 14 deletions(-) > > diff --git a/fs/aio.c b/fs/aio.c > index c72b2c51b446c..f7d47c9ff6deb 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -1761,7 +1761,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, > list_del_init(&req->wait.entry); > list_del(&iocb->ki_list); > iocb->ki_res.res = mangle_poll(mask); > - if (iocb->ki_eventfd && eventfd_signal_count()) { > + if (iocb->ki_eventfd && eventfd_signal_allowed()) { IIUC, the logic changed and this should read !eventfd_signal_allowed(). Or am I missing something here? Actually checking other stable branches and upstream, this looks to be a typo only in v5.10.90-rt61-rc1 --nX > iocb = NULL; > INIT_WORK(&req->work, aio_poll_put_work); > schedule_work(&req->work); > diff --git a/fs/eventfd.c b/fs/eventfd.c > index df466ef81dddf..9035ca60bfcf3 100644 > --- a/fs/eventfd.c > +++ b/fs/eventfd.c > @@ -25,8 +25,6 @@ > #include <linux/idr.h> > #include <linux/uio.h> > > -DEFINE_PER_CPU(int, eventfd_wake_count); > - > static DEFINE_IDA(eventfd_ida); > > struct eventfd_ctx { > @@ -67,21 +65,21 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) > * Deadlock or stack overflow issues can happen if we recurse here > * through waitqueue wakeup handlers. If the caller users potentially > * nested waitqueues with custom wakeup handlers, then it should > - * check eventfd_signal_count() before calling this function. If > - * it returns true, the eventfd_signal() call should be deferred to a > + * check eventfd_signal_allowed() before calling this function. If > + * it returns false, the eventfd_signal() call should be deferred to a > * safe context. > */ > - if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) > + if (WARN_ON_ONCE(current->in_eventfd_signal)) > return 0; > > spin_lock_irqsave(&ctx->wqh.lock, flags); > - this_cpu_inc(eventfd_wake_count); > + current->in_eventfd_signal = 1; > if (ULLONG_MAX - ctx->count < n) > n = ULLONG_MAX - ctx->count; > ctx->count += n; > if (waitqueue_active(&ctx->wqh)) > wake_up_locked_poll(&ctx->wqh, EPOLLIN); > - this_cpu_dec(eventfd_wake_count); > + current->in_eventfd_signal = 0; > spin_unlock_irqrestore(&ctx->wqh.lock, flags); > > return n; > diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h > index dc4fd8a6644dd..836b4c021a0a4 100644 > --- a/include/linux/eventfd.h > +++ b/include/linux/eventfd.h > @@ -14,6 +14,7 @@ > #include <linux/err.h> > #include <linux/percpu-defs.h> > #include <linux/percpu.h> > +#include <linux/sched.h> > > /* > * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining > @@ -42,11 +43,9 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); > int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, > __u64 *cnt); > > -DECLARE_PER_CPU(int, eventfd_wake_count); > - > -static inline bool eventfd_signal_count(void) > +static inline bool eventfd_signal_allowed(void) > { > - return this_cpu_read(eventfd_wake_count); > + return !current->in_eventfd_signal; > } > > #else /* CONFIG_EVENTFD */ > @@ -77,9 +76,9 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, > return -ENOSYS; > } > > -static inline bool eventfd_signal_count(void) > +static inline bool eventfd_signal_allowed(void) > { > - return false; > + return true; > } > > #endif > diff --git a/include/linux/sched.h b/include/linux/sched.h > index 409a24036952c..29e6ff1af1df9 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -852,6 +852,10 @@ struct task_struct { > /* Stalled due to lack of memory */ > unsigned in_memstall:1; > #endif > +#ifdef CONFIG_EVENTFD > + /* Recursion prevention for eventfd_signal() */ > + unsigned in_eventfd_signal:1; > +#endif > > unsigned long atomic_flags; /* Flags requiring atomic access. */ > > -- > 2.33.1 >
Actually I see it was fixed later with 4b3749865374899e115aa8c48681709b086fe6d3 so I guess it should be included as well. --nX On Thu, Feb 3, 2022 at 9:59 AM Daniel Vacek <neelx.g@gmail.com> wrote: > > Hi, > > OT: Btw for some reason you're sending a copy to > "stable-rt@vger.kernel.org"@redhat.com. > > On Fri, Jan 7, 2022 at 8:04 PM Luis Claudio R. Goncalves > <lgoncalv@redhat.com> wrote: > > > > From: Thomas Gleixner <tglx@linutronix.de> > > > > v5.10.90-rt61-rc1 stable review patch. > > If anyone has any objections, please let me know. > > > > ----------- > > > > > > Upstream commit b542e383d8c005f06a131e2b40d5889b812f19c6 > > > > The recursion protection for eventfd_signal() is based on a per CPU > > variable and relies on the !RT semantics of spin_lock_irqsave() for > > protecting this per CPU variable. On RT kernels spin_lock_irqsave() neither > > disables preemption nor interrupts which allows the spin lock held section > > to be preempted. If the preempting task invokes eventfd_signal() as well, > > then the recursion warning triggers. > > > > Paolo suggested to protect the per CPU variable with a local lock, but > > that's heavyweight and actually not necessary. The goal of this protection > > is to prevent the task stack from overflowing, which can be achieved with a > > per task recursion protection as well. > > > > Replace the per CPU variable with a per task bit similar to other recursion > > protection bits like task_struct::in_page_owner. This works on both !RT and > > RT kernels and removes as a side effect the extra per CPU storage. > > > > No functional change for !RT kernels. > > > > Reported-by: Daniel Bristot de Oliveira <bristot@redhat.com> > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > > Tested-by: Daniel Bristot de Oliveira <bristot@redhat.com> > > Acked-by: Jason Wang <jasowang@redhat.com> > > Cc: Al Viro <viro@zeniv.linux.org.uk> > > Link: https://lore.kernel.org/r/87wnp9idso.ffs@tglx > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> > > --- > > fs/aio.c | 2 +- > > fs/eventfd.c | 12 +++++------- > > include/linux/eventfd.h | 11 +++++------ > > include/linux/sched.h | 4 ++++ > > 4 files changed, 15 insertions(+), 14 deletions(-) > > > > diff --git a/fs/aio.c b/fs/aio.c > > index c72b2c51b446c..f7d47c9ff6deb 100644 > > --- a/fs/aio.c > > +++ b/fs/aio.c > > @@ -1761,7 +1761,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, > > list_del_init(&req->wait.entry); > > list_del(&iocb->ki_list); > > iocb->ki_res.res = mangle_poll(mask); > > - if (iocb->ki_eventfd && eventfd_signal_count()) { > > + if (iocb->ki_eventfd && eventfd_signal_allowed()) { > > IIUC, the logic changed and this should read > !eventfd_signal_allowed(). Or am I missing something here? > > Actually checking other stable branches and upstream, this looks to be > a typo only in v5.10.90-rt61-rc1 > > --nX > > > > > > iocb = NULL; > > INIT_WORK(&req->work, aio_poll_put_work); > > schedule_work(&req->work); > > diff --git a/fs/eventfd.c b/fs/eventfd.c > > index df466ef81dddf..9035ca60bfcf3 100644 > > --- a/fs/eventfd.c > > +++ b/fs/eventfd.c > > @@ -25,8 +25,6 @@ > > #include <linux/idr.h> > > #include <linux/uio.h> > > > > -DEFINE_PER_CPU(int, eventfd_wake_count); > > - > > static DEFINE_IDA(eventfd_ida); > > > > struct eventfd_ctx { > > @@ -67,21 +65,21 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) > > * Deadlock or stack overflow issues can happen if we recurse here > > * through waitqueue wakeup handlers. If the caller users potentially > > * nested waitqueues with custom wakeup handlers, then it should > > - * check eventfd_signal_count() before calling this function. If > > - * it returns true, the eventfd_signal() call should be deferred to a > > + * check eventfd_signal_allowed() before calling this function. If > > + * it returns false, the eventfd_signal() call should be deferred to a > > * safe context. > > */ > > - if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) > > + if (WARN_ON_ONCE(current->in_eventfd_signal)) > > return 0; > > > > spin_lock_irqsave(&ctx->wqh.lock, flags); > > - this_cpu_inc(eventfd_wake_count); > > + current->in_eventfd_signal = 1; > > if (ULLONG_MAX - ctx->count < n) > > n = ULLONG_MAX - ctx->count; > > ctx->count += n; > > if (waitqueue_active(&ctx->wqh)) > > wake_up_locked_poll(&ctx->wqh, EPOLLIN); > > - this_cpu_dec(eventfd_wake_count); > > + current->in_eventfd_signal = 0; > > spin_unlock_irqrestore(&ctx->wqh.lock, flags); > > > > return n; > > diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h > > index dc4fd8a6644dd..836b4c021a0a4 100644 > > --- a/include/linux/eventfd.h > > +++ b/include/linux/eventfd.h > > @@ -14,6 +14,7 @@ > > #include <linux/err.h> > > #include <linux/percpu-defs.h> > > #include <linux/percpu.h> > > +#include <linux/sched.h> > > > > /* > > * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining > > @@ -42,11 +43,9 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); > > int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, > > __u64 *cnt); > > > > -DECLARE_PER_CPU(int, eventfd_wake_count); > > - > > -static inline bool eventfd_signal_count(void) > > +static inline bool eventfd_signal_allowed(void) > > { > > - return this_cpu_read(eventfd_wake_count); > > + return !current->in_eventfd_signal; > > } > > > > #else /* CONFIG_EVENTFD */ > > @@ -77,9 +76,9 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, > > return -ENOSYS; > > } > > > > -static inline bool eventfd_signal_count(void) > > +static inline bool eventfd_signal_allowed(void) > > { > > - return false; > > + return true; > > } > > > > #endif > > diff --git a/include/linux/sched.h b/include/linux/sched.h > > index 409a24036952c..29e6ff1af1df9 100644 > > --- a/include/linux/sched.h > > +++ b/include/linux/sched.h > > @@ -852,6 +852,10 @@ struct task_struct { > > /* Stalled due to lack of memory */ > > unsigned in_memstall:1; > > #endif > > +#ifdef CONFIG_EVENTFD > > + /* Recursion prevention for eventfd_signal() */ > > + unsigned in_eventfd_signal:1; > > +#endif > > > > unsigned long atomic_flags; /* Flags requiring atomic access. */ > > > > -- > > 2.33.1 > >
On Thu, Feb 3, 2022 at 6:17 AM Daniel Vacek <neelx.g@gmail.com> wrote: > > Actually I see it was fixed later with > 4b3749865374899e115aa8c48681709b086fe6d3 so I guess it should be > included as well. Thank you for the feedback! I will proceed with the changes and testing to release the kernel probably on Monday. Also, thank you for the note about the wrong email address! Best regards, Luis
diff --git a/fs/aio.c b/fs/aio.c index c72b2c51b446c..f7d47c9ff6deb 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1761,7 +1761,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, list_del_init(&req->wait.entry); list_del(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); - if (iocb->ki_eventfd && eventfd_signal_count()) { + if (iocb->ki_eventfd && eventfd_signal_allowed()) { iocb = NULL; INIT_WORK(&req->work, aio_poll_put_work); schedule_work(&req->work); diff --git a/fs/eventfd.c b/fs/eventfd.c index df466ef81dddf..9035ca60bfcf3 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -25,8 +25,6 @@ #include <linux/idr.h> #include <linux/uio.h> -DEFINE_PER_CPU(int, eventfd_wake_count); - static DEFINE_IDA(eventfd_ida); struct eventfd_ctx { @@ -67,21 +65,21 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) * Deadlock or stack overflow issues can happen if we recurse here * through waitqueue wakeup handlers. If the caller users potentially * nested waitqueues with custom wakeup handlers, then it should - * check eventfd_signal_count() before calling this function. If - * it returns true, the eventfd_signal() call should be deferred to a + * check eventfd_signal_allowed() before calling this function. If + * it returns false, the eventfd_signal() call should be deferred to a * safe context. */ - if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) + if (WARN_ON_ONCE(current->in_eventfd_signal)) return 0; spin_lock_irqsave(&ctx->wqh.lock, flags); - this_cpu_inc(eventfd_wake_count); + current->in_eventfd_signal = 1; if (ULLONG_MAX - ctx->count < n) n = ULLONG_MAX - ctx->count; ctx->count += n; if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLIN); - this_cpu_dec(eventfd_wake_count); + current->in_eventfd_signal = 0; spin_unlock_irqrestore(&ctx->wqh.lock, flags); return n; diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index dc4fd8a6644dd..836b4c021a0a4 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -14,6 +14,7 @@ #include <linux/err.h> #include <linux/percpu-defs.h> #include <linux/percpu.h> +#include <linux/sched.h> /* * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining @@ -42,11 +43,9 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); -DECLARE_PER_CPU(int, eventfd_wake_count); - -static inline bool eventfd_signal_count(void) +static inline bool eventfd_signal_allowed(void) { - return this_cpu_read(eventfd_wake_count); + return !current->in_eventfd_signal; } #else /* CONFIG_EVENTFD */ @@ -77,9 +76,9 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, return -ENOSYS; } -static inline bool eventfd_signal_count(void) +static inline bool eventfd_signal_allowed(void) { - return false; + return true; } #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 409a24036952c..29e6ff1af1df9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -852,6 +852,10 @@ struct task_struct { /* Stalled due to lack of memory */ unsigned in_memstall:1; #endif +#ifdef CONFIG_EVENTFD + /* Recursion prevention for eventfd_signal() */ + unsigned in_eventfd_signal:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */