diff mbox

[tip/core/rcu,2/3] rcu: Disallow callback registry on offline CPUs

Message ID 1346353383-350-2-git-send-email-paulmck@linux.vnet.ibm.com
State Accepted
Commit 0d8ee37e2fcb7b77b9c5dee784beca5a215cad4c
Headers show

Commit Message

Paul E. McKenney Aug. 30, 2012, 7:03 p.m. UTC
From: "Paul E. McKenney" <paul.mckenney@linaro.org>

Posting a callback after the CPU_DEAD notifier effectively leaks
that callback unless/until that CPU comes back online.  Silence is
unhelpful when attempting to track down such leaks, so this commit emits
a WARN_ON_ONCE() and unconditionally leaks the callback when an offline
CPU attempts to register a callback.  The rdp->nxttail[RCU_NEXT_TAIL] is
set to NULL in the CPU_DEAD notifier and restored in the CPU_UP_PREPARE
notifier, allowing _call_rcu() to determine exactly when posting callbacks
is illegal.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

Comments

Josh Triplett Aug. 31, 2012, 4:21 p.m. UTC | #1
On Thu, Aug 30, 2012 at 12:03:02PM -0700, Paul E. McKenney wrote:
> From: "Paul E. McKenney" <paul.mckenney@linaro.org>
> 
> Posting a callback after the CPU_DEAD notifier effectively leaks
> that callback unless/until that CPU comes back online.  Silence is
> unhelpful when attempting to track down such leaks, so this commit emits
> a WARN_ON_ONCE() and unconditionally leaks the callback when an offline
> CPU attempts to register a callback.  The rdp->nxttail[RCU_NEXT_TAIL] is
> set to NULL in the CPU_DEAD notifier and restored in the CPU_UP_PREPARE
> notifier, allowing _call_rcu() to determine exactly when posting callbacks
> is illegal.
> 
> Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

One suggestion below; with or without that change:

Reviewed-by: Josh Triplett <josh@joshtriplett.org>

> ---
>  kernel/rcutree.c |   10 ++++++++++
>  1 files changed, 10 insertions(+), 0 deletions(-)
> 
> diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> index 9854a00..5f8c4dd 100644
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -1503,6 +1503,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
>  	WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
>  		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
>  		  cpu, rdp->qlen, rdp->nxtlist);
> +	init_callback_list(rdp);
> +	/* Disallow further callbacks on this CPU. */
> +	rdp->nxttail[RCU_NEXT_TAIL] = NULL;
>  }
>  
>  #else /* #ifdef CONFIG_HOTPLUG_CPU */
> @@ -1925,6 +1928,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
>  	rdp = this_cpu_ptr(rsp->rda);
>  
>  	/* Add the callback to our list. */
> +	if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) {
> +		/* _call_rcu() is illegal on offline CPU; leak the callback. */
> +		WARN_ON_ONCE(1);

You can write this as:

if (WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] == NULL))

WARN_ON_ONCE also has a built-in unlikely() already.

> +		local_irq_restore(flags);
> +		return;
> +	}
>  	ACCESS_ONCE(rdp->qlen)++;
>  	if (lazy)
>  		rdp->qlen_lazy++;
> @@ -2462,6 +2471,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
>  	rdp->qlen_last_fqs_check = 0;
>  	rdp->n_force_qs_snap = rsp->n_force_qs;
>  	rdp->blimit = blimit;
> +	init_callback_list(rdp);  /* Re-enable callbacks on this CPU. */
>  	rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
>  	atomic_set(&rdp->dynticks->dynticks,
>  		   (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
> -- 
> 1.7.8
>
Paul E. McKenney Sept. 4, 2012, 11:53 p.m. UTC | #2
On Fri, Aug 31, 2012 at 09:21:30AM -0700, Josh Triplett wrote:
> On Thu, Aug 30, 2012 at 12:03:02PM -0700, Paul E. McKenney wrote:
> > From: "Paul E. McKenney" <paul.mckenney@linaro.org>
> > 
> > Posting a callback after the CPU_DEAD notifier effectively leaks
> > that callback unless/until that CPU comes back online.  Silence is
> > unhelpful when attempting to track down such leaks, so this commit emits
> > a WARN_ON_ONCE() and unconditionally leaks the callback when an offline
> > CPU attempts to register a callback.  The rdp->nxttail[RCU_NEXT_TAIL] is
> > set to NULL in the CPU_DEAD notifier and restored in the CPU_UP_PREPARE
> > notifier, allowing _call_rcu() to determine exactly when posting callbacks
> > is illegal.
> > 
> > Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> One suggestion below; with or without that change:
> 
> Reviewed-by: Josh Triplett <josh@joshtriplett.org>
> 
> > ---
> >  kernel/rcutree.c |   10 ++++++++++
> >  1 files changed, 10 insertions(+), 0 deletions(-)
> > 
> > diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> > index 9854a00..5f8c4dd 100644
> > --- a/kernel/rcutree.c
> > +++ b/kernel/rcutree.c
> > @@ -1503,6 +1503,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
> >  	WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
> >  		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
> >  		  cpu, rdp->qlen, rdp->nxtlist);
> > +	init_callback_list(rdp);
> > +	/* Disallow further callbacks on this CPU. */
> > +	rdp->nxttail[RCU_NEXT_TAIL] = NULL;
> >  }
> >  
> >  #else /* #ifdef CONFIG_HOTPLUG_CPU */
> > @@ -1925,6 +1928,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
> >  	rdp = this_cpu_ptr(rsp->rda);
> >  
> >  	/* Add the callback to our list. */
> > +	if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) {
> > +		/* _call_rcu() is illegal on offline CPU; leak the callback. */
> > +		WARN_ON_ONCE(1);
> 
> You can write this as:
> 
> if (WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] == NULL))
> 
> WARN_ON_ONCE also has a built-in unlikely() already.

Indeed, I missed the fact that WARN_ON_ONCE() returns its argument.
I believe that there are a number of other places in RCU where I could
bury a WARN_ON_ONCE(1) into the preceding "if" statement, so added a
todo-list item to check all of them and convert as appropriate.

							Thanx, Paul

> > +		local_irq_restore(flags);
> > +		return;
> > +	}
> >  	ACCESS_ONCE(rdp->qlen)++;
> >  	if (lazy)
> >  		rdp->qlen_lazy++;
> > @@ -2462,6 +2471,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
> >  	rdp->qlen_last_fqs_check = 0;
> >  	rdp->n_force_qs_snap = rsp->n_force_qs;
> >  	rdp->blimit = blimit;
> > +	init_callback_list(rdp);  /* Re-enable callbacks on this CPU. */
> >  	rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
> >  	atomic_set(&rdp->dynticks->dynticks,
> >  		   (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
> > -- 
> > 1.7.8
> > 
>
diff mbox

Patch

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 9854a00..5f8c4dd 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1503,6 +1503,9 @@  static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
 	WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
 		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
 		  cpu, rdp->qlen, rdp->nxtlist);
+	init_callback_list(rdp);
+	/* Disallow further callbacks on this CPU. */
+	rdp->nxttail[RCU_NEXT_TAIL] = NULL;
 }
 
 #else /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -1925,6 +1928,12 @@  __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 	rdp = this_cpu_ptr(rsp->rda);
 
 	/* Add the callback to our list. */
+	if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) {
+		/* _call_rcu() is illegal on offline CPU; leak the callback. */
+		WARN_ON_ONCE(1);
+		local_irq_restore(flags);
+		return;
+	}
 	ACCESS_ONCE(rdp->qlen)++;
 	if (lazy)
 		rdp->qlen_lazy++;
@@ -2462,6 +2471,7 @@  rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
 	rdp->qlen_last_fqs_check = 0;
 	rdp->n_force_qs_snap = rsp->n_force_qs;
 	rdp->blimit = blimit;
+	init_callback_list(rdp);  /* Re-enable callbacks on this CPU. */
 	rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
 	atomic_set(&rdp->dynticks->dynticks,
 		   (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);