@@ -33,23 +33,23 @@ rcu/rcuboost:
The output of "cat rcu/rcudata" looks as follows:
rcu_sched:
- 0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0
- 1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0
- 2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0
- 3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0
- 4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0
- 5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0
- 6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0
- 7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0
+ 0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0
+ 1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0
+ 2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0
+ 3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0
+ 4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0
+ 5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0
+ 6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0
+ 7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0
rcu_bh:
- 0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0
- 1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0
- 2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0
- 3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0
- 4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0
- 5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0
- 6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0
- 7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0
+ 0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0
+ 1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0
+ 2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0
+ 3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0
+ 4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0
+ 5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0
+ 6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0
+ 7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0
The first section lists the rcu_data structures for rcu_sched, the second
for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an
@@ -119,10 +119,6 @@ o "of" is the number of times that some other CPU has forced a
CPU is offline when it is really alive and kicking) is a fatal
error, so it makes sense to err conservatively.
-o "ri" is the number of times that RCU has seen fit to send a
- reschedule IPI to this CPU in order to get it to report a
- quiescent state.
-
o "ql" is the number of RCU callbacks currently residing on
this CPU. This is the total number of callbacks, regardless
of what state they are in (new, waiting for grace period to
@@ -320,25 +320,18 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
static int rcu_implicit_offline_qs(struct rcu_data *rdp)
{
/*
- * If the CPU is offline, it is in a quiescent state. We can
- * trust its state not to change because interrupts are disabled.
+ * If the CPU is offline for more than a jiffy, it is in a quiescent
+ * state. We can trust its state not to change because interrupts
+ * are disabled. The reason for the jiffy's worth of slack is to
+ * handle CPUs initializing on the way up and finding their way
+ * to the idle loop on the way down.
*/
- if (cpu_is_offline(rdp->cpu)) {
+ if (cpu_is_offline(rdp->cpu) &&
+ ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) {
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
rdp->offline_fqs++;
return 1;
}
-
- /*
- * The CPU is online, so send it a reschedule IPI. This forces
- * it through the scheduler, and (inefficiently) also handles cases
- * where idle loops fail to inform RCU about the CPU being idle.
- */
- if (rdp->cpu != smp_processor_id())
- smp_send_reschedule(rdp->cpu);
- else
- set_need_resched();
- rdp->resched_ipi++;
return 0;
}
@@ -601,19 +594,33 @@ EXPORT_SYMBOL(rcu_is_cpu_idle);
* this task being preempted, its old CPU being taken offline, resuming
* on some other CPU, then determining that its old CPU is now offline.
* It is OK to use RCU on an offline processor during initial boot, hence
- * the check for rcu_scheduler_fully_active.
+ * the check for rcu_scheduler_fully_active. Note also that it is OK
+ * for a CPU coming online to use RCU for one jiffy prior to marking itself
+ * online in the cpu_online_mask. Similarly, it is OK for a CPU going
+ * offline to continue to use RCU for one jiffy after marking itself
+ * offline in the cpu_online_mask. This leniency is necessary given the
+ * non-atomic nature of the online and offline processing, for example,
+ * the fact that a CPU enters the scheduler after completing the CPU_DYING
+ * notifiers.
+ *
+ * This is also why RCU internally marks CPUs online during the
+ * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase.
*
* Disable checking if in an NMI handler because we cannot safely report
* errors from NMI handlers anyway.
*/
bool rcu_lockdep_current_cpu_online(void)
{
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
bool ret;
if (in_nmi())
return 1;
preempt_disable();
- ret = cpu_online(smp_processor_id()) ||
+ rdp = &__get_cpu_var(rcu_sched_data);
+ rnp = rdp->mynode;
+ ret = (rdp->grpmask & rnp->qsmaskinit) ||
!rcu_scheduler_fully_active;
preempt_enable();
return ret;
@@ -1308,14 +1315,12 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
*/
static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
{
- unsigned long flags;
int i;
unsigned long mask;
- int need_report;
int receive_cpu = cpumask_any(cpu_online_mask);
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
- struct rcu_node *rnp = rdp->mynode; /* For dying CPU. */
+ RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */
/* First, adjust the counts. */
if (rdp->nxtlist != NULL) {
@@ -1381,32 +1386,6 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
"cpuofl");
rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum);
/* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */
-
- /*
- * Remove the dying CPU from the bitmasks in the rcu_node
- * hierarchy. Because we are in stop_machine() context, we
- * automatically exclude ->onofflock critical sections.
- */
- do {
- raw_spin_lock_irqsave(&rnp->lock, flags);
- rnp->qsmaskinit &= ~mask;
- if (rnp->qsmaskinit != 0) {
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
- break;
- }
- if (rnp == rdp->mynode) {
- need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
- if (need_report & RCU_OFL_TASKS_NORM_GP)
- rcu_report_unblock_qs_rnp(rnp, flags);
- else
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
- if (need_report & RCU_OFL_TASKS_EXP_GP)
- rcu_report_exp_rnp(rsp, rnp, true);
- } else
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
- mask = rnp->grpmask;
- rnp = rnp->parent;
- } while (rnp != NULL);
}
/*
@@ -1417,11 +1396,53 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
*/
static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
{
+ unsigned long flags;
+ unsigned long mask;
+ int need_report = 0;
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
- struct rcu_node *rnp = rdp->mynode;
+ struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rnp. */
+ /* Adjust any no-longer-needed kthreads. */
rcu_stop_cpu_kthread(cpu);
rcu_node_kthread_setaffinity(rnp, -1);
+
+ /* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */
+
+ /* Exclude any attempts to start a new grace period. */
+ raw_spin_lock_irqsave(&rsp->onofflock, flags);
+
+ /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
+ mask = rdp->grpmask; /* rnp->grplo is constant. */
+ do {
+ raw_spin_lock(&rnp->lock); /* irqs already disabled. */
+ rnp->qsmaskinit &= ~mask;
+ if (rnp->qsmaskinit != 0) {
+ if (rnp != rdp->mynode)
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ break;
+ }
+ if (rnp == rdp->mynode)
+ need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
+ else
+ raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+ mask = rnp->grpmask;
+ rnp = rnp->parent;
+ } while (rnp != NULL);
+
+ /*
+ * We still hold the leaf rcu_node structure lock here, and
+ * irqs are still disabled. The reason for this subterfuge is
+ * because invoking rcu_report_unblock_qs_rnp() with ->onofflock
+ * held leads to deadlock.
+ */
+ raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
+ rnp = rdp->mynode;
+ if (need_report & RCU_OFL_TASKS_NORM_GP)
+ rcu_report_unblock_qs_rnp(rnp, flags);
+ else
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ if (need_report & RCU_OFL_TASKS_EXP_GP)
+ rcu_report_exp_rnp(rsp, rnp, true);
}
#else /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -289,7 +289,6 @@ struct rcu_data {
/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
unsigned long offline_fqs; /* Kicked due to being offline. */
- unsigned long resched_ipi; /* Sent a resched IPI. */
/* 5) __rcu_pending() statistics. */
unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
@@ -610,7 +610,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
* absolutely necessary, but this is a good performance/complexity
* tradeoff.
*/
- if (rcu_preempt_blocked_readers_cgp(rnp))
+ if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)
retval |= RCU_OFL_TASKS_NORM_GP;
if (rcu_preempted_readers_exp(rnp))
retval |= RCU_OFL_TASKS_EXP_GP;
@@ -72,7 +72,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
rdp->dynticks->dynticks_nesting,
rdp->dynticks->dynticks_nmi_nesting,
rdp->dynticks_fqs);
- seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
+ seq_printf(m, " of=%lu", rdp->offline_fqs);
seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c",
rdp->qlen_lazy, rdp->qlen,
".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
@@ -144,7 +144,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
rdp->dynticks->dynticks_nesting,
rdp->dynticks->dynticks_nmi_nesting,
rdp->dynticks_fqs);
- seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
+ seq_printf(m, ",%lu", rdp->offline_fqs);
seq_printf(m, ",%ld,%ld,\"%c%c%c%c\"", rdp->qlen_lazy, rdp->qlen,
".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
rdp->nxttail[RCU_NEXT_TAIL]],
@@ -168,7 +168,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
{
seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
- seq_puts(m, "\"of\",\"ri\",\"qll\",\"ql\",\"qs\"");
+ seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\"");
#ifdef CONFIG_RCU_BOOST
seq_puts(m, "\"kt\",\"ktl\"");
#endif /* #ifdef CONFIG_RCU_BOOST */