@@ -1994,8 +1994,40 @@ static void rcu_prepare_for_idle(int cpu)
#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
-#define RCU_NEEDS_CPU_FLUSHES 5 /* Allow for callback self-repost. */
+/*
+ * This code is invoked when a CPU goes idle, at which point we want
+ * to have the CPU do everything required for RCU so that it can enter
+ * the energy-efficient dyntick-idle mode. This is handled by a
+ * state machine implemented by rcu_prepare_for_idle() below.
+ *
+ * The following three proprocessor symbols control this state machine:
+ *
+ * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt
+ * to satisfy RCU. Beyond this point, it is better to incur a periodic
+ * scheduling-clock interrupt than to loop through the state machine
+ * at full power.
+ * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are
+ * optional if RCU does not need anything immediately from this
+ * CPU, even if this CPU still has RCU callbacks queued. The first
+ * times through the state machine are mandatory: we need to give
+ * the state machine a chance to communicate a quiescent state
+ * to the RCU core.
+ * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
+ * to sleep in dyntick-idle mode with RCU callbacks pending. This
+ * is sized to be roughly one RCU grace period. Those energy-efficiency
+ * benchmarkers who might otherwise be tempted to set this to a large
+ * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
+ * system. And if you are -that- concerned about energy efficiency,
+ * just power the system down and be done with it!
+ *
+ * The values below work well in practice. If future workloads require
+ * adjustment, they can be converted into kernel config parameters, though
+ * making the state machine smarter might be a better option.
+ */
+#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
+#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */
+
static DEFINE_PER_CPU(int, rcu_dyntick_drain);
static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer);
@@ -2110,17 +2142,17 @@ static void rcu_prepare_for_idle(int cpu)
/* Check and update the rcu_dyntick_drain sequencing. */
if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
/* First time through, initialize the counter. */
- per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
- } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
+ per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES;
+ } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES &&
+ !rcu_pending(cpu)) {
/* Can we go dyntick-idle despite still having callbacks? */
- if (!rcu_pending(cpu)) {
- trace_rcu_prep_idle("Dyntick with callbacks");
- per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
- hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu),
- rcu_idle_gp_wait, HRTIMER_MODE_REL);
- return; /* Nothing more to do immediately. */
- }
-
+ trace_rcu_prep_idle("Dyntick with callbacks");
+ per_cpu(rcu_dyntick_drain, cpu) = 0;
+ per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
+ hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu),
+ rcu_idle_gp_wait, HRTIMER_MODE_REL);
+ return; /* Nothing more to do immediately. */
+ } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
/* We have hit the limit, so time to give up. */
per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
local_irq_restore(flags);