@@ -129,6 +129,7 @@
* @state_bins: Idle state data bins for this CPU.
* @total: Grand total of the "intercepts" and "hits" metrics for all bins.
* @tick_intercepts: "Intercepts" before TICK_NSEC.
+ * @short_idle: Wakeups after short idle periods.
*/
struct teo_cpu {
s64 time_span_ns;
@@ -136,6 +137,7 @@
struct teo_bin state_bins[CPUIDLE_STATE_MAX];
unsigned int total;
unsigned int tick_intercepts;
+ unsigned int short_idle;
};
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -152,12 +154,12 @@
s64 target_residency_ns;
u64 measured_ns;
- if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
+ cpu_data->short_idle -= cpu_data->short_idle >> DECAY_SHIFT;
+
+ if (cpu_data->time_span_ns < 0) {
/*
- * This causes the wakeup to be counted as a hit regardless of
- * regardless of the real idle duration which doesn't need to be
- * computed because the wakeup has been close enough to an
- * anticipated timer.
+ * If one of the safety nets has triggered, assume that this
+ * might have been a long sleep.
*/
measured_ns = U64_MAX;
} else {
@@ -177,10 +179,14 @@
* time, so take 1/2 of the exit latency as a very rough
* approximation of the average of it.
*/
- if (measured_ns >= lat_ns)
+ if (measured_ns >= lat_ns) {
measured_ns -= lat_ns / 2;
- else
+ if (measured_ns < RESIDENCY_THRESHOLD_NS)
+ cpu_data->short_idle += PULSE;
+ } else {
measured_ns /= 2;
+ cpu_data->short_idle += PULSE;
+ }
}
cpu_data->total = 0;
@@ -419,27 +425,35 @@
if (idx > constraint_idx)
idx = constraint_idx;
- if (!idx) {
- /*
- * Query the sleep length to be able to count the wakeup as a
- * hit if it is caused by a timer.
- */
- cpu_data->sleep_length_ns = tick_nohz_get_sleep_length(&delta_tick);
- goto out_tick;
- }
-
/*
- * If state 0 is a polling one, check if the target residency of
- * the current candidate state is low enough and skip the timers
- * check in that case too.
+ * If either the candidate state is state 0 or its target residency is
+ * low enough, there is basically nothing more to do, but if the sleep
+ * length is not updated, the subsequent wakeup will be counted as an
+ * "intercept" which may be problematic in the cases when timer wakeups
+ * are dominant. Namely, it may effectively prevent deeper idle states
+ * from being selected at one point even if no imminent timers are
+ * scheduled.
+ *
+ * However, frequent timers in the RESIDENCY_THRESHOLD_NS range on one
+ * CPU are unlikely (user space has a default 50 us slack value for
+ * hrtimers and there are relatively few timers with a lower deadline
+ * value in the kernel), and even if they did happene, the potential
+ * benefit from using a deep idle state in that case would be
+ * questionable anyway for latency reasons. Thus if the measured idle
+ * duration falls into that range in the majority of cases, assume
+ * non-timer wakeups to be dominant and skip updating the sleep length
+ * to reduce latency.
*/
- if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
- drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS)
+ if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) &&
+ 2 * cpu_data->short_idle >= cpu_data->total)
goto out_tick;
duration_ns = tick_nohz_get_sleep_length(&delta_tick);
cpu_data->sleep_length_ns = duration_ns;
+ if (!idx)
+ goto out_tick;
+
/*
* If the closest expected timer is before the target residency of the
* candidate state, a shallower one needs to be found.
@@ -501,7 +515,7 @@
if (dev->poll_time_limit ||
(tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) {
dev->poll_time_limit = false;
- cpu_data->time_span_ns = cpu_data->sleep_length_ns;
+ cpu_data->time_span_ns = KTIME_MIN;
} else {
cpu_data->time_span_ns = local_clock() - cpu_data->time_span_ns;
}