@@ -3017,12 +3017,15 @@ ___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
/*
* Step 2: update *_avg.
*/
- sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
+ sa->load_avg = div_u64((sa->load_sum - sa->period_contrib * weight),
+ (LOAD_AVG_MAX - 1024));
if (cfs_rq) {
cfs_rq->runnable_load_avg =
- div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
+ div_u64((cfs_rq->runnable_load_sum - sa->period_contrib * weight),
+ (LOAD_AVG_MAX - 1024));
}
- sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
+ sa->util_avg = (sa->util_sum - (running * sa->period_contrib << SCHED_CAPACITY_SHIFT)) /
+ (LOAD_AVG_MAX - 1024);
return 1;
}
In the current implementation of load/util_avg, we assume that the ongoing time segment has fully elapsed, and util/load_sum is divided by LOAD_AVG_MAX, even if part of the time segment still remains. As a consequence, this remaining part is considered as idle time and generates unexpected variations of util_avg of a busy CPU in the range ]1002..1024[ whereas util_avg should stay at 1023. In order to keep the metric stable, we should not consider the ongoing time segment when computing load/util_avg but only the segments that have already fully elapsed. Suggested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> --- Sorry some unexpected characters appeared in the commit message of previous version kernel/sched/fair.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) -- 2.7.4