Message ID | 20220816130629.3178-1-lukasz.luba@arm.com |
---|---|
State | Accepted |
Commit | 6d5afdc97ea71958287364a1f1d07e59ef151b11 |
Headers | show |
Series | [v2] cpufreq: schedutil: Move max CPU capacity to sugov_policy | expand |
On Tue, Aug 16, 2022 at 3:06 PM Lukasz Luba <lukasz.luba@arm.com> wrote: > > There is no need to keep the max CPU capacity in the per_cpu instance. > Furthermore, there is no need to check and update that variable > (sg_cpu->max) every time in the frequency change request, which is part > of hot path. Instead use struct sugov_policy to store that information. > Initialize the max CPU capacity during the setup and start callback. > We can do that since all CPUs in the same frequency domain have the same > max capacity (capacity setup and thermal pressure are based on that). > > Acked-by: Viresh Kumar <viresh.kumar@linaro.org> > Signed-off-by: Lukasz Luba <lukasz.luba@arm.com> > --- > Changes v2: > - collected ACK from Viresh > - re-based on top of latest mainline where the previously conflicting > change is now merged > > kernel/sched/cpufreq_schedutil.c | 30 +++++++++++++++--------------- > 1 file changed, 15 insertions(+), 15 deletions(-) > > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c > index 1207c78f85c1..9161d1136d01 100644 > --- a/kernel/sched/cpufreq_schedutil.c > +++ b/kernel/sched/cpufreq_schedutil.c > @@ -25,6 +25,9 @@ struct sugov_policy { > unsigned int next_freq; > unsigned int cached_raw_freq; > > + /* max CPU capacity, which is equal for all CPUs in freq. domain */ > + unsigned long max; > + > /* The next fields are only needed if fast switch cannot be used: */ > struct irq_work irq_work; > struct kthread_work work; > @@ -48,7 +51,6 @@ struct sugov_cpu { > > unsigned long util; > unsigned long bw_dl; > - unsigned long max; > > /* The field below is for single-CPU policies only: */ > #ifdef CONFIG_NO_HZ_COMMON > @@ -158,7 +160,6 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) > { > struct rq *rq = cpu_rq(sg_cpu->cpu); > > - sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); > sg_cpu->bw_dl = cpu_bw_dl(rq); > sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), > FREQUENCY_UTIL, NULL); > @@ -253,6 +254,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, > */ > static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) > { > + struct sugov_policy *sg_policy = sg_cpu->sg_policy; > unsigned long boost; > > /* No boost currently required */ > @@ -280,7 +282,8 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) > * sg_cpu->util is already in capacity scale; convert iowait_boost > * into the same scale so we can compare. > */ > - boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; > + boost = sg_cpu->iowait_boost * sg_policy->max; > + boost >>= SCHED_CAPACITY_SHIFT; > boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); > if (sg_cpu->util < boost) > sg_cpu->util = boost; > @@ -337,7 +340,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, > if (!sugov_update_single_common(sg_cpu, time, flags)) > return; > > - next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max); > + next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max); > /* > * Do not reduce the frequency if the CPU has not been idle > * recently, as the reduction is likely to be premature then. > @@ -373,6 +376,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, > unsigned int flags) > { > struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); > + struct sugov_policy *sg_policy = sg_cpu->sg_policy; > unsigned long prev_util = sg_cpu->util; > > /* > @@ -399,7 +403,8 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, > sg_cpu->util = prev_util; > > cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), > - map_util_perf(sg_cpu->util), sg_cpu->max); > + map_util_perf(sg_cpu->util), > + sg_policy->max); > > sg_cpu->sg_policy->last_freq_update_time = time; > } > @@ -408,25 +413,19 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) > { > struct sugov_policy *sg_policy = sg_cpu->sg_policy; > struct cpufreq_policy *policy = sg_policy->policy; > - unsigned long util = 0, max = 1; > + unsigned long util = 0; > unsigned int j; > > for_each_cpu(j, policy->cpus) { > struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); > - unsigned long j_util, j_max; > > sugov_get_util(j_sg_cpu); > sugov_iowait_apply(j_sg_cpu, time); > - j_util = j_sg_cpu->util; > - j_max = j_sg_cpu->max; > > - if (j_util * max > j_max * util) { > - util = j_util; > - max = j_max; > - } > + util = max(j_sg_cpu->util, util); > } > > - return get_next_freq(sg_policy, util, max); > + return get_next_freq(sg_policy, util, sg_policy->max); > } > > static void > @@ -752,7 +751,7 @@ static int sugov_start(struct cpufreq_policy *policy) > { > struct sugov_policy *sg_policy = policy->governor_data; > void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); > - unsigned int cpu; > + unsigned int cpu = cpumask_first(policy->cpus); > > sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; > sg_policy->last_freq_update_time = 0; > @@ -760,6 +759,7 @@ static int sugov_start(struct cpufreq_policy *policy) > sg_policy->work_in_progress = false; > sg_policy->limits_changed = false; > sg_policy->cached_raw_freq = 0; > + sg_policy->max = arch_scale_cpu_capacity(cpu); > > sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); > > -- Applied as 6.1 material, thanks!
>We can do that since all CPUs in the same frequency domain have the >same max capacity Do they? In the Intel Alder Lake datasheet [1], it says that a single power rail supplies all IA ("Intel Architecture") cores, which includes both P cores and E cores. I don't have anything that new, but on Haswell and Skylake, despite the fact that each CPU has a separate policy that lists only itself in affected_cpus, with the userspace governor I find that every core runs at the the highest frequency set among all cores. For clarity: $ grep . cpufreq/policy*/scaling_{setspeed,cur_freq} cpufreq/policy0/scaling_setspeed:3000000 cpufreq/policy1/scaling_setspeed:2000000 cpufreq/policy0/scaling_cur_freq:2999997 cpufreq/policy1/scaling_cur_freq:3000001 It seems that these cores are in the same frequency domain, even if cpufreq doesn't know about it. I don't know if this affects the behavior of the governors in any way, but it might be a bug in intel_pstate that could one day be fixed. If it is, then any heterogeneous-uarch chips with both CPU types sharing a voltage rail would have CPUs with different max capacity in the same frequency domain. This might present a problem for any future attempt to harmonize treatment of big.LITTLE between ARM and x86. [1]: https://edc.intel.com/content/www/us/en/design/ipla/software-development-platforms/client/platforms/alder-lake-desktop/12th-generation-intel-core-processors-datasheet-volume-1-of-2/processor-power-rails_1/
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 1207c78f85c1..9161d1136d01 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -25,6 +25,9 @@ struct sugov_policy { unsigned int next_freq; unsigned int cached_raw_freq; + /* max CPU capacity, which is equal for all CPUs in freq. domain */ + unsigned long max; + /* The next fields are only needed if fast switch cannot be used: */ struct irq_work irq_work; struct kthread_work work; @@ -48,7 +51,6 @@ struct sugov_cpu { unsigned long util; unsigned long bw_dl; - unsigned long max; /* The field below is for single-CPU policies only: */ #ifdef CONFIG_NO_HZ_COMMON @@ -158,7 +160,6 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) { struct rq *rq = cpu_rq(sg_cpu->cpu); - sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); sg_cpu->bw_dl = cpu_bw_dl(rq); sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), FREQUENCY_UTIL, NULL); @@ -253,6 +254,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, */ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) { + struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long boost; /* No boost currently required */ @@ -280,7 +282,8 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) * sg_cpu->util is already in capacity scale; convert iowait_boost * into the same scale so we can compare. */ - boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; + boost = sg_cpu->iowait_boost * sg_policy->max; + boost >>= SCHED_CAPACITY_SHIFT; boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); if (sg_cpu->util < boost) sg_cpu->util = boost; @@ -337,7 +340,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, if (!sugov_update_single_common(sg_cpu, time, flags)) return; - next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max); + next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max); /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. @@ -373,6 +376,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); + struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long prev_util = sg_cpu->util; /* @@ -399,7 +403,8 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, sg_cpu->util = prev_util; cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), - map_util_perf(sg_cpu->util), sg_cpu->max); + map_util_perf(sg_cpu->util), + sg_policy->max); sg_cpu->sg_policy->last_freq_update_time = time; } @@ -408,25 +413,19 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - unsigned long util = 0, max = 1; + unsigned long util = 0; unsigned int j; for_each_cpu(j, policy->cpus) { struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); - unsigned long j_util, j_max; sugov_get_util(j_sg_cpu); sugov_iowait_apply(j_sg_cpu, time); - j_util = j_sg_cpu->util; - j_max = j_sg_cpu->max; - if (j_util * max > j_max * util) { - util = j_util; - max = j_max; - } + util = max(j_sg_cpu->util, util); } - return get_next_freq(sg_policy, util, max); + return get_next_freq(sg_policy, util, sg_policy->max); } static void @@ -752,7 +751,7 @@ static int sugov_start(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); - unsigned int cpu; + unsigned int cpu = cpumask_first(policy->cpus); sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; sg_policy->last_freq_update_time = 0; @@ -760,6 +759,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->work_in_progress = false; sg_policy->limits_changed = false; sg_policy->cached_raw_freq = 0; + sg_policy->max = arch_scale_cpu_capacity(cpu); sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);