Message ID | 20220425123819.137735-3-pierre.gondois@arm.com |
---|---|
State | Accepted |
Commit | 740fcdc2c20ecf855b36b919d7fa1b872b5a7eae |
Headers | show |
Series | Enable EAS for CPPC/ACPI based systems | expand |
Hi, There is a warning on arm64 platform when CONFIG_ENERGY_MODEL is not set: drivers/cpufreq/cppc_cpufreq.c:550:12: error: ‘cppc_get_cpu_cost’ defined but not used [-Werror=unused-function] 550 | static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz, | ^~~~~~~~~~~~~~~~~ drivers/cpufreq/cppc_cpufreq.c:481:12: error: ‘cppc_get_cpu_power’ defined but not used [-Werror=unused-function] 481 | static int cppc_get_cpu_power(struct device *cpu_dev, | ^~~~~~~~~~~~~~~~~~ Thanks, Shaokun On 2022/4/25 20:38, Pierre Gondois wrote: > From: Pierre Gondois <Pierre.Gondois@arm.com> > > Performance states and energy consumption values are not advertised > in ACPI. In the GicC structure of the MADT table, the "Processor > Power Efficiency Class field" (called efficiency class from now) > allows to describe the relative energy efficiency of CPUs. > > To leverage the EM and EAS, the CPPC driver creates a set of > artificial performance states and registers them in the Energy Model > (EM), such as: > - Every 20 capacity unit, a performance state is created. > - The energy cost of each performance state gradually increases. > No power value is generated as only the cost is used in the EM. > > During task placement, a task can raise the frequency of its whole > pd. This can make EAS place a task on a pd with CPUs that are > individually less energy efficient. > As cost values are artificial, and to place tasks on CPUs with the > lower efficiency class, a gap in cost values is generated for adjacent > efficiency classes. > E.g.: > - efficiency class = 0, capacity is in [0-1024], so cost values > are in [0: 51] (one performance state every 20 capacity unit) > - efficiency class = 1, capacity is in [0-1024], cost values > are in [1*gap+0: 1*gap+51]. > > The value of the cost gap is chosen to absorb a the energy of 4 CPUs > at their maximum capacity. This means that between: > 1- a pd of 4 CPUs, each of them being used at almost their full > capacity. Their efficiency class is N. > 2- a CPU using almost none of its capacity. Its efficiency class is > N+1 > EAS will choose the first option. > > This patch also populates the (struct cpufreq_driver).register_em > callback if the valid efficiency_class ACPI values are provided. > > Signed-off-by: Pierre Gondois <Pierre.Gondois@arm.com> > --- > drivers/cpufreq/cppc_cpufreq.c | 144 +++++++++++++++++++++++++++++++++ > 1 file changed, 144 insertions(+) > > diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c > index 3cd05651707d..3eaa23d1aaf5 100644 > --- a/drivers/cpufreq/cppc_cpufreq.c > +++ b/drivers/cpufreq/cppc_cpufreq.c > @@ -421,6 +421,134 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) > } > > static DEFINE_PER_CPU(unsigned int, efficiency_class); > +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy); > + > +/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */ > +#define CPPC_EM_CAP_STEP (20) > +/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */ > +#define CPPC_EM_COST_STEP (1) > +/* Add a cost gap correspnding to the energy of 4 CPUs. */ > +#define CPPC_EM_COST_GAP (4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \ > + / CPPC_EM_CAP_STEP) > + > +static unsigned int get_perf_level_count(struct cpufreq_policy *policy) > +{ > + struct cppc_perf_caps *perf_caps; > + unsigned int min_cap, max_cap; > + struct cppc_cpudata *cpu_data; > + int cpu = policy->cpu; > + > + cpu_data = policy->driver_data; > + perf_caps = &cpu_data->perf_caps; > + max_cap = arch_scale_cpu_capacity(cpu); > + min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf); > + if ((min_cap == 0) || (max_cap < min_cap)) > + return 0; > + return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP; > +} > + > +/* > + * The cost is defined as: > + * cost = power * max_frequency / frequency > + */ > +static inline unsigned long compute_cost(int cpu, int step) > +{ > + return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) + > + step * CPPC_EM_COST_STEP; > +} > + > +static int cppc_get_cpu_power(struct device *cpu_dev, > + unsigned long *power, unsigned long *KHz) > +{ > + unsigned long perf_step, perf_prev, perf, perf_check; > + unsigned int min_step, max_step, step, step_check; > + unsigned long prev_freq = *KHz; > + unsigned int min_cap, max_cap; > + struct cpufreq_policy *policy; > + > + struct cppc_perf_caps *perf_caps; > + struct cppc_cpudata *cpu_data; > + > + policy = cpufreq_cpu_get_raw(cpu_dev->id); > + cpu_data = policy->driver_data; > + perf_caps = &cpu_data->perf_caps; > + max_cap = arch_scale_cpu_capacity(cpu_dev->id); > + min_cap = div_u64(max_cap * perf_caps->lowest_perf, > + perf_caps->highest_perf); > + > + perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap; > + min_step = min_cap / CPPC_EM_CAP_STEP; > + max_step = max_cap / CPPC_EM_CAP_STEP; > + > + perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); > + step = perf_prev / perf_step; > + > + if (step > max_step) > + return -EINVAL; > + > + if (min_step == max_step) { > + step = max_step; > + perf = perf_caps->highest_perf; > + } else if (step < min_step) { > + step = min_step; > + perf = perf_caps->lowest_perf; > + } else { > + step++; > + if (step == max_step) > + perf = perf_caps->highest_perf; > + else > + perf = step * perf_step; > + } > + > + *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf); > + perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); > + step_check = perf_check / perf_step; > + > + /* > + * To avoid bad integer approximation, check that new frequency value > + * increased and that the new frequency will be converted to the > + * desired step value. > + */ > + while ((*KHz == prev_freq) || (step_check != step)) { > + perf++; > + *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf); > + perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); > + step_check = perf_check / perf_step; > + } > + > + /* > + * With an artificial EM, only the cost value is used. Still the power > + * is populated such as 0 < power < EM_MAX_POWER. This allows to add > + * more sense to the artificial performance states. > + */ > + *power = compute_cost(cpu_dev->id, step); > + > + return 0; > +} > + > +static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz, > + unsigned long *cost) > +{ > + unsigned long perf_step, perf_prev; > + struct cppc_perf_caps *perf_caps; > + struct cpufreq_policy *policy; > + struct cppc_cpudata *cpu_data; > + unsigned int max_cap; > + int step; > + > + policy = cpufreq_cpu_get_raw(cpu_dev->id); > + cpu_data = policy->driver_data; > + perf_caps = &cpu_data->perf_caps; > + max_cap = arch_scale_cpu_capacity(cpu_dev->id); > + > + perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz); > + perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap; > + step = perf_prev / perf_step; > + > + *cost = compute_cost(cpu_dev->id, step); > + > + return 0; > +} > > static int populate_efficiency_class(void) > { > @@ -453,10 +581,23 @@ static int populate_efficiency_class(void) > } > index++; > } > + cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em; > > return 0; > } > > +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy) > +{ > + struct cppc_cpudata *cpu_data; > + struct em_data_callback em_cb = > + EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost); > + > + cpu_data = policy->driver_data; > + em_dev_register_perf_domain(get_cpu_device(policy->cpu), > + get_perf_level_count(policy), &em_cb, > + cpu_data->shared_cpu_map, 0); > +} > + > #else > > static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) > @@ -467,6 +608,9 @@ static int populate_efficiency_class(void) > { > return 0; > } > +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy) > +{ > +} > #endif > > >
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 3cd05651707d..3eaa23d1aaf5 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -421,6 +421,134 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) } static DEFINE_PER_CPU(unsigned int, efficiency_class); +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy); + +/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */ +#define CPPC_EM_CAP_STEP (20) +/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */ +#define CPPC_EM_COST_STEP (1) +/* Add a cost gap correspnding to the energy of 4 CPUs. */ +#define CPPC_EM_COST_GAP (4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \ + / CPPC_EM_CAP_STEP) + +static unsigned int get_perf_level_count(struct cpufreq_policy *policy) +{ + struct cppc_perf_caps *perf_caps; + unsigned int min_cap, max_cap; + struct cppc_cpudata *cpu_data; + int cpu = policy->cpu; + + cpu_data = policy->driver_data; + perf_caps = &cpu_data->perf_caps; + max_cap = arch_scale_cpu_capacity(cpu); + min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf); + if ((min_cap == 0) || (max_cap < min_cap)) + return 0; + return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP; +} + +/* + * The cost is defined as: + * cost = power * max_frequency / frequency + */ +static inline unsigned long compute_cost(int cpu, int step) +{ + return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) + + step * CPPC_EM_COST_STEP; +} + +static int cppc_get_cpu_power(struct device *cpu_dev, + unsigned long *power, unsigned long *KHz) +{ + unsigned long perf_step, perf_prev, perf, perf_check; + unsigned int min_step, max_step, step, step_check; + unsigned long prev_freq = *KHz; + unsigned int min_cap, max_cap; + struct cpufreq_policy *policy; + + struct cppc_perf_caps *perf_caps; + struct cppc_cpudata *cpu_data; + + policy = cpufreq_cpu_get_raw(cpu_dev->id); + cpu_data = policy->driver_data; + perf_caps = &cpu_data->perf_caps; + max_cap = arch_scale_cpu_capacity(cpu_dev->id); + min_cap = div_u64(max_cap * perf_caps->lowest_perf, + perf_caps->highest_perf); + + perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap; + min_step = min_cap / CPPC_EM_CAP_STEP; + max_step = max_cap / CPPC_EM_CAP_STEP; + + perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + step = perf_prev / perf_step; + + if (step > max_step) + return -EINVAL; + + if (min_step == max_step) { + step = max_step; + perf = perf_caps->highest_perf; + } else if (step < min_step) { + step = min_step; + perf = perf_caps->lowest_perf; + } else { + step++; + if (step == max_step) + perf = perf_caps->highest_perf; + else + perf = step * perf_step; + } + + *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf); + perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + step_check = perf_check / perf_step; + + /* + * To avoid bad integer approximation, check that new frequency value + * increased and that the new frequency will be converted to the + * desired step value. + */ + while ((*KHz == prev_freq) || (step_check != step)) { + perf++; + *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf); + perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz); + step_check = perf_check / perf_step; + } + + /* + * With an artificial EM, only the cost value is used. Still the power + * is populated such as 0 < power < EM_MAX_POWER. This allows to add + * more sense to the artificial performance states. + */ + *power = compute_cost(cpu_dev->id, step); + + return 0; +} + +static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz, + unsigned long *cost) +{ + unsigned long perf_step, perf_prev; + struct cppc_perf_caps *perf_caps; + struct cpufreq_policy *policy; + struct cppc_cpudata *cpu_data; + unsigned int max_cap; + int step; + + policy = cpufreq_cpu_get_raw(cpu_dev->id); + cpu_data = policy->driver_data; + perf_caps = &cpu_data->perf_caps; + max_cap = arch_scale_cpu_capacity(cpu_dev->id); + + perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz); + perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap; + step = perf_prev / perf_step; + + *cost = compute_cost(cpu_dev->id, step); + + return 0; +} static int populate_efficiency_class(void) { @@ -453,10 +581,23 @@ static int populate_efficiency_class(void) } index++; } + cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em; return 0; } +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy) +{ + struct cppc_cpudata *cpu_data; + struct em_data_callback em_cb = + EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost); + + cpu_data = policy->driver_data; + em_dev_register_perf_domain(get_cpu_device(policy->cpu), + get_perf_level_count(policy), &em_cb, + cpu_data->shared_cpu_map, 0); +} + #else static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) @@ -467,6 +608,9 @@ static int populate_efficiency_class(void) { return 0; } +static void cppc_cpufreq_register_em(struct cpufreq_policy *policy) +{ +} #endif