Message ID | 1622804761-126737-1-git-send-email-vincent.donnefort@arm.com |
---|---|
Headers | show |
Series | EM / PM: Inefficient OPPs | expand |
On Fri, Jun 04, 2021 at 12:05:56PM +0100, Vincent Donnefort wrote: > Currently, a debug message is printed if an inefficient state is detected > in the Energy Model. Unfortunately, it won't detect if the first state is > inefficient or if two successive states are. Fix this behavior. > > Fixes: 27871f7a (PM: Introduce an Energy Model management framework) > Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com> > Reviewed-by: Quentin Perret <qperret@google.com> > Reviewed-by: Lukasz Luba <lukasz.luba@arm.com> > > diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c > index 0c620eb..c4871a8 100644 > --- a/kernel/power/energy_model.c > +++ b/kernel/power/energy_model.c > @@ -107,8 +107,7 @@ static void em_debug_remove_pd(struct device *dev) {} > static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, > int nr_states, struct em_data_callback *cb) > { > - unsigned long opp_eff, prev_opp_eff = ULONG_MAX; > - unsigned long power, freq, prev_freq = 0; > + unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX; > struct em_perf_state *table; > int i, ret; > u64 fmax; > @@ -153,25 +152,19 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, > > table[i].power = power; > table[i].frequency = prev_freq = freq; > - > - /* > - * The hertz/watts efficiency ratio should decrease as the > - * frequency grows on sane platforms. But this isn't always > - * true in practice so warn the user if a higher OPP is more > - * power efficient than a lower one. > - */ > - opp_eff = freq / power; > - if (opp_eff >= prev_opp_eff) > - dev_dbg(dev, "EM: hertz/watts ratio non-monotonically decreasing: em_perf_state %d >= em_perf_state%d\n", > - i, i - 1); > - prev_opp_eff = opp_eff; > } > > /* Compute the cost of each performance state. */ > fmax = (u64) table[nr_states - 1].frequency; > - for (i = 0; i < nr_states; i++) { > + for (i = nr_states - 1; i >= 0; i--) { > table[i].cost = div64_u64(fmax * table[i].power, > table[i].frequency); > + if (table[i].cost >= prev_cost) { > + dev_dbg(dev, "EM: OPP:%lu is inefficient\n", > + table[i].frequency); > + } else { > + prev_cost = table[i].cost; > + } nit: curly braces aren't needed, especially if you change the 'dev_dbg' statement to be a single line. Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
On Fri, Jun 04, 2021 at 12:05:58PM +0100, Vincent Donnefort wrote: > Some SoCs such as the sd855 have OPPs within the same policy whose cost is > higher than others with a higher frequency. Those OPPs are inefficients and > it might be interesting for a governor to not use them. > > Adding a flag, CPUFREQ_INEFFICIENT_FREQ, to mark such OPPs into the > frequency table, as well as a new cpufreq_frequency_table member > "efficient". This new member will allow a governor to quickly resolve an > inefficient frequency to an efficient one. > > Efficient OPPs point to themselves. Governors must also ensure that the > efficiency resolution does not break the policy maximum. > > Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com> > > diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c > index 67e56cf..0756d7d6 100644 > --- a/drivers/cpufreq/freq_table.c > +++ b/drivers/cpufreq/freq_table.c > @@ -351,6 +351,53 @@ static int set_freq_table_sorted(struct cpufreq_policy *policy) > return 0; > } > > +static void set_freq_table_efficiencies(struct cpufreq_policy *policy) > +{ > + struct cpufreq_frequency_table *pos, *table = policy->freq_table; > + enum cpufreq_table_sorting sort = policy->freq_table_sorted; > + int efficient, idx; > + > + /* Not supported */ > + if (sort == CPUFREQ_TABLE_UNSORTED) { > + cpufreq_for_each_entry_idx(pos, table, idx) > + pos->efficient = idx; > + return; > + } > + > + /* The highest frequency is always efficient */ > + cpufreq_for_each_entry_idx(pos, table, idx) { > + if (pos->frequency == CPUFREQ_ENTRY_INVALID) > + continue; > + > + efficient = idx; > + > + if (sort == CPUFREQ_TABLE_SORTED_DESCENDING) > + break; > + } > + > + for (;;) { > + pos = &table[idx]; > + > + if (pos->frequency == CPUFREQ_ENTRY_INVALID) > + continue; That would result in an infinite loop, you still want to execute the code at the bottom of the loop which increments/decrements 'idx' and exits the loop when the end of the table is reached. > + > + if (pos->flags & CPUFREQ_INEFFICIENT_FREQ) { > + pos->efficient = efficient; > + } else { > + pos->efficient = idx; > + efficient = idx; > + } > + > + if (sort == CPUFREQ_TABLE_SORTED_ASCENDING) { > + if (--idx < 0) > + break; > + } else { > + if (table[++idx].frequency == CPUFREQ_TABLE_END) > + break; > + } > + } > +} > + > int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy) > { > int ret; > @@ -362,7 +409,13 @@ int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy) > if (ret) > return ret; > > - return set_freq_table_sorted(policy); > + ret = set_freq_table_sorted(policy); > + if (ret) > + return ret; > + > + set_freq_table_efficiencies(policy); > + > + return ret; > } > > MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); > diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h > index 353969c..d10784c 100644 > --- a/include/linux/cpufreq.h > +++ b/include/linux/cpufreq.h > @@ -666,13 +666,15 @@ struct governor_attr { > #define CPUFREQ_ENTRY_INVALID ~0u > #define CPUFREQ_TABLE_END ~1u > /* Special Values of .flags field */ > -#define CPUFREQ_BOOST_FREQ (1 << 0) > +#define CPUFREQ_BOOST_FREQ (1 << 0) > +#define CPUFREQ_INEFFICIENT_FREQ (1 << 1) > > struct cpufreq_frequency_table { > unsigned int flags; > unsigned int driver_data; /* driver specific data, not used by core */ > unsigned int frequency; /* kHz - doesn't need to be in ascending > * order */ > + unsigned int efficient; /* idx of an efficient frequency */ > }; > > #if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) > -- > 2.7.4 >
On Fri, Jun 04, 2021 at 12:06:00PM +0100, Vincent Donnefort wrote: > The Energy Model has a 1:1 mapping between OPPs and performance states > (em_perf_state). We can then read which states are inefficient and use this > information to mark the cpufreq table. > > Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com> > > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c > index 7431f40a..34d344d 100644 > --- a/drivers/cpufreq/cpufreq.c > +++ b/drivers/cpufreq/cpufreq.c > @@ -19,6 +19,7 @@ > #include <linux/cpu_cooling.h> > #include <linux/delay.h> > #include <linux/device.h> > +#include <linux/energy_model.h> > #include <linux/init.h> > #include <linux/kernel_stat.h> > #include <linux/module.h> > @@ -1317,6 +1318,31 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) > kfree(policy); > } > > +static void cpufreq_inefficiencies_from_em(struct cpufreq_policy *policy, > + struct em_perf_domain *em_pd) > +{ > + struct cpufreq_frequency_table *pos, *table = policy->freq_table; > + struct em_perf_state *em_table; > + int i; > + > + if (!em_pd) > + return; > + > + em_table = em_pd->table; > + > + for (i = 0; i < em_pd->nr_perf_states; i++) { > + if (!(em_table[i].flags & EM_PERF_STATE_INEFFICIENT)) > + continue; > + > + cpufreq_for_each_valid_entry(pos, table) { > + if (pos->frequency == em_table[i].frequency) { > + pos->flags |= CPUFREQ_INEFFICIENT_FREQ; > + break; > + } > + } > + } > +} > + > static int cpufreq_online(unsigned int cpu) > { > struct cpufreq_policy *policy; > @@ -1371,6 +1397,12 @@ static int cpufreq_online(unsigned int cpu) > goto out_free_policy; > } > > + /* > + * Sync potential inefficiencies with an Energy Model that the > + * driver might have registered. > + */ > + cpufreq_inefficiencies_from_em(policy, em_cpu_get(cpu)); > + > ret = cpufreq_table_validate_and_sort(policy); > if (ret) > goto out_exit_policy; Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
> > + for (;;) { > > + pos = &table[idx]; > > + > > + if (pos->frequency == CPUFREQ_ENTRY_INVALID) > > + continue; > > That would result in an infinite loop, you still want to execute the code > at the bottom of the loop which increments/decrements 'idx' and exits the > loop when the end of the table is reached. Arg, indeed, sorry for that ugly thing. I'll wait for more input from Viresh before submitting a fixed version. > > > + > > + if (pos->flags & CPUFREQ_INEFFICIENT_FREQ) { > > + pos->efficient = efficient; > > + } else { > > + pos->efficient = idx; > > + efficient = idx; > > + } > > + > > + if (sort == CPUFREQ_TABLE_SORTED_ASCENDING) { > > + if (--idx < 0) > > + break; > > + } else { > > + if (table[++idx].frequency == CPUFREQ_TABLE_END) > > + break; > > + } > > + } > > +} > > +