diff mbox series

[v8,3/6] cpufreq: amd-pstate: implement cpb_boost sysfs entry for boost control

Message ID 64eb440fd48d10a55525253bce2e9143db872f67.1714112854.git.perry.yuan@amd.com
State New
Headers show
Series AMD Pstate Driver Core Performance Boost | expand

Commit Message

Yuan, Perry April 26, 2024, 6:34 a.m. UTC
From: Perry Yuan <Perry.Yuan@amd.com>

With this new sysfs entry `cpb_boost`created, user can change CPU boost
state dynamically under `active`, `guided` and `passive` modes.
And the highest perf and frequency will also be updated as the boost
state changing.

0): check current boost state
cat /sys/devices/system/cpu/amd_pstate/cpb_boost

1): disable CPU boost
sudo bash -c "echo 0 > /sys/devices/system/cpu/amd_pstate/cpb_boost"

2): enable CPU boost
sudo bash -c "echo 1 > /sys/devices/system/cpu/amd_pstate/cpb_boost"

Link: https://bugzilla.kernel.org/show_bug.cgi?id=217931
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217618
Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
---
 drivers/cpufreq/amd-pstate.c | 99 ++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)

Comments

Mario Limonciello April 26, 2024, 2:07 p.m. UTC | #1
On 4/26/2024 01:34, Perry Yuan wrote:
> From: Perry Yuan <Perry.Yuan@amd.com>
> 
> With this new sysfs entry `cpb_boost`created, user can change CPU boost
> state dynamically under `active`, `guided` and `passive` modes.
> And the highest perf and frequency will also be updated as the boost
> state changing.
> 
> 0): check current boost state
> cat /sys/devices/system/cpu/amd_pstate/cpb_boost
> 
> 1): disable CPU boost
> sudo bash -c "echo 0 > /sys/devices/system/cpu/amd_pstate/cpb_boost"
> 
> 2): enable CPU boost
> sudo bash -c "echo 1 > /sys/devices/system/cpu/amd_pstate/cpb_boost"
> 
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=217931
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=217618
> Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
> ---
>   drivers/cpufreq/amd-pstate.c | 99 ++++++++++++++++++++++++++++++++++++
>   1 file changed, 99 insertions(+)
> 
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index aa17a3134497..53ef39c6dbfa 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -1295,6 +1295,103 @@ static ssize_t prefcore_show(struct device *dev,
>   	return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
>   }
>   
> +static int amd_cpu_boost_update(struct amd_cpudata *cpudata, u32 on)
> +{
> +	struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu);
> +	struct cppc_perf_ctrls perf_ctrls;
> +	u32 highest_perf, nominal_perf, nominal_freq, max_freq;
> +	int ret;
> +
> +	if (!policy)
> +		return -ENODATA;
> +
> +	highest_perf = READ_ONCE(cpudata->highest_perf);
> +	nominal_perf = READ_ONCE(cpudata->nominal_perf);
> +	nominal_freq = READ_ONCE(cpudata->nominal_freq);
> +	max_freq = READ_ONCE(cpudata->max_freq);
> +
> +	if (boot_cpu_has(X86_FEATURE_CPPC)) {
> +		u64 value = READ_ONCE(cpudata->cppc_req_cached);
> +
> +		value &= ~GENMASK_ULL(7, 0);
> +		value |= on ? highest_perf : nominal_perf;
> +		WRITE_ONCE(cpudata->cppc_req_cached, value);
> +
> +		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
> +	} else {
> +		perf_ctrls.max_perf = on ? highest_perf : nominal_perf;
> +		ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
> +		if (ret) {
> +			cpufreq_cpu_release(policy);
> +			pr_debug("failed to set energy perf value (%d)\n", ret);
> +			return ret;
> +		}
> +	}
> +
> +	if (on)
> +		policy->cpuinfo.max_freq = max_freq;
> +	else
> +		policy->cpuinfo.max_freq = nominal_freq * 1000;
> +
> +	policy->max = policy->cpuinfo.max_freq;
> +
> +	if (cppc_state == AMD_PSTATE_PASSIVE) {
> +		ret = freq_qos_update_request(&cpudata->req[1],
> +				      policy->cpuinfo.max_freq);
> +	}
> +
> +	cpufreq_cpu_release(policy);
> +
> +	return ret;
> +}
> +
> +static ssize_t cpb_boost_show(struct device *dev,
> +			   struct device_attribute *attr, char *buf)
> +{
> +	return sysfs_emit(buf, "%u\n", amd_pstate_global_params.cpb_boost);
> +}
> +
> +static ssize_t cpb_boost_store(struct device *dev, struct device_attribute *b,
> +			    const char *buf, size_t count)
> +{
> +	bool new_state;
> +	ssize_t ret;
> +	int cpu;
> +
> +	if (!amd_pstate_global_params.cpb_supported) {
> +		pr_err("Boost mode is not supported by this processor or SBIOS\n");
> +		return -EINVAL;
> +	}

As this is information that will be known when you first probe, I feel 
it would be better to affect the visibility of the file than let a user 
discover it doesn't work when they try to write it.

Thinking down the road software like power-profiles-daemon and tuned 
will probably adapt to this new file and if their contract is that the 
file exists means they should write it that's going to turn into 
needless errors in any such system's log on every boot.

> +
> +	ret = kstrtobool(buf, &new_state);
> +	if (ret)
> +		return ret;
> +
> +	mutex_lock(&amd_pstate_driver_lock);
> +	amd_pstate_global_params.cpb_boost = !!new_state;
> +
> +	for_each_present_cpu(cpu) {

It seems to me that by making an attribute for every single CPU this is 
wrong.  It means that writing boost for one CPU's files applies to all 
other CPUs too.

If it's going to be a global attribute that is shared by all CPUs it 
should be a single file.

Otherwise this is going to be a problem if (for example) software tries 
to turn on boost for only 1 CPU.  Think of this sequence:

1) 16 CPUs, cpb_boost is currently turned off.
2) Software tries to turn CPB boost on for the "first" CPU only.
3) It reads the value of the first CPU and sees it's 0.  So It changes 
the value for the first CPU (which causes a global change).
4) It reads the value for the second CPU and sees it's 1.  It tries to 
change this back to zero, which again causes a global change.
5) It checks all the others and they're all 0 and it does nothing.

So you see by having a global attribute which is shared with every 
single CPU you now have a flow problem that userspace can introduce.

> +
> +		struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
> +		struct amd_cpudata *cpudata = policy->driver_data;
> +
> +		if (!cpudata) {
> +			pr_err("cpudata is NULL\n");
> +			ret = -ENODATA;
> +			cpufreq_cpu_put(policy);
> +			goto err_exit;
> +		}
> +
> +		amd_cpu_boost_update(cpudata, amd_pstate_global_params.cpb_boost);
> +		refresh_frequency_limits(policy);
> +		cpufreq_cpu_put(policy);
> +	}
> +
> +err_exit:
> +	mutex_unlock(&amd_pstate_driver_lock);
> +	return ret < 0 ? ret : count;
> +}
> +
>   cpufreq_freq_attr_ro(amd_pstate_max_freq);
>   cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
>   
> @@ -1305,6 +1402,7 @@ cpufreq_freq_attr_rw(energy_performance_preference);
>   cpufreq_freq_attr_ro(energy_performance_available_preferences);
>   static DEVICE_ATTR_RW(status);
>   static DEVICE_ATTR_RO(prefcore);
> +static DEVICE_ATTR_RW(cpb_boost);
>   
>   static struct freq_attr *amd_pstate_attr[] = {
>   	&amd_pstate_max_freq,
> @@ -1329,6 +1427,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
>   static struct attribute *pstate_global_attributes[] = {
>   	&dev_attr_status.attr,
>   	&dev_attr_prefcore.attr,
> +	&dev_attr_cpb_boost.attr,
>   	NULL
>   };
>
Mario Limonciello April 27, 2024, 8:24 p.m. UTC | #2
On 4/26/2024 09:07, Mario Limonciello wrote:
> On 4/26/2024 01:34, Perry Yuan wrote:
>> From: Perry Yuan <Perry.Yuan@amd.com>
>>
>> With this new sysfs entry `cpb_boost`created, user can change CPU boost
>> state dynamically under `active`, `guided` and `passive` modes.
>> And the highest perf and frequency will also be updated as the boost
>> state changing.
>>
>> 0): check current boost state
>> cat /sys/devices/system/cpu/amd_pstate/cpb_boost
>>
>> 1): disable CPU boost
>> sudo bash -c "echo 0 > /sys/devices/system/cpu/amd_pstate/cpb_boost"
>>
>> 2): enable CPU boost
>> sudo bash -c "echo 1 > /sys/devices/system/cpu/amd_pstate/cpb_boost"
>>
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=217931
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=217618
>> Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
>> ---
>>   drivers/cpufreq/amd-pstate.c | 99 ++++++++++++++++++++++++++++++++++++
>>   1 file changed, 99 insertions(+)
>>
>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>> index aa17a3134497..53ef39c6dbfa 100644
>> --- a/drivers/cpufreq/amd-pstate.c
>> +++ b/drivers/cpufreq/amd-pstate.c
>> @@ -1295,6 +1295,103 @@ static ssize_t prefcore_show(struct device *dev,
>>       return sysfs_emit(buf, "%s\n", 
>> str_enabled_disabled(amd_pstate_prefcore));
>>   }
>> +static int amd_cpu_boost_update(struct amd_cpudata *cpudata, u32 on)
>> +{
>> +    struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu);
>> +    struct cppc_perf_ctrls perf_ctrls;
>> +    u32 highest_perf, nominal_perf, nominal_freq, max_freq;
>> +    int ret;
>> +
>> +    if (!policy)
>> +        return -ENODATA;
>> +
>> +    highest_perf = READ_ONCE(cpudata->highest_perf);
>> +    nominal_perf = READ_ONCE(cpudata->nominal_perf);
>> +    nominal_freq = READ_ONCE(cpudata->nominal_freq);
>> +    max_freq = READ_ONCE(cpudata->max_freq);
>> +
>> +    if (boot_cpu_has(X86_FEATURE_CPPC)) {
>> +        u64 value = READ_ONCE(cpudata->cppc_req_cached);
>> +
>> +        value &= ~GENMASK_ULL(7, 0);
>> +        value |= on ? highest_perf : nominal_perf;
>> +        WRITE_ONCE(cpudata->cppc_req_cached, value);
>> +
>> +        wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
>> +    } else {
>> +        perf_ctrls.max_perf = on ? highest_perf : nominal_perf;
>> +        ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
>> +        if (ret) {
>> +            cpufreq_cpu_release(policy);
>> +            pr_debug("failed to set energy perf value (%d)\n", ret);
>> +            return ret;
>> +        }
>> +    }
>> +
>> +    if (on)
>> +        policy->cpuinfo.max_freq = max_freq;
>> +    else
>> +        policy->cpuinfo.max_freq = nominal_freq * 1000;
>> +
>> +    policy->max = policy->cpuinfo.max_freq;
>> +
>> +    if (cppc_state == AMD_PSTATE_PASSIVE) {
>> +        ret = freq_qos_update_request(&cpudata->req[1],
>> +                      policy->cpuinfo.max_freq);
>> +    }
>> +
>> +    cpufreq_cpu_release(policy);
>> +
>> +    return ret;
>> +}
>> +
>> +static ssize_t cpb_boost_show(struct device *dev,
>> +               struct device_attribute *attr, char *buf)
>> +{
>> +    return sysfs_emit(buf, "%u\n", amd_pstate_global_params.cpb_boost);
>> +}
>> +
>> +static ssize_t cpb_boost_store(struct device *dev, struct 
>> device_attribute *b,
>> +                const char *buf, size_t count)
>> +{
>> +    bool new_state;
>> +    ssize_t ret;
>> +    int cpu;
>> +
>> +    if (!amd_pstate_global_params.cpb_supported) {
>> +        pr_err("Boost mode is not supported by this processor or 
>> SBIOS\n");
>> +        return -EINVAL;
>> +    }
> 
> As this is information that will be known when you first probe, I feel 
> it would be better to affect the visibility of the file than let a user 
> discover it doesn't work when they try to write it.
> 
> Thinking down the road software like power-profiles-daemon and tuned 
> will probably adapt to this new file and if their contract is that the 
> file exists means they should write it that's going to turn into 
> needless errors in any such system's log on every boot.
> 
>> +
>> +    ret = kstrtobool(buf, &new_state);
>> +    if (ret)
>> +        return ret;
>> +
>> +    mutex_lock(&amd_pstate_driver_lock);
>> +    amd_pstate_global_params.cpb_boost = !!new_state;
>> +
>> +    for_each_present_cpu(cpu) {
> 
> It seems to me that by making an attribute for every single CPU this is 
> wrong.  It means that writing boost for one CPU's files applies to all 
> other CPUs too.
> 
> If it's going to be a global attribute that is shared by all CPUs it 
> should be a single file.
> 
> Otherwise this is going to be a problem if (for example) software tries 
> to turn on boost for only 1 CPU.  Think of this sequence:
> 
> 1) 16 CPUs, cpb_boost is currently turned off.
> 2) Software tries to turn CPB boost on for the "first" CPU only.
> 3) It reads the value of the first CPU and sees it's 0.  So It changes 
> the value for the first CPU (which causes a global change).
> 4) It reads the value for the second CPU and sees it's 1.  It tries to 
> change this back to zero, which again causes a global change.
> 5) It checks all the others and they're all 0 and it does nothing.
> 
> So you see by having a global attribute which is shared with every 
> single CPU you now have a flow problem that userspace can introduce.
> 

Hi,

Sorry, I can see now that it current IS a global attribute in this 
implementation (IE it's DEVICE_ATTR_RW not cpufreq_freq_attr_rw).

However as you're already actually applying it to each CPU individually, 
I do think it is better to make it a per CPU attribute.

There could be cases that software only wants some of the cores to be 
boosted, so could you please instead consider to convert to use 
cpufreq_freq_attr_rw for the attribute and only apply one CPU at a time?

Thanks!

>> +
>> +        struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
>> +        struct amd_cpudata *cpudata = policy->driver_data;
>> +
>> +        if (!cpudata) {
>> +            pr_err("cpudata is NULL\n");
>> +            ret = -ENODATA;
>> +            cpufreq_cpu_put(policy);
>> +            goto err_exit;
>> +        }
>> +
>> +        amd_cpu_boost_update(cpudata, 
>> amd_pstate_global_params.cpb_boost);
>> +        refresh_frequency_limits(policy);
>> +        cpufreq_cpu_put(policy);
>> +    }
>> +
>> +err_exit:
>> +    mutex_unlock(&amd_pstate_driver_lock);
>> +    return ret < 0 ? ret : count;
>> +}
>> +
>>   cpufreq_freq_attr_ro(amd_pstate_max_freq);
>>   cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
>> @@ -1305,6 +1402,7 @@ 
>> cpufreq_freq_attr_rw(energy_performance_preference);
>>   cpufreq_freq_attr_ro(energy_performance_available_preferences);
>>   static DEVICE_ATTR_RW(status);
>>   static DEVICE_ATTR_RO(prefcore);
>> +static DEVICE_ATTR_RW(cpb_boost);
>>   static struct freq_attr *amd_pstate_attr[] = {
>>       &amd_pstate_max_freq,
>> @@ -1329,6 +1427,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
>>   static struct attribute *pstate_global_attributes[] = {
>>       &dev_attr_status.attr,
>>       &dev_attr_prefcore.attr,
>> +    &dev_attr_cpb_boost.attr,
>>       NULL
>>   };
>
diff mbox series

Patch

diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index aa17a3134497..53ef39c6dbfa 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1295,6 +1295,103 @@  static ssize_t prefcore_show(struct device *dev,
 	return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
 }
 
+static int amd_cpu_boost_update(struct amd_cpudata *cpudata, u32 on)
+{
+	struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu);
+	struct cppc_perf_ctrls perf_ctrls;
+	u32 highest_perf, nominal_perf, nominal_freq, max_freq;
+	int ret;
+
+	if (!policy)
+		return -ENODATA;
+
+	highest_perf = READ_ONCE(cpudata->highest_perf);
+	nominal_perf = READ_ONCE(cpudata->nominal_perf);
+	nominal_freq = READ_ONCE(cpudata->nominal_freq);
+	max_freq = READ_ONCE(cpudata->max_freq);
+
+	if (boot_cpu_has(X86_FEATURE_CPPC)) {
+		u64 value = READ_ONCE(cpudata->cppc_req_cached);
+
+		value &= ~GENMASK_ULL(7, 0);
+		value |= on ? highest_perf : nominal_perf;
+		WRITE_ONCE(cpudata->cppc_req_cached, value);
+
+		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+	} else {
+		perf_ctrls.max_perf = on ? highest_perf : nominal_perf;
+		ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
+		if (ret) {
+			cpufreq_cpu_release(policy);
+			pr_debug("failed to set energy perf value (%d)\n", ret);
+			return ret;
+		}
+	}
+
+	if (on)
+		policy->cpuinfo.max_freq = max_freq;
+	else
+		policy->cpuinfo.max_freq = nominal_freq * 1000;
+
+	policy->max = policy->cpuinfo.max_freq;
+
+	if (cppc_state == AMD_PSTATE_PASSIVE) {
+		ret = freq_qos_update_request(&cpudata->req[1],
+				      policy->cpuinfo.max_freq);
+	}
+
+	cpufreq_cpu_release(policy);
+
+	return ret;
+}
+
+static ssize_t cpb_boost_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%u\n", amd_pstate_global_params.cpb_boost);
+}
+
+static ssize_t cpb_boost_store(struct device *dev, struct device_attribute *b,
+			    const char *buf, size_t count)
+{
+	bool new_state;
+	ssize_t ret;
+	int cpu;
+
+	if (!amd_pstate_global_params.cpb_supported) {
+		pr_err("Boost mode is not supported by this processor or SBIOS\n");
+		return -EINVAL;
+	}
+
+	ret = kstrtobool(buf, &new_state);
+	if (ret)
+		return ret;
+
+	mutex_lock(&amd_pstate_driver_lock);
+	amd_pstate_global_params.cpb_boost = !!new_state;
+
+	for_each_present_cpu(cpu) {
+
+		struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+		struct amd_cpudata *cpudata = policy->driver_data;
+
+		if (!cpudata) {
+			pr_err("cpudata is NULL\n");
+			ret = -ENODATA;
+			cpufreq_cpu_put(policy);
+			goto err_exit;
+		}
+
+		amd_cpu_boost_update(cpudata, amd_pstate_global_params.cpb_boost);
+		refresh_frequency_limits(policy);
+		cpufreq_cpu_put(policy);
+	}
+
+err_exit:
+	mutex_unlock(&amd_pstate_driver_lock);
+	return ret < 0 ? ret : count;
+}
+
 cpufreq_freq_attr_ro(amd_pstate_max_freq);
 cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
 
@@ -1305,6 +1402,7 @@  cpufreq_freq_attr_rw(energy_performance_preference);
 cpufreq_freq_attr_ro(energy_performance_available_preferences);
 static DEVICE_ATTR_RW(status);
 static DEVICE_ATTR_RO(prefcore);
+static DEVICE_ATTR_RW(cpb_boost);
 
 static struct freq_attr *amd_pstate_attr[] = {
 	&amd_pstate_max_freq,
@@ -1329,6 +1427,7 @@  static struct freq_attr *amd_pstate_epp_attr[] = {
 static struct attribute *pstate_global_attributes[] = {
 	&dev_attr_status.attr,
 	&dev_attr_prefcore.attr,
+	&dev_attr_cpb_boost.attr,
 	NULL
 };