diff mbox series

[1/2] thermal: intel: int340x: Add performance control for platform temperature control

Message ID 20250604203518.2330533-1-srinivas.pandruvada@linux.intel.com
State New
Headers show
Series [1/2] thermal: intel: int340x: Add performance control for platform temperature control | expand

Commit Message

Srinivas Pandruvada June 4, 2025, 8:35 p.m. UTC
Add additional attribute to control performance of platform temperature
control feature. Two attributes are added:

gain: 0-7 levels, with 0 being most aggressive.
	7 – graceful, favors performance at the expense of temperature
	overshoots
	0 – aggressive, favors tight regulation over performance

min_performance_level: A value from 0-255. Specifies minimum Performance
	level below which the there will be no throttling.
	0 - all levels of throttling allowed including survivability
	actions.
	255 - no throttling allowed.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 Documentation/driver-api/thermal/intel_dptf.rst   | 10 ++++++++++
 .../platform_temperature_control.c                | 15 ++++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

Comments

Rafael J. Wysocki June 4, 2025, 8:55 p.m. UTC | #1
On Wed, Jun 4, 2025 at 10:35 PM Srinivas Pandruvada
<srinivas.pandruvada@linux.intel.com> wrote:
>
> Add additional attribute to control performance of platform temperature
> control feature. Two attributes are added:

It would be good to explain why they are needed.

> gain: 0-7 levels, with 0 being most aggressive.
>         7 – graceful, favors performance at the expense of temperature
>         overshoots
>         0 – aggressive, favors tight regulation over performance
>
> min_performance_level: A value from 0-255. Specifies minimum Performance
>         level below which the there will be no throttling.
>         0 - all levels of throttling allowed including survivability
>         actions.
>         255 - no throttling allowed.

The description of min_performance_level above doesn't seem to be
consistent to me.  Specifically, the descriptions of the 0 and 255
values appear to indicate that this really is about what kind of
throttling can be applied.

Also, I gather that the units of this number are arbitrary and it is
not tied to anything specific.  I mean, 127 doesn't mean 50% of CPU
performance, for instance.

> Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> ---
>  Documentation/driver-api/thermal/intel_dptf.rst   | 10 ++++++++++
>  .../platform_temperature_control.c                | 15 ++++++++++++++-
>  2 files changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst
> index ec5769accae0..794f5cce548e 100644
> --- a/Documentation/driver-api/thermal/intel_dptf.rst
> +++ b/Documentation/driver-api/thermal/intel_dptf.rst
> @@ -206,6 +206,16 @@ All these controls needs admin privilege to update.
>         Update a new temperature target in milli degree celsius for hardware to
>         use for the temperature control.
>
> +``gain`` (RW)
> +       A value in the range 0-7. Sets the aggressiveness of control loop.
> +       7 – graceful, favors performance at the expense of temperature overshoots.
> +       0 – aggressive, favors tight regulation over performance.
> +
> +``min_performance_level`` (RW)
> +       Minimum Performance level below which the there will be no throttling.
> +       0 - all levels of throttling allowed including survivability actions.
> +       256 - no throttling allowed.
> +
>  Given that this is platform temperature control, it is expected that a
>  single user-level manager owns and manages the controls. If multiple
>  user-level software applications attempt to write different targets, it
> diff --git a/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c b/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
> index 2d6504514893..6cd05783a52d 100644
> --- a/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
> +++ b/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
> @@ -49,7 +49,7 @@ struct mmio_reg {
>  };
>
>  #define MAX_ATTR_GROUP_NAME_LEN        32
> -#define PTC_MAX_ATTRS          3
> +#define PTC_MAX_ATTRS          5
>
>  struct ptc_data {
>         u32 offset;
> @@ -57,6 +57,8 @@ struct ptc_data {
>         struct attribute *ptc_attrs[PTC_MAX_ATTRS];
>         struct device_attribute temperature_target_attr;
>         struct device_attribute enable_attr;
> +       struct device_attribute gain_attr;
> +       struct device_attribute min_performance_level_attr;
>         char group_name[MAX_ATTR_GROUP_NAME_LEN];
>  };
>
> @@ -78,6 +80,8 @@ static u32 ptc_offsets[PTC_MAX_INSTANCES] = {0x5B20, 0x5B28, 0x5B30};
>  static const char * const ptc_strings[] = {
>         "temperature_target",
>         "enable",
> +       "gain",
> +       "min_performance_level",
>         NULL
>  };
>
> @@ -177,6 +181,11 @@ PTC_SHOW(temperature_target);
>  PTC_STORE(temperature_target);
>  PTC_SHOW(enable);
>  PTC_STORE(enable);
> +PTC_SHOW(gain);
> +PTC_STORE(gain);
> +PTC_SHOW(min_performance_level);
> +PTC_STORE(min_performance_level);
> +
>
>  #define ptc_init_attribute(_name)\
>         do {\
> @@ -193,9 +202,13 @@ static int ptc_create_groups(struct pci_dev *pdev, int instance, struct ptc_data
>
>         ptc_init_attribute(temperature_target);
>         ptc_init_attribute(enable);
> +       ptc_init_attribute(gain);
> +       ptc_init_attribute(min_performance_level);
>
>         data->ptc_attrs[index++] = &data->temperature_target_attr.attr;
>         data->ptc_attrs[index++] = &data->enable_attr.attr;
> +       data->ptc_attrs[index++] = &data->gain_attr.attr;
> +       data->ptc_attrs[index++] = &data->min_performance_level_attr.attr;
>         data->ptc_attrs[index] = NULL;
>
>         snprintf(data->group_name, MAX_ATTR_GROUP_NAME_LEN,
> --
> 2.49.0
>
Srinivas Pandruvada June 4, 2025, 9:40 p.m. UTC | #2
On Wed, 2025-06-04 at 22:55 +0200, Rafael J. Wysocki wrote:
> On Wed, Jun 4, 2025 at 10:35 PM Srinivas Pandruvada
> <srinivas.pandruvada@linux.intel.com> wrote:
> > 
> > Add additional attribute to control performance of platform
> > temperature
> > control feature. Two attributes are added:
> 
> It would be good to explain why they are needed.

This is to give user space some influence in the aggressiveness of
control loops in the firmware. Firmware may be aggressive but  there
can be some tolerance based on current conditions like ambient
temperature or whether the laptop is placed not on lap. Similar to
current adaptive thermal controls changing temperature limits and power
limits, with the PTC these controls can be used.

> 
> > gain: 0-7 levels, with 0 being most aggressive.
> >         7 – graceful, favors performance at the expense of
> > temperature
> >         overshoots
> >         0 – aggressive, favors tight regulation over performance
> > 
> > min_performance_level: A value from 0-255. Specifies minimum
> > Performance
> >         level below which the there will be no throttling.
> >         0 - all levels of throttling allowed including
> > survivability
> >         actions.
> >         255 - no throttling allowed.
> 
> The description of min_performance_level above doesn't seem to be
> consistent to me.  Specifically, the descriptions of the 0 and 255
> values appear to indicate that this really is about what kind of
> throttling can be applied.

At the end this is some power/frequency throttling to various sub
systems. 0 means that to meet temperature threshold, the power can be
lower than the lowest package power limit which is survivability mode.

> 
> Also, I gather that the units of this number are arbitrary and it is
> not tied to anything specific.  I mean, 127 doesn't mean 50% of CPU
> performance, for instance.
It is unit-less. It may not scale linearly to assume any value to
percent.

Thanks,
Srinivas

> 
> > Signed-off-by: Srinivas Pandruvada
> > <srinivas.pandruvada@linux.intel.com>
> > ---
> >  Documentation/driver-api/thermal/intel_dptf.rst   | 10 ++++++++++
> >  .../platform_temperature_control.c                | 15
> > ++++++++++++++-
> >  2 files changed, 24 insertions(+), 1 deletion(-)
> > 
> > diff --git a/Documentation/driver-api/thermal/intel_dptf.rst
> > b/Documentation/driver-api/thermal/intel_dptf.rst
> > index ec5769accae0..794f5cce548e 100644
> > --- a/Documentation/driver-api/thermal/intel_dptf.rst
> > +++ b/Documentation/driver-api/thermal/intel_dptf.rst
> > @@ -206,6 +206,16 @@ All these controls needs admin privilege to
> > update.
> >         Update a new temperature target in milli degree celsius for
> > hardware to
> >         use for the temperature control.
> > 
> > +``gain`` (RW)
> > +       A value in the range 0-7. Sets the aggressiveness of
> > control loop.
> > +       7 – graceful, favors performance at the expense of
> > temperature overshoots.
> > +       0 – aggressive, favors tight regulation over performance.
> > +
> > +``min_performance_level`` (RW)
> > +       Minimum Performance level below which the there will be no
> > throttling.
> > +       0 - all levels of throttling allowed including
> > survivability actions.
> > +       256 - no throttling allowed.
> > +
> >  Given that this is platform temperature control, it is expected
> > that a
> >  single user-level manager owns and manages the controls. If
> > multiple
> >  user-level software applications attempt to write different
> > targets, it
> > diff --git
> > a/drivers/thermal/intel/int340x_thermal/platform_temperature_contro
> > l.c
> > b/drivers/thermal/intel/int340x_thermal/platform_temperature_contro
> > l.c
> > index 2d6504514893..6cd05783a52d 100644
> > ---
> > a/drivers/thermal/intel/int340x_thermal/platform_temperature_contro
> > l.c
> > +++
> > b/drivers/thermal/intel/int340x_thermal/platform_temperature_contro
> > l.c
> > @@ -49,7 +49,7 @@ struct mmio_reg {
> >  };
> > 
> >  #define MAX_ATTR_GROUP_NAME_LEN        32
> > -#define PTC_MAX_ATTRS          3
> > +#define PTC_MAX_ATTRS          5
> > 
> >  struct ptc_data {
> >         u32 offset;
> > @@ -57,6 +57,8 @@ struct ptc_data {
> >         struct attribute *ptc_attrs[PTC_MAX_ATTRS];
> >         struct device_attribute temperature_target_attr;
> >         struct device_attribute enable_attr;
> > +       struct device_attribute gain_attr;
> > +       struct device_attribute min_performance_level_attr;
> >         char group_name[MAX_ATTR_GROUP_NAME_LEN];
> >  };
> > 
> > @@ -78,6 +80,8 @@ static u32 ptc_offsets[PTC_MAX_INSTANCES] =
> > {0x5B20, 0x5B28, 0x5B30};
> >  static const char * const ptc_strings[] = {
> >         "temperature_target",
> >         "enable",
> > +       "gain",
> > +       "min_performance_level",
> >         NULL
> >  };
> > 
> > @@ -177,6 +181,11 @@ PTC_SHOW(temperature_target);
> >  PTC_STORE(temperature_target);
> >  PTC_SHOW(enable);
> >  PTC_STORE(enable);
> > +PTC_SHOW(gain);
> > +PTC_STORE(gain);
> > +PTC_SHOW(min_performance_level);
> > +PTC_STORE(min_performance_level);
> > +
> > 
> >  #define ptc_init_attribute(_name)\
> >         do {\
> > @@ -193,9 +202,13 @@ static int ptc_create_groups(struct pci_dev
> > *pdev, int instance, struct ptc_data
> > 
> >         ptc_init_attribute(temperature_target);
> >         ptc_init_attribute(enable);
> > +       ptc_init_attribute(gain);
> > +       ptc_init_attribute(min_performance_level);
> > 
> >         data->ptc_attrs[index++] = &data-
> > >temperature_target_attr.attr;
> >         data->ptc_attrs[index++] = &data->enable_attr.attr;
> > +       data->ptc_attrs[index++] = &data->gain_attr.attr;
> > +       data->ptc_attrs[index++] = &data-
> > >min_performance_level_attr.attr;
> >         data->ptc_attrs[index] = NULL;
> > 
> >         snprintf(data->group_name, MAX_ATTR_GROUP_NAME_LEN,
> > --
> > 2.49.0
> >
Srinivas Pandruvada June 5, 2025, 5:20 p.m. UTC | #3
On Thu, 2025-06-05 at 02:18 +0000, Zhang, Rui wrote:
> On Wed, 2025-06-04 at 13:35 -0700, Srinivas Pandruvada wrote:
> > Add debugfs interface to override hardware provide temperature.
> > This
> > interface can be used primarily for debug. Alternatively this can
> > be also used to use hardware control loops to manage temperature
> > for
> > virtual sensors. Virtual sensors are soft sensors created by
> > kernel/
> > user space aggregating other sensors.
> > 
> > There are three attributes to override the maximum three instances
> > of
> > platform temperature control.
> > /sys/kernel/debug/plaftform_temperature_control/
> > ├── temperature_0
> > ├── temperature_1
> > └── temperature_2
> > 
> > These are write only attributes requires admin privilege. Any value
> > greater than 0, will override the temperature. A value of 0 will
> > stop overriding the temperature.
> > 
> > Signed-off-by: Srinivas Pandruvada
> > <srinivas.pandruvada@linux.intel.com>
> > ---
> >  .../platform_temperature_control.c            | 64
> > +++++++++++++++++++
> >  1 file changed, 64 insertions(+)
> > 
> > diff --git
> > a/drivers/thermal/intel/int340x_thermal/platform_temperature_contro
> > l.c
> > b/drivers/thermal/intel/int340x_thermal/platform_temperature_contro
> > l.c
> > index 6cd05783a52d..5dcfd2cc9082 100644
> > ---
> > a/drivers/thermal/intel/int340x_thermal/platform_temperature_contro
> > l.c
> > +++
> > b/drivers/thermal/intel/int340x_thermal/platform_temperature_contro
> > l.c
> > @@ -38,6 +38,7 @@
> >  
> >  #include <linux/kernel.h>
> >  #include <linux/module.h>
> > +#include <linux/debugfs.h>
> >  #include <linux/pci.h>
> >  #include "processor_thermal_device.h"
> >  
> > @@ -53,6 +54,7 @@ struct mmio_reg {
> >  
> >  struct ptc_data {
> >  	u32 offset;
> > +	struct pci_dev *pdev;
> >  	struct attribute_group ptc_attr_group;
> >  	struct attribute *ptc_attrs[PTC_MAX_ATTRS];
> >  	struct device_attribute temperature_target_attr;
> > @@ -222,6 +224,63 @@ static int ptc_create_groups(struct pci_dev
> > *pdev,
> > int instance, struct ptc_data
> >  }
> >  
> >  static struct ptc_data ptc_instance[PTC_MAX_INSTANCES];
> > +static struct dentry *ptc_debugfs;
> > +
> > +#define PTC_TEMP_OVERRIDE_ENABLE_INDEX	4
> > +#define PTC_TEMP_OVERRIDE_INDEX		5
> > +
> > +static ssize_t ptc_temperature_write(struct file *file, const char
> > __user *data,
> > +				     size_t count, loff_t *ppos)
> > +{
> > +	struct ptc_data *ptc_instance = file->private_data;
> > +	struct pci_dev *pdev = ptc_instance->pdev;
> > +	char buf[32];
> > +	ssize_t len;
> > +	u32 value;
> > +
> > +	len = min(count, sizeof(buf) - 1);
> > +	if (copy_from_user(buf, data, len))
> > +		return -EFAULT;
> > +
> > +	buf[len] = '\0';
> > +	if (kstrtouint(buf, 0, &value))
> > +		return -EINVAL;
> > +
> > +	if (ptc_mmio_regs[PTC_TEMP_OVERRIDE_INDEX].units)
> > +		value /=
> > ptc_mmio_regs[PTC_TEMP_OVERRIDE_INDEX].units;
> > +
> > +	if (value > ptc_mmio_regs[PTC_TEMP_OVERRIDE_INDEX].mask)
> > +		return -EINVAL;
> > +
> > +	if (!value) {
> > +		ptc_mmio_write(pdev, ptc_instance->offset,
> > PTC_TEMP_OVERRIDE_ENABLE_INDEX, 0);
> > +	} else {
> > +		ptc_mmio_write(pdev, ptc_instance->offset,
> > PTC_TEMP_OVERRIDE_INDEX, value);
> > +		ptc_mmio_write(pdev, ptc_instance->offset,
> > PTC_TEMP_OVERRIDE_ENABLE_INDEX, 1);
> > +	}
> > +
> > +	return count;
> > +}
> > +
> > +static const struct file_operations ptc_fops = {
> > +	.open = simple_open,
> > +	.write = ptc_temperature_write,
> > +	.llseek = generic_file_llseek,
> > +};
> > +
> > +static void ptc_create_debugfs(void)
> > +{
> > +	ptc_debugfs =
> > debugfs_create_dir("plaftform_temperature_control", NULL);
> 
> s/platform/plaftform
> 
correct.

> And same in the changelog.
> 
> > +
> > +	debugfs_create_file("temperature_0",  0200, ptc_debugfs, 
> > &ptc_instance[0], &ptc_fops);
> > +	debugfs_create_file("temperature_1",  0200, ptc_debugfs, 
> > &ptc_instance[1], &ptc_fops);
> > +	debugfs_create_file("temperature_2",  0200, ptc_debugfs, 
> > &ptc_instance[2], &ptc_fops);
> > +}
> > +
> > +static void ptc_delete_debugfs(void)
> > +{
> > +	debugfs_remove_recursive(ptc_debugfs);
> > +}
> >  
> >  int proc_thermal_ptc_add(struct pci_dev *pdev, struct
> > proc_thermal_device *proc_priv)
> >  {
> > @@ -230,10 +289,13 @@ int proc_thermal_ptc_add(struct pci_dev
> > *pdev,
> > struct proc_thermal_device *proc_
> >  
> >  		for (i = 0; i < PTC_MAX_INSTANCES; i++) {
> >  			ptc_instance[i].offset = ptc_offsets[i];
> > +			ptc_instance[i].pdev = pdev;
> >  			ptc_create_groups(pdev, i,
> > &ptc_instance[i]);
> >  		}
> >  	}
> >  
> > +	ptc_create_debugfs();
> > +
> 
> should we create the debugfs only when PROC_THERMAL_FEATURE_PTC is
> set?

This function is only called when
 if (feature_mask & PROC_THERMAL_FEATURE_PTC) {
}


> 
> >  	return 0;
> >  }
> >  EXPORT_SYMBOL_GPL(proc_thermal_ptc_add);
> > @@ -248,6 +310,8 @@ void proc_thermal_ptc_remove(struct pci_dev
> > *pdev)
> >  		for (i = 0; i < PTC_MAX_INSTANCES; i++)
> >  			sysfs_remove_group(&pdev->dev.kobj,
> > &ptc_instance[i].ptc_attr_group);
> >  	}
> > +
> > +	ptc_delete_debugfs();
> 
> ditto.
Same as above.

Thanks,
Srinivas

> 
> thanks,
> rui
diff mbox series

Patch

diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst
index ec5769accae0..794f5cce548e 100644
--- a/Documentation/driver-api/thermal/intel_dptf.rst
+++ b/Documentation/driver-api/thermal/intel_dptf.rst
@@ -206,6 +206,16 @@  All these controls needs admin privilege to update.
 	Update a new temperature target in milli degree celsius for hardware to
 	use for the temperature control.
 
+``gain`` (RW)
+	A value in the range 0-7. Sets the aggressiveness of control loop.
+	7 – graceful, favors performance at the expense of temperature overshoots.
+	0 – aggressive, favors tight regulation over performance.
+
+``min_performance_level`` (RW)
+	Minimum Performance level below which the there will be no throttling.
+	0 - all levels of throttling allowed including survivability actions.
+	256 - no throttling allowed.
+
 Given that this is platform temperature control, it is expected that a
 single user-level manager owns and manages the controls. If multiple
 user-level software applications attempt to write different targets, it
diff --git a/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c b/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
index 2d6504514893..6cd05783a52d 100644
--- a/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
+++ b/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
@@ -49,7 +49,7 @@  struct mmio_reg {
 };
 
 #define MAX_ATTR_GROUP_NAME_LEN	32
-#define PTC_MAX_ATTRS		3
+#define PTC_MAX_ATTRS		5
 
 struct ptc_data {
 	u32 offset;
@@ -57,6 +57,8 @@  struct ptc_data {
 	struct attribute *ptc_attrs[PTC_MAX_ATTRS];
 	struct device_attribute temperature_target_attr;
 	struct device_attribute enable_attr;
+	struct device_attribute gain_attr;
+	struct device_attribute min_performance_level_attr;
 	char group_name[MAX_ATTR_GROUP_NAME_LEN];
 };
 
@@ -78,6 +80,8 @@  static u32 ptc_offsets[PTC_MAX_INSTANCES] = {0x5B20, 0x5B28, 0x5B30};
 static const char * const ptc_strings[] = {
 	"temperature_target",
 	"enable",
+	"gain",
+	"min_performance_level",
 	NULL
 };
 
@@ -177,6 +181,11 @@  PTC_SHOW(temperature_target);
 PTC_STORE(temperature_target);
 PTC_SHOW(enable);
 PTC_STORE(enable);
+PTC_SHOW(gain);
+PTC_STORE(gain);
+PTC_SHOW(min_performance_level);
+PTC_STORE(min_performance_level);
+
 
 #define ptc_init_attribute(_name)\
 	do {\
@@ -193,9 +202,13 @@  static int ptc_create_groups(struct pci_dev *pdev, int instance, struct ptc_data
 
 	ptc_init_attribute(temperature_target);
 	ptc_init_attribute(enable);
+	ptc_init_attribute(gain);
+	ptc_init_attribute(min_performance_level);
 
 	data->ptc_attrs[index++] = &data->temperature_target_attr.attr;
 	data->ptc_attrs[index++] = &data->enable_attr.attr;
+	data->ptc_attrs[index++] = &data->gain_attr.attr;
+	data->ptc_attrs[index++] = &data->min_performance_level_attr.attr;
 	data->ptc_attrs[index] = NULL;
 
 	snprintf(data->group_name, MAX_ATTR_GROUP_NAME_LEN,