Message ID | 20230519032719.2581689-3-evalenti@kernel.org |
---|---|
State | New |
Headers | show |
Series | thermal: enhancements on thermal stats | expand |
On Fri, May 19, 2023 at 5:27 AM Eduardo Valentin <evalenti@kernel.org> wrote: > > From: Eduardo Valentin <eduval@amazon.com> > > This patch improves the current cooling device > statistics by adding a new file under > cdev/stats/requests_of_thermal_zone > > to represent the number of times each thermal zone > requested the cooling device to effectively change. > If the request associated was not serviced because > another thermal zone asked for a higher cooling level, > this counter does not increase. What if the cdev is associated with two thermal zones asking for the same state of it? > The file format is: > thermal_zone: <type> <count> > > Samples: > $ cat cdev0/stats/requests_of_thermal_zone > thermal_zone: amb0 2 The "one value per attribute" sysfs rule violation. > > In this example, it means the thermal zone 'amb0' has requested > 2 times for cdev0 to change state. Like in the previous patch, it would be good to explain the use case. > Cc: "Rafael J. Wysocki" <rafael@kernel.org> (supporter:THERMAL) > Cc: Daniel Lezcano <daniel.lezcano@linaro.org> (supporter:THERMAL) > Cc: Amit Kucheria <amitk@kernel.org> (reviewer:THERMAL) > Cc: Zhang Rui <rui.zhang@intel.com> (reviewer:THERMAL) > Cc: Jonathan Corbet <corbet@lwn.net> (maintainer:DOCUMENTATION) > Cc: linux-pm@vger.kernel.org (open list:THERMAL) > Cc: linux-doc@vger.kernel.org (open list:DOCUMENTATION) > Cc: linux-kernel@vger.kernel.org (open list) > > Signed-off-by: Eduardo Valentin <eduval@amazon.com> > --- > .../driver-api/thermal/sysfs-api.rst | 2 + > drivers/thermal/thermal_core.h | 1 + > drivers/thermal/thermal_sysfs.c | 52 +++++++++++++++++++ > 3 files changed, 55 insertions(+) > > diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst > index caa50d61a5bc..75309a51d9b3 100644 > --- a/Documentation/driver-api/thermal/sysfs-api.rst > +++ b/Documentation/driver-api/thermal/sysfs-api.rst > @@ -369,6 +369,8 @@ Thermal cooling device sys I/F, created once it's registered:: > |---stats/trans_table: Cooling state transition table > |---stats/time_in_thermal_zone_ms: Time that this cooling device was driven > each associated thermal zone. > + |---stats/requests_of_thermal_zone: Total number of times this cooling device > + changed due to each associated thermal zone. The meaning of the above description is not clear to me. > > > Then next two dynamic attributes are created/removed in pairs. They represent > diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h > index 3cce60c6e065..ed6511c3b794 100644 > --- a/drivers/thermal/thermal_core.h > +++ b/drivers/thermal/thermal_core.h > @@ -103,6 +103,7 @@ struct thermal_instance { > unsigned int weight; /* The weight of the cooling device */ > bool upper_no_limit; > #if IS_ENABLED(CONFIG_THERMAL_STATISTICS) > + unsigned long total_requests; > ktime_t time_in; /* time spent in this instance */ > #endif > }; > diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c > index a3b71f03db75..0bce1415f7e8 100644 > --- a/drivers/thermal/thermal_sysfs.c > +++ b/drivers/thermal/thermal_sysfs.c > @@ -723,6 +723,7 @@ void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev, > stats->trans_table[stats->state * (cdev->max_state + 1) + new_state]++; > stats->state = new_state; > stats->total_trans++; > + stats->curr_instance->total_requests++; > > unlock: > spin_unlock(&stats->lock); > @@ -867,6 +868,54 @@ time_in_thermal_zone_ms_show(struct device *dev, struct device_attribute *attr, > return ret < 0 ? ret : len; > } > > +static ssize_t > +requests_of_thermal_zone_show(struct device *dev, struct device_attribute *attr, > + char *buf) > +{ > + LIST_HEAD(cdev_thermal_zone_list); > + struct thermal_cooling_device *cdev = to_cooling_device(dev); > + struct cooling_dev_stats *stats = cdev->stats; > + struct cdev_thermal_zone_residency *res, *next; > + struct thermal_instance *instance; > + ssize_t len = 0, ret = 0; > + > + mutex_lock(&cdev->lock); > + > + spin_lock(&stats->lock); > + update_time_in_state(stats, stats->curr_instance); > + spin_unlock(&stats->lock); > + > + build_cdev_thermal_zone_residency(&cdev_thermal_zone_list, cdev); > + > + list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) > + list_for_each_entry(res, &cdev_thermal_zone_list, node) > + if (strncmp(res->thermal_zone, instance->tz->type, > + THERMAL_NAME_LENGTH) == 0) > + res->counter += instance->total_requests; > + > + mutex_unlock(&cdev->lock); > + > + list_for_each_entry_safe(res, next, &cdev_thermal_zone_list, node) { Why is the _safe variant needed here? > + ret = sprintf(buf + len, "thermal_zone: %s\t%lu\n", > + res->thermal_zone, res->counter); > + > + if (ret == 0) > + ret = -EOVERFLOW; > + > + if (ret < 0) > + break; > + > + len += ret; > + } > + > + list_for_each_entry_safe(res, next, &cdev_thermal_zone_list, node) { > + list_del(&res->node); > + kfree(res); > + } > + > + return ret < 0 ? ret : len; I would prefer if (ret < 0) return ret; return len; > +} > + > static ssize_t > reset_store(struct device *dev, struct device_attribute *attr, const char *buf, > size_t count) > @@ -897,6 +946,7 @@ reset_store(struct device *dev, struct device_attribute *attr, const char *buf, > > /* Make sure we reset all counters per instance */ > list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) { > + instance->total_requests = 0; > instance->time_in = ktime_set(0, 0); > } > > @@ -971,6 +1021,7 @@ static ssize_t trans_table_show(struct device *dev, > static DEVICE_ATTR_RO(total_trans); > static DEVICE_ATTR_RO(time_in_state_ms); > static DEVICE_ATTR_RO(time_in_thermal_zone_ms); > +static DEVICE_ATTR_RO(requests_of_thermal_zone); > static DEVICE_ATTR_WO(reset); > static DEVICE_ATTR_RO(trans_table); > > @@ -978,6 +1029,7 @@ static struct attribute *cooling_device_stats_attrs[] = { > &dev_attr_total_trans.attr, > &dev_attr_time_in_state_ms.attr, > &dev_attr_time_in_thermal_zone_ms.attr, > + &dev_attr_requests_of_thermal_zone.attr, > &dev_attr_reset.attr, > &dev_attr_trans_table.attr, > NULL > --
On Tue, Jun 20, 2023 at 07:12:52PM +0200, Rafael J. Wysocki wrote: > > > > On Fri, May 19, 2023 at 5:27 AM Eduardo Valentin <evalenti@kernel.org> wrote: > > > > From: Eduardo Valentin <eduval@amazon.com> > > > > This patch improves the current cooling device > > statistics by adding a new file under > > cdev/stats/requests_of_thermal_zone > > > > to represent the number of times each thermal zone > > requested the cooling device to effectively change. > > If the request associated was not serviced because > > another thermal zone asked for a higher cooling level, > > this counter does not increase. > > What if the cdev is associated with two thermal zones asking for the > same state of it? same as explained before, there will be always one thermal instance that is picked. This patch considers that. > > > The file format is: > > thermal_zone: <type> <count> > > > > Samples: > > $ cat cdev0/stats/requests_of_thermal_zone > > thermal_zone: amb0 2 > > The "one value per attribute" sysfs rule violation. > > > > > In this example, it means the thermal zone 'amb0' has requested > > 2 times for cdev0 to change state. > > Like in the previous patch, it would be good to explain the use case. > > > Cc: "Rafael J. Wysocki" <rafael@kernel.org> (supporter:THERMAL) > > Cc: Daniel Lezcano <daniel.lezcano@linaro.org> (supporter:THERMAL) > > Cc: Amit Kucheria <amitk@kernel.org> (reviewer:THERMAL) > > Cc: Zhang Rui <rui.zhang@intel.com> (reviewer:THERMAL) > > Cc: Jonathan Corbet <corbet@lwn.net> (maintainer:DOCUMENTATION) > > Cc: linux-pm@vger.kernel.org (open list:THERMAL) > > Cc: linux-doc@vger.kernel.org (open list:DOCUMENTATION) > > Cc: linux-kernel@vger.kernel.org (open list) > > > > Signed-off-by: Eduardo Valentin <eduval@amazon.com> > > --- > > .../driver-api/thermal/sysfs-api.rst | 2 + > > drivers/thermal/thermal_core.h | 1 + > > drivers/thermal/thermal_sysfs.c | 52 +++++++++++++++++++ > > 3 files changed, 55 insertions(+) > > > > diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst > > index caa50d61a5bc..75309a51d9b3 100644 > > --- a/Documentation/driver-api/thermal/sysfs-api.rst > > +++ b/Documentation/driver-api/thermal/sysfs-api.rst > > @@ -369,6 +369,8 @@ Thermal cooling device sys I/F, created once it's registered:: > > |---stats/trans_table: Cooling state transition table > > |---stats/time_in_thermal_zone_ms: Time that this cooling device was driven > > each associated thermal zone. > > + |---stats/requests_of_thermal_zone: Total number of times this cooling device > > + changed due to each associated thermal zone. > > The meaning of the above description is not clear to me. > > > > > > > Then next two dynamic attributes are created/removed in pairs. They represent > > diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h > > index 3cce60c6e065..ed6511c3b794 100644 > > --- a/drivers/thermal/thermal_core.h > > +++ b/drivers/thermal/thermal_core.h > > @@ -103,6 +103,7 @@ struct thermal_instance { > > unsigned int weight; /* The weight of the cooling device */ > > bool upper_no_limit; > > #if IS_ENABLED(CONFIG_THERMAL_STATISTICS) > > + unsigned long total_requests; > > ktime_t time_in; /* time spent in this instance */ > > #endif > > }; > > diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c > > index a3b71f03db75..0bce1415f7e8 100644 > > --- a/drivers/thermal/thermal_sysfs.c > > +++ b/drivers/thermal/thermal_sysfs.c > > @@ -723,6 +723,7 @@ void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev, > > stats->trans_table[stats->state * (cdev->max_state + 1) + new_state]++; > > stats->state = new_state; > > stats->total_trans++; > > + stats->curr_instance->total_requests++; > > > > unlock: > > spin_unlock(&stats->lock); > > @@ -867,6 +868,54 @@ time_in_thermal_zone_ms_show(struct device *dev, struct device_attribute *attr, > > return ret < 0 ? ret : len; > > } > > > > +static ssize_t > > +requests_of_thermal_zone_show(struct device *dev, struct device_attribute *attr, > > + char *buf) > > +{ > > + LIST_HEAD(cdev_thermal_zone_list); > > + struct thermal_cooling_device *cdev = to_cooling_device(dev); > > + struct cooling_dev_stats *stats = cdev->stats; > > + struct cdev_thermal_zone_residency *res, *next; > > + struct thermal_instance *instance; > > + ssize_t len = 0, ret = 0; > > + > > + mutex_lock(&cdev->lock); > > + > > + spin_lock(&stats->lock); > > + update_time_in_state(stats, stats->curr_instance); > > + spin_unlock(&stats->lock); > > + > > + build_cdev_thermal_zone_residency(&cdev_thermal_zone_list, cdev); > > + > > + list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) > > + list_for_each_entry(res, &cdev_thermal_zone_list, node) > > + if (strncmp(res->thermal_zone, instance->tz->type, > > + THERMAL_NAME_LENGTH) == 0) > > + res->counter += instance->total_requests; > > + > > + mutex_unlock(&cdev->lock); > > + > > + list_for_each_entry_safe(res, next, &cdev_thermal_zone_list, node) { > > Why is the _safe variant needed here? > > > + ret = sprintf(buf + len, "thermal_zone: %s\t%lu\n", > > + res->thermal_zone, res->counter); > > + > > + if (ret == 0) > > + ret = -EOVERFLOW; > > + > > + if (ret < 0) > > + break; > > + > > + len += ret; > > + } > > + > > + list_for_each_entry_safe(res, next, &cdev_thermal_zone_list, node) { > > + list_del(&res->node); > > + kfree(res); > > + } > > + > > + return ret < 0 ? ret : len; > > I would prefer > > if (ret < 0) > return ret; > > return len; OK. I will fix this and enhance the explanations. And the same comments and replies of the previous patch applies here. > > > +} > > + > > static ssize_t > > reset_store(struct device *dev, struct device_attribute *attr, const char *buf, > > size_t count) > > @@ -897,6 +946,7 @@ reset_store(struct device *dev, struct device_attribute *attr, const char *buf, > > > > /* Make sure we reset all counters per instance */ > > list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) { > > + instance->total_requests = 0; > > instance->time_in = ktime_set(0, 0); > > } > > > > @@ -971,6 +1021,7 @@ static ssize_t trans_table_show(struct device *dev, > > static DEVICE_ATTR_RO(total_trans); > > static DEVICE_ATTR_RO(time_in_state_ms); > > static DEVICE_ATTR_RO(time_in_thermal_zone_ms); > > +static DEVICE_ATTR_RO(requests_of_thermal_zone); > > static DEVICE_ATTR_WO(reset); > > static DEVICE_ATTR_RO(trans_table); > > > > @@ -978,6 +1029,7 @@ static struct attribute *cooling_device_stats_attrs[] = { > > &dev_attr_total_trans.attr, > > &dev_attr_time_in_state_ms.attr, > > &dev_attr_time_in_thermal_zone_ms.attr, > > + &dev_attr_requests_of_thermal_zone.attr, > > &dev_attr_reset.attr, > > &dev_attr_trans_table.attr, > > NULL > > --
diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst index caa50d61a5bc..75309a51d9b3 100644 --- a/Documentation/driver-api/thermal/sysfs-api.rst +++ b/Documentation/driver-api/thermal/sysfs-api.rst @@ -369,6 +369,8 @@ Thermal cooling device sys I/F, created once it's registered:: |---stats/trans_table: Cooling state transition table |---stats/time_in_thermal_zone_ms: Time that this cooling device was driven each associated thermal zone. + |---stats/requests_of_thermal_zone: Total number of times this cooling device + changed due to each associated thermal zone. Then next two dynamic attributes are created/removed in pairs. They represent diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 3cce60c6e065..ed6511c3b794 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -103,6 +103,7 @@ struct thermal_instance { unsigned int weight; /* The weight of the cooling device */ bool upper_no_limit; #if IS_ENABLED(CONFIG_THERMAL_STATISTICS) + unsigned long total_requests; ktime_t time_in; /* time spent in this instance */ #endif }; diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index a3b71f03db75..0bce1415f7e8 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -723,6 +723,7 @@ void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev, stats->trans_table[stats->state * (cdev->max_state + 1) + new_state]++; stats->state = new_state; stats->total_trans++; + stats->curr_instance->total_requests++; unlock: spin_unlock(&stats->lock); @@ -867,6 +868,54 @@ time_in_thermal_zone_ms_show(struct device *dev, struct device_attribute *attr, return ret < 0 ? ret : len; } +static ssize_t +requests_of_thermal_zone_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + LIST_HEAD(cdev_thermal_zone_list); + struct thermal_cooling_device *cdev = to_cooling_device(dev); + struct cooling_dev_stats *stats = cdev->stats; + struct cdev_thermal_zone_residency *res, *next; + struct thermal_instance *instance; + ssize_t len = 0, ret = 0; + + mutex_lock(&cdev->lock); + + spin_lock(&stats->lock); + update_time_in_state(stats, stats->curr_instance); + spin_unlock(&stats->lock); + + build_cdev_thermal_zone_residency(&cdev_thermal_zone_list, cdev); + + list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) + list_for_each_entry(res, &cdev_thermal_zone_list, node) + if (strncmp(res->thermal_zone, instance->tz->type, + THERMAL_NAME_LENGTH) == 0) + res->counter += instance->total_requests; + + mutex_unlock(&cdev->lock); + + list_for_each_entry_safe(res, next, &cdev_thermal_zone_list, node) { + ret = sprintf(buf + len, "thermal_zone: %s\t%lu\n", + res->thermal_zone, res->counter); + + if (ret == 0) + ret = -EOVERFLOW; + + if (ret < 0) + break; + + len += ret; + } + + list_for_each_entry_safe(res, next, &cdev_thermal_zone_list, node) { + list_del(&res->node); + kfree(res); + } + + return ret < 0 ? ret : len; +} + static ssize_t reset_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -897,6 +946,7 @@ reset_store(struct device *dev, struct device_attribute *attr, const char *buf, /* Make sure we reset all counters per instance */ list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) { + instance->total_requests = 0; instance->time_in = ktime_set(0, 0); } @@ -971,6 +1021,7 @@ static ssize_t trans_table_show(struct device *dev, static DEVICE_ATTR_RO(total_trans); static DEVICE_ATTR_RO(time_in_state_ms); static DEVICE_ATTR_RO(time_in_thermal_zone_ms); +static DEVICE_ATTR_RO(requests_of_thermal_zone); static DEVICE_ATTR_WO(reset); static DEVICE_ATTR_RO(trans_table); @@ -978,6 +1029,7 @@ static struct attribute *cooling_device_stats_attrs[] = { &dev_attr_total_trans.attr, &dev_attr_time_in_state_ms.attr, &dev_attr_time_in_thermal_zone_ms.attr, + &dev_attr_requests_of_thermal_zone.attr, &dev_attr_reset.attr, &dev_attr_trans_table.attr, NULL