diff mbox series

[v6,1/4] PM: Add sysfs files to represent time spent in hardware sleep state

Message ID 20230403211831.4010-2-mario.limonciello@amd.com
State New
Headers show
Series Add vendor agnostic mechanism to report hardware sleep | expand

Commit Message

Mario Limonciello April 3, 2023, 9:18 p.m. UTC
Userspace can't easily discover how much of a sleep cycle was spent in a
hardware sleep state without using kernel tracing and vendor specific sysfs
or debugfs files.

To make this information more discoverable, introduce two new sysfs files:
1) The time spent in a hw sleep state for last cycle.
2) The time spent in a hw sleep state since the kernel booted
Both of these files will be present only if the system supports s2idle.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
v5->v6:
 * Add total attribute as well
 * Change text for documentation
 * Adjust flow of is_visible callback.
 * If overflow was detected in total attribute return -EOVERFLOW
 * Rename symbol
 * Add stub for symbol for builds without CONFIG_PM_SLEEP
v4->v5:
 * Provide time in microseconds instead of percent. Userspace can convert
   this if desirable.
---
 Documentation/ABI/testing/sysfs-power | 27 ++++++++++++++++
 include/linux/suspend.h               |  5 +++
 kernel/power/main.c                   | 45 +++++++++++++++++++++++++++
 3 files changed, 77 insertions(+)

Comments

Box, David E April 5, 2023, 12:42 a.m. UTC | #1
Hi Mario,

On Mon, 2023-04-03 at 16:18 -0500, Mario Limonciello wrote:
> Userspace can't easily discover how much of a sleep cycle was spent in a
> hardware sleep state without using kernel tracing and vendor specific sysfs
> or debugfs files.
> 
> To make this information more discoverable, introduce two new sysfs files:
> 1) The time spent in a hw sleep state for last cycle.
> 2) The time spent in a hw sleep state since the kernel booted
> Both of these files will be present only if the system supports s2idle.
> 
> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> ---
> v5->v6:
>  * Add total attribute as well
>  * Change text for documentation
>  * Adjust flow of is_visible callback.
>  * If overflow was detected in total attribute return -EOVERFLOW
>  * Rename symbol
>  * Add stub for symbol for builds without CONFIG_PM_SLEEP
> v4->v5:
>  * Provide time in microseconds instead of percent. Userspace can convert
>    this if desirable.
> ---
>  Documentation/ABI/testing/sysfs-power | 27 ++++++++++++++++
>  include/linux/suspend.h               |  5 +++
>  kernel/power/main.c                   | 45 +++++++++++++++++++++++++++
>  3 files changed, 77 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-power
> b/Documentation/ABI/testing/sysfs-power
> index f99d433ff311..37240575d282 100644
> --- a/Documentation/ABI/testing/sysfs-power
> +++ b/Documentation/ABI/testing/sysfs-power
> @@ -413,6 +413,33 @@ Description:
>                 The /sys/power/suspend_stats/last_failed_step file contains
>                 the last failed step in the suspend/resume path.
>  
> +What:          /sys/power/suspend_stats/last_hw_sleep
> +Date:          June 2023
> +Contact:       Mario Limonciello <mario.limonciello@amd.com>
> +Description:
> +               The /sys/power/suspend_stats/last_hw_sleep file
> +               contains the duration of time spent in a hardware sleep
> +               state in the most recent system suspend-resume cycle.
> +               This number is measured in microseconds.
> +
> +               NOTE: Limitations in the size of the hardware counters may
> +               cause this value to be inaccurate in longer sleep cycles.
> +
> +What:          /sys/power/suspend_stats/total_hw_sleep
> +Date:          June 2023
> +Contact:       Mario Limonciello <mario.limonciello@amd.com>
> +Description:
> +               The /sys/power/suspend_stats/total_hw_sleep file
> +               contains the aggregate of time spent in a hardware sleep
> +               state since the kernel was booted. This number
> +               is measured in microseconds.
> +
> +               NOTE: Limitations in the size of the hardware counters may
> +               cause this value to be inaccurate in longer sleep cycles.
> +
> +               If an overflow has been detected this file will return
> +               -EOVERFLOW.
> +
>  What:          /sys/power/sync_on_suspend
>  Date:          October 2019
>  Contact:       Jonas Meurer <jonas@freesources.org>
> diff --git a/include/linux/suspend.h b/include/linux/suspend.h
> index cfe19a028918..069ef0c0ae57 100644
> --- a/include/linux/suspend.h
> +++ b/include/linux/suspend.h
> @@ -68,6 +68,8 @@ struct suspend_stats {
>         int     last_failed_errno;
>         int     errno[REC_FAILED_NUM];
>         int     last_failed_step;
> +       u64     last_hw_sleep;
> +       u64     total_hw_sleep;
>         enum suspend_stat_step  failed_steps[REC_FAILED_NUM];
>  };
>  
> @@ -489,6 +491,7 @@ void restore_processor_state(void);
>  extern int register_pm_notifier(struct notifier_block *nb);
>  extern int unregister_pm_notifier(struct notifier_block *nb);
>  extern void ksys_sync_helper(void);
> +extern void pm_report_hw_sleep_time(u64 t);
>  
>  #define pm_notifier(fn, pri) {                         \
>         static struct notifier_block fn##_nb =                  \
> @@ -526,6 +529,8 @@ static inline int unregister_pm_notifier(struct
> notifier_block *nb)
>         return 0;
>  }
>  
> +static inline void pm_report_hw_sleep_time(u64 t) {};
> +
>  static inline void ksys_sync_helper(void) {}
>  
>  #define pm_notifier(fn, pri)   do { (void)(fn); } while (0)
> diff --git a/kernel/power/main.c b/kernel/power/main.c
> index 31ec4a9b9d70..ffd4dd43cbdd 100644
> --- a/kernel/power/main.c
> +++ b/kernel/power/main.c
> @@ -6,6 +6,7 @@
>   * Copyright (c) 2003 Open Source Development Lab
>   */
>  
> +#include <linux/acpi.h>
>  #include <linux/export.h>
>  #include <linux/kobject.h>
>  #include <linux/string.h>
> @@ -83,6 +84,18 @@ int unregister_pm_notifier(struct notifier_block *nb)
>  }
>  EXPORT_SYMBOL_GPL(unregister_pm_notifier);
>  
> +void pm_report_hw_sleep_time(u64 t)
> +{
> +       suspend_stats.last_hw_sleep = t;
> +       if (suspend_stats.total_hw_sleep == -EOVERFLOW)
> +               return;
> +       if (suspend_stats.total_hw_sleep + t < suspend_stats.total_hw_sleep)
> +               suspend_stats.total_hw_sleep = -EOVERFLOW;
> +       else
> +               suspend_stats.total_hw_sleep += t;

total_hw_sleep is u64. At microsecond granularity it will never realistically
overflow and isn't worth the check IMO. The overflow concern comes from the u32
hardware counter, but I don't think there's a good way to detect it.

You could just report the maximum hardware counter time as max_hw_sleep so users
know the value isn't reliable if suspended for longer than that.

David

> +}
> +EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);
> +
>  int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long
> val_down)
>  {
>         int ret;
> @@ -377,6 +390,22 @@ static ssize_t last_failed_step_show(struct kobject
> *kobj,
>  }
>  static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step);
>  
> +static ssize_t last_hw_sleep_show(struct kobject *kobj,
> +               struct kobj_attribute *attr, char *buf)
> +{
> +       return sysfs_emit(buf, "%llu\n", suspend_stats.last_hw_sleep);
> +}
> +static struct kobj_attribute last_hw_sleep = __ATTR_RO(last_hw_sleep);
> +
> +static ssize_t total_hw_sleep_show(struct kobject *kobj,
> +               struct kobj_attribute *attr, char *buf)
> +{
> +       if (suspend_stats.total_hw_sleep == -EOVERFLOW)
> +               return suspend_stats.total_hw_sleep;
> +       return sysfs_emit(buf, "%llu\n", suspend_stats.total_hw_sleep);
> +}
> +static struct kobj_attribute total_hw_sleep = __ATTR_RO(total_hw_sleep);
> +
>  static struct attribute *suspend_attrs[] = {
>         &success.attr,
>         &fail.attr,
> @@ -391,12 +420,28 @@ static struct attribute *suspend_attrs[] = {
>         &last_failed_dev.attr,
>         &last_failed_errno.attr,
>         &last_failed_step.attr,
> +       &last_hw_sleep.attr,
> +       &total_hw_sleep.attr,
>         NULL,
>  };
>  
> +static umode_t suspend_attr_is_visible(struct kobject *kobj, struct attribute
> *attr, int idx)
> +{
> +       if (attr != &last_hw_sleep.attr &&
> +           attr != &total_hw_sleep.attr)
> +               return 0444;
> +
> +#ifdef CONFIG_ACPI
> +       if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
> +               return 0444;
> +#endif
> +       return 0;
> +}
> +
>  static const struct attribute_group suspend_attr_group = {
>         .name = "suspend_stats",
>         .attrs = suspend_attrs,
> +       .is_visible = suspend_attr_is_visible,
>  };
>  
>  #ifdef CONFIG_DEBUG_FS
Hans de Goede April 6, 2023, 12:30 p.m. UTC | #2
Hi,

On 4/5/23 02:42, Box, David E wrote:
> Hi Mario,
> 
> On Mon, 2023-04-03 at 16:18 -0500, Mario Limonciello wrote:
>> Userspace can't easily discover how much of a sleep cycle was spent in a
>> hardware sleep state without using kernel tracing and vendor specific sysfs
>> or debugfs files.
>>
>> To make this information more discoverable, introduce two new sysfs files:
>> 1) The time spent in a hw sleep state for last cycle.
>> 2) The time spent in a hw sleep state since the kernel booted
>> Both of these files will be present only if the system supports s2idle.
>>
>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>> ---
>> v5->v6:
>>  * Add total attribute as well
>>  * Change text for documentation
>>  * Adjust flow of is_visible callback.
>>  * If overflow was detected in total attribute return -EOVERFLOW
>>  * Rename symbol
>>  * Add stub for symbol for builds without CONFIG_PM_SLEEP
>> v4->v5:
>>  * Provide time in microseconds instead of percent. Userspace can convert
>>    this if desirable.
>> ---
>>  Documentation/ABI/testing/sysfs-power | 27 ++++++++++++++++
>>  include/linux/suspend.h               |  5 +++
>>  kernel/power/main.c                   | 45 +++++++++++++++++++++++++++
>>  3 files changed, 77 insertions(+)
>>
>> diff --git a/Documentation/ABI/testing/sysfs-power
>> b/Documentation/ABI/testing/sysfs-power
>> index f99d433ff311..37240575d282 100644
>> --- a/Documentation/ABI/testing/sysfs-power
>> +++ b/Documentation/ABI/testing/sysfs-power
>> @@ -413,6 +413,33 @@ Description:
>>                 The /sys/power/suspend_stats/last_failed_step file contains
>>                 the last failed step in the suspend/resume path.
>>  
>> +What:          /sys/power/suspend_stats/last_hw_sleep
>> +Date:          June 2023
>> +Contact:       Mario Limonciello <mario.limonciello@amd.com>
>> +Description:
>> +               The /sys/power/suspend_stats/last_hw_sleep file
>> +               contains the duration of time spent in a hardware sleep
>> +               state in the most recent system suspend-resume cycle.
>> +               This number is measured in microseconds.
>> +
>> +               NOTE: Limitations in the size of the hardware counters may
>> +               cause this value to be inaccurate in longer sleep cycles.
>> +
>> +What:          /sys/power/suspend_stats/total_hw_sleep
>> +Date:          June 2023
>> +Contact:       Mario Limonciello <mario.limonciello@amd.com>
>> +Description:
>> +               The /sys/power/suspend_stats/total_hw_sleep file
>> +               contains the aggregate of time spent in a hardware sleep
>> +               state since the kernel was booted. This number
>> +               is measured in microseconds.
>> +
>> +               NOTE: Limitations in the size of the hardware counters may
>> +               cause this value to be inaccurate in longer sleep cycles.
>> +
>> +               If an overflow has been detected this file will return
>> +               -EOVERFLOW.
>> +
>>  What:          /sys/power/sync_on_suspend
>>  Date:          October 2019
>>  Contact:       Jonas Meurer <jonas@freesources.org>
>> diff --git a/include/linux/suspend.h b/include/linux/suspend.h
>> index cfe19a028918..069ef0c0ae57 100644
>> --- a/include/linux/suspend.h
>> +++ b/include/linux/suspend.h
>> @@ -68,6 +68,8 @@ struct suspend_stats {
>>         int     last_failed_errno;
>>         int     errno[REC_FAILED_NUM];
>>         int     last_failed_step;
>> +       u64     last_hw_sleep;
>> +       u64     total_hw_sleep;
>>         enum suspend_stat_step  failed_steps[REC_FAILED_NUM];
>>  };
>>  
>> @@ -489,6 +491,7 @@ void restore_processor_state(void);
>>  extern int register_pm_notifier(struct notifier_block *nb);
>>  extern int unregister_pm_notifier(struct notifier_block *nb);
>>  extern void ksys_sync_helper(void);
>> +extern void pm_report_hw_sleep_time(u64 t);
>>  
>>  #define pm_notifier(fn, pri) {                         \
>>         static struct notifier_block fn##_nb =                  \
>> @@ -526,6 +529,8 @@ static inline int unregister_pm_notifier(struct
>> notifier_block *nb)
>>         return 0;
>>  }
>>  
>> +static inline void pm_report_hw_sleep_time(u64 t) {};
>> +
>>  static inline void ksys_sync_helper(void) {}
>>  
>>  #define pm_notifier(fn, pri)   do { (void)(fn); } while (0)
>> diff --git a/kernel/power/main.c b/kernel/power/main.c
>> index 31ec4a9b9d70..ffd4dd43cbdd 100644
>> --- a/kernel/power/main.c
>> +++ b/kernel/power/main.c
>> @@ -6,6 +6,7 @@
>>   * Copyright (c) 2003 Open Source Development Lab
>>   */
>>  
>> +#include <linux/acpi.h>
>>  #include <linux/export.h>
>>  #include <linux/kobject.h>
>>  #include <linux/string.h>
>> @@ -83,6 +84,18 @@ int unregister_pm_notifier(struct notifier_block *nb)
>>  }
>>  EXPORT_SYMBOL_GPL(unregister_pm_notifier);
>>  
>> +void pm_report_hw_sleep_time(u64 t)
>> +{
>> +       suspend_stats.last_hw_sleep = t;
>> +       if (suspend_stats.total_hw_sleep == -EOVERFLOW)
>> +               return;
>> +       if (suspend_stats.total_hw_sleep + t < suspend_stats.total_hw_sleep)
>> +               suspend_stats.total_hw_sleep = -EOVERFLOW;
>> +       else
>> +               suspend_stats.total_hw_sleep += t;
> 
> total_hw_sleep is u64. At microsecond granularity it will never realistically
> overflow and isn't worth the check IMO. The overflow concern comes from the u32
> hardware counter, but I don't think there's a good way to detect it.
> 
> You could just report the maximum hardware counter time as max_hw_sleep so users
> know the value isn't reliable if suspended for longer than that.

Yes I think we need to let userspace know the longest hw-sleep time
the hw can reliably record and then userspace can (and should) chose
to not use/check the counters when the total suspend time is bigger
then the max hw-sleep time.

Other then that this patch-set seems like a good idea to me
and I have no objections / remarks on the pdx86 bits.

Regards,

Hans



>> +}
>> +EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);
>> +
>>  int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long
>> val_down)
>>  {
>>         int ret;
>> @@ -377,6 +390,22 @@ static ssize_t last_failed_step_show(struct kobject
>> *kobj,
>>  }
>>  static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step);
>>  
>> +static ssize_t last_hw_sleep_show(struct kobject *kobj,
>> +               struct kobj_attribute *attr, char *buf)
>> +{
>> +       return sysfs_emit(buf, "%llu\n", suspend_stats.last_hw_sleep);
>> +}
>> +static struct kobj_attribute last_hw_sleep = __ATTR_RO(last_hw_sleep);
>> +
>> +static ssize_t total_hw_sleep_show(struct kobject *kobj,
>> +               struct kobj_attribute *attr, char *buf)
>> +{
>> +       if (suspend_stats.total_hw_sleep == -EOVERFLOW)
>> +               return suspend_stats.total_hw_sleep;
>> +       return sysfs_emit(buf, "%llu\n", suspend_stats.total_hw_sleep);
>> +}
>> +static struct kobj_attribute total_hw_sleep = __ATTR_RO(total_hw_sleep);
>> +
>>  static struct attribute *suspend_attrs[] = {
>>         &success.attr,
>>         &fail.attr,
>> @@ -391,12 +420,28 @@ static struct attribute *suspend_attrs[] = {
>>         &last_failed_dev.attr,
>>         &last_failed_errno.attr,
>>         &last_failed_step.attr,
>> +       &last_hw_sleep.attr,
>> +       &total_hw_sleep.attr,
>>         NULL,
>>  };
>>  
>> +static umode_t suspend_attr_is_visible(struct kobject *kobj, struct attribute
>> *attr, int idx)
>> +{
>> +       if (attr != &last_hw_sleep.attr &&
>> +           attr != &total_hw_sleep.attr)
>> +               return 0444;
>> +
>> +#ifdef CONFIG_ACPI
>> +       if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
>> +               return 0444;
>> +#endif
>> +       return 0;
>> +}
>> +
>>  static const struct attribute_group suspend_attr_group = {
>>         .name = "suspend_stats",
>>         .attrs = suspend_attrs,
>> +       .is_visible = suspend_attr_is_visible,
>>  };
>>  
>>  #ifdef CONFIG_DEBUG_FS
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index f99d433ff311..37240575d282 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -413,6 +413,33 @@  Description:
 		The /sys/power/suspend_stats/last_failed_step file contains
 		the last failed step in the suspend/resume path.
 
+What:		/sys/power/suspend_stats/last_hw_sleep
+Date:		June 2023
+Contact:	Mario Limonciello <mario.limonciello@amd.com>
+Description:
+		The /sys/power/suspend_stats/last_hw_sleep file
+		contains the duration of time spent in a hardware sleep
+		state in the most recent system suspend-resume cycle.
+		This number is measured in microseconds.
+
+		NOTE: Limitations in the size of the hardware counters may
+		cause this value to be inaccurate in longer sleep cycles.
+
+What:		/sys/power/suspend_stats/total_hw_sleep
+Date:		June 2023
+Contact:	Mario Limonciello <mario.limonciello@amd.com>
+Description:
+		The /sys/power/suspend_stats/total_hw_sleep file
+		contains the aggregate of time spent in a hardware sleep
+		state since the kernel was booted. This number
+		is measured in microseconds.
+
+		NOTE: Limitations in the size of the hardware counters may
+		cause this value to be inaccurate in longer sleep cycles.
+
+		If an overflow has been detected this file will return
+		-EOVERFLOW.
+
 What:		/sys/power/sync_on_suspend
 Date:		October 2019
 Contact:	Jonas Meurer <jonas@freesources.org>
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index cfe19a028918..069ef0c0ae57 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -68,6 +68,8 @@  struct suspend_stats {
 	int	last_failed_errno;
 	int	errno[REC_FAILED_NUM];
 	int	last_failed_step;
+	u64	last_hw_sleep;
+	u64	total_hw_sleep;
 	enum suspend_stat_step	failed_steps[REC_FAILED_NUM];
 };
 
@@ -489,6 +491,7 @@  void restore_processor_state(void);
 extern int register_pm_notifier(struct notifier_block *nb);
 extern int unregister_pm_notifier(struct notifier_block *nb);
 extern void ksys_sync_helper(void);
+extern void pm_report_hw_sleep_time(u64 t);
 
 #define pm_notifier(fn, pri) {				\
 	static struct notifier_block fn##_nb =			\
@@ -526,6 +529,8 @@  static inline int unregister_pm_notifier(struct notifier_block *nb)
 	return 0;
 }
 
+static inline void pm_report_hw_sleep_time(u64 t) {};
+
 static inline void ksys_sync_helper(void) {}
 
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 31ec4a9b9d70..ffd4dd43cbdd 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -6,6 +6,7 @@ 
  * Copyright (c) 2003 Open Source Development Lab
  */
 
+#include <linux/acpi.h>
 #include <linux/export.h>
 #include <linux/kobject.h>
 #include <linux/string.h>
@@ -83,6 +84,18 @@  int unregister_pm_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(unregister_pm_notifier);
 
+void pm_report_hw_sleep_time(u64 t)
+{
+	suspend_stats.last_hw_sleep = t;
+	if (suspend_stats.total_hw_sleep == -EOVERFLOW)
+		return;
+	if (suspend_stats.total_hw_sleep + t < suspend_stats.total_hw_sleep)
+		suspend_stats.total_hw_sleep = -EOVERFLOW;
+	else
+		suspend_stats.total_hw_sleep += t;
+}
+EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);
+
 int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down)
 {
 	int ret;
@@ -377,6 +390,22 @@  static ssize_t last_failed_step_show(struct kobject *kobj,
 }
 static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step);
 
+static ssize_t last_hw_sleep_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%llu\n", suspend_stats.last_hw_sleep);
+}
+static struct kobj_attribute last_hw_sleep = __ATTR_RO(last_hw_sleep);
+
+static ssize_t total_hw_sleep_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	if (suspend_stats.total_hw_sleep == -EOVERFLOW)
+		return suspend_stats.total_hw_sleep;
+	return sysfs_emit(buf, "%llu\n", suspend_stats.total_hw_sleep);
+}
+static struct kobj_attribute total_hw_sleep = __ATTR_RO(total_hw_sleep);
+
 static struct attribute *suspend_attrs[] = {
 	&success.attr,
 	&fail.attr,
@@ -391,12 +420,28 @@  static struct attribute *suspend_attrs[] = {
 	&last_failed_dev.attr,
 	&last_failed_errno.attr,
 	&last_failed_step.attr,
+	&last_hw_sleep.attr,
+	&total_hw_sleep.attr,
 	NULL,
 };
 
+static umode_t suspend_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+	if (attr != &last_hw_sleep.attr &&
+	    attr != &total_hw_sleep.attr)
+		return 0444;
+
+#ifdef CONFIG_ACPI
+	if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
+		return 0444;
+#endif
+	return 0;
+}
+
 static const struct attribute_group suspend_attr_group = {
 	.name = "suspend_stats",
 	.attrs = suspend_attrs,
+	.is_visible = suspend_attr_is_visible,
 };
 
 #ifdef CONFIG_DEBUG_FS