diff mbox series

[v2,2/8] thermal/debugfs: Do not extend mitigation episodes beyond system resume

Message ID 2337425.ElGaqSPkdT@kreacher
State New
Headers show
Series thermal/debugfs: Assorted improvements for the 6.11 cycle | expand

Commit Message

Rafael J. Wysocki May 28, 2024, 2:53 p.m. UTC
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Because thermal zone handling by the thermal core is started from
scratch during resume from system-wide suspend, prevent the debug
code from extending mitigation episodes beyond that point by ending
the mitigation episode currently in progress, if any, for each thermal
zone.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

v1 -> v2: Rebase.

---
 drivers/thermal/thermal_core.c    |    1 +
 drivers/thermal/thermal_debugfs.c |   36 ++++++++++++++++++++++++++++++++++++
 drivers/thermal/thermal_debugfs.h |    2 ++
 3 files changed, 39 insertions(+)

Comments

Rafael J. Wysocki June 10, 2024, 11:29 a.m. UTC | #1
On Mon, Jun 10, 2024 at 10:28 AM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
> On 28/05/2024 16:53, Rafael J. Wysocki wrote:
> > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > Because thermal zone handling by the thermal core is started from
> > scratch during resume from system-wide suspend, prevent the debug
> > code from extending mitigation episodes beyond that point by ending
> > the mitigation episode currently in progress, if any, for each thermal
> > zone.
>
> Why it is done at resume time and not at suspend time ?

Because it is related to thermal_zone_device_init() which also runs at
the resume time, so IMV it's better to keep these two pieces together.

Why would it be better to run this during suspend?

> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > ---
> >
> > v1 -> v2: Rebase.
> >
> > ---
> >   drivers/thermal/thermal_core.c    |    1 +
> >   drivers/thermal/thermal_debugfs.c |   36 ++++++++++++++++++++++++++++++++++++
> >   drivers/thermal/thermal_debugfs.h |    2 ++
> >   3 files changed, 39 insertions(+)
> >
> > Index: linux-pm/drivers/thermal/thermal_core.c
> > ===================================================================
> > --- linux-pm.orig/drivers/thermal/thermal_core.c
> > +++ linux-pm/drivers/thermal/thermal_core.c
> > @@ -1641,6 +1641,7 @@ static void thermal_zone_device_resume(s
> >
> >       tz->suspended = false;
> >
> > +     thermal_debug_tz_resume(tz);
> >       thermal_zone_device_init(tz);
> >       __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> >
> > Index: linux-pm/drivers/thermal/thermal_debugfs.c
> > ===================================================================
> > --- linux-pm.orig/drivers/thermal/thermal_debugfs.c
> > +++ linux-pm/drivers/thermal/thermal_debugfs.c
> > @@ -926,3 +926,39 @@ void thermal_debug_tz_remove(struct ther
> >       thermal_debugfs_remove_id(thermal_dbg);
> >       kfree(trips_crossed);
> >   }
> > +
> > +void thermal_debug_tz_resume(struct thermal_zone_device *tz)
> > +{
> > +     struct thermal_debugfs *thermal_dbg = tz->debugfs;
> > +     ktime_t now = ktime_get();
> > +     struct tz_debugfs *tz_dbg;
> > +     struct tz_episode *tze;
> > +     int i;
> > +
> > +     if (!thermal_dbg)
> > +             return;
> > +
> > +     mutex_lock(&thermal_dbg->lock);
> > +
> > +     tz_dbg = &thermal_dbg->tz_dbg;
> > +
> > +     if (!tz_dbg->nr_trips)
> > +             goto out;
> > +
> > +     /*
> > +      * A mitigation episode was in progress before the preceding system
> > +      * suspend transition, so close it because the zone handling is starting
> > +      * over from scratch.
> > +      */
> > +     tze = list_first_entry(&tz_dbg->tz_episodes, struct tz_episode, node);
> > +
> > +     for (i = 0; i < tz_dbg->nr_trips; i++)
> > +             tz_episode_close_trip(tze, tz_dbg->trips_crossed[i], now);
> > +
> > +     tze->duration = ktime_sub(now, tze->timestamp);
> > +
> > +     tz_dbg->nr_trips = 0;
> > +
> > +out:
> > +     mutex_unlock(&thermal_dbg->lock);
> > +}
> > Index: linux-pm/drivers/thermal/thermal_debugfs.h
> > ===================================================================
> > --- linux-pm.orig/drivers/thermal/thermal_debugfs.h
> > +++ linux-pm/drivers/thermal/thermal_debugfs.h
> > @@ -7,6 +7,7 @@ void thermal_debug_cdev_remove(struct th
> >   void thermal_debug_cdev_state_update(const struct thermal_cooling_device *cdev, int state);
> >   void thermal_debug_tz_add(struct thermal_zone_device *tz);
> >   void thermal_debug_tz_remove(struct thermal_zone_device *tz);
> > +void thermal_debug_tz_resume(struct thermal_zone_device *tz);
> >   void thermal_debug_tz_trip_up(struct thermal_zone_device *tz,
> >                             const struct thermal_trip *trip);
> >   void thermal_debug_tz_trip_down(struct thermal_zone_device *tz,
> > @@ -20,6 +21,7 @@ static inline void thermal_debug_cdev_st
> >                                                  int state) {}
> >   static inline void thermal_debug_tz_add(struct thermal_zone_device *tz) {}
> >   static inline void thermal_debug_tz_remove(struct thermal_zone_device *tz) {}
> > +static inline void thermal_debug_tz_resume(struct thermal_zone_device *tz) {}
> >   static inline void thermal_debug_tz_trip_up(struct thermal_zone_device *tz,
> >                                           const struct thermal_trip *trip) {};
> >   static inline void thermal_debug_tz_trip_down(struct thermal_zone_device *tz,
> >
> >
> >
>
> --
> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>
> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> <http://twitter.com/#!/linaroorg> Twitter |
> <http://www.linaro.org/linaro-blog/> Blog
>
Rafael J. Wysocki June 11, 2024, 6:35 p.m. UTC | #2
On Mon, Jun 10, 2024 at 3:39 PM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
> On 10/06/2024 13:29, Rafael J. Wysocki wrote:
> > On Mon, Jun 10, 2024 at 10:28 AM Daniel Lezcano
> > <daniel.lezcano@linaro.org> wrote:
> >>
> >> On 28/05/2024 16:53, Rafael J. Wysocki wrote:
> >>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>>
> >>> Because thermal zone handling by the thermal core is started from
> >>> scratch during resume from system-wide suspend, prevent the debug
> >>> code from extending mitigation episodes beyond that point by ending
> >>> the mitigation episode currently in progress, if any, for each thermal
> >>> zone.
> >>
> >> Why it is done at resume time and not at suspend time ?
> >
> > Because it is related to thermal_zone_device_init() which also runs at
> > the resume time, so IMV it's better to keep these two pieces together.
> >
> > Why would it be better to run this during suspend?
>
>  From a logical point of view, it makes more sense to cancel something
> at suspend time rather than resume. That prevents future readers to be
> puzzled by an action done in an unexpected place.
>
> Technically speaking there is no difference if it is done during suspend
> or resume. Well... we want to prevent actions to be done at resume time
> in order to not increase the resume duration but I'm not sure this code
> is doing a big difference.
>
> If you want to keep it as is, feel free to add my:
>
> Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>

I will, thank you!

And thanks for all of the other ACKs.
diff mbox series

Patch

Index: linux-pm/drivers/thermal/thermal_core.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_core.c
+++ linux-pm/drivers/thermal/thermal_core.c
@@ -1641,6 +1641,7 @@  static void thermal_zone_device_resume(s
 
 	tz->suspended = false;
 
+	thermal_debug_tz_resume(tz);
 	thermal_zone_device_init(tz);
 	__thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
 
Index: linux-pm/drivers/thermal/thermal_debugfs.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_debugfs.c
+++ linux-pm/drivers/thermal/thermal_debugfs.c
@@ -926,3 +926,39 @@  void thermal_debug_tz_remove(struct ther
 	thermal_debugfs_remove_id(thermal_dbg);
 	kfree(trips_crossed);
 }
+
+void thermal_debug_tz_resume(struct thermal_zone_device *tz)
+{
+	struct thermal_debugfs *thermal_dbg = tz->debugfs;
+	ktime_t now = ktime_get();
+	struct tz_debugfs *tz_dbg;
+	struct tz_episode *tze;
+	int i;
+
+	if (!thermal_dbg)
+		return;
+
+	mutex_lock(&thermal_dbg->lock);
+
+	tz_dbg = &thermal_dbg->tz_dbg;
+
+	if (!tz_dbg->nr_trips)
+		goto out;
+
+	/*
+	 * A mitigation episode was in progress before the preceding system
+	 * suspend transition, so close it because the zone handling is starting
+	 * over from scratch.
+	 */
+	tze = list_first_entry(&tz_dbg->tz_episodes, struct tz_episode, node);
+
+	for (i = 0; i < tz_dbg->nr_trips; i++)
+		tz_episode_close_trip(tze, tz_dbg->trips_crossed[i], now);
+
+	tze->duration = ktime_sub(now, tze->timestamp);
+
+	tz_dbg->nr_trips = 0;
+
+out:
+	mutex_unlock(&thermal_dbg->lock);
+}
Index: linux-pm/drivers/thermal/thermal_debugfs.h
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_debugfs.h
+++ linux-pm/drivers/thermal/thermal_debugfs.h
@@ -7,6 +7,7 @@  void thermal_debug_cdev_remove(struct th
 void thermal_debug_cdev_state_update(const struct thermal_cooling_device *cdev, int state);
 void thermal_debug_tz_add(struct thermal_zone_device *tz);
 void thermal_debug_tz_remove(struct thermal_zone_device *tz);
+void thermal_debug_tz_resume(struct thermal_zone_device *tz);
 void thermal_debug_tz_trip_up(struct thermal_zone_device *tz,
 			      const struct thermal_trip *trip);
 void thermal_debug_tz_trip_down(struct thermal_zone_device *tz,
@@ -20,6 +21,7 @@  static inline void thermal_debug_cdev_st
 						   int state) {}
 static inline void thermal_debug_tz_add(struct thermal_zone_device *tz) {}
 static inline void thermal_debug_tz_remove(struct thermal_zone_device *tz) {}
+static inline void thermal_debug_tz_resume(struct thermal_zone_device *tz) {}
 static inline void thermal_debug_tz_trip_up(struct thermal_zone_device *tz,
 					    const struct thermal_trip *trip) {};
 static inline void thermal_debug_tz_trip_down(struct thermal_zone_device *tz,