diff mbox series

[v1,4/4] thermal: gov_bang_bang: Use governor_data to reduce overhead

Message ID 2285575.iZASKD2KPV@rjwysocki.net
State New
Headers show
Series thermal: gov_bang_bang: Prevent cooling devices from getting stuck in the "on" state | expand

Commit Message

Rafael J. Wysocki Aug. 13, 2024, 2:29 p.m. UTC
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

After running once, the for_each_trip_desc() loop in
bang_bang_manage() is pure needless overhead because it is not going to
make any changes unless a new cooling device has been bound to one of
the trips in the thermal zone or the system is resuming from sleep.

For this reason, make bang_bang_manage() set governor_data for the
thermal zone and check it upfront to decide whether or not it needs to
do anything.

However, governor_data needs to be reset in some cases to let
bang_bang_manage() know that it should walk the trips again, so add an
.update_tz() callback to the governor and make the core additionally
invoke it during system resume.

To avoid affecting the other users of that callback unnecessarily, add
a special notification reason for system resume, THERMAL_TZ_RESUME, and
also pass it to __thermal_zone_device_update() called during system
resume for consistency.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/gov_bang_bang.c |   18 ++++++++++++++++++
 drivers/thermal/thermal_core.c  |    3 ++-
 include/linux/thermal.h         |    1 +
 3 files changed, 21 insertions(+), 1 deletion(-)

Comments

Peter Kästle Aug. 13, 2024, 9:08 p.m. UTC | #1
On 13.08.24 16:29, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> After running once, the for_each_trip_desc() loop in
> bang_bang_manage() is pure needless overhead because it is not going to
> make any changes unless a new cooling device has been bound to one of
> the trips in the thermal zone or the system is resuming from sleep.
> 
> For this reason, make bang_bang_manage() set governor_data for the
> thermal zone and check it upfront to decide whether or not it needs to
> do anything.
> 
> However, governor_data needs to be reset in some cases to let
> bang_bang_manage() know that it should walk the trips again, so add an
> .update_tz() callback to the governor and make the core additionally
> invoke it during system resume.
> 
> To avoid affecting the other users of that callback unnecessarily, add
> a special notification reason for system resume, THERMAL_TZ_RESUME, and
> also pass it to __thermal_zone_device_update() called during system
> resume for consistency.
> 
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Acked-by: Peter Kästle <peter@piie.net>

> ---
>   drivers/thermal/gov_bang_bang.c |   18 ++++++++++++++++++
>   drivers/thermal/thermal_core.c  |    3 ++-
>   include/linux/thermal.h         |    1 +
>   3 files changed, 21 insertions(+), 1 deletion(-)
> 
> Index: linux-pm/drivers/thermal/gov_bang_bang.c
> ===================================================================
> --- linux-pm.orig/drivers/thermal/gov_bang_bang.c
> +++ linux-pm/drivers/thermal/gov_bang_bang.c
> @@ -86,6 +86,10 @@ static void bang_bang_manage(struct ther
>   	const struct thermal_trip_desc *td;
>   	struct thermal_instance *instance;
>   
> +	/* If the code below has run already, nothing needs to be done. */
> +	if (tz->governor_data)
> +		return;
> +
>   	for_each_trip_desc(tz, td) {
>   		const struct thermal_trip *trip = &td->trip;
>   
> @@ -107,11 +111,25 @@ static void bang_bang_manage(struct ther
>   				bang_bang_set_instance_target(instance, 0);
>   		}
>   	}
> +
> +	tz->governor_data = (void *)true;
> +}
> +
> +static void bang_bang_update_tz(struct thermal_zone_device *tz,
> +				enum thermal_notify_event reason)
> +{
> +	/*
> +	 * Let bang_bang_manage() know that it needs to walk trips after binding
> +	 * a new cdev and after system resume.
> +	 */
> +	if (reason == THERMAL_TZ_BIND_CDEV || reason == THERMAL_TZ_RESUME)
> +		tz->governor_data = NULL;
>   }
>   
>   static struct thermal_governor thermal_gov_bang_bang = {
>   	.name		= "bang_bang",
>   	.trip_crossed	= bang_bang_control,
>   	.manage		= bang_bang_manage,
> +	.update_tz	= bang_bang_update_tz,
>   };
>   THERMAL_GOVERNOR_DECLARE(thermal_gov_bang_bang);
> Index: linux-pm/drivers/thermal/thermal_core.c
> ===================================================================
> --- linux-pm.orig/drivers/thermal/thermal_core.c
> +++ linux-pm/drivers/thermal/thermal_core.c
> @@ -1692,7 +1692,8 @@ static void thermal_zone_device_resume(s
>   
>   	thermal_debug_tz_resume(tz);
>   	thermal_zone_device_init(tz);
> -	__thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> +	thermal_governor_update_tz(tz, THERMAL_TZ_RESUME);
> +	__thermal_zone_device_update(tz, THERMAL_TZ_RESUME);
>   
>   	complete(&tz->resume);
>   	tz->resuming = false;
> Index: linux-pm/include/linux/thermal.h
> ===================================================================
> --- linux-pm.orig/include/linux/thermal.h
> +++ linux-pm/include/linux/thermal.h
> @@ -55,6 +55,7 @@ enum thermal_notify_event {
>   	THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal zone */
>   	THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */
>   	THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */
> +	THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */
>   };
>   
>   /**
> 
> 
>
Zhang, Rui Aug. 14, 2024, 6:09 a.m. UTC | #2
On Tue, 2024-08-13 at 16:29 +0200, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> After running once, the for_each_trip_desc() loop in
> bang_bang_manage() is pure needless overhead because it is not going
> to
> make any changes unless a new cooling device has been bound to one of
> the trips in the thermal zone or the system is resuming from sleep.
> 
> For this reason, make bang_bang_manage() set governor_data for the
> thermal zone and check it upfront to decide whether or not it needs
> to
> do anything.
> 
> However, governor_data needs to be reset in some cases to let
> bang_bang_manage() know that it should walk the trips again, so add
> an
> .update_tz() callback to the governor and make the core additionally
> invoke it during system resume.
> 
> To avoid affecting the other users of that callback unnecessarily,
> add
> a special notification reason for system resume, THERMAL_TZ_RESUME,
> and
> also pass it to __thermal_zone_device_update() called during system
> resume for consistency.
> 
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
>  drivers/thermal/gov_bang_bang.c |   18 ++++++++++++++++++
>  drivers/thermal/thermal_core.c  |    3 ++-
>  include/linux/thermal.h         |    1 +
>  3 files changed, 21 insertions(+), 1 deletion(-)
> 
> Index: linux-pm/drivers/thermal/gov_bang_bang.c
> ===================================================================
> --- linux-pm.orig/drivers/thermal/gov_bang_bang.c
> +++ linux-pm/drivers/thermal/gov_bang_bang.c
> @@ -86,6 +86,10 @@ static void bang_bang_manage(struct ther
>         const struct thermal_trip_desc *td;
>         struct thermal_instance *instance;
>  
> +       /* If the code below has run already, nothing needs to be
> done. */
> +       if (tz->governor_data)
> +               return;
> +
>         for_each_trip_desc(tz, td) {
>                 const struct thermal_trip *trip = &td->trip;
>  
> @@ -107,11 +111,25 @@ static void bang_bang_manage(struct ther
>                                 bang_bang_set_instance_target(instanc
> e, 0);
>                 }
>         }
> +
> +       tz->governor_data = (void *)true;
> +}
> +
> +static void bang_bang_update_tz(struct thermal_zone_device *tz,
> +                               enum thermal_notify_event reason)
> +{
> +       /*
> +        * Let bang_bang_manage() know that it needs to walk trips
> after binding
> +        * a new cdev and after system resume.
> +        */
> +       if (reason == THERMAL_TZ_BIND_CDEV || reason ==
> THERMAL_TZ_RESUME)
> +               tz->governor_data = NULL;
>  }

can we do the cdev initialization for BIND_CDEV and RESUME notification
in .update_tz() directly?

Then we don't need .manage() callback. This makes more sense to me
because bang_bang governor cares about trip point crossing only.

thanks,
rui
>  
>  static struct thermal_governor thermal_gov_bang_bang = {
>         .name           = "bang_bang",
>         .trip_crossed   = bang_bang_control,
>         .manage         = bang_bang_manage,
> +       .update_tz      = bang_bang_update_tz,
>  };
>  THERMAL_GOVERNOR_DECLARE(thermal_gov_bang_bang);
> Index: linux-pm/drivers/thermal/thermal_core.c
> ===================================================================
> --- linux-pm.orig/drivers/thermal/thermal_core.c
> +++ linux-pm/drivers/thermal/thermal_core.c
> @@ -1692,7 +1692,8 @@ static void thermal_zone_device_resume(s
>  
>         thermal_debug_tz_resume(tz);
>         thermal_zone_device_init(tz);
> -       __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> +       thermal_governor_update_tz(tz, THERMAL_TZ_RESUME);
> +       __thermal_zone_device_update(tz, THERMAL_TZ_RESUME);
>  
>         complete(&tz->resume);
>         tz->resuming = false;
> Index: linux-pm/include/linux/thermal.h
> ===================================================================
> --- linux-pm.orig/include/linux/thermal.h
> +++ linux-pm/include/linux/thermal.h
> @@ -55,6 +55,7 @@ enum thermal_notify_event {
>         THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal
> zone */
>         THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the
> thermal zone */
>         THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight
> changed */
> +       THERMAL_TZ_RESUME, /* Thermal zone is resuming after system
> sleep */
>  };
>  
>  /**
> 
> 
>
Rafael J. Wysocki Aug. 14, 2024, 5:26 p.m. UTC | #3
On Wed, Aug 14, 2024 at 8:09 AM Zhang, Rui <rui.zhang@intel.com> wrote:
>
> On Tue, 2024-08-13 at 16:29 +0200, Rafael J. Wysocki wrote:
> > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > After running once, the for_each_trip_desc() loop in
> > bang_bang_manage() is pure needless overhead because it is not going
> > to
> > make any changes unless a new cooling device has been bound to one of
> > the trips in the thermal zone or the system is resuming from sleep.
> >
> > For this reason, make bang_bang_manage() set governor_data for the
> > thermal zone and check it upfront to decide whether or not it needs
> > to
> > do anything.
> >
> > However, governor_data needs to be reset in some cases to let
> > bang_bang_manage() know that it should walk the trips again, so add
> > an
> > .update_tz() callback to the governor and make the core additionally
> > invoke it during system resume.
> >
> > To avoid affecting the other users of that callback unnecessarily,
> > add
> > a special notification reason for system resume, THERMAL_TZ_RESUME,
> > and
> > also pass it to __thermal_zone_device_update() called during system
> > resume for consistency.
> >
> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > ---
> >  drivers/thermal/gov_bang_bang.c |   18 ++++++++++++++++++
> >  drivers/thermal/thermal_core.c  |    3 ++-
> >  include/linux/thermal.h         |    1 +
> >  3 files changed, 21 insertions(+), 1 deletion(-)
> >
> > Index: linux-pm/drivers/thermal/gov_bang_bang.c
> > ===================================================================
> > --- linux-pm.orig/drivers/thermal/gov_bang_bang.c
> > +++ linux-pm/drivers/thermal/gov_bang_bang.c
> > @@ -86,6 +86,10 @@ static void bang_bang_manage(struct ther
> >         const struct thermal_trip_desc *td;
> >         struct thermal_instance *instance;
> >
> > +       /* If the code below has run already, nothing needs to be
> > done. */
> > +       if (tz->governor_data)
> > +               return;
> > +
> >         for_each_trip_desc(tz, td) {
> >                 const struct thermal_trip *trip = &td->trip;
> >
> > @@ -107,11 +111,25 @@ static void bang_bang_manage(struct ther
> >                                 bang_bang_set_instance_target(instanc
> > e, 0);
> >                 }
> >         }
> > +
> > +       tz->governor_data = (void *)true;
> > +}
> > +
> > +static void bang_bang_update_tz(struct thermal_zone_device *tz,
> > +                               enum thermal_notify_event reason)
> > +{
> > +       /*
> > +        * Let bang_bang_manage() know that it needs to walk trips
> > after binding
> > +        * a new cdev and after system resume.
> > +        */
> > +       if (reason == THERMAL_TZ_BIND_CDEV || reason ==
> > THERMAL_TZ_RESUME)
> > +               tz->governor_data = NULL;
> >  }
>
> can we do the cdev initialization for BIND_CDEV and RESUME notification
> in .update_tz() directly?

That would be viable if the zone temperature was known at the time
.update_tz() runs, but it isn't.  See this message:

https://lore.kernel.org/linux-pm/CAJZ5v0ji_7Z-24iCO_Xxu4Zm4jgVFmR9jVp8QNiCOxzV9gqSnA@mail.gmail.com/

As long as the zone temperature is not known, it is not clear which
way to initialize the cooling devices.

Interestingly enough, the zone temperature is first checked by the
core when the zone is enabled and not when it is registered.

> Then we don't need .manage() callback. This makes more sense to me
> because bang_bang governor cares about trip point crossing only.

That's true, but this is all about a corner case in which no trip
points are crossed and the cooling devices need to be initialized
properly regardless.

> >  static struct thermal_governor thermal_gov_bang_bang = {
> >         .name           = "bang_bang",
> >         .trip_crossed   = bang_bang_control,
> >         .manage         = bang_bang_manage,
> > +       .update_tz      = bang_bang_update_tz,
> >  };
> >  THERMAL_GOVERNOR_DECLARE(thermal_gov_bang_bang);
> > Index: linux-pm/drivers/thermal/thermal_core.c
> > ===================================================================
> > --- linux-pm.orig/drivers/thermal/thermal_core.c
> > +++ linux-pm/drivers/thermal/thermal_core.c
> > @@ -1692,7 +1692,8 @@ static void thermal_zone_device_resume(s
> >
> >         thermal_debug_tz_resume(tz);
> >         thermal_zone_device_init(tz);
> > -       __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> > +       thermal_governor_update_tz(tz, THERMAL_TZ_RESUME);
> > +       __thermal_zone_device_update(tz, THERMAL_TZ_RESUME);
> >
> >         complete(&tz->resume);
> >         tz->resuming = false;
> > Index: linux-pm/include/linux/thermal.h
> > ===================================================================
> > --- linux-pm.orig/include/linux/thermal.h
> > +++ linux-pm/include/linux/thermal.h
> > @@ -55,6 +55,7 @@ enum thermal_notify_event {
> >         THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal
> > zone */
> >         THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the
> > thermal zone */
> >         THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight
> > changed */
> > +       THERMAL_TZ_RESUME, /* Thermal zone is resuming after system
> > sleep */
> >  };
> >
> >  /**
> >
> >
> >
>
Zhang, Rui Aug. 15, 2024, 3:26 a.m. UTC | #4
On Wed, 2024-08-14 at 19:26 +0200, Rafael J. Wysocki wrote:
> On Wed, Aug 14, 2024 at 8:09 AM Zhang, Rui <rui.zhang@intel.com>
> wrote:
> > 
> > On Tue, 2024-08-13 at 16:29 +0200, Rafael J. Wysocki wrote:
> > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > > 
> > > After running once, the for_each_trip_desc() loop in
> > > bang_bang_manage() is pure needless overhead because it is not
> > > going
> > > to
> > > make any changes unless a new cooling device has been bound to
> > > one of
> > > the trips in the thermal zone or the system is resuming from
> > > sleep.
> > > 
> > > For this reason, make bang_bang_manage() set governor_data for
> > > the
> > > thermal zone and check it upfront to decide whether or not it
> > > needs
> > > to
> > > do anything.
> > > 
> > > However, governor_data needs to be reset in some cases to let
> > > bang_bang_manage() know that it should walk the trips again, so
> > > add
> > > an
> > > .update_tz() callback to the governor and make the core
> > > additionally
> > > invoke it during system resume.
> > > 
> > > To avoid affecting the other users of that callback
> > > unnecessarily,
> > > add
> > > a special notification reason for system resume,
> > > THERMAL_TZ_RESUME,
> > > and
> > > also pass it to __thermal_zone_device_update() called during
> > > system
> > > resume for consistency.
> > > 
> > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > > ---
> > >  drivers/thermal/gov_bang_bang.c |   18 ++++++++++++++++++
> > >  drivers/thermal/thermal_core.c  |    3 ++-
> > >  include/linux/thermal.h         |    1 +
> > >  3 files changed, 21 insertions(+), 1 deletion(-)
> > > 
> > > Index: linux-pm/drivers/thermal/gov_bang_bang.c
> > > =================================================================
> > > ==
> > > --- linux-pm.orig/drivers/thermal/gov_bang_bang.c
> > > +++ linux-pm/drivers/thermal/gov_bang_bang.c
> > > @@ -86,6 +86,10 @@ static void bang_bang_manage(struct ther
> > >         const struct thermal_trip_desc *td;
> > >         struct thermal_instance *instance;
> > > 
> > > +       /* If the code below has run already, nothing needs to be
> > > done. */
> > > +       if (tz->governor_data)
> > > +               return;
> > > +
> > >         for_each_trip_desc(tz, td) {
> > >                 const struct thermal_trip *trip = &td->trip;
> > > 
> > > @@ -107,11 +111,25 @@ static void bang_bang_manage(struct ther
> > >                                
> > > bang_bang_set_instance_target(instanc
> > > e, 0);
> > >                 }
> > >         }
> > > +
> > > +       tz->governor_data = (void *)true;
> > > +}
> > > +
> > > +static void bang_bang_update_tz(struct thermal_zone_device *tz,
> > > +                               enum thermal_notify_event reason)
> > > +{
> > > +       /*
> > > +        * Let bang_bang_manage() know that it needs to walk
> > > trips
> > > after binding
> > > +        * a new cdev and after system resume.
> > > +        */
> > > +       if (reason == THERMAL_TZ_BIND_CDEV || reason ==
> > > THERMAL_TZ_RESUME)
> > > +               tz->governor_data = NULL;
> > >  }
> > 
> > can we do the cdev initialization for BIND_CDEV and RESUME
> > notification
> > in .update_tz() directly?
> 
> That would be viable if the zone temperature was known at the time
> .update_tz() runs, but it isn't.  See this message:
> 
> https://lore.kernel.org/linux-pm/CAJZ5v0ji_7Z-24iCO_Xxu4Zm4jgVFmR9jVp8QNiCOxzV9gqSnA@mail.gmail.com/
> 
> As long as the zone temperature is not known, it is not clear which
> way to initialize the cooling devices.

right. Then the patch LGTM.

BTW, what do you think if we add handling for first temperature read in
handle_thermal_trip(), say, some draft code like below,

if (tz->last_temperature == THERMAL_TEMP_INIT) {
	if (tz->temperature < trip->temperature)
		list_add(&td->notify_list_node, way_down_list);
	else
		list_add_tail(&td->notify_list_node, way_up_list);
}

This should handle both the init and the resume case.

thanks,
rui
> 
> Interestingly enough, the zone temperature is first checked by the
> core when the zone is enabled and not when it is registered.
> 
> > Then we don't need .manage() callback. This makes more sense to me
> > because bang_bang governor cares about trip point crossing only.
> 
> That's true, but this is all about a corner case in which no trip
> points are crossed and the cooling devices need to be initialized
> properly regardless.
> 
> > >  static struct thermal_governor thermal_gov_bang_bang = {
> > >         .name           = "bang_bang",
> > >         .trip_crossed   = bang_bang_control,
> > >         .manage         = bang_bang_manage,
> > > +       .update_tz      = bang_bang_update_tz,
> > >  };
> > >  THERMAL_GOVERNOR_DECLARE(thermal_gov_bang_bang);
> > > Index: linux-pm/drivers/thermal/thermal_core.c
> > > =================================================================
> > > ==
> > > --- linux-pm.orig/drivers/thermal/thermal_core.c
> > > +++ linux-pm/drivers/thermal/thermal_core.c
> > > @@ -1692,7 +1692,8 @@ static void thermal_zone_device_resume(s
> > > 
> > >         thermal_debug_tz_resume(tz);
> > >         thermal_zone_device_init(tz);
> > > -       __thermal_zone_device_update(tz,
> > > THERMAL_EVENT_UNSPECIFIED);
> > > +       thermal_governor_update_tz(tz, THERMAL_TZ_RESUME);
> > > +       __thermal_zone_device_update(tz, THERMAL_TZ_RESUME);
> > > 
> > >         complete(&tz->resume);
> > >         tz->resuming = false;
> > > Index: linux-pm/include/linux/thermal.h
> > > =================================================================
> > > ==
> > > --- linux-pm.orig/include/linux/thermal.h
> > > +++ linux-pm/include/linux/thermal.h
> > > @@ -55,6 +55,7 @@ enum thermal_notify_event {
> > >         THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the
> > > thermal
> > > zone */
> > >         THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the
> > > thermal zone */
> > >         THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance
> > > weight
> > > changed */
> > > +       THERMAL_TZ_RESUME, /* Thermal zone is resuming after
> > > system
> > > sleep */
> > >  };
> > > 
> > >  /**
> > > 
> > > 
> > > 
> > 
>
Rafael J. Wysocki Aug. 15, 2024, 12:35 p.m. UTC | #5
On Thu, Aug 15, 2024 at 5:26 AM Zhang, Rui <rui.zhang@intel.com> wrote:
>
> On Wed, 2024-08-14 at 19:26 +0200, Rafael J. Wysocki wrote:
> > On Wed, Aug 14, 2024 at 8:09 AM Zhang, Rui <rui.zhang@intel.com>
> > wrote:
> > >
> > > On Tue, 2024-08-13 at 16:29 +0200, Rafael J. Wysocki wrote:
> > > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > > >
> > > > After running once, the for_each_trip_desc() loop in
> > > > bang_bang_manage() is pure needless overhead because it is not
> > > > going
> > > > to
> > > > make any changes unless a new cooling device has been bound to
> > > > one of
> > > > the trips in the thermal zone or the system is resuming from
> > > > sleep.
> > > >
> > > > For this reason, make bang_bang_manage() set governor_data for
> > > > the
> > > > thermal zone and check it upfront to decide whether or not it
> > > > needs
> > > > to
> > > > do anything.
> > > >
> > > > However, governor_data needs to be reset in some cases to let
> > > > bang_bang_manage() know that it should walk the trips again, so
> > > > add
> > > > an
> > > > .update_tz() callback to the governor and make the core
> > > > additionally
> > > > invoke it during system resume.
> > > >
> > > > To avoid affecting the other users of that callback
> > > > unnecessarily,
> > > > add
> > > > a special notification reason for system resume,
> > > > THERMAL_TZ_RESUME,
> > > > and
> > > > also pass it to __thermal_zone_device_update() called during
> > > > system
> > > > resume for consistency.
> > > >
> > > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > > > ---
> > > >  drivers/thermal/gov_bang_bang.c |   18 ++++++++++++++++++
> > > >  drivers/thermal/thermal_core.c  |    3 ++-
> > > >  include/linux/thermal.h         |    1 +
> > > >  3 files changed, 21 insertions(+), 1 deletion(-)
> > > >
> > > > Index: linux-pm/drivers/thermal/gov_bang_bang.c
> > > > =================================================================
> > > > ==
> > > > --- linux-pm.orig/drivers/thermal/gov_bang_bang.c
> > > > +++ linux-pm/drivers/thermal/gov_bang_bang.c
> > > > @@ -86,6 +86,10 @@ static void bang_bang_manage(struct ther
> > > >         const struct thermal_trip_desc *td;
> > > >         struct thermal_instance *instance;
> > > >
> > > > +       /* If the code below has run already, nothing needs to be
> > > > done. */
> > > > +       if (tz->governor_data)
> > > > +               return;
> > > > +
> > > >         for_each_trip_desc(tz, td) {
> > > >                 const struct thermal_trip *trip = &td->trip;
> > > >
> > > > @@ -107,11 +111,25 @@ static void bang_bang_manage(struct ther
> > > >
> > > > bang_bang_set_instance_target(instanc
> > > > e, 0);
> > > >                 }
> > > >         }
> > > > +
> > > > +       tz->governor_data = (void *)true;
> > > > +}
> > > > +
> > > > +static void bang_bang_update_tz(struct thermal_zone_device *tz,
> > > > +                               enum thermal_notify_event reason)
> > > > +{
> > > > +       /*
> > > > +        * Let bang_bang_manage() know that it needs to walk
> > > > trips
> > > > after binding
> > > > +        * a new cdev and after system resume.
> > > > +        */
> > > > +       if (reason == THERMAL_TZ_BIND_CDEV || reason ==
> > > > THERMAL_TZ_RESUME)
> > > > +               tz->governor_data = NULL;
> > > >  }
> > >
> > > can we do the cdev initialization for BIND_CDEV and RESUME
> > > notification
> > > in .update_tz() directly?
> >
> > That would be viable if the zone temperature was known at the time
> > .update_tz() runs, but it isn't.  See this message:
> >
> > https://lore.kernel.org/linux-pm/CAJZ5v0ji_7Z-24iCO_Xxu4Zm4jgVFmR9jVp8QNiCOxzV9gqSnA@mail.gmail.com/
> >
> > As long as the zone temperature is not known, it is not clear which
> > way to initialize the cooling devices.
>
> right. Then the patch LGTM.

Great!

> BTW, what do you think if we add handling for first temperature read in
> handle_thermal_trip(), say, some draft code like below,
>
> if (tz->last_temperature == THERMAL_TEMP_INIT) {
>         if (tz->temperature < trip->temperature)
>                 list_add(&td->notify_list_node, way_down_list);
>         else
>                 list_add_tail(&td->notify_list_node, way_up_list);
> }
>
> This should handle both the init and the resume case.

I have considered doing something similar, but there are quite a few
arguments against it.

First, it would cause spurious notifications to be sent to user space.
Second, the debug code would need to be modified to take this case
into account explicitly.  Moreover, this would be extra overhead for
the other governors.

IMV it's better to limit the impact to the Bang-bang governor where
the problem is.
Zhang, Rui Aug. 16, 2024, 2:59 a.m. UTC | #6
On Thu, 2024-08-15 at 14:35 +0200, Rafael J. Wysocki wrote:
> On Thu, Aug 15, 2024 at 5:26 AM Zhang, Rui <rui.zhang@intel.com>
> wrote:
> > 
> > On Wed, 2024-08-14 at 19:26 +0200, Rafael J. Wysocki wrote:
> > > On Wed, Aug 14, 2024 at 8:09 AM Zhang, Rui <rui.zhang@intel.com>
> > > wrote:
> > > > 
> > > > On Tue, 2024-08-13 at 16:29 +0200, Rafael J. Wysocki wrote:
> > > > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > > > > 
> > > > > After running once, the for_each_trip_desc() loop in
> > > > > bang_bang_manage() is pure needless overhead because it is
> > > > > not
> > > > > going
> > > > > to
> > > > > make any changes unless a new cooling device has been bound
> > > > > to
> > > > > one of
> > > > > the trips in the thermal zone or the system is resuming from
> > > > > sleep.
> > > > > 
> > > > > For this reason, make bang_bang_manage() set governor_data
> > > > > for
> > > > > the
> > > > > thermal zone and check it upfront to decide whether or not it
> > > > > needs
> > > > > to
> > > > > do anything.
> > > > > 
> > > > > However, governor_data needs to be reset in some cases to let
> > > > > bang_bang_manage() know that it should walk the trips again,
> > > > > so
> > > > > add
> > > > > an
> > > > > .update_tz() callback to the governor and make the core
> > > > > additionally
> > > > > invoke it during system resume.
> > > > > 
> > > > > To avoid affecting the other users of that callback
> > > > > unnecessarily,
> > > > > add
> > > > > a special notification reason for system resume,
> > > > > THERMAL_TZ_RESUME,
> > > > > and
> > > > > also pass it to __thermal_zone_device_update() called during
> > > > > system
> > > > > resume for consistency.
> > > > > 
> > > > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > > > > ---
> > > > >  drivers/thermal/gov_bang_bang.c |   18 ++++++++++++++++++
> > > > >  drivers/thermal/thermal_core.c  |    3 ++-
> > > > >  include/linux/thermal.h         |    1 +
> > > > >  3 files changed, 21 insertions(+), 1 deletion(-)
> > > > > 
> > > > > Index: linux-pm/drivers/thermal/gov_bang_bang.c
> > > > > =============================================================
> > > > > ====
> > > > > ==
> > > > > --- linux-pm.orig/drivers/thermal/gov_bang_bang.c
> > > > > +++ linux-pm/drivers/thermal/gov_bang_bang.c
> > > > > @@ -86,6 +86,10 @@ static void bang_bang_manage(struct ther
> > > > >         const struct thermal_trip_desc *td;
> > > > >         struct thermal_instance *instance;
> > > > > 
> > > > > +       /* If the code below has run already, nothing needs
> > > > > to be
> > > > > done. */
> > > > > +       if (tz->governor_data)
> > > > > +               return;
> > > > > +
> > > > >         for_each_trip_desc(tz, td) {
> > > > >                 const struct thermal_trip *trip = &td->trip;
> > > > > 
> > > > > @@ -107,11 +111,25 @@ static void bang_bang_manage(struct
> > > > > ther
> > > > > 
> > > > > bang_bang_set_instance_target(instanc
> > > > > e, 0);
> > > > >                 }
> > > > >         }
> > > > > +
> > > > > +       tz->governor_data = (void *)true;
> > > > > +}
> > > > > +
> > > > > +static void bang_bang_update_tz(struct thermal_zone_device
> > > > > *tz,
> > > > > +                               enum thermal_notify_event
> > > > > reason)
> > > > > +{
> > > > > +       /*
> > > > > +        * Let bang_bang_manage() know that it needs to walk
> > > > > trips
> > > > > after binding
> > > > > +        * a new cdev and after system resume.
> > > > > +        */
> > > > > +       if (reason == THERMAL_TZ_BIND_CDEV || reason ==
> > > > > THERMAL_TZ_RESUME)
> > > > > +               tz->governor_data = NULL;
> > > > >  }
> > > > 
> > > > can we do the cdev initialization for BIND_CDEV and RESUME
> > > > notification
> > > > in .update_tz() directly?
> > > 
> > > That would be viable if the zone temperature was known at the
> > > time
> > > .update_tz() runs, but it isn't.  See this message:
> > > 
> > > https://lore.kernel.org/linux-pm/CAJZ5v0ji_7Z-24iCO_Xxu4Zm4jgVFmR9jVp8QNiCOxzV9gqSnA@mail.gmail.com/
> > > 
> > > As long as the zone temperature is not known, it is not clear
> > > which
> > > way to initialize the cooling devices.
> > 
> > right. Then the patch LGTM.
> 
> Great!
> 
> > BTW, what do you think if we add handling for first temperature
> > read in
> > handle_thermal_trip(), say, some draft code like below,
> > 
> > if (tz->last_temperature == THERMAL_TEMP_INIT) {
> >         if (tz->temperature < trip->temperature)
> >                 list_add(&td->notify_list_node, way_down_list);
> >         else
> >                 list_add_tail(&td->notify_list_node, way_up_list);
> > }
> > 
> > This should handle both the init and the resume case.
> 
> I have considered doing something similar, but there are quite a few
> arguments against it.
> 
> First, it would cause spurious notifications to be sent to user
> space.
> Second, the debug code would need to be modified to take this case
> into account explicitly.  Moreover, this would be extra overhead for
> the other governors.
> 
> IMV it's better to limit the impact to the Bang-bang governor where
> the problem is.

Agreed.

Reviewed-by: Zhang Rui <rui.zhang@intel.com>

thanks,
rui
diff mbox series

Patch

Index: linux-pm/drivers/thermal/gov_bang_bang.c
===================================================================
--- linux-pm.orig/drivers/thermal/gov_bang_bang.c
+++ linux-pm/drivers/thermal/gov_bang_bang.c
@@ -86,6 +86,10 @@  static void bang_bang_manage(struct ther
 	const struct thermal_trip_desc *td;
 	struct thermal_instance *instance;
 
+	/* If the code below has run already, nothing needs to be done. */
+	if (tz->governor_data)
+		return;
+
 	for_each_trip_desc(tz, td) {
 		const struct thermal_trip *trip = &td->trip;
 
@@ -107,11 +111,25 @@  static void bang_bang_manage(struct ther
 				bang_bang_set_instance_target(instance, 0);
 		}
 	}
+
+	tz->governor_data = (void *)true;
+}
+
+static void bang_bang_update_tz(struct thermal_zone_device *tz,
+				enum thermal_notify_event reason)
+{
+	/*
+	 * Let bang_bang_manage() know that it needs to walk trips after binding
+	 * a new cdev and after system resume.
+	 */
+	if (reason == THERMAL_TZ_BIND_CDEV || reason == THERMAL_TZ_RESUME)
+		tz->governor_data = NULL;
 }
 
 static struct thermal_governor thermal_gov_bang_bang = {
 	.name		= "bang_bang",
 	.trip_crossed	= bang_bang_control,
 	.manage		= bang_bang_manage,
+	.update_tz	= bang_bang_update_tz,
 };
 THERMAL_GOVERNOR_DECLARE(thermal_gov_bang_bang);
Index: linux-pm/drivers/thermal/thermal_core.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_core.c
+++ linux-pm/drivers/thermal/thermal_core.c
@@ -1692,7 +1692,8 @@  static void thermal_zone_device_resume(s
 
 	thermal_debug_tz_resume(tz);
 	thermal_zone_device_init(tz);
-	__thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
+	thermal_governor_update_tz(tz, THERMAL_TZ_RESUME);
+	__thermal_zone_device_update(tz, THERMAL_TZ_RESUME);
 
 	complete(&tz->resume);
 	tz->resuming = false;
Index: linux-pm/include/linux/thermal.h
===================================================================
--- linux-pm.orig/include/linux/thermal.h
+++ linux-pm/include/linux/thermal.h
@@ -55,6 +55,7 @@  enum thermal_notify_event {
 	THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal zone */
 	THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */
 	THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */
+	THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */
 };
 
 /**