diff mbox series

[V4] thermal/core/power_allocator: reset thermal governor when trip point is changed

Message ID 20230619063534.12831-1-di.shen@unisoc.com
State New
Headers show
Series [V4] thermal/core/power_allocator: reset thermal governor when trip point is changed | expand

Commit Message

Di Shen June 19, 2023, 6:35 a.m. UTC
When the thermal trip point is changed, the governor should
be reset so that the policy algorithm be updated to adapt to the
new trip point.

This patch adds an ops for thermal the governor structure to reset
the governor. The ops is called when the trip point is changed.
For power allocator, the parameters of pid controller and the states
of power cooling devices can be reset when the passive trip point
is changed.

Signed-off-by: Di Shen <di.shen@unisoc.com>

---
V4:
- Compared to V3, handle it in thermal core instead of in governor.

- Add an ops to the governor structure, and call it when a trip
  point is changed.

- Define reset ops for power allocator.

V3:
- Add fix tag.

V2:
- Compared to v1, do not revert.

- Add a variable(last_switch_on_temp) in power_allocator_params
  to record the last switch_on_temp value.

- Adds a function to renew the update flag and update the
  last_switch_on_temp when thermal trips are writable.

V1:
- Revert commit 0952177f2a1f.
---
---
 drivers/thermal/gov_power_allocator.c | 21 +++++++++++++++++++++
 drivers/thermal/thermal_trip.c        |  6 ++++++
 include/linux/thermal.h               |  1 +
 3 files changed, 28 insertions(+)

Comments

Di Shen June 21, 2023, 4:51 a.m. UTC | #1
On Tue, Jun 20, 2023 at 6:39 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>
> Hi Di,
>
> I have missed your v4 because it landed below your v3 thread.
>
> On 6/19/23 07:35, Di Shen wrote:
> > When the thermal trip point is changed, the governor should
> > be reset so that the policy algorithm be updated to adapt to the
> > new trip point.
> >
> > This patch adds an ops for thermal the governor structure to reset
>
> s/ops/callback
>
> > the governor. The ops is called when the trip point is changed.
> > For power allocator, the parameters of pid controller and the states
> > of power cooling devices can be reset when the passive trip point
> > is changed.
> >
> > Signed-off-by: Di Shen <di.shen@unisoc.com>
> >
> > ---
> > V4:
> > - Compared to V3, handle it in thermal core instead of in governor.
> >
> > - Add an ops to the governor structure, and call it when a trip
> >    point is changed.
> >
> > - Define reset ops for power allocator.
> >
> > V3:
> > - Add fix tag.
> >
> > V2:
> > - Compared to v1, do not revert.
> >
> > - Add a variable(last_switch_on_temp) in power_allocator_params
> >    to record the last switch_on_temp value.
> >
> > - Adds a function to renew the update flag and update the
> >    last_switch_on_temp when thermal trips are writable.
> >
> > V1:
> > - Revert commit 0952177f2a1f.
> > ---
> > ---
> >   drivers/thermal/gov_power_allocator.c | 21 +++++++++++++++++++++
> >   drivers/thermal/thermal_trip.c        |  6 ++++++
> >   include/linux/thermal.h               |  1 +
> >   3 files changed, 28 insertions(+)
> >
> > diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
> > index 8642f1096b91..41d155adc616 100644
> > --- a/drivers/thermal/gov_power_allocator.c
> > +++ b/drivers/thermal/gov_power_allocator.c
> > @@ -729,10 +729,31 @@ static int power_allocator_throttle(struct thermal_zone_device *tz, int trip_id)
> >       return allocate_power(tz, trip.temperature);
> >   }
> >
> > +static int power_allocator_reset(struct thermal_zone_device *tz, int trip_id)
> > +{
> > +     int ret = 0;
> > +     struct thermal_trip trip;
> > +     struct power_allocator_params *params = tz->governor_data;
> > +
> > +     ret = __thermal_zone_get_trip(tz, trip_id, &trip);
> > +     if (ret)
> > +             return ret;
> > +
> > +     /* Only need reset for passive trips */
> > +     if (trip.type != THERMAL_TRIP_PASSIVE)
> > +             return -EINVAL;
> > +
> > +     reset_pid_controller(params);
> > +     allow_maximum_power(tz, true);
> > +
> > +     return ret;
> > +}
> > +
> >   static struct thermal_governor thermal_gov_power_allocator = {
> >       .name           = "power_allocator",
> >       .bind_to_tz     = power_allocator_bind,
> >       .unbind_from_tz = power_allocator_unbind,
> >       .throttle       = power_allocator_throttle,
> > +     .reset          = power_allocator_reset,
> >   };
> >   THERMAL_GOVERNOR_DECLARE(thermal_gov_power_allocator);
> > diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c
> > index 907f3a4d7bc8..52eb768fada8 100644
> > --- a/drivers/thermal/thermal_trip.c
> > +++ b/drivers/thermal/thermal_trip.c
> > @@ -173,6 +173,12 @@ int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id,
> >       if (tz->trips && (t.temperature != trip->temperature || t.hysteresis != trip->hysteresis))
> >               tz->trips[trip_id] = *trip;
> >
> > +     if (t.temperature != trip->temperature && tz->governor && tz->governor->reset) {
> > +             ret = tz->governor->reset(tz, trip_id);
> > +             if (ret)
> > +                     pr_warn_once("Failed to reset thermal governor\n");
> > +     }
>
> I agree with Rafael. Maybe change that to debug print, so that can be
> checked during the product testing. We cannot do much if that happens.
>
Right.

> > +
> >       thermal_notify_tz_trip_change(tz->id, trip_id, trip->type,
> >                                     trip->temperature, trip->hysteresis);
> >
> > diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> > index 87837094d549..155ce2291fa5 100644
> > --- a/include/linux/thermal.h
> > +++ b/include/linux/thermal.h
> > @@ -204,6 +204,7 @@ struct thermal_governor {
> >       int (*bind_to_tz)(struct thermal_zone_device *tz);
> >       void (*unbind_from_tz)(struct thermal_zone_device *tz);
> >       int (*throttle)(struct thermal_zone_device *tz, int trip);
> > +     int (*reset)(struct thermal_zone_device *tz, int trip);
> >       struct list_head        governor_list;
> >   };
> >
>
> That thermal_governor::reset() callback is what I had im mind while
> giving you the feedback for the v1. Now it's much cleaner what is going
> on and why.
>

Yes, it is necessary to do something for the governor if the trip point
is changed, especially for the governors that their trips are strongly
related to
the policy.

> Apart from some small bits, LGTM. Please adjust the comment in the patch
> header and this debug print and you can add:
>
> Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
>
> Please send the next version as separate new thread.
>
> Regards,
> Lukasz

Thank you Lukasz !
I couldn't agree with you more about your comments. What you have said is what
I want to express.

I'd love to send the next version. Thanks again.

Best regards,
Di
Rafael J. Wysocki June 22, 2023, 6:27 p.m. UTC | #2
On Tue, Jun 20, 2023 at 1:56 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>
>
>
> On 6/20/23 11:39, Rafael J. Wysocki wrote:
> > On Tue, Jun 20, 2023 at 12:19 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
> >>
> >> Hi Rafael,
> >>
> >>
> >> On 6/20/23 11:07, Rafael J. Wysocki wrote:
> >>> On Tue, Jun 20, 2023 at 11:46 AM Rafael J. Wysocki <rafael@kernel.org> wrote:
> >>>>
> >>>> On Mon, Jun 19, 2023 at 8:36 AM Di Shen <di.shen@unisoc.com> wrote:
> >>>>>
> >>>>> When the thermal trip point is changed, the governor should
> >>>>> be reset so that the policy algorithm be updated to adapt to the
> >>>>> new trip point.
> >>>>>
> >>>>> This patch adds an ops for thermal the governor structure to reset
> >>>>> the governor. The ops is called when the trip point is changed.
> >>>>> For power allocator, the parameters of pid controller and the states
> >>>>> of power cooling devices can be reset when the passive trip point
> >>>>> is changed.
> >>>>>
> >>>>> Signed-off-by: Di Shen <di.shen@unisoc.com>
> >>>>>
> >>>>> ---
> >>>>> V4:
> >>>>> - Compared to V3, handle it in thermal core instead of in governor.
> >>>>>
> >>>>> - Add an ops to the governor structure, and call it when a trip
> >>>>>     point is changed.
> >>>>>
> >>>>> - Define reset ops for power allocator.
> >>>>>
> >>>>> V3:
> >>>>> - Add fix tag.
> >>>>>
> >>>>> V2:
> >>>>> - Compared to v1, do not revert.
> >>>>>
> >>>>> - Add a variable(last_switch_on_temp) in power_allocator_params
> >>>>>     to record the last switch_on_temp value.
> >>>>>
> >>>>> - Adds a function to renew the update flag and update the
> >>>>>     last_switch_on_temp when thermal trips are writable.
> >>>>>
> >>>>> V1:
> >>>>> - Revert commit 0952177f2a1f.
> >>>>> ---
> >>>>> ---
> >>>>>    drivers/thermal/gov_power_allocator.c | 21 +++++++++++++++++++++
> >>>>>    drivers/thermal/thermal_trip.c        |  6 ++++++
> >>>>>    include/linux/thermal.h               |  1 +
> >>>>>    3 files changed, 28 insertions(+)
> >>>>>
> >>>>> diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
> >>>>> index 8642f1096b91..41d155adc616 100644
> >>>>> --- a/drivers/thermal/gov_power_allocator.c
> >>>>> +++ b/drivers/thermal/gov_power_allocator.c
> >>>>> @@ -729,10 +729,31 @@ static int power_allocator_throttle(struct thermal_zone_device *tz, int trip_id)
> >>>>>           return allocate_power(tz, trip.temperature);
> >>>>>    }
> >>>>>
> >>>>> +static int power_allocator_reset(struct thermal_zone_device *tz, int trip_id)
> >>>>> +{
> >>>>> +       int ret = 0;
> >>>>> +       struct thermal_trip trip;
> >>>>> +       struct power_allocator_params *params = tz->governor_data;
> >>>>> +
> >>>>> +       ret = __thermal_zone_get_trip(tz, trip_id, &trip);
> >>>>> +       if (ret)
> >>>>> +               return ret;
> >>>>> +
> >>>>> +       /* Only need reset for passive trips */
> >>>>> +       if (trip.type != THERMAL_TRIP_PASSIVE)
> >>>>> +               return -EINVAL;
> >>>>> +
> >>>>> +       reset_pid_controller(params);
> >>>>> +       allow_maximum_power(tz, true);
> >>>>> +
> >>>>> +       return ret;
> >>>>> +}
> >>>>> +
> >>>>>    static struct thermal_governor thermal_gov_power_allocator = {
> >>>>>           .name           = "power_allocator",
> >>>>>           .bind_to_tz     = power_allocator_bind,
> >>>>>           .unbind_from_tz = power_allocator_unbind,
> >>>>>           .throttle       = power_allocator_throttle,
> >>>>> +       .reset          = power_allocator_reset,
> >>>>>    };
> >>>>>    THERMAL_GOVERNOR_DECLARE(thermal_gov_power_allocator);
> >>>>> diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c
> >>>>> index 907f3a4d7bc8..52eb768fada8 100644
> >>>>> --- a/drivers/thermal/thermal_trip.c
> >>>>> +++ b/drivers/thermal/thermal_trip.c
> >>>>> @@ -173,6 +173,12 @@ int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id,
> >>>>>           if (tz->trips && (t.temperature != trip->temperature || t.hysteresis != trip->hysteresis))
> >>>>>                   tz->trips[trip_id] = *trip;
> >>>>>
> >>>>> +       if (t.temperature != trip->temperature && tz->governor && tz->governor->reset) {
> >>>>> +               ret = tz->governor->reset(tz, trip_id);
> >>>>> +               if (ret)
> >>>>> +                       pr_warn_once("Failed to reset thermal governor\n");
> >>>>
> >>>> I'm not really sure if it is useful to print this message here.
> >>>>
> >>>> First off, the governors may print more precise diagnostic messages if
> >>>> they care.
> >>>>
> >>>> Second, what is the sysadmin supposed to do in response to this message?
> >>>
> >>> In addition to the above, trip point temperatures may be updated in
> >>> other places too, for instance in response to notifications from
> >>> platform firmware and IMV this new callback should be also used in
> >>> those cases.  However, in those cases multiple trip points may change
> >>> at a time and the critical/hot trip point temperatures may be updated
> >>> too AFAICS.
> >>
> >> IIRC the critical/hot trip points are handled differently, not using the
> >> governors. The governors' 'throttle' callback would be called only
> >> after we pass the test of 'critical/hot' [1].
> >
> > OK, but is it actually useful to return an error code from the
> > ->reset() callback when passed a non-passive trip point?
>
> It will depend on the governor code. In our case the setup code
> w.r.t. trip types is quite confusing (to fit into many possible
> configurations). The non-passive trip point would be only
> possible to bind when there are not other passive trip points.
> That's is a really corner case and probably never used on any
> device. Therefore, IMO we can just bail out in such situation
> when then someone tries to update such single non-passive
> trip point (probably not aware what is doing with IPA?).

Because this is up to the governor, the core has no clue what to do
with the return value from ->reset() and so there should be none.

As I said, governors can print whatever diagnostic messages they like
in that callback, but returning anything from it to the core is just
not useful IMV.

> For the rest of the governors - it's up to them what they
> report in case non-passive trip is updated...

Sure.

> >
> >> What Di is facing is in the issue under the bucket of
> >> 'handle_non_critical_trips()' when the governor just tries to
> >> work on stale data - old trip temp.
> >
> > Well, fair enough, but what about the other governors?  Is this
> > problem limited to power_allocator?
>
> IIUC the core fwk code - non of the governors would be needed
> to handle the critical/hot trips. For the rest of the trip types
> I would say it's up to the governor. In our IPA case this stale
> data is used for power budget estimation - quite fundamental
> step. Therefore, the reset and start from scratch would make more
> sense.
>
> I think other governors don't try to 'estimate' such
> abstract power headroom based on temperature - so probably
> they don't have to even implement the 'reset()' callback
> (I don't know their logic that well).

So there seems to be a claim that IPA is the only governor needing the
->reset() callback, but I have not seen any solid analysis confirming
that.  It very well may be the case, but then the changelog should
clearly explain why this is the case IMO.

> >
> >> For the 2nd case IIUC the code, we pass the 'trip.temperature'
> >> and should be ready for what you said (modification of that value).
> >
> > Generally speaking, it needs to be prepared for a simultaneous change
> > of multiple trip points (including active), in which case it may not
> > be useful to invoke the ->reset() callback for each of them
> > individually.
>
> Although, that looks more cleaner IMO. Resetting one by one in
> a temperature order would be easily maintainable, won't be?

I wouldn't call it maintainable really.

First of all, the trips may not be ordered.  There are no guarantees
whatsoever that they will be ordered, so the caller may have to
determine the temperature order in the first place.  This would be an
extra requirement that currently is not there.

Apart from this, I don't see any fundamental difference between the
case when trip points are updated via sysfs and when they are updated
by the driver.  The governor should reset itself in any of those cases
and even if one trip point changes, the temperature order of all of
them may change, so the governor reset mechanism should be able to
handle the case when multiple trip points are updated at the same
time.  While you may argue that this is theoretical, the ACPI spec
clearly states that this is allowed to happen, for example.

If you want a generic reset callback for governors, that's fine, but
make it generic and not specific to a particular use case.
Lukasz Luba June 23, 2023, 7:43 a.m. UTC | #3
On 6/22/23 19:27, Rafael J. Wysocki wrote:
> On Tue, Jun 20, 2023 at 1:56 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>>
>>
>>
>> On 6/20/23 11:39, Rafael J. Wysocki wrote:
>>> On Tue, Jun 20, 2023 at 12:19 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>>>>
>>>> Hi Rafael,
>>>>
>>>>
>>>> On 6/20/23 11:07, Rafael J. Wysocki wrote:
>>>>> On Tue, Jun 20, 2023 at 11:46 AM Rafael J. Wysocki <rafael@kernel.org> wrote:

[snip]

> 
> Because this is up to the governor, the core has no clue what to do
> with the return value from ->reset() and so there should be none.
> 
> As I said, governors can print whatever diagnostic messages they like
> in that callback, but returning anything from it to the core is just
> not useful IMV.
> 
>> For the rest of the governors - it's up to them what they
>> report in case non-passive trip is updated...
> 
> Sure.
> 
>>>
>>>> What Di is facing is in the issue under the bucket of
>>>> 'handle_non_critical_trips()' when the governor just tries to
>>>> work on stale data - old trip temp.
>>>
>>> Well, fair enough, but what about the other governors?  Is this
>>> problem limited to power_allocator?
>>
>> IIUC the core fwk code - non of the governors would be needed
>> to handle the critical/hot trips. For the rest of the trip types
>> I would say it's up to the governor. In our IPA case this stale
>> data is used for power budget estimation - quite fundamental
>> step. Therefore, the reset and start from scratch would make more
>> sense.
>>
>> I think other governors don't try to 'estimate' such
>> abstract power headroom based on temperature - so probably
>> they don't have to even implement the 'reset()' callback
>> (I don't know their logic that well).
> 
> So there seems to be a claim that IPA is the only governor needing the
> ->reset() callback, but I have not seen any solid analysis confirming
> that.  It very well may be the case, but then the changelog should
> clearly explain why this is the case IMO.

I agree, the patch header doesn't explain that properly. Here is the
explanation for this Intelligent Power Allocator (IPA):

The IPA controls temperature using PID mechanism. It's a closed
feedback loop. That algorithm can 'learn' from the 'observed'
in the past reaction for it's control decisions and accumulates that
information in the part called 'error integral'. Those accumulated
'error' gaps are the differences between the set target value and the
actually achieved value. In our case the target value is the target
temperature which is coming from the trip point. That part is then used
with the 'I' (of PID) component, so we can compensate for those
'learned' mistakes.
Now, when you change the target temperature value - all your previous
learned errors won't help you. That's why Intelligent Power Allocator
should reset previously accumulated history.

> 
>>>
>>>> For the 2nd case IIUC the code, we pass the 'trip.temperature'
>>>> and should be ready for what you said (modification of that value).
>>>
>>> Generally speaking, it needs to be prepared for a simultaneous change
>>> of multiple trip points (including active), in which case it may not
>>> be useful to invoke the ->reset() callback for each of them
>>> individually.
>>
>> Although, that looks more cleaner IMO. Resetting one by one in
>> a temperature order would be easily maintainable, won't be?
> 
> I wouldn't call it maintainable really.
> 
> First of all, the trips may not be ordered.  There are no guarantees
> whatsoever that they will be ordered, so the caller may have to
> determine the temperature order in the first place.  This would be an
> extra requirement that currently is not there.
> 
> Apart from this, I don't see any fundamental difference between the
> case when trip points are updated via sysfs and when they are updated
> by the driver.  The governor should reset itself in any of those cases
> and even if one trip point changes, the temperature order of all of
> them may change, so the governor reset mechanism should be able to
> handle the case when multiple trip points are updated at the same
> time.  While you may argue that this is theoretical, the ACPI spec
> clearly states that this is allowed to happen, for example.
> 
> If you want a generic reset callback for governors, that's fine, but
> make it generic and not specific to a particular use case.

I think we agree here, but probably having slightly different
implementation in mind. Based on you explanation I think you
want simply this API:
void (*reset)(struct thermal_zone_device *tz);

1. no return value
2. no specific trip ID as argument

Do you agree?
IMO such implementation and API would also work for this IPA
purpose. Would that work for the ACPI use case as well?
Rafael J. Wysocki June 23, 2023, 4:55 p.m. UTC | #4
On Fri, Jun 23, 2023 at 9:43 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
>
>
>
> On 6/22/23 19:27, Rafael J. Wysocki wrote:
> > On Tue, Jun 20, 2023 at 1:56 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
> >>
> >>
> >>
> >> On 6/20/23 11:39, Rafael J. Wysocki wrote:
> >>> On Tue, Jun 20, 2023 at 12:19 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
> >>>>
> >>>> Hi Rafael,
> >>>>
> >>>>
> >>>> On 6/20/23 11:07, Rafael J. Wysocki wrote:
> >>>>> On Tue, Jun 20, 2023 at 11:46 AM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> [snip]
>
> >
> > Because this is up to the governor, the core has no clue what to do
> > with the return value from ->reset() and so there should be none.
> >
> > As I said, governors can print whatever diagnostic messages they like
> > in that callback, but returning anything from it to the core is just
> > not useful IMV.
> >
> >> For the rest of the governors - it's up to them what they
> >> report in case non-passive trip is updated...
> >
> > Sure.
> >
> >>>
> >>>> What Di is facing is in the issue under the bucket of
> >>>> 'handle_non_critical_trips()' when the governor just tries to
> >>>> work on stale data - old trip temp.
> >>>
> >>> Well, fair enough, but what about the other governors?  Is this
> >>> problem limited to power_allocator?
> >>
> >> IIUC the core fwk code - non of the governors would be needed
> >> to handle the critical/hot trips. For the rest of the trip types
> >> I would say it's up to the governor. In our IPA case this stale
> >> data is used for power budget estimation - quite fundamental
> >> step. Therefore, the reset and start from scratch would make more
> >> sense.
> >>
> >> I think other governors don't try to 'estimate' such
> >> abstract power headroom based on temperature - so probably
> >> they don't have to even implement the 'reset()' callback
> >> (I don't know their logic that well).
> >
> > So there seems to be a claim that IPA is the only governor needing the
> > ->reset() callback, but I have not seen any solid analysis confirming
> > that.  It very well may be the case, but then the changelog should
> > clearly explain why this is the case IMO.
>
> I agree, the patch header doesn't explain that properly. Here is the
> explanation for this Intelligent Power Allocator (IPA):
>
> The IPA controls temperature using PID mechanism. It's a closed
> feedback loop. That algorithm can 'learn' from the 'observed'
> in the past reaction for it's control decisions and accumulates that
> information in the part called 'error integral'. Those accumulated
> 'error' gaps are the differences between the set target value and the
> actually achieved value. In our case the target value is the target
> temperature which is coming from the trip point. That part is then used
> with the 'I' (of PID) component, so we can compensate for those
> 'learned' mistakes.
> Now, when you change the target temperature value - all your previous
> learned errors won't help you. That's why Intelligent Power Allocator
> should reset previously accumulated history.

Right.

And every other governor using information from the past for control
will have an analogous problem, won't it?

> >
> >>>
> >>>> For the 2nd case IIUC the code, we pass the 'trip.temperature'
> >>>> and should be ready for what you said (modification of that value).
> >>>
> >>> Generally speaking, it needs to be prepared for a simultaneous change
> >>> of multiple trip points (including active), in which case it may not
> >>> be useful to invoke the ->reset() callback for each of them
> >>> individually.
> >>
> >> Although, that looks more cleaner IMO. Resetting one by one in
> >> a temperature order would be easily maintainable, won't be?
> >
> > I wouldn't call it maintainable really.
> >
> > First of all, the trips may not be ordered.  There are no guarantees
> > whatsoever that they will be ordered, so the caller may have to
> > determine the temperature order in the first place.  This would be an
> > extra requirement that currently is not there.
> >
> > Apart from this, I don't see any fundamental difference between the
> > case when trip points are updated via sysfs and when they are updated
> > by the driver.  The governor should reset itself in any of those cases
> > and even if one trip point changes, the temperature order of all of
> > them may change, so the governor reset mechanism should be able to
> > handle the case when multiple trip points are updated at the same
> > time.  While you may argue that this is theoretical, the ACPI spec
> > clearly states that this is allowed to happen, for example.
> >
> > If you want a generic reset callback for governors, that's fine, but
> > make it generic and not specific to a particular use case.
>
> I think we agree here, but probably having slightly different
> implementation in mind. Based on you explanation I think you
> want simply this API:
> void (*reset)(struct thermal_zone_device *tz);
>
> 1. no return value
> 2. no specific trip ID as argument
>
> Do you agree?

Yes, I do.

> IMO such implementation and API would also work for this IPA
> purpose. Would that work for the ACPI use case as well?

It would AFAICS.
Lukasz Luba June 23, 2023, 5:34 p.m. UTC | #5
On 6/23/23 17:55, Rafael J. Wysocki wrote:
> On Fri, Jun 23, 2023 at 9:43 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
>>
>>
>>

[snip]

>>
>> I agree, the patch header doesn't explain that properly. Here is the
>> explanation for this Intelligent Power Allocator (IPA):
>>
>> The IPA controls temperature using PID mechanism. It's a closed
>> feedback loop. That algorithm can 'learn' from the 'observed'
>> in the past reaction for it's control decisions and accumulates that
>> information in the part called 'error integral'. Those accumulated
>> 'error' gaps are the differences between the set target value and the
>> actually achieved value. In our case the target value is the target
>> temperature which is coming from the trip point. That part is then used
>> with the 'I' (of PID) component, so we can compensate for those
>> 'learned' mistakes.
>> Now, when you change the target temperature value - all your previous
>> learned errors won't help you. That's why Intelligent Power Allocator
>> should reset previously accumulated history.
> 
> Right.
> 
> And every other governor using information from the past for control
> will have an analogous problem, won't it?

Not necessarily, but to play safe I would go case-by-case and make
sure other governors are aligned to this new feature.

E.g. the bang-bang governor operates only on current temperature and
current trip value + trip hysteresis. The flow graph describes it [1].
The control (state of the fan: ON or OFF) of that governor could be
simply adjusted to the new reality -> new trip point temp. That would
just mean 'toggling' the fan if needed. There are only 2 'target'
states: 0 or 1 for the fan. You can images a situation when the
temperature doesn't change, but we manipulate the trip value for that
governor. The governor would react correctly always in such situation
w/o a need of a reset IMO.

> 
>>>
>>>>>
>>>>>> For the 2nd case IIUC the code, we pass the 'trip.temperature'
>>>>>> and should be ready for what you said (modification of that value).
>>>>>
>>>>> Generally speaking, it needs to be prepared for a simultaneous change
>>>>> of multiple trip points (including active), in which case it may not
>>>>> be useful to invoke the ->reset() callback for each of them
>>>>> individually.
>>>>
>>>> Although, that looks more cleaner IMO. Resetting one by one in
>>>> a temperature order would be easily maintainable, won't be?
>>>
>>> I wouldn't call it maintainable really.
>>>
>>> First of all, the trips may not be ordered.  There are no guarantees
>>> whatsoever that they will be ordered, so the caller may have to
>>> determine the temperature order in the first place.  This would be an
>>> extra requirement that currently is not there.
>>>
>>> Apart from this, I don't see any fundamental difference between the
>>> case when trip points are updated via sysfs and when they are updated
>>> by the driver.  The governor should reset itself in any of those cases
>>> and even if one trip point changes, the temperature order of all of
>>> them may change, so the governor reset mechanism should be able to
>>> handle the case when multiple trip points are updated at the same
>>> time.  While you may argue that this is theoretical, the ACPI spec
>>> clearly states that this is allowed to happen, for example.
>>>
>>> If you want a generic reset callback for governors, that's fine, but
>>> make it generic and not specific to a particular use case.
>>
>> I think we agree here, but probably having slightly different
>> implementation in mind. Based on you explanation I think you
>> want simply this API:
>> void (*reset)(struct thermal_zone_device *tz);
>>
>> 1. no return value
>> 2. no specific trip ID as argument
>>
>> Do you agree?
> 
> Yes, I do.

OK, thanks.

Di could you implement that 'reset()' API according to this description,
please?

> 
>> IMO such implementation and API would also work for this IPA
>> purpose. Would that work for the ACPI use case as well?
> 
> It would AFAICS.

Thanks Rafael for the comments and the progress that we made :)

Regards,
Lukasz

[1] 
https://elixir.bootlin.com/linux/v6.3/source/drivers/thermal/gov_bang_bang.c#L80
Di Shen June 25, 2023, 8:39 a.m. UTC | #6
On Fri, Jun 23, 2023 at 4:10 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>
>
>
> On 6/22/23 19:27, Rafael J. Wysocki wrote:
> > On Tue, Jun 20, 2023 at 1:56 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
> >>
> >>
> >>
> >> On 6/20/23 11:39, Rafael J. Wysocki wrote:
> >>> On Tue, Jun 20, 2023 at 12:19 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
> >>>>
> >>>> Hi Rafael,
> >>>>
> >>>>
> >>>> On 6/20/23 11:07, Rafael J. Wysocki wrote:
> >>>>> On Tue, Jun 20, 2023 at 11:46 AM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> [snip]
>
> >
> > Because this is up to the governor, the core has no clue what to do
> > with the return value from ->reset() and so there should be none.
> >
> > As I said, governors can print whatever diagnostic messages they like
> > in that callback, but returning anything from it to the core is just
> > not useful IMV.
> >
> >> For the rest of the governors - it's up to them what they
> >> report in case non-passive trip is updated...
> >
> > Sure.
> >
> >>>
> >>>> What Di is facing is in the issue under the bucket of
> >>>> 'handle_non_critical_trips()' when the governor just tries to
> >>>> work on stale data - old trip temp.
> >>>
> >>> Well, fair enough, but what about the other governors?  Is this
> >>> problem limited to power_allocator?
> >>
> >> IIUC the core fwk code - non of the governors would be needed
> >> to handle the critical/hot trips. For the rest of the trip types
> >> I would say it's up to the governor. In our IPA case this stale
> >> data is used for power budget estimation - quite fundamental
> >> step. Therefore, the reset and start from scratch would make more
> >> sense.
> >>
> >> I think other governors don't try to 'estimate' such
> >> abstract power headroom based on temperature - so probably
> >> they don't have to even implement the 'reset()' callback
> >> (I don't know their logic that well).
> >
> > So there seems to be a claim that IPA is the only governor needing the
> > ->reset() callback, but I have not seen any solid analysis confirming
> > that.  It very well may be the case, but then the changelog should
> > clearly explain why this is the case IMO.
>
> I agree, the patch header doesn't explain that properly. Here is the
> explanation for this Intelligent Power Allocator (IPA):
>
> The IPA controls temperature using PID mechanism. It's a closed
> feedback loop. That algorithm can 'learn' from the 'observed'
> in the past reaction for it's control decisions and accumulates that
> information in the part called 'error integral'. Those accumulated
> 'error' gaps are the differences between the set target value and the
> actually achieved value. In our case the target value is the target
> temperature which is coming from the trip point. That part is then used
> with the 'I' (of PID) component, so we can compensate for those
> 'learned' mistakes.
> Now, when you change the target temperature value - all your previous
> learned errors won't help you. That's why Intelligent Power Allocator
> should reset previously accumulated history.
>

Yes, THAT's the point!
Maybe I need to write the commit message in more detail.

> >
> >>>
> >>>> For the 2nd case IIUC the code, we pass the 'trip.temperature'
> >>>> and should be ready for what you said (modification of that value).
> >>>
> >>> Generally speaking, it needs to be prepared for a simultaneous change
> >>> of multiple trip points (including active), in which case it may not
> >>> be useful to invoke the ->reset() callback for each of them
> >>> individually.
> >>
> >> Although, that looks more cleaner IMO. Resetting one by one in
> >> a temperature order would be easily maintainable, won't be?
> >
> > I wouldn't call it maintainable really.
> >
> > First of all, the trips may not be ordered.  There are no guarantees
> > whatsoever that they will be ordered, so the caller may have to
> > determine the temperature order in the first place.  This would be an
> > extra requirement that currently is not there.
> >
> > Apart from this, I don't see any fundamental difference between the
> > case when trip points are updated via sysfs and when they are updated
> > by the driver.  The governor should reset itself in any of those cases
> > and even if one trip point changes, the temperature order of all of
> > them may change, so the governor reset mechanism should be able to
> > handle the case when multiple trip points are updated at the same
> > time.  While you may argue that this is theoretical, the ACPI spec
> > clearly states that this is allowed to happen, for example.
> >
> > If you want a generic reset callback for governors, that's fine, but
> > make it generic and not specific to a particular use case.
>
> I think we agree here, but probably having slightly different
> implementation in mind. Based on you explanation I think you
> want simply this API:
> void (*reset)(struct thermal_zone_device *tz);
>
> 1. no return value
> 2. no specific trip ID as argument
>
> Do you agree?
> IMO such implementation and API would also work for this IPA
> purpose. Would that work for the ACPI use case as well?
Di Shen June 25, 2023, 8:40 a.m. UTC | #7
On Sat, Jun 24, 2023 at 1:54 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
>
>
>
> On 6/23/23 17:55, Rafael J. Wysocki wrote:
> > On Fri, Jun 23, 2023 at 9:43 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
> >>
> >>
> >>
>
> [snip]
>
> >>
> >> I agree, the patch header doesn't explain that properly. Here is the
> >> explanation for this Intelligent Power Allocator (IPA):
> >>
> >> The IPA controls temperature using PID mechanism. It's a closed
> >> feedback loop. That algorithm can 'learn' from the 'observed'
> >> in the past reaction for it's control decisions and accumulates that
> >> information in the part called 'error integral'. Those accumulated
> >> 'error' gaps are the differences between the set target value and the
> >> actually achieved value. In our case the target value is the target
> >> temperature which is coming from the trip point. That part is then used
> >> with the 'I' (of PID) component, so we can compensate for those
> >> 'learned' mistakes.
> >> Now, when you change the target temperature value - all your previous
> >> learned errors won't help you. That's why Intelligent Power Allocator
> >> should reset previously accumulated history.
> >
> > Right.
> >
> > And every other governor using information from the past for control
> > will have an analogous problem, won't it?
>
> Not necessarily, but to play safe I would go case-by-case and make
> sure other governors are aligned to this new feature.
>
> E.g. the bang-bang governor operates only on current temperature and
> current trip value + trip hysteresis. The flow graph describes it [1].
> The control (state of the fan: ON or OFF) of that governor could be
> simply adjusted to the new reality -> new trip point temp. That would
> just mean 'toggling' the fan if needed. There are only 2 'target'
> states: 0 or 1 for the fan. You can images a situation when the
> temperature doesn't change, but we manipulate the trip value for that
> governor. The governor would react correctly always in such situation
> w/o a need of a reset IMO.
>
> >
> >>>
> >>>>>
> >>>>>> For the 2nd case IIUC the code, we pass the 'trip.temperature'
> >>>>>> and should be ready for what you said (modification of that value).
> >>>>>
> >>>>> Generally speaking, it needs to be prepared for a simultaneous change
> >>>>> of multiple trip points (including active), in which case it may not
> >>>>> be useful to invoke the ->reset() callback for each of them
> >>>>> individually.
> >>>>
> >>>> Although, that looks more cleaner IMO. Resetting one by one in
> >>>> a temperature order would be easily maintainable, won't be?
> >>>
> >>> I wouldn't call it maintainable really.
> >>>
> >>> First of all, the trips may not be ordered.  There are no guarantees
> >>> whatsoever that they will be ordered, so the caller may have to
> >>> determine the temperature order in the first place.  This would be an
> >>> extra requirement that currently is not there.
> >>>
> >>> Apart from this, I don't see any fundamental difference between the
> >>> case when trip points are updated via sysfs and when they are updated
> >>> by the driver.  The governor should reset itself in any of those cases
> >>> and even if one trip point changes, the temperature order of all of
> >>> them may change, so the governor reset mechanism should be able to
> >>> handle the case when multiple trip points are updated at the same
> >>> time.  While you may argue that this is theoretical, the ACPI spec
> >>> clearly states that this is allowed to happen, for example.
> >>>
> >>> If you want a generic reset callback for governors, that's fine, but
> >>> make it generic and not specific to a particular use case.
> >>
> >> I think we agree here, but probably having slightly different
> >> implementation in mind. Based on you explanation I think you
> >> want simply this API:
> >> void (*reset)(struct thermal_zone_device *tz);
> >>
> >> 1. no return value
> >> 2. no specific trip ID as argument
> >>
> >> Do you agree?
> >
> > Yes, I do.
>
> OK, thanks.
>
> Di could you implement that 'reset()' API according to this description,
> please?
>
Yes, happy to do that.

> >
> >> IMO such implementation and API would also work for this IPA
> >> purpose. Would that work for the ACPI use case as well?
> >
> > It would AFAICS.
>
> Thanks Rafael for the comments and the progress that we made :)
>
> Regards,
> Lukasz
>
> [1]
> https://elixir.bootlin.com/linux/v6.3/source/drivers/thermal/gov_bang_bang.c#L80

Thanks Lukas and Rafeal for the comments. I will send the next version later.

Best regards,
Di
Lukasz Luba June 26, 2023, 7:45 a.m. UTC | #8
On 6/25/23 09:39, Di Shen wrote:
> On Fri, Jun 23, 2023 at 4:10 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>>
>>
>>
>> On 6/22/23 19:27, Rafael J. Wysocki wrote:

[snip]

>>>
>>> So there seems to be a claim that IPA is the only governor needing the
>>> ->reset() callback, but I have not seen any solid analysis confirming
>>> that.  It very well may be the case, but then the changelog should
>>> clearly explain why this is the case IMO.
>>
>> I agree, the patch header doesn't explain that properly. Here is the
>> explanation for this Intelligent Power Allocator (IPA):
>>
>> The IPA controls temperature using PID mechanism. It's a closed
>> feedback loop. That algorithm can 'learn' from the 'observed'
>> in the past reaction for it's control decisions and accumulates that
>> information in the part called 'error integral'. Those accumulated
>> 'error' gaps are the differences between the set target value and the
>> actually achieved value. In our case the target value is the target
>> temperature which is coming from the trip point. That part is then used
>> with the 'I' (of PID) component, so we can compensate for those
>> 'learned' mistakes.
>> Now, when you change the target temperature value - all your previous
>> learned errors won't help you. That's why Intelligent Power Allocator
>> should reset previously accumulated history.
>>
> 
> Yes, THAT's the point!
> Maybe I need to write the commit message in more detail.
> 

Yes, please extend that description.

Regards,
Lukasz
diff mbox series

Patch

diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 8642f1096b91..41d155adc616 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -729,10 +729,31 @@  static int power_allocator_throttle(struct thermal_zone_device *tz, int trip_id)
 	return allocate_power(tz, trip.temperature);
 }
 
+static int power_allocator_reset(struct thermal_zone_device *tz, int trip_id)
+{
+	int ret = 0;
+	struct thermal_trip trip;
+	struct power_allocator_params *params = tz->governor_data;
+
+	ret = __thermal_zone_get_trip(tz, trip_id, &trip);
+	if (ret)
+		return ret;
+
+	/* Only need reset for passive trips */
+	if (trip.type != THERMAL_TRIP_PASSIVE)
+		return -EINVAL;
+
+	reset_pid_controller(params);
+	allow_maximum_power(tz, true);
+
+	return ret;
+}
+
 static struct thermal_governor thermal_gov_power_allocator = {
 	.name		= "power_allocator",
 	.bind_to_tz	= power_allocator_bind,
 	.unbind_from_tz	= power_allocator_unbind,
 	.throttle	= power_allocator_throttle,
+	.reset		= power_allocator_reset,
 };
 THERMAL_GOVERNOR_DECLARE(thermal_gov_power_allocator);
diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c
index 907f3a4d7bc8..52eb768fada8 100644
--- a/drivers/thermal/thermal_trip.c
+++ b/drivers/thermal/thermal_trip.c
@@ -173,6 +173,12 @@  int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id,
 	if (tz->trips && (t.temperature != trip->temperature || t.hysteresis != trip->hysteresis))
 		tz->trips[trip_id] = *trip;
 
+	if (t.temperature != trip->temperature && tz->governor && tz->governor->reset) {
+		ret = tz->governor->reset(tz, trip_id);
+		if (ret)
+			pr_warn_once("Failed to reset thermal governor\n");
+	}
+
 	thermal_notify_tz_trip_change(tz->id, trip_id, trip->type,
 				      trip->temperature, trip->hysteresis);
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 87837094d549..155ce2291fa5 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -204,6 +204,7 @@  struct thermal_governor {
 	int (*bind_to_tz)(struct thermal_zone_device *tz);
 	void (*unbind_from_tz)(struct thermal_zone_device *tz);
 	int (*throttle)(struct thermal_zone_device *tz, int trip);
+	int (*reset)(struct thermal_zone_device *tz, int trip);
 	struct list_head	governor_list;
 };