diff mbox series

[v3,21/24] thermal: intel: hfi: Implement model-specific checks for task classification

Message ID 20230207051105.11575-22-ricardo.neri-calderon@linux.intel.com
State New
Headers show
Series sched: Introduce classes of tasks for load balance | expand

Commit Message

Ricardo Neri Feb. 7, 2023, 5:11 a.m. UTC
In Alder Lake and Raptor Lake, the result of thread classification is more
accurate when only one SMT sibling is busy. Classification results for
class 2 and 3 are always reliable.

To avoid unnecessary migrations, only update the class of a task if it has
been the same during 4 consecutive user ticks.

Cc: Ben Segall <bsegall@google.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Lukasz Luba <lukasz.luba@arm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim C. Chen <tim.c.chen@intel.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: x86@kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
---
Changes since v2:
 * None

Changes since v1:
 * Adjusted the result the classification of Intel Thread Director to start
   at class 1. Class 0 for the scheduler means that the task is
   unclassified.
 * Used the new names of the IPC classes members in task_struct.
 * Reworked helper functions to use sched_smt_siblings_idle() to query
   the idle state of the SMT siblings of a CPU.
---
 drivers/thermal/intel/intel_hfi.c | 60 ++++++++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

Comments

Rafael J. Wysocki March 27, 2023, 5:03 p.m. UTC | #1
On Tue, Feb 7, 2023 at 6:02 AM Ricardo Neri
<ricardo.neri-calderon@linux.intel.com> wrote:
>
> In Alder Lake and Raptor Lake, the result of thread classification is more
> accurate when only one SMT sibling is busy. Classification results for
> class 2 and 3 are always reliable.
>
> To avoid unnecessary migrations, only update the class of a task if it has
> been the same during 4 consecutive user ticks.
>
> Cc: Ben Segall <bsegall@google.com>
> Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> Cc: Ionela Voinescu <ionela.voinescu@arm.com>
> Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
> Cc: Len Brown <len.brown@intel.com>
> Cc: Lukasz Luba <lukasz.luba@arm.com>
> Cc: Mel Gorman <mgorman@suse.de>
> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Tim C. Chen <tim.c.chen@intel.com>
> Cc: Valentin Schneider <vschneid@redhat.com>
> Cc: x86@kernel.org
> Cc: linux-pm@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> ---
> Changes since v2:
>  * None
>
> Changes since v1:
>  * Adjusted the result the classification of Intel Thread Director to start
>    at class 1. Class 0 for the scheduler means that the task is
>    unclassified.
>  * Used the new names of the IPC classes members in task_struct.
>  * Reworked helper functions to use sched_smt_siblings_idle() to query
>    the idle state of the SMT siblings of a CPU.
> ---
>  drivers/thermal/intel/intel_hfi.c | 60 ++++++++++++++++++++++++++++++-
>  1 file changed, 59 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> index 35d947f47550..fdb53e4cabc1 100644
> --- a/drivers/thermal/intel/intel_hfi.c
> +++ b/drivers/thermal/intel/intel_hfi.c
> @@ -40,6 +40,7 @@
>  #include <linux/workqueue.h>
>
>  #include <asm/msr.h>
> +#include <asm/intel-family.h>
>
>  #include "../thermal_core.h"
>  #include "intel_hfi.h"
> @@ -209,9 +210,64 @@ static int __percpu *hfi_ipcc_scores;
>   */
>  #define HFI_UNCLASSIFIED_DEFAULT 1
>
> +#define CLASS_DEBOUNCER_SKIPS 4
> +
> +/**
> + * debounce_and_update_class() - Process and update a task's classification
> + *
> + * @p:         The task of which the classification will be updated
> + * @new_ipcc:  The new IPC classification
> + *
> + * Update the classification of @p with the new value that hardware provides.
> + * Only update the classification of @p if it has been the same during
> + * CLASS_DEBOUNCER_SKIPS consecutive ticks.
> + */
> +static void debounce_and_update_class(struct task_struct *p, u8 new_ipcc)
> +{
> +       u16 debounce_skip;
> +
> +       /* The class of @p changed. Only restart the debounce counter. */
> +       if (p->ipcc_tmp != new_ipcc) {
> +               p->ipcc_cntr = 1;
> +               goto out;
> +       }
> +
> +       /*
> +        * The class of @p did not change. Update it if it has been the same
> +        * for CLASS_DEBOUNCER_SKIPS user ticks.
> +        */
> +       debounce_skip = p->ipcc_cntr + 1;
> +       if (debounce_skip < CLASS_DEBOUNCER_SKIPS)
> +               p->ipcc_cntr++;
> +       else
> +               p->ipcc = new_ipcc;
> +
> +out:
> +       p->ipcc_tmp = new_ipcc;
> +}

Why does the code above belong to the Intel HFI driver?  It doesn't
look like there is anything driver-specific in it.

> +
> +static bool classification_is_accurate(u8 hfi_class, bool smt_siblings_idle)
> +{
> +       switch (boot_cpu_data.x86_model) {
> +       case INTEL_FAM6_ALDERLAKE:
> +       case INTEL_FAM6_ALDERLAKE_L:
> +       case INTEL_FAM6_RAPTORLAKE:
> +       case INTEL_FAM6_RAPTORLAKE_P:
> +       case INTEL_FAM6_RAPTORLAKE_S:
> +               if (hfi_class == 3 || hfi_class == 2 || smt_siblings_idle)
> +                       return true;
> +
> +               return false;
> +
> +       default:
> +               return true;
> +       }
> +}
> +
>  void intel_hfi_update_ipcc(struct task_struct *curr)
>  {
>         union hfi_thread_feedback_char_msr msr;
> +       bool idle;
>
>         /* We should not be here if ITD is not supported. */
>         if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
> @@ -227,7 +283,9 @@ void intel_hfi_update_ipcc(struct task_struct *curr)
>          * 0 is a valid classification for Intel Thread Director. A scheduler
>          * IPCC class of 0 means that the task is unclassified. Adjust.
>          */
> -       curr->ipcc = msr.split.classid + 1;
> +       idle = sched_smt_siblings_idle(task_cpu(curr));
> +       if (classification_is_accurate(msr.split.classid, idle))
> +               debounce_and_update_class(curr, msr.split.classid + 1);
>  }

I still think that this function should just return a number, possibly
including a special "no IPCC" value.  It may be passed a bool argument
indicating whether or not the SMT siblings are idle.

>
>  unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
> --
Ricardo Neri March 29, 2023, 12:15 a.m. UTC | #2
On Mon, Mar 27, 2023 at 07:03:08PM +0200, Rafael J. Wysocki wrote:
> On Tue, Feb 7, 2023 at 6:02 AM Ricardo Neri
> <ricardo.neri-calderon@linux.intel.com> wrote:
> >
> > In Alder Lake and Raptor Lake, the result of thread classification is more
> > accurate when only one SMT sibling is busy. Classification results for
> > class 2 and 3 are always reliable.
> >
> > To avoid unnecessary migrations, only update the class of a task if it has
> > been the same during 4 consecutive user ticks.
> >
> > Cc: Ben Segall <bsegall@google.com>
> > Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
> > Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> > Cc: Ionela Voinescu <ionela.voinescu@arm.com>
> > Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
> > Cc: Len Brown <len.brown@intel.com>
> > Cc: Lukasz Luba <lukasz.luba@arm.com>
> > Cc: Mel Gorman <mgorman@suse.de>
> > Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> > Cc: Steven Rostedt <rostedt@goodmis.org>
> > Cc: Tim C. Chen <tim.c.chen@intel.com>
> > Cc: Valentin Schneider <vschneid@redhat.com>
> > Cc: x86@kernel.org
> > Cc: linux-pm@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org
> > Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> > ---
> > Changes since v2:
> >  * None
> >
> > Changes since v1:
> >  * Adjusted the result the classification of Intel Thread Director to start
> >    at class 1. Class 0 for the scheduler means that the task is
> >    unclassified.
> >  * Used the new names of the IPC classes members in task_struct.
> >  * Reworked helper functions to use sched_smt_siblings_idle() to query
> >    the idle state of the SMT siblings of a CPU.
> > ---
> >  drivers/thermal/intel/intel_hfi.c | 60 ++++++++++++++++++++++++++++++-
> >  1 file changed, 59 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> > index 35d947f47550..fdb53e4cabc1 100644
> > --- a/drivers/thermal/intel/intel_hfi.c
> > +++ b/drivers/thermal/intel/intel_hfi.c
> > @@ -40,6 +40,7 @@
> >  #include <linux/workqueue.h>
> >
> >  #include <asm/msr.h>
> > +#include <asm/intel-family.h>
> >
> >  #include "../thermal_core.h"
> >  #include "intel_hfi.h"
> > @@ -209,9 +210,64 @@ static int __percpu *hfi_ipcc_scores;
> >   */
> >  #define HFI_UNCLASSIFIED_DEFAULT 1
> >
> > +#define CLASS_DEBOUNCER_SKIPS 4
> > +
> > +/**
> > + * debounce_and_update_class() - Process and update a task's classification
> > + *
> > + * @p:         The task of which the classification will be updated
> > + * @new_ipcc:  The new IPC classification
> > + *
> > + * Update the classification of @p with the new value that hardware provides.
> > + * Only update the classification of @p if it has been the same during
> > + * CLASS_DEBOUNCER_SKIPS consecutive ticks.
> > + */
> > +static void debounce_and_update_class(struct task_struct *p, u8 new_ipcc)
> > +{
> > +       u16 debounce_skip;
> > +
> > +       /* The class of @p changed. Only restart the debounce counter. */
> > +       if (p->ipcc_tmp != new_ipcc) {
> > +               p->ipcc_cntr = 1;
> > +               goto out;
> > +       }
> > +
> > +       /*
> > +        * The class of @p did not change. Update it if it has been the same
> > +        * for CLASS_DEBOUNCER_SKIPS user ticks.
> > +        */
> > +       debounce_skip = p->ipcc_cntr + 1;
> > +       if (debounce_skip < CLASS_DEBOUNCER_SKIPS)
> > +               p->ipcc_cntr++;
> > +       else
> > +               p->ipcc = new_ipcc;
> > +
> > +out:
> > +       p->ipcc_tmp = new_ipcc;
> > +}
> 
> Why does the code above belong to the Intel HFI driver?  It doesn't
> look like there is anything driver-specific in it.

That is a good point. This post-processing is specific to the
implementation of IPCC classes using Intel Thread Director.

Maybe a new file called drivers/thermal/intel/intel_itd.c would be better?
Rafael J. Wysocki March 31, 2023, 5:08 p.m. UTC | #3
On Thu, Mar 30, 2023 at 4:31 AM Ricardo Neri
<ricardo.neri-calderon@linux.intel.com> wrote:
>
> On Wed, Mar 29, 2023 at 02:21:57PM +0200, Rafael J. Wysocki wrote:
> > On Wed, Mar 29, 2023 at 2:04 AM Ricardo Neri
> > <ricardo.neri-calderon@linux.intel.com> wrote:
> > >
> > > On Mon, Mar 27, 2023 at 07:03:08PM +0200, Rafael J. Wysocki wrote:
> > > > On Tue, Feb 7, 2023 at 6:02 AM Ricardo Neri
> > > > <ricardo.neri-calderon@linux.intel.com> wrote:
> > > > >
> > > > > In Alder Lake and Raptor Lake, the result of thread classification is more
> > > > > accurate when only one SMT sibling is busy. Classification results for
> > > > > class 2 and 3 are always reliable.
> > > > >
> > > > > To avoid unnecessary migrations, only update the class of a task if it has
> > > > > been the same during 4 consecutive user ticks.
> > > > >
> > > > > Cc: Ben Segall <bsegall@google.com>
> > > > > Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
> > > > > Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> > > > > Cc: Ionela Voinescu <ionela.voinescu@arm.com>
> > > > > Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > Cc: Len Brown <len.brown@intel.com>
> > > > > Cc: Lukasz Luba <lukasz.luba@arm.com>
> > > > > Cc: Mel Gorman <mgorman@suse.de>
> > > > > Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > > > > Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> > > > > Cc: Steven Rostedt <rostedt@goodmis.org>
> > > > > Cc: Tim C. Chen <tim.c.chen@intel.com>
> > > > > Cc: Valentin Schneider <vschneid@redhat.com>
> > > > > Cc: x86@kernel.org
> > > > > Cc: linux-pm@vger.kernel.org
> > > > > Cc: linux-kernel@vger.kernel.org
> > > > > Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> > > > > ---
> > > > > Changes since v2:
> > > > >  * None
> > > > >
> > > > > Changes since v1:
> > > > >  * Adjusted the result the classification of Intel Thread Director to start
> > > > >    at class 1. Class 0 for the scheduler means that the task is
> > > > >    unclassified.
> > > > >  * Used the new names of the IPC classes members in task_struct.
> > > > >  * Reworked helper functions to use sched_smt_siblings_idle() to query
> > > > >    the idle state of the SMT siblings of a CPU.
> > > > > ---
> > > > >  drivers/thermal/intel/intel_hfi.c | 60 ++++++++++++++++++++++++++++++-
> > > > >  1 file changed, 59 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> > > > > index 35d947f47550..fdb53e4cabc1 100644
> > > > > --- a/drivers/thermal/intel/intel_hfi.c
> > > > > +++ b/drivers/thermal/intel/intel_hfi.c
> > > > > @@ -40,6 +40,7 @@
> > > > >  #include <linux/workqueue.h>
> > > > >
> > > > >  #include <asm/msr.h>
> > > > > +#include <asm/intel-family.h>
> > > > >
> > > > >  #include "../thermal_core.h"
> > > > >  #include "intel_hfi.h"
> > > > > @@ -209,9 +210,64 @@ static int __percpu *hfi_ipcc_scores;
> > > > >   */
> > > > >  #define HFI_UNCLASSIFIED_DEFAULT 1
> > > > >
> > > > > +#define CLASS_DEBOUNCER_SKIPS 4
> > > > > +
> > > > > +/**
> > > > > + * debounce_and_update_class() - Process and update a task's classification
> > > > > + *
> > > > > + * @p:         The task of which the classification will be updated
> > > > > + * @new_ipcc:  The new IPC classification
> > > > > + *
> > > > > + * Update the classification of @p with the new value that hardware provides.
> > > > > + * Only update the classification of @p if it has been the same during
> > > > > + * CLASS_DEBOUNCER_SKIPS consecutive ticks.
> > > > > + */
> > > > > +static void debounce_and_update_class(struct task_struct *p, u8 new_ipcc)
> > > > > +{
> > > > > +       u16 debounce_skip;
> > > > > +
> > > > > +       /* The class of @p changed. Only restart the debounce counter. */
> > > > > +       if (p->ipcc_tmp != new_ipcc) {
> > > > > +               p->ipcc_cntr = 1;
> > > > > +               goto out;
> > > > > +       }
> > > > > +
> > > > > +       /*
> > > > > +        * The class of @p did not change. Update it if it has been the same
> > > > > +        * for CLASS_DEBOUNCER_SKIPS user ticks.
> > > > > +        */
> > > > > +       debounce_skip = p->ipcc_cntr + 1;
> > > > > +       if (debounce_skip < CLASS_DEBOUNCER_SKIPS)
> > > > > +               p->ipcc_cntr++;
> > > > > +       else
> > > > > +               p->ipcc = new_ipcc;
> > > > > +
> > > > > +out:
> > > > > +       p->ipcc_tmp = new_ipcc;
> > > > > +}
> > > >
> > > > Why does the code above belong to the Intel HFI driver?  It doesn't
> > > > look like there is anything driver-specific in it.
> > >
> > > That is a good point. This post-processing is specific to the
> > > implementation of IPCC classes using Intel Thread Director.
> >
> > Well, the implementation-specific part is the processor model check
> > whose only contribution is to say whether or not the classification is
> > valid.  The rest appears to be fairly generic to me.
>
> I meant to say that we use Intel Thread Director and the HFI driver to
> implement the interfaces defined in patch 2. Other architectures may
> implement those interfaces differently.
>
> For Intel, we may even need different filters and debouncers for different
> models.
>
> >
> > > Maybe a new file called drivers/thermal/intel/intel_itd.c would be better?
> >
> > So which part of this code other than the processor model check
> > mentioned above is Intel-specific?
>
> debounce_and_update_class() is needed for Intel processors, other
> architectures may not need it or have a different solution.

IMV, one general problem with this approach is that it is making a
more-or-less random thermal driver operate on task structure
internals, while drivers/thermal/ is not a usual place to look for CPU
scheduler code.

I'm not sure why it has to be done this way and none of the above
explains that IIUC.

Is it really the role of the thermal HFI driver to implement a task
classification algorithm?  I'm not convinced about that.  Personally,
I would probably introduce some proper arch code doing that and using
input from the HFI driver.
Ricardo Neri April 3, 2023, 2:12 p.m. UTC | #4
On Fri, Mar 31, 2023 at 07:08:55PM +0200, Rafael J. Wysocki wrote:
> On Thu, Mar 30, 2023 at 4:31 AM Ricardo Neri
> <ricardo.neri-calderon@linux.intel.com> wrote:
> >
> > On Wed, Mar 29, 2023 at 02:21:57PM +0200, Rafael J. Wysocki wrote:
> > > On Wed, Mar 29, 2023 at 2:04 AM Ricardo Neri
> > > <ricardo.neri-calderon@linux.intel.com> wrote:
> > > >
> > > > On Mon, Mar 27, 2023 at 07:03:08PM +0200, Rafael J. Wysocki wrote:
> > > > > On Tue, Feb 7, 2023 at 6:02 AM Ricardo Neri
> > > > > <ricardo.neri-calderon@linux.intel.com> wrote:
> > > > > >
> > > > > > In Alder Lake and Raptor Lake, the result of thread classification is more
> > > > > > accurate when only one SMT sibling is busy. Classification results for
> > > > > > class 2 and 3 are always reliable.
> > > > > >
> > > > > > To avoid unnecessary migrations, only update the class of a task if it has
> > > > > > been the same during 4 consecutive user ticks.
> > > > > >
> > > > > > Cc: Ben Segall <bsegall@google.com>
> > > > > > Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
> > > > > > Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> > > > > > Cc: Ionela Voinescu <ionela.voinescu@arm.com>
> > > > > > Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > > > Cc: Len Brown <len.brown@intel.com>
> > > > > > Cc: Lukasz Luba <lukasz.luba@arm.com>
> > > > > > Cc: Mel Gorman <mgorman@suse.de>
> > > > > > Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > > > > > Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> > > > > > Cc: Steven Rostedt <rostedt@goodmis.org>
> > > > > > Cc: Tim C. Chen <tim.c.chen@intel.com>
> > > > > > Cc: Valentin Schneider <vschneid@redhat.com>
> > > > > > Cc: x86@kernel.org
> > > > > > Cc: linux-pm@vger.kernel.org
> > > > > > Cc: linux-kernel@vger.kernel.org
> > > > > > Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> > > > > > ---
> > > > > > Changes since v2:
> > > > > >  * None
> > > > > >
> > > > > > Changes since v1:
> > > > > >  * Adjusted the result the classification of Intel Thread Director to start
> > > > > >    at class 1. Class 0 for the scheduler means that the task is
> > > > > >    unclassified.
> > > > > >  * Used the new names of the IPC classes members in task_struct.
> > > > > >  * Reworked helper functions to use sched_smt_siblings_idle() to query
> > > > > >    the idle state of the SMT siblings of a CPU.
> > > > > > ---
> > > > > >  drivers/thermal/intel/intel_hfi.c | 60 ++++++++++++++++++++++++++++++-
> > > > > >  1 file changed, 59 insertions(+), 1 deletion(-)
> > > > > >
> > > > > > diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> > > > > > index 35d947f47550..fdb53e4cabc1 100644
> > > > > > --- a/drivers/thermal/intel/intel_hfi.c
> > > > > > +++ b/drivers/thermal/intel/intel_hfi.c
> > > > > > @@ -40,6 +40,7 @@
> > > > > >  #include <linux/workqueue.h>
> > > > > >
> > > > > >  #include <asm/msr.h>
> > > > > > +#include <asm/intel-family.h>
> > > > > >
> > > > > >  #include "../thermal_core.h"
> > > > > >  #include "intel_hfi.h"
> > > > > > @@ -209,9 +210,64 @@ static int __percpu *hfi_ipcc_scores;
> > > > > >   */
> > > > > >  #define HFI_UNCLASSIFIED_DEFAULT 1
> > > > > >
> > > > > > +#define CLASS_DEBOUNCER_SKIPS 4
> > > > > > +
> > > > > > +/**
> > > > > > + * debounce_and_update_class() - Process and update a task's classification
> > > > > > + *
> > > > > > + * @p:         The task of which the classification will be updated
> > > > > > + * @new_ipcc:  The new IPC classification
> > > > > > + *
> > > > > > + * Update the classification of @p with the new value that hardware provides.
> > > > > > + * Only update the classification of @p if it has been the same during
> > > > > > + * CLASS_DEBOUNCER_SKIPS consecutive ticks.
> > > > > > + */
> > > > > > +static void debounce_and_update_class(struct task_struct *p, u8 new_ipcc)
> > > > > > +{
> > > > > > +       u16 debounce_skip;
> > > > > > +
> > > > > > +       /* The class of @p changed. Only restart the debounce counter. */
> > > > > > +       if (p->ipcc_tmp != new_ipcc) {
> > > > > > +               p->ipcc_cntr = 1;
> > > > > > +               goto out;
> > > > > > +       }
> > > > > > +
> > > > > > +       /*
> > > > > > +        * The class of @p did not change. Update it if it has been the same
> > > > > > +        * for CLASS_DEBOUNCER_SKIPS user ticks.
> > > > > > +        */
> > > > > > +       debounce_skip = p->ipcc_cntr + 1;
> > > > > > +       if (debounce_skip < CLASS_DEBOUNCER_SKIPS)
> > > > > > +               p->ipcc_cntr++;
> > > > > > +       else
> > > > > > +               p->ipcc = new_ipcc;
> > > > > > +
> > > > > > +out:
> > > > > > +       p->ipcc_tmp = new_ipcc;
> > > > > > +}
> > > > >
> > > > > Why does the code above belong to the Intel HFI driver?  It doesn't
> > > > > look like there is anything driver-specific in it.
> > > >
> > > > That is a good point. This post-processing is specific to the
> > > > implementation of IPCC classes using Intel Thread Director.
> > >
> > > Well, the implementation-specific part is the processor model check
> > > whose only contribution is to say whether or not the classification is
> > > valid.  The rest appears to be fairly generic to me.
> >
> > I meant to say that we use Intel Thread Director and the HFI driver to
> > implement the interfaces defined in patch 2. Other architectures may
> > implement those interfaces differently.
> >
> > For Intel, we may even need different filters and debouncers for different
> > models.
> >
> > >
> > > > Maybe a new file called drivers/thermal/intel/intel_itd.c would be better?
> > >
> > > So which part of this code other than the processor model check
> > > mentioned above is Intel-specific?
> >
> > debounce_and_update_class() is needed for Intel processors, other
> > architectures may not need it or have a different solution.
> 
> IMV, one general problem with this approach is that it is making a
> more-or-less random thermal driver operate on task structure
> internals, while drivers/thermal/ is not a usual place to look for CPU
> scheduler code.

Fair point.

> 
> I'm not sure why it has to be done this way and none of the above
> explains that IIUC.
> 
> Is it really the role of the thermal HFI driver to implement a task
> classification algorithm?  I'm not convinced about that. 

Arguably, Intel Thread Director, an extension of the HFI driver provides
the classification.

> Personally,
> I would probably introduce some proper arch code doing that and using
> input from the HFI driver.

This makes sense to me. I will work on such updates.

Thanks and BR,
Ricardo
diff mbox series

Patch

diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 35d947f47550..fdb53e4cabc1 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -40,6 +40,7 @@ 
 #include <linux/workqueue.h>
 
 #include <asm/msr.h>
+#include <asm/intel-family.h>
 
 #include "../thermal_core.h"
 #include "intel_hfi.h"
@@ -209,9 +210,64 @@  static int __percpu *hfi_ipcc_scores;
  */
 #define HFI_UNCLASSIFIED_DEFAULT 1
 
+#define CLASS_DEBOUNCER_SKIPS 4
+
+/**
+ * debounce_and_update_class() - Process and update a task's classification
+ *
+ * @p:		The task of which the classification will be updated
+ * @new_ipcc:	The new IPC classification
+ *
+ * Update the classification of @p with the new value that hardware provides.
+ * Only update the classification of @p if it has been the same during
+ * CLASS_DEBOUNCER_SKIPS consecutive ticks.
+ */
+static void debounce_and_update_class(struct task_struct *p, u8 new_ipcc)
+{
+	u16 debounce_skip;
+
+	/* The class of @p changed. Only restart the debounce counter. */
+	if (p->ipcc_tmp != new_ipcc) {
+		p->ipcc_cntr = 1;
+		goto out;
+	}
+
+	/*
+	 * The class of @p did not change. Update it if it has been the same
+	 * for CLASS_DEBOUNCER_SKIPS user ticks.
+	 */
+	debounce_skip = p->ipcc_cntr + 1;
+	if (debounce_skip < CLASS_DEBOUNCER_SKIPS)
+		p->ipcc_cntr++;
+	else
+		p->ipcc = new_ipcc;
+
+out:
+	p->ipcc_tmp = new_ipcc;
+}
+
+static bool classification_is_accurate(u8 hfi_class, bool smt_siblings_idle)
+{
+	switch (boot_cpu_data.x86_model) {
+	case INTEL_FAM6_ALDERLAKE:
+	case INTEL_FAM6_ALDERLAKE_L:
+	case INTEL_FAM6_RAPTORLAKE:
+	case INTEL_FAM6_RAPTORLAKE_P:
+	case INTEL_FAM6_RAPTORLAKE_S:
+		if (hfi_class == 3 || hfi_class == 2 || smt_siblings_idle)
+			return true;
+
+		return false;
+
+	default:
+		return true;
+	}
+}
+
 void intel_hfi_update_ipcc(struct task_struct *curr)
 {
 	union hfi_thread_feedback_char_msr msr;
+	bool idle;
 
 	/* We should not be here if ITD is not supported. */
 	if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
@@ -227,7 +283,9 @@  void intel_hfi_update_ipcc(struct task_struct *curr)
 	 * 0 is a valid classification for Intel Thread Director. A scheduler
 	 * IPCC class of 0 means that the task is unclassified. Adjust.
 	 */
-	curr->ipcc = msr.split.classid + 1;
+	idle = sched_smt_siblings_idle(task_cpu(curr));
+	if (classification_is_accurate(msr.split.classid, idle))
+		debounce_and_update_class(curr, msr.split.classid + 1);
 }
 
 unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)