diff mbox series

[2/2] tuna: Add idle-state control functionality

Message ID 20250128014551.15058-3-jwyatt@redhat.com
State New
Headers show
Series Add cpupower idle-state functionality | expand

Commit Message

John B. Wyatt IV Jan. 28, 2025, 1:45 a.m. UTC
Allows Tuna to control cpu idle-state functionality on the system,
including querying, enabling, disabling of cpu idle-states to control
power usage or to test functionality.

This requires cpupower, a utility in the Linux kernel repository and
the cpupower Python bindings added in Linux 6.12 to control cpu
idle-states. If cpupower is missing Tuna as a whole will continue to
function and idle-set functionality will error out.

Signed-off-by: John B. Wyatt IV <jwyatt@redhat.com>
Signed-off-by: John B. Wyatt IV <sageofredondo@gmail.com>
---
 tuna-cmd.py      |  33 +++++++-
 tuna/cpupower.py | 202 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 233 insertions(+), 2 deletions(-)
 create mode 100755 tuna/cpupower.py

Comments

Crystal Wood Feb. 13, 2025, 11:09 p.m. UTC | #1
On Mon, 2025-01-27 at 20:45 -0500, John B. Wyatt IV wrote:
> Allows Tuna to control cpu idle-state functionality on the system,
> including querying, enabling, disabling of cpu idle-states to control
> power usage or to test functionality.
> 
> This requires cpupower, a utility in the Linux kernel repository and
> the cpupower Python bindings added in Linux 6.12 to control cpu
> idle-states. If cpupower is missing Tuna as a whole will continue to
> function and idle-set functionality will error out.
> 
> Signed-off-by: John B. Wyatt IV <jwyatt@redhat.com>
> Signed-off-by: John B. Wyatt IV <sageofredondo@gmail.com>
> ---
>  tuna-cmd.py      |  33 +++++++-
>  tuna/cpupower.py | 202 +++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 233 insertions(+), 2 deletions(-)
>  create mode 100755 tuna/cpupower.py
> 
> diff --git a/tuna-cmd.py b/tuna-cmd.py
> index d0323f5..81d0f48 100755
> --- a/tuna-cmd.py
> +++ b/tuna-cmd.py
> @@ -25,6 +25,7 @@ from tuna import tuna, sysfs, utils
>  import logging
>  import time
>  import shutil
> +import tuna.cpupower as cpw

>  def get_loglevel(level):
>      if level.isdigit() and int(level) in range(0,5):
> @@ -115,8 +116,12 @@ def gen_parser():
>              "disable_perf": dict(action='store_true', help="Explicitly disable usage of perf in GUI for process view"),
>              "refresh": dict(default=2500, metavar='MSEC', type=int, help="Refresh the GUI every MSEC milliseconds"),
>              "priority": dict(default=(None, None), metavar="POLICY:RTPRIO", type=tuna.get_policy_and_rtprio, help="Set thread scheduler tunables: POLICY and RTPRIO"),
> -            "background": dict(action='store_true', help="Run command as background task")
> -         }
> +            "background": dict(action='store_true', help="Run command as background task"),
> +            "idle_state_disabled_status": dict(dest='idle_state_disabled_status', metavar='IDLESTATEDISABLEDSTATUS', type=int, help='Print if cpu idle state of the cpus in CPU-LIST is enabled or disabled. If CPU-LIST is not specified, default to all cpus.'),
> +            "idle_info": dict(dest='idle_info', action='store_const', const=True, help='Print general idle information on cpus in CPU-LIST. If CPU-LIST is not specified, default to all cpus.'),
> +            "disable_idle_state": dict(dest='disable_idle_state', metavar='IDLESTATEINDEX', type=int, help='Disable cpus in CPU-LIST\'s cpu idle (cpu sleep state). If CPU-LIST is not specified, default to all cpus.'),
> +            "enable_idle_state": dict(dest='enable_idle_state', metavar='IDLESTATEINDEX', type=int, help='Enable cpus in CPU-LIST\'s cpu idle (cpu sleep state). If CPU-LIST is not specified, default to all cpus.')
> +    }
>  
>      parser = HelpMessageParser(description="tuna - Application Tuning Program")
>  
> @@ -147,6 +152,10 @@ def gen_parser():
>      show_irqs = subparser.add_parser('show_irqs', description='Show IRQ list', help='Show IRQ list')
>      show_configs = subparser.add_parser('show_configs', description='List preloaded profiles', help='List preloaded profiles')
>  
> +    idle_set = subparser.add_parser('idle-set',
> +                                    description='Query and set all idle states on a given CPU list. Requires libcpupower to be installed',
> +                                    help='Set all idle states on a given CPU-LIST.')

idle_state would be a better name (or idle-state, but underscores are
already used elsewhere...), since it can both set and get.

It also mostly operates on individual states (-i being the exception),
not all at once.  How about just:

Manage CPU idle state disabling (requires libcpupower)

Also, don't forget to update the man page.

> @@ -635,6 +651,19 @@ def main():
>          my_logger.addHandler(add_handler("DEBUG", tofile=False))
>          my_logger.info("Debug option set")
>  
> +    if args.command == 'idle-set':
> +        if not cpw.have_cpupower:
> +            print(f"Error: libcpupower bindings are not detected; need {cpw.cpupower_required_kernel} at a minimum.")
> +            sys.exit(1)
> +
> +        if not args.cpu_list or args.cpu_list == []:
> +            args.cpu_list = cpw.Cpupower().get_all_cpu_list()
> +
> +        my_cpupower = cpw.Cpupower(args.cpu_list)
> +        ret = my_cpupower.idle_state_handler(args)
> +        if ret > 0:
> +            sys.exit(ret)

Why not just pass in cpu_list as is, and have cpw understand what
an empty or absent list is?  And it looks like it already partially
does check for None...  If the user specifically does something
like --cpu '' should we really treat that as equivalent to not
specifing --cpu?  I don't see other commands doing this.

Especially if you're going to delegate all other option parsing...
Why is cpulist special?  Just do something like:

elif args.command == 'idle_state'
    cpw.idle_state(args)

>      if args.loglevel:
>          if not args.debug:
>              my_logger = setup_logging("my_logger")

Why did you put the new command before log handling, rather than with
all the other commands?

> diff --git a/tuna/cpupower.py b/tuna/cpupower.py
> new file mode 100755
> index 0000000..b09dc2f
> --- /dev/null
> +++ b/tuna/cpupower.py
> @@ -0,0 +1,202 @@
> +# Copyright (C) 2024 John B. Wyatt IV
> +# SPDX-License-Identifier: GPL-2.0-only
> +
> +from typing import List
> +import tuna.utils as utils
> +
> +cpupower_required_kernel = "6.12"
> +have_cpupower = None
> +
> +
> +import raw_pylibcpupower as cpw

This is a bit confusing since you import this module as cpw
elsewhere...

Also, I got this even without trying to use the new functionality:

$ ./tuna-cmd.py 
Traceback (most recent call last):
  File "/home/crwood/git/tuna/./tuna-cmd.py", line 28, in <module>
    import tuna.cpupower as cpw
  File "/home/crwood/git/tuna/tuna/cpupower.py", line 11, in <module>
    import raw_pylibcpupower as cpw
ModuleNotFoundError: No module named 'raw_pylibcpupower'

Maybe something like:

try:
    import raw_pylibcpupower as lcpw
    lcpw.cpufreq_get_available_frequencies(0)
except:
    lcpw = None

> +        You must use have_cpupower variable to determine if the bindings were
> +        detected in your code."""

Instead of doing to the class/module user what SWIG did to the binding
user, why not just throw an exception if the binding is missing?  This
will automatically happen if lcpw is None, though you may want a
friendlier error message in the main entry point.

> +        def __init__(self, cpulist=None):
> +            if cpulist == None:
> +                self.__cpulist = self.get_all_cpu_list()
> +            else:
> +                self.__cpulist = cpulist
> +
> +        @classmethod
> +        def get_all_cpu_list(cls):
> +            return list(range(cls.get_idle_info()["all_cpus"]))

Is this really idle-state-specific?  Maybe just something like this
in tuna/utils.py:

def get_all_cpu_list():
    return list(range(get_nr_cpus()))

We shouldn't need to get all the idle state information just to get a
cpu list.

And all these class methods make me wonder why this is a class to begin
with, rather than just module functions.

> +
> +        @classmethod
> +        def get_idle_info(cls, cpu=0):

Why is cpu 0 special?

> +            idle_states, idle_states_amt = cls.get_idle_states(cpu)
> +            idle_states_list = []
> +            for idle_state in range(0, len(idle_states)):
> +                idle_states_list.append(
> +                    {
> +                        "CPU ID": cpu,
> +                        "Idle State Name": idle_states[idle_state],
> +                        "Flags/Description": cpw.cpuidle_state_desc(cpu, idle_state),
> +                        "Latency": cpw.cpuidle_state_latency(cpu, idle_state),
> +                        "Usage": cpw.cpuidle_state_usage(cpu, idle_state),
> +                        "Duration": cpw.cpuidle_state_time(cpu, idle_state)
> +                    }
> +                )
> +            idle_info = {
> +                "all_cpus": utils.get_nr_cpus(),
> +                "CPUidle-driver": cpw.cpuidle_get_driver(),
> +                "CPUidle-governor": cpw.cpuidle_get_governor(),
> +                "idle-states-count": idle_states_amt,
> +                "available-idle-states": idle_states,
> +                "cpu-states": idle_states_list
> +            }
> +            return idle_info

This seems overly complicated.  Why do you bundle all this stuff up
just to extract it elsewhere?  The call to get the data is simpler than
the data structure lookup.

> +
> +        @classmethod
> +        def print_idle_info(cls, cpu_list=[0]):

The only thing that instantiating this class does is to store a cpu
list, and here you're passing it into a class method instead...

> +        def idle_state_handler(self, args) -> int:
> +            if args.idle_state_disabled_status != None:
> +                cstate_index = args.idle_state_disabled_status
> +                cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state

The API doesn't make that assumption, so why should we?  Systems exist
with heterogeneous CPUs... could be a reason to support looking up states
by name, if and when there are systems we care about that actually have
different cpuidle states.

And you don't need this lookup anyay -- libcpupower already does the
bounds check, and you can get the name inside the loop.

> +                if cstate_index < 0 or cstate_index >= cstate_amt:
> +                    print(f"Invalid idle state range. Total for this cpu is {cstate_amt}")
> +                    return 1

"this cpu"?

> +                cstate_name = cstate_list[cstate_index]
> +                ret = self.is_disabled_idle_state(cstate_index)
> +                for i, e in enumerate(ret):
> +                    match e:
> +                        case 1:
> +                            print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is disabled.")
> +                        case 0:
> +                            print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is enabled.")
> +                        case -1:
> +                            print(f"Idlestate not available")
> +                        case -2:
> +                            print(f"Disabling is not supported by the kernel")
> +                        case _:
> +                            print(f"Not documented: {e}")
> +            elif args.idle_info != None:
> +                self.print_idle_info(args.cpu_list)
> +                return 0
> +            elif args.disable_idle_state != None:
> +                cstate_index = args.disable_idle_state
> +                cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state
> +                if cstate_index < 0 or cstate_index >= cstate_amt:
> +                    print(f"Invalid idle state range. Total for this cpu is {cstate_amt}")
> +                    return 1
> +                cstate_name = cstate_list[cstate_index]
> +                ret = self.disable_idle_state(cstate_index, 1)
> +                for i, e in enumerate(ret):
> +                    match e:
> +                        case 0:
> +                            print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is disabled.")
> +                        case -1:
> +                            print(f"Idlestate not available")
> +                        case -2:
> +                            print(f"Disabling is not supported by the kernel")
> +                        case -3:
> +                            print(f"No write access to disable/enable C-states: try using sudo")
> +                        case _:
> +                            print(f"Not documented: {e}")
> +            elif args.enable_idle_state != None:
> +                cstate_index = args.enable_idle_state
> +                cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state
> +                if cstate_index < 0 or cstate_index >= cstate_amt:
> +                    print(f"Invalid idle state range. Total for this cpu is {cstate_amt}")
> +                    return 1
> +                cstate_name = cstate_list[cstate_index]
> +                ret = self.disable_idle_state(cstate_index, 0)
> +                for i, e in enumerate(ret):
> +                    match e:
> +                        case 0:
> +                            print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is enabled.")
> +                        case -1:
> +                            print(f"Idlestate not available")
> +                        case -2:
> +                            print(f"Disabling is not supported by the kernel")
> +                        case -3:
> +                            print(f"No write access to disable/enable C-states: try using sudo")
> +                        case _:
> +                            print(f"Not documented: {e}")

Factor out the error messages so they're not duplicated between
suboperations.  Actually, pretty much the whole thing is duplicated between
enable/disable...

Do we want to print anything at all on success?  It looks like tuna
generally follows the Unix philosophy of not doing so.

And the error messages are a bit vague... imagine running a script and
something deep inside it spits out "Disabling is not supported by the
kernel".  Disabling what?

> +            else:
> +                print(args)
> +                print("idle-set error: you should not get here!")

"you should not get here" is not a useful error message... jut throw
an exception.

-Crystal
John B. Wyatt IV Feb. 19, 2025, 6:23 p.m. UTC | #2
On Thu, Feb 13, 2025 at 05:09:18PM -0600, Crystal Wood wrote:
> On Mon, 2025-01-27 at 20:45 -0500, John B. Wyatt IV wrote:
> > Allows Tuna to control cpu idle-state functionality on the system,
> > including querying, enabling, disabling of cpu idle-states to control
> > power usage or to test functionality.
> > 
> > This requires cpupower, a utility in the Linux kernel repository and
> > the cpupower Python bindings added in Linux 6.12 to control cpu
> > idle-states. If cpupower is missing Tuna as a whole will continue to
> > function and idle-set functionality will error out.
> > 
> > Signed-off-by: John B. Wyatt IV <jwyatt@redhat.com>
> > Signed-off-by: John B. Wyatt IV <sageofredondo@gmail.com>
> > ---
> >  tuna-cmd.py      |  33 +++++++-
> >  tuna/cpupower.py | 202 +++++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 233 insertions(+), 2 deletions(-)
> >  create mode 100755 tuna/cpupower.py
> > 
> > diff --git a/tuna-cmd.py b/tuna-cmd.py
> > index d0323f5..81d0f48 100755
> > --- a/tuna-cmd.py
> > +++ b/tuna-cmd.py
> > @@ -25,6 +25,7 @@ from tuna import tuna, sysfs, utils
> >  import logging
> >  import time
> >  import shutil
> > +import tuna.cpupower as cpw
> 
> >  def get_loglevel(level):
> >      if level.isdigit() and int(level) in range(0,5):
> > @@ -115,8 +116,12 @@ def gen_parser():
> >              "disable_perf": dict(action='store_true', help="Explicitly disable usage of perf in GUI for process view"),
> >              "refresh": dict(default=2500, metavar='MSEC', type=int, help="Refresh the GUI every MSEC milliseconds"),
> >              "priority": dict(default=(None, None), metavar="POLICY:RTPRIO", type=tuna.get_policy_and_rtprio, help="Set thread scheduler tunables: POLICY and RTPRIO"),
> > -            "background": dict(action='store_true', help="Run command as background task")
> > -         }
> > +            "background": dict(action='store_true', help="Run command as background task"),
> > +            "idle_state_disabled_status": dict(dest='idle_state_disabled_status', metavar='IDLESTATEDISABLEDSTATUS', type=int, help='Print if cpu idle state of the cpus in CPU-LIST is enabled or disabled. If CPU-LIST is not specified, default to all cpus.'),
> > +            "idle_info": dict(dest='idle_info', action='store_const', const=True, help='Print general idle information on cpus in CPU-LIST. If CPU-LIST is not specified, default to all cpus.'),
> > +            "disable_idle_state": dict(dest='disable_idle_state', metavar='IDLESTATEINDEX', type=int, help='Disable cpus in CPU-LIST\'s cpu idle (cpu sleep state). If CPU-LIST is not specified, default to all cpus.'),
> > +            "enable_idle_state": dict(dest='enable_idle_state', metavar='IDLESTATEINDEX', type=int, help='Enable cpus in CPU-LIST\'s cpu idle (cpu sleep state). If CPU-LIST is not specified, default to all cpus.')
> > +    }
> >  
> >      parser = HelpMessageParser(description="tuna - Application Tuning Program")
> >  
> > @@ -147,6 +152,10 @@ def gen_parser():
> >      show_irqs = subparser.add_parser('show_irqs', description='Show IRQ list', help='Show IRQ list')
> >      show_configs = subparser.add_parser('show_configs', description='List preloaded profiles', help='List preloaded profiles')
> >  
> > +    idle_set = subparser.add_parser('idle-set',
> > +                                    description='Query and set all idle states on a given CPU list. Requires libcpupower to be installed',
> > +                                    help='Set all idle states on a given CPU-LIST.')
> 
> idle_state would be a better name (or idle-state, but underscores are
> already used elsewhere...), since it can both set and get.

Being used elsewhere is why I used it. Nothing else to add.

> 
> It also mostly operates on individual states (-i being the exception),
> not all at once.  How about just:
> 
> Manage CPU idle state disabling (requires libcpupower)

Will do.

> 
> Also, don't forget to update the man page.

The man page will be in a future patch.

> 
> > @@ -635,6 +651,19 @@ def main():
> >          my_logger.addHandler(add_handler("DEBUG", tofile=False))
> >          my_logger.info("Debug option set")
> >  
> > +    if args.command == 'idle-set':
> > +        if not cpw.have_cpupower:
> > +            print(f"Error: libcpupower bindings are not detected; need {cpw.cpupower_required_kernel} at a minimum.")
> > +            sys.exit(1)
> > +
> > +        if not args.cpu_list or args.cpu_list == []:
> > +            args.cpu_list = cpw.Cpupower().get_all_cpu_list()
> > +
> > +        my_cpupower = cpw.Cpupower(args.cpu_list)
> > +        ret = my_cpupower.idle_state_handler(args)
> > +        if ret > 0:
> > +            sys.exit(ret)
> 
> Why not just pass in cpu_list as is, and have cpw understand what
> an empty or absent list is?  And it looks like it already partially
> does check for None...  If the user specifically does something
> like --cpu '' should we really treat that as equivalent to not
> specifing --cpu?  I don't see other commands doing this.

Will move the logic to the class.

I do not understand what you wrote for the second part with checking for
--cpu. Would you please rephase?

Note I am still learning argparse; if you know of a better way, an
example would be welcome. :-)

> 
> Especially if you're going to delegate all other option parsing...
> Why is cpulist special?  Just do something like:
> 
> elif args.command == 'idle_state'
>     cpw.idle_state(args)
> 
> >      if args.loglevel:
> >          if not args.debug:
> >              my_logger = setup_logging("my_logger")
> 
> Why did you put the new command before log handling, rather than with
> all the other commands?

Will amend this. Looks like I misapplied a commit hunk.

> 
> > diff --git a/tuna/cpupower.py b/tuna/cpupower.py
> > new file mode 100755
> > index 0000000..b09dc2f
> > --- /dev/null
> > +++ b/tuna/cpupower.py
> > @@ -0,0 +1,202 @@
> > +# Copyright (C) 2024 John B. Wyatt IV
> > +# SPDX-License-Identifier: GPL-2.0-only
> > +
> > +from typing import List
> > +import tuna.utils as utils
> > +
> > +cpupower_required_kernel = "6.12"
> > +have_cpupower = None
> > +
> > +
> > +import raw_pylibcpupower as cpw
> 
> This is a bit confusing since you import this module as cpw
> elsewhere...
> 
> Also, I got this even without trying to use the new functionality:
> 
> $ ./tuna-cmd.py 
> Traceback (most recent call last):
>   File "/home/crwood/git/tuna/./tuna-cmd.py", line 28, in <module>
>     import tuna.cpupower as cpw
>   File "/home/crwood/git/tuna/tuna/cpupower.py", line 11, in <module>
>     import raw_pylibcpupower as cpw
> ModuleNotFoundError: No module named 'raw_pylibcpupower'
> 

This was pointed out to me in a separate email. I have already fixed it
for the v2 I will send.

> Maybe something like:
> 
> try:
>     import raw_pylibcpupower as lcpw
>     lcpw.cpufreq_get_available_frequencies(0)
> except:
>     lcpw = None

pylint does not like a module to be dynamically imported this way, but I
found a workaround using importlib. pylint's messaging is very valuable
so I rather not have to disable it. It gives a warning every time cpw is
used otherwise.

> 
> > +        You must use have_cpupower variable to determine if the bindings were
> > +        detected in your code."""
> 
> Instead of doing to the class/module user what SWIG did to the binding
> user, why not just throw an exception if the binding is missing?  This
> will automatically happen if lcpw is None, though you may want a
> friendlier error message in the main entry point.

Please see above.

> 
> > +        def __init__(self, cpulist=None):
> > +            if cpulist == None:
> > +                self.__cpulist = self.get_all_cpu_list()
> > +            else:
> > +                self.__cpulist = cpulist
> > +
> > +        @classmethod
> > +        def get_all_cpu_list(cls):
> > +            return list(range(cls.get_idle_info()["all_cpus"]))
> 
> Is this really idle-state-specific?  Maybe just something like this
> in tuna/utils.py:

It is in the cpupower.py file, not an idle-state.py file. The only part
specific to idle-state is idle-set is the only user.

I was thinking of that, but that can be a future patch. I wanted this
implementation to be kept simple.

> 
> def get_all_cpu_list():
>     return list(range(get_nr_cpus()))
> 
> We shouldn't need to get all the idle state information just to get a
> cpu list.

I mimicked the way cpupower reports this information. The information is
useful to report. I am not sure what your objection is.

> 
> And all these class methods make me wonder why this is a class to begin
> with, rather than just module functions.

I plan to add future functionality down the road; a class would be
appropriate for handling this.

> 
> > +
> > +        @classmethod
> > +        def get_idle_info(cls, cpu=0):
> 
> Why is cpu 0 special?

cpupower itself for this functionality picks a random cpu thread to report.
I choose cpu 0 as a stopgap until I find/upstream better functionality to
handle this. I did not feel a random cpu was any better.

> 
> > +            idle_states, idle_states_amt = cls.get_idle_states(cpu)
> > +            idle_states_list = []
> > +            for idle_state in range(0, len(idle_states)):
> > +                idle_states_list.append(
> > +                    {
> > +                        "CPU ID": cpu,
> > +                        "Idle State Name": idle_states[idle_state],
> > +                        "Flags/Description": cpw.cpuidle_state_desc(cpu, idle_state),
> > +                        "Latency": cpw.cpuidle_state_latency(cpu, idle_state),
> > +                        "Usage": cpw.cpuidle_state_usage(cpu, idle_state),
> > +                        "Duration": cpw.cpuidle_state_time(cpu, idle_state)
> > +                    }
> > +                )
> > +            idle_info = {
> > +                "all_cpus": utils.get_nr_cpus(),
> > +                "CPUidle-driver": cpw.cpuidle_get_driver(),
> > +                "CPUidle-governor": cpw.cpuidle_get_governor(),
> > +                "idle-states-count": idle_states_amt,
> > +                "available-idle-states": idle_states,
> > +                "cpu-states": idle_states_list
> > +            }
> > +            return idle_info
> 
> This seems overly complicated.  Why do you bundle all this stuff up
> just to extract it elsewhere?  The call to get the data is simpler than
> the data structure lookup.

I do not understand. There are multiple calls to get the data.

As I said above, I was mimicking how cpupower reports this information.
I thought it would be useful to report. I wrap this data up into a
hashmap that I hope may be useful to export as json for scripting in the
future.

> 
> > +
> > +        @classmethod
> > +        def print_idle_info(cls, cpu_list=[0]):
> 
> The only thing that instantiating this class does is to store a cpu
> list, and here you're passing it into a class method instead...

Yes, please see above.

> 
> > +        def idle_state_handler(self, args) -> int:
> > +            if args.idle_state_disabled_status != None:
> > +                cstate_index = args.idle_state_disabled_status
> > +                cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state
> 
> The API doesn't make that assumption, so why should we?  Systems exist
> with heterogeneous CPUs... could be a reason to support looking up states
> by name, if and when there are systems we care about that actually have
> different cpuidle states.

Heterogeneous CPU support will come in a future patch once I have
upstreamed some more work.

Note that the libcpupower API offers a way to get the number of cores of a
system (get_cpu_topology()), but the code to do this is commented out (see
the missing curly brace?) and returns a 0 for the number of cores
currently.

https://elixir.bootlin.com/linux/v6.13.3/source/tools/power/cpupower/lib/cpupower.c#L206-L213

> 
> And you don't need this lookup anyay -- libcpupower already does the
> bounds check, and you can get the name inside the loop.

I will check on that.

> 
> > +                if cstate_index < 0 or cstate_index >= cstate_amt:
> > +                    print(f"Invalid idle state range. Total for this cpu is {cstate_amt}")
> > +                    return 1
> 
> "this cpu"?

Will amend.

> 
> > +                cstate_name = cstate_list[cstate_index]
> > +                ret = self.is_disabled_idle_state(cstate_index)
> > +                for i, e in enumerate(ret):
> > +                    match e:
> > +                        case 1:
> > +                            print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is disabled.")
> > +                        case 0:
> > +                            print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is enabled.")
> > +                        case -1:
> > +                            print(f"Idlestate not available")
> > +                        case -2:
> > +                            print(f"Disabling is not supported by the kernel")
> > +                        case _:
> > +                            print(f"Not documented: {e}")
> > +            elif args.idle_info != None:
> > +                self.print_idle_info(args.cpu_list)
> > +                return 0
> > +            elif args.disable_idle_state != None:
> > +                cstate_index = args.disable_idle_state
> > +                cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state
> > +                if cstate_index < 0 or cstate_index >= cstate_amt:
> > +                    print(f"Invalid idle state range. Total for this cpu is {cstate_amt}")
> > +                    return 1
> > +                cstate_name = cstate_list[cstate_index]
> > +                ret = self.disable_idle_state(cstate_index, 1)
> > +                for i, e in enumerate(ret):
> > +                    match e:
> > +                        case 0:
> > +                            print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is disabled.")
> > +                        case -1:
> > +                            print(f"Idlestate not available")
> > +                        case -2:
> > +                            print(f"Disabling is not supported by the kernel")
> > +                        case -3:
> > +                            print(f"No write access to disable/enable C-states: try using sudo")
> > +                        case _:
> > +                            print(f"Not documented: {e}")
> > +            elif args.enable_idle_state != None:
> > +                cstate_index = args.enable_idle_state
> > +                cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state
> > +                if cstate_index < 0 or cstate_index >= cstate_amt:
> > +                    print(f"Invalid idle state range. Total for this cpu is {cstate_amt}")
> > +                    return 1
> > +                cstate_name = cstate_list[cstate_index]
> > +                ret = self.disable_idle_state(cstate_index, 0)
> > +                for i, e in enumerate(ret):
> > +                    match e:
> > +                        case 0:
> > +                            print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is enabled.")
> > +                        case -1:
> > +                            print(f"Idlestate not available")
> > +                        case -2:
> > +                            print(f"Disabling is not supported by the kernel")
> > +                        case -3:
> > +                            print(f"No write access to disable/enable C-states: try using sudo")
> > +                        case _:
> > +                            print(f"Not documented: {e}")
> 
> Factor out the error messages so they're not duplicated between
> suboperations.  Actually, pretty much the whole thing is duplicated between
> enable/disable...

Will do.

> 
> Do we want to print anything at all on success?  It looks like tuna
> generally follows the Unix philosophy of not doing so.

Good point. That will also make it easier to script.

> 
> And the error messages are a bit vague... imagine running a script and
> something deep inside it spits out "Disabling is not supported by the
> kernel".  Disabling what?
> 
> > +            else:
> > +                print(args)
> > +                print("idle-set error: you should not get here!")
> 
> "you should not get here" is not a useful error message... jut throw
> an exception.

Will amend.
diff mbox series

Patch

diff --git a/tuna-cmd.py b/tuna-cmd.py
index d0323f5..81d0f48 100755
--- a/tuna-cmd.py
+++ b/tuna-cmd.py
@@ -25,6 +25,7 @@  from tuna import tuna, sysfs, utils
 import logging
 import time
 import shutil
+import tuna.cpupower as cpw
 
 def get_loglevel(level):
     if level.isdigit() and int(level) in range(0,5):
@@ -115,8 +116,12 @@  def gen_parser():
             "disable_perf": dict(action='store_true', help="Explicitly disable usage of perf in GUI for process view"),
             "refresh": dict(default=2500, metavar='MSEC', type=int, help="Refresh the GUI every MSEC milliseconds"),
             "priority": dict(default=(None, None), metavar="POLICY:RTPRIO", type=tuna.get_policy_and_rtprio, help="Set thread scheduler tunables: POLICY and RTPRIO"),
-            "background": dict(action='store_true', help="Run command as background task")
-         }
+            "background": dict(action='store_true', help="Run command as background task"),
+            "idle_state_disabled_status": dict(dest='idle_state_disabled_status', metavar='IDLESTATEDISABLEDSTATUS', type=int, help='Print if cpu idle state of the cpus in CPU-LIST is enabled or disabled. If CPU-LIST is not specified, default to all cpus.'),
+            "idle_info": dict(dest='idle_info', action='store_const', const=True, help='Print general idle information on cpus in CPU-LIST. If CPU-LIST is not specified, default to all cpus.'),
+            "disable_idle_state": dict(dest='disable_idle_state', metavar='IDLESTATEINDEX', type=int, help='Disable cpus in CPU-LIST\'s cpu idle (cpu sleep state). If CPU-LIST is not specified, default to all cpus.'),
+            "enable_idle_state": dict(dest='enable_idle_state', metavar='IDLESTATEINDEX', type=int, help='Enable cpus in CPU-LIST\'s cpu idle (cpu sleep state). If CPU-LIST is not specified, default to all cpus.')
+    }
 
     parser = HelpMessageParser(description="tuna - Application Tuning Program")
 
@@ -147,6 +152,10 @@  def gen_parser():
     show_irqs = subparser.add_parser('show_irqs', description='Show IRQ list', help='Show IRQ list')
     show_configs = subparser.add_parser('show_configs', description='List preloaded profiles', help='List preloaded profiles')
 
+    idle_set = subparser.add_parser('idle-set',
+                                    description='Query and set all idle states on a given CPU list. Requires libcpupower to be installed',
+                                    help='Set all idle states on a given CPU-LIST.')
+
     what_is = subparser.add_parser('what_is', description='Provides help about selected entities', help='Provides help about selected entities')
     gui = subparser.add_parser('gui', description="Start the GUI", help="Start the GUI")
 
@@ -218,6 +227,13 @@  def gen_parser():
     show_irqs_group.add_argument('-S', '--sockets', **MODS['sockets'])
     show_irqs.add_argument('-q', '--irqs', **MODS['irqs'])
 
+    idle_set_group = idle_set.add_mutually_exclusive_group(required=True)
+    idle_set.add_argument('-c', '--cpus', **MODS['cpus'])
+    idle_set_group.add_argument('-s', '--status', **MODS['idle_state_disabled_status'])
+    idle_set_group.add_argument('-i', '--idle-info', **MODS['idle_info'])
+    idle_set_group.add_argument('-d', '--disable', **MODS['disable_idle_state'])
+    idle_set_group.add_argument('-e', '--enable', **MODS['enable_idle_state'])
+
     what_is.add_argument('thread_list', **POS['thread_list'])
 
     gui.add_argument('-d', '--disable_perf', **MODS['disable_perf'])
@@ -635,6 +651,19 @@  def main():
         my_logger.addHandler(add_handler("DEBUG", tofile=False))
         my_logger.info("Debug option set")
 
+    if args.command == 'idle-set':
+        if not cpw.have_cpupower:
+            print(f"Error: libcpupower bindings are not detected; need {cpw.cpupower_required_kernel} at a minimum.")
+            sys.exit(1)
+
+        if not args.cpu_list or args.cpu_list == []:
+            args.cpu_list = cpw.Cpupower().get_all_cpu_list()
+
+        my_cpupower = cpw.Cpupower(args.cpu_list)
+        ret = my_cpupower.idle_state_handler(args)
+        if ret > 0:
+            sys.exit(ret)
+
     if args.loglevel:
         if not args.debug:
             my_logger = setup_logging("my_logger")
diff --git a/tuna/cpupower.py b/tuna/cpupower.py
new file mode 100755
index 0000000..b09dc2f
--- /dev/null
+++ b/tuna/cpupower.py
@@ -0,0 +1,202 @@ 
+# Copyright (C) 2024 John B. Wyatt IV
+# SPDX-License-Identifier: GPL-2.0-only
+
+from typing import List
+import tuna.utils as utils
+
+cpupower_required_kernel = "6.12"
+have_cpupower = None
+
+
+import raw_pylibcpupower as cpw
+try:
+    # Noticed that SWIG generated bindings will simply complain a
+    # function does not exist instead of triggering an exception.
+    # Call some query function to trigger an AttributeError for
+    # when the bindings are missing.
+    cpw.cpufreq_get_available_frequencies(0)
+    have_cpupower = True
+except:
+    have_cpupower = False
+
+
+if have_cpupower:
+    class Cpupower:
+        """The Cpupower class allows you to query and change the power states of the
+        cpu.
+
+        You may query or change the cpus all at once or a list of the cpus provided to the constructor's cpulist argument.
+
+        The bindings must be detected on the $PYTHONPATH variable.
+
+        You must use have_cpupower variable to determine if the bindings were
+        detected in your code."""
+        def __init__(self, cpulist=None):
+            if cpulist == None:
+                self.__cpulist = self.get_all_cpu_list()
+            else:
+                self.__cpulist = cpulist
+
+        @classmethod
+        def get_all_cpu_list(cls):
+            return list(range(cls.get_idle_info()["all_cpus"]))
+
+        @classmethod
+        def get_idle_states(cls, cpu):
+            """
+            Get the c-states of a cpu.
+
+            You can capture the return values with:
+            states_list, states_amt = get_idle_states()
+
+            Returns
+                List[String]: list of cstates
+                Int: amt of cstates
+            """
+            ret = []
+            for cstate in range(cpw.cpuidle_state_count(cpu)):
+                ret.append(cpw.cpuidle_state_name(cpu,cstate))
+            return ret, cpw.cpuidle_state_count(cpu)
+
+        @classmethod
+        def get_idle_info(cls, cpu=0):
+            idle_states, idle_states_amt = cls.get_idle_states(cpu)
+            idle_states_list = []
+            for idle_state in range(0, len(idle_states)):
+                idle_states_list.append(
+                    {
+                        "CPU ID": cpu,
+                        "Idle State Name": idle_states[idle_state],
+                        "Flags/Description": cpw.cpuidle_state_desc(cpu, idle_state),
+                        "Latency": cpw.cpuidle_state_latency(cpu, idle_state),
+                        "Usage": cpw.cpuidle_state_usage(cpu, idle_state),
+                        "Duration": cpw.cpuidle_state_time(cpu, idle_state)
+                    }
+                )
+            idle_info = {
+                "all_cpus": utils.get_nr_cpus(),
+                "CPUidle-driver": cpw.cpuidle_get_driver(),
+                "CPUidle-governor": cpw.cpuidle_get_governor(),
+                "idle-states-count": idle_states_amt,
+                "available-idle-states": idle_states,
+                "cpu-states": idle_states_list
+            }
+            return idle_info
+
+        @classmethod
+        def print_idle_info(cls, cpu_list=[0]):
+            cpu_count = utils.get_nr_cpus()
+            for cpu in cpu_list:
+                idle_info = cls.get_idle_info(cpu)
+                print_str = f"""
+CPUidle driver: {idle_info["CPUidle-driver"]}
+CPUidle governor: {idle_info["CPUidle-governor"]}
+analyzing CPU {cpu}
+
+Number of idle states: {idle_info["idle-states-count"]}
+Available idle states: {idle_info["available-idle-states"]}
+"""
+                for state in idle_info["cpu-states"]:
+                    print_str += f"""{state["Idle State Name"]}
+Flags/Description: {state["Flags/Description"]}
+Latency: {state["Latency"]}
+Usage: {state["Usage"]}
+Duration: {state["Duration"]}
+"""
+                print(
+                    print_str
+                )
+
+        def idle_state_handler(self, args) -> int:
+            if args.idle_state_disabled_status != None:
+                cstate_index = args.idle_state_disabled_status
+                cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state
+                if cstate_index < 0 or cstate_index >= cstate_amt:
+                    print(f"Invalid idle state range. Total for this cpu is {cstate_amt}")
+                    return 1
+                cstate_name = cstate_list[cstate_index]
+                ret = self.is_disabled_idle_state(cstate_index)
+                for i, e in enumerate(ret):
+                    match e:
+                        case 1:
+                            print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is disabled.")
+                        case 0:
+                            print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is enabled.")
+                        case -1:
+                            print(f"Idlestate not available")
+                        case -2:
+                            print(f"Disabling is not supported by the kernel")
+                        case _:
+                            print(f"Not documented: {e}")
+            elif args.idle_info != None:
+                self.print_idle_info(args.cpu_list)
+                return 0
+            elif args.disable_idle_state != None:
+                cstate_index = args.disable_idle_state
+                cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state
+                if cstate_index < 0 or cstate_index >= cstate_amt:
+                    print(f"Invalid idle state range. Total for this cpu is {cstate_amt}")
+                    return 1
+                cstate_name = cstate_list[cstate_index]
+                ret = self.disable_idle_state(cstate_index, 1)
+                for i, e in enumerate(ret):
+                    match e:
+                        case 0:
+                            print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is disabled.")
+                        case -1:
+                            print(f"Idlestate not available")
+                        case -2:
+                            print(f"Disabling is not supported by the kernel")
+                        case -3:
+                            print(f"No write access to disable/enable C-states: try using sudo")
+                        case _:
+                            print(f"Not documented: {e}")
+            elif args.enable_idle_state != None:
+                cstate_index = args.enable_idle_state
+                cstate_list, cstate_amt = self.get_idle_states(args.cpu_list[0]) # Assumption, that all cpus have the same idle state
+                if cstate_index < 0 or cstate_index >= cstate_amt:
+                    print(f"Invalid idle state range. Total for this cpu is {cstate_amt}")
+                    return 1
+                cstate_name = cstate_list[cstate_index]
+                ret = self.disable_idle_state(cstate_index, 0)
+                for i, e in enumerate(ret):
+                    match e:
+                        case 0:
+                            print(f"CPU: {args.cpu_list[i]} Idle state \"{cstate_name}\" is enabled.")
+                        case -1:
+                            print(f"Idlestate not available")
+                        case -2:
+                            print(f"Disabling is not supported by the kernel")
+                        case -3:
+                            print(f"No write access to disable/enable C-states: try using sudo")
+                        case _:
+                            print(f"Not documented: {e}")
+            else:
+                print(args)
+                print("idle-set error: you should not get here!")
+            return 0
+
+        """
+        Disable or enable an idle state using the object's stored list of cpus.
+
+        Args:
+            state (int): The cpu idle state index to disable or enable as an int starting from 0.
+            disabled (int): set to 1 to disable or 0 to enable. Less than 0 is an error.
+        """
+        def disable_idle_state(self, state, disabled) -> List[int]:
+            ret = []
+            for cpu in self.__cpulist:
+                ret.append(cpw.cpuidle_state_disable(cpu, state, disabled))
+            return ret
+
+        """
+        Query the idle state.
+
+        Args:
+            state: The cpu idle state. 1 is disabled, 0 is enabled. Less than 0 is an error.
+        """
+        def is_disabled_idle_state(self, state) -> List[int]:
+            ret = []
+            for cpu in self.__cpulist:
+                ret.append(cpw.cpuidle_is_state_disabled(cpu, state))
+            return ret