@@ -29,4 +29,14 @@ config INTEL_RAPL
controller, CPU core (Power Plance 0), graphics uncore (Power Plane
1), etc.
+config IDLE_INJECTION
+ bool "Idle injection framework"
+ depends on CPU_IDLE
+ default n
+ help
+ This enables support for the idle injection framework. It
+ provides a way to force idle periods on a set of specified
+ CPUs for power capping. Idle period can be injected
+ synchronously on a set of specified CPUs or alternatively
+ on a per CPU basis.
endif
@@ -1,2 +1,3 @@
obj-$(CONFIG_POWERCAP) += powercap_sys.o
obj-$(CONFIG_INTEL_RAPL) += intel_rapl.o
+obj-$(CONFIG_IDLE_INJECTION) += idle_injection.o
new file mode 100644
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018 Linaro Limited
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * The idle injection framework proposes a way to force a cpu to enter
+ * an idle state during a specified amount of time for a specified
+ * period.
+ *
+ * It relies on the smpboot kthreads which handles, via its main loop,
+ * the common code for hotplugging and [un]parking.
+ *
+ * At init time, all the kthreads are created and parked.
+ *
+ * A cpumask is specified as parameter for the idle injection
+ * registering function. The kthreads will be synchronized regarding
+ * this cpumask.
+ *
+ * The idle + run duration is specified via the helpers and then the
+ * idle injection can be started at this point.
+ *
+ * A kthread will call play_idle() with the specified idle duration
+ * from above.
+ *
+ * A timer is set after waking up all the tasks, to the next idle
+ * injection cycle.
+ *
+ * The task handling the timer interrupt will wakeup all the kthreads
+ * belonging to the cpumask.
+ */
+#define pr_fmt(fmt) "ii_dev: " fmt
+
+#include <linux/cpu.h>
+#include <linux/freezer.h>
+#include <linux/hrtimer.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smpboot.h>
+
+#include <uapi/linux/sched/types.h>
+
+/**
+ * struct idle_injection_thread - task on/off switch structure
+ * @tsk: a pointer to a task_struct injecting the idle cycles
+ * @should_run: a integer used as a boolean by the smpboot kthread API
+ */
+struct idle_injection_thread {
+ struct task_struct *tsk;
+ int should_run;
+};
+
+/**
+ * struct idle_injection_device - data for the idle injection
+ * @cpumask: a cpumask containing the list of CPUs managed by the device
+ * @timer: a hrtimer giving the tempo for the idle injection
+ * @count: an atomic to keep track of the last task exiting the idle cycle
+ * @idle_duration_ms: an unsigned int specifying the idle duration
+ * @run_duration_ms: an unsigned int specifying the running duration
+ */
+struct idle_injection_device {
+ cpumask_var_t cpumask;
+ struct hrtimer timer;
+ struct completion stop_complete;
+ unsigned int idle_duration_ms;
+ unsigned int run_duration_ms;
+ atomic_t count;
+};
+
+static DEFINE_PER_CPU(struct idle_injection_thread, idle_injection_thread);
+static DEFINE_PER_CPU(struct idle_injection_device *, idle_injection_device);
+
+/**
+ * __idle_injection_wakeup - Wake up all idle injection threads
+ * @ii_dev: the idle injection device
+ *
+ * Every idle injection task belonging to the idle injection device
+ * and running on an online CPU will be wake up by this call.
+ */
+static void __idle_injection_wakeup(struct idle_injection_device *ii_dev)
+{
+ struct idle_injection_thread *iit;
+ struct cpumask tmp;
+ unsigned int cpu;
+
+ cpumask_and(&tmp, ii_dev->cpumask, cpu_online_mask);
+
+ atomic_set(&ii_dev->count, cpumask_weight(&tmp));
+
+ for_each_cpu(cpu, &tmp) {
+ iit = per_cpu_ptr(&idle_injection_thread, cpu);
+ iit->should_run = 1;
+ wake_up_process(iit->tsk);
+ }
+}
+
+/**
+ * idle_injection_wakeup - Wake up all idle injection threads
+ * @ii_dev: the idle injection device
+ *
+ * This function wakes up all the idle injection tasks belonging to
+ * @ii_dev by invoking __idle_injection_wakeup() but with the cpu
+ * hoplug disabled.
+ */
+static void idle_injection_wakeup(struct idle_injection_device *ii_dev)
+{
+ get_online_cpus();
+ __idle_injection_wakeup(ii_dev);
+ put_online_cpus();
+}
+
+/**
+ * idle_injection_wakeup_fn - idle injection timer callback
+ * @timer: a hrtimer structure
+ *
+ * This function is called when the idle injection timer expires which
+ * will wake up the idle injection tasks and these ones, in turn, play
+ * idle a specified amount of time.
+ *
+ * Return: HRTIMER_NORESTART.
+ */
+static enum hrtimer_restart idle_injection_wakeup_fn(struct hrtimer *timer)
+{
+ struct idle_injection_device *ii_dev =
+ container_of(timer, struct idle_injection_device, timer);
+
+ __idle_injection_wakeup(ii_dev);
+
+ return HRTIMER_NORESTART;
+}
+
+/**
+ * idle_injection_last_man - operations by the last task
+ * @ii_dev: a pointer to an idle_injection_device structure
+ *
+ * This function groups the operations done by the last idle injection
+ * task. It can be called from the idle injection callback as well as
+ * from the park callback when the thread is parked at hotplug time.
+ */
+static void idle_injection_last_man(struct idle_injection_device *ii_dev)
+{
+ unsigned int run_duration_ms;
+
+ run_duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+ if (run_duration_ms) {
+ hrtimer_start(&ii_dev->timer, ms_to_ktime(run_duration_ms),
+ HRTIMER_MODE_REL_PINNED);
+ return;
+ }
+
+ complete(&ii_dev->stop_complete);
+}
+
+/**
+ * idle_injection_fn - idle injection routine
+ * @cpu: the CPU number the tasks belongs to
+ *
+ * The idle injection routine will stay idle the specified amount of
+ * time
+ */
+static void idle_injection_fn(unsigned int cpu)
+{
+ struct idle_injection_device *ii_dev;
+ struct idle_injection_thread *iit;
+ unsigned int idle_duration_ms;
+
+ ii_dev = per_cpu(idle_injection_device, cpu);
+ iit = per_cpu_ptr(&idle_injection_thread, cpu);
+
+ /*
+ * Boolean used by the smpboot main loop and used as a
+ * flip-flop in this function
+ */
+ iit->should_run = 0;
+
+ idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms);
+ if (idle_duration_ms)
+ play_idle(idle_duration_ms);
+
+ if (atomic_dec_and_test(&ii_dev->count))
+ idle_injection_last_man(ii_dev);
+}
+
+/**
+ * idle_injection_set_duration - idle and run duration helper
+ * @run_duration_ms: an unsigned int giving the running time in milliseconds
+ * @idle_duration_ms: an unsigned int giving the idle time in milliseconds
+ */
+void idle_injection_set_duration(struct idle_injection_device *ii_dev,
+ unsigned int run_duration_ms,
+ unsigned int idle_duration_ms)
+{
+ WRITE_ONCE(ii_dev->run_duration_ms, run_duration_ms);
+ WRITE_ONCE(ii_dev->idle_duration_ms, idle_duration_ms);
+}
+
+/**
+ * idle_injection_get_duration - idle and run duration helper
+ * @run_duration_ms: a pointer to an unsigned int to store the running time
+ * @idle_duration_ms: a pointer to an unsigned int to store the idle time
+ */
+void idle_injection_get_duration(struct idle_injection_device *ii_dev,
+ unsigned int *run_duration_ms,
+ unsigned int *idle_duration_ms)
+{
+ *run_duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+ *idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms);
+}
+
+/**
+ * idle_injection_start - starts the idle injections
+ * @ii_dev: a pointer to an idle_injection_device structure
+ *
+ * The function starts the idle injection cycles by first waking up
+ * all the tasks the ii_dev is attached to and let them handle the
+ * idle-run periods.
+ *
+ * Return: -EINVAL if the idle or the running durations are not set.
+ */
+int idle_injection_start(struct idle_injection_device *ii_dev)
+{
+ if (!READ_ONCE(ii_dev->idle_duration_ms))
+ return -EINVAL;
+
+ if (!READ_ONCE(ii_dev->run_duration_ms))
+ return -EINVAL;
+
+ pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n",
+ cpumask_pr_args(ii_dev->cpumask));
+
+ idle_injection_wakeup(ii_dev);
+
+ return 0;
+}
+
+/**
+ * idle_injection_stop - stops the idle injections
+ * @ii_dev: a pointer to an idle injection_device structure
+ *
+ * The function stops the idle injection by resetting the idle and
+ * running durations and wait for the threads to complete. If we are
+ * in the process of injecting an idle cycle, then this will wait the
+ * end of the cycle.
+ */
+void idle_injection_stop(struct idle_injection_device *ii_dev)
+{
+ pr_debug("Stopping injecting idle cycles on CPUs '%*pbl'\n",
+ cpumask_pr_args(ii_dev->cpumask));
+
+ idle_injection_set_duration(ii_dev, 0, 0);
+
+ wait_for_completion_interruptible(&ii_dev->stop_complete);
+}
+
+/**
+ * idle_injection_setup - initialize the current task as a RT task
+ * @cpu: the CPU number where the kthread is running on (not used)
+ *
+ * Called one time, this function is in charge of setting the task
+ * scheduler parameters.
+ */
+static void idle_injection_setup(unsigned int cpu)
+{
+ struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 };
+
+ set_freezable();
+
+ sched_setscheduler(current, SCHED_FIFO, ¶m);
+}
+
+/**
+ * idle_injection_park - move out of the idle injection threads pool
+ * @cpu: the CPU number where the kthread is running on
+ *
+ * Catch the situation where the CPU is hotplugged while we were about
+ * to inject an idle cycle.
+ */
+static void idle_injection_park(unsigned int cpu)
+{
+ struct idle_injection_device *ii_dev;
+ struct idle_injection_thread *iit;
+
+ ii_dev = per_cpu(idle_injection_device, cpu);
+ iit = per_cpu_ptr(&idle_injection_thread, cpu);
+
+ /*
+ * If we are supposed to run an idle cycle but obviously we
+ * can't because we are in the process of parking, so bail out
+ * from the pool of idle injection tasks, do nothing otherwise.
+ */
+ if (!iit->should_run)
+ return;
+
+ iit->should_run = 0;
+
+ if (atomic_dec_and_test(&ii_dev->count))
+ idle_injection_last_man(ii_dev);
+}
+
+/**
+ * idle_injection_should_run - function helper for the smpboot API
+ * @cpu: the CPU number where the kthread is running on
+ *
+ * Return: a boolean telling if the thread can run.
+ */
+static int idle_injection_should_run(unsigned int cpu)
+{
+ struct idle_injection_thread *iit =
+ per_cpu_ptr(&idle_injection_thread, cpu);
+
+ return iit->should_run;
+}
+
+static struct idle_injection_device *ii_dev_alloc(void)
+{
+ struct idle_injection_device *ii_dev;
+
+ ii_dev = kzalloc(sizeof(*ii_dev), GFP_KERNEL);
+ if (!ii_dev)
+ return NULL;
+
+ if (!alloc_cpumask_var(&ii_dev->cpumask, GFP_KERNEL)) {
+ kfree(ii_dev);
+ return NULL;
+ }
+
+ return ii_dev;
+}
+
+static void ii_dev_free(struct idle_injection_device *ii_dev)
+{
+ free_cpumask_var(ii_dev->cpumask);
+ kfree(ii_dev);
+}
+
+/**
+ * idle_injection_register - idle injection init routine
+ * @cpumask: the list of CPUs managed by the idle injection device
+ *
+ * This is the initialization function in charge of creating the
+ * initializing of the timer and allocate the structures. It does not
+ * starts the idle injection cycles.
+ *
+ * Return: NULL if an allocation fails.
+ */
+struct idle_injection_device *idle_injection_register(struct cpumask *cpumask)
+{
+ struct idle_injection_device *ii_dev;
+ int cpu;
+
+ ii_dev = ii_dev_alloc();
+ if (!ii_dev)
+ return NULL;
+
+ cpumask_copy(ii_dev->cpumask, cpumask);
+ hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ ii_dev->timer.function = idle_injection_wakeup_fn;
+ init_completion(&ii_dev->stop_complete);
+
+ for_each_cpu(cpu, ii_dev->cpumask) {
+
+ if (per_cpu(idle_injection_device, cpu)) {
+ pr_err("cpu%d is already registered\n", cpu);
+ goto out_rollback_per_cpu;
+ }
+
+ per_cpu(idle_injection_device, cpu) = ii_dev;
+ }
+
+ return ii_dev;
+
+out_rollback_per_cpu:
+ for_each_cpu(cpu, ii_dev->cpumask)
+ per_cpu(idle_injection_device, cpu) = NULL;
+
+ ii_dev_free(ii_dev);
+
+ return NULL;
+}
+
+/**
+ * idle_injection_unregister - Unregister the idle injection device
+ * @ii_dev: a pointer to an idle injection device
+ *
+ * The function is in charge of stopping the idle injections,
+ * unregister the kthreads and free the allocated memory in the
+ * register function.
+ */
+void idle_injection_unregister(struct idle_injection_device *ii_dev)
+{
+ int cpu;
+
+ idle_injection_stop(ii_dev);
+
+ for_each_cpu(cpu, ii_dev->cpumask)
+ per_cpu(idle_injection_device, cpu) = NULL;
+
+ ii_dev_free(ii_dev);
+}
+
+static struct smp_hotplug_thread idle_injection_threads = {
+ .store = &idle_injection_thread.tsk,
+ .setup = idle_injection_setup,
+ .park = idle_injection_park,
+ .thread_fn = idle_injection_fn,
+ .thread_comm = "idle_inject/%u",
+ .thread_should_run = idle_injection_should_run,
+};
+
+static int __init idle_injection_init(void)
+{
+ return smpboot_register_percpu_thread(&idle_injection_threads);
+}
+early_initcall(idle_injection_init);
new file mode 100644
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Linaro Ltd
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ */
+#ifndef __IDLE_INJECTION_H__
+#define __IDLE_INJECTION_H__
+
+/* private idle injection device structure */
+struct idle_injection_device;
+
+struct idle_injection_device *idle_injection_register(struct cpumask *cpumask);
+
+void idle_injection_unregister(struct idle_injection_device *ii_dev);
+
+int idle_injection_start(struct idle_injection_device *ii_dev);
+
+void idle_injection_stop(struct idle_injection_device *ii_dev);
+
+void idle_injection_set_duration(struct idle_injection_device *ii_dev,
+ unsigned int run_duration_ms,
+ unsigned int idle_duration_ms);
+
+void idle_injection_get_duration(struct idle_injection_device *ii_dev,
+ unsigned int *run_duration_ms,
+ unsigned int *idle_duration_ms);
+#endif /* __IDLE_INJECTION_H__ */