diff mbox

[RFCv2,07/23] sched: Introduce system-wide sched_energy

Message ID 1404404770-323-8-git-send-email-morten.rasmussen@arm.com
State New
Headers show

Commit Message

Morten Rasmussen July 3, 2014, 4:25 p.m. UTC
From: Dietmar Eggemann <dietmar.eggemann@arm.com>

The energy aware algorithm needs system wide energy information on certain
platforms (e.g. a one socket SMP system). Unfortunately, there is no
sched_group that covers all cpus in the system, so there is no place to
attach a system wide sched_group_energy data structure. In such a system,
the energy data is only attached to the sched groups for the individual
cpus in the sched domain (sd) MC level.

This patch adds a _hack_ to provide system-wide energy data via the
sched_domain_topology_level table for such a system.

The problem is that the sched_domain_topology_level table is not an
interface to provide system-wide data but we want to keep the
configuration of all energy related data in one place.

The sched_domain_energy_f of the last entry (the one which is
initialized with {NULL, }) of the sched_domain_topology_level table is
set to cpu_sys_energy(). Since the sched_domain_mask_f of this entry
stays NULL it is still not considered for the existing scheduler set-up
code (see for_each_sd_topology()).

A second call to init_sched_energy() with an sd pointer argument set to
NULL initializes the system-wide energy structure sse.

There is no system-wide power management on the example platform (ARM TC2)
which could potentially interact with the scheduler so struct
sched_group_energy *sse stays NULL.

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
---
 arch/arm/kernel/topology.c |    7 ++++++-
 kernel/sched/core.c        |   34 ++++++++++++++++++++++++++++++----
 kernel/sched/sched.h       |    2 ++
 3 files changed, 38 insertions(+), 5 deletions(-)
diff mbox

Patch

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index a7d5a6e..70915b1 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -386,6 +386,11 @@  static inline const struct sched_group_energy *cpu_core_energy(int cpu)
 			&energy_core_a15;
 }
 
+static inline const struct sched_group_energy *cpu_sys_energy(int cpu)
+{
+	return NULL;
+}
+
 static inline const int cpu_corepower_flags(void)
 {
 	return SD_SHARE_PKG_RESOURCES  | SD_SHARE_POWERDOMAIN;
@@ -396,7 +401,7 @@  static struct sched_domain_topology_level arm_topology[] = {
 	{ cpu_coregroup_mask, cpu_corepower_flags, cpu_core_energy, SD_INIT_NAME(MC) },
 #endif
 	{ cpu_cpu_mask, 0, cpu_cluster_energy, SD_INIT_NAME(DIE) },
-	{ NULL, },
+	{ NULL,	0, cpu_sys_energy},
 };
 
 /*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7fecc63..2d7544a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5954,20 +5954,44 @@  static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
 	atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
 }
 
+/* System-wide energy information. */
+struct sched_group_energy *sse;
+
 static void init_sched_energy(int cpu, struct sched_domain *sd,
 			      struct sched_domain_topology_level *tl)
 {
-	struct sched_group *sg = sd->groups;
-	struct sched_group_energy *energy = sg->sge;
+	struct sched_group *sg = sd ? sd->groups : NULL;
+	struct sched_group_energy *energy = sd ? sg->sge : sse;
 	sched_domain_energy_f fn = tl->energy;
-	struct cpumask *mask = sched_group_cpus(sg);
+	const struct cpumask *mask = sd ? sched_group_cpus(sg) :
+					  cpu_cpu_mask(cpu);
 
-	if (!fn || !fn(cpu))
+	if (!fn || !fn(cpu) || (!sd && energy))
 		return;
 
 	if (cpumask_weight(mask) > 1)
 		check_sched_energy_data(cpu, fn, mask);
 
+	if (!sd) {
+		energy = sse = kzalloc(sizeof(struct sched_group_energy) +
+				       fn(cpu)->nr_idle_states*
+				       sizeof(struct idle_state) +
+				       fn(cpu)->nr_cap_states*
+				       sizeof(struct capacity_state),
+				       GFP_KERNEL);
+		BUG_ON(!energy);
+
+		energy->idle_states = (struct idle_state *)
+				      ((void *)&energy->cap_states +
+				       sizeof(energy->cap_states));
+
+		energy->cap_states = (struct capacity_state *)
+				     ((void *)&energy->cap_states +
+				      sizeof(energy->cap_states) +
+				      fn(cpu)->nr_idle_states*
+				      sizeof(struct idle_state));
+	}
+
 	energy->nr_idle_states = fn(cpu)->nr_idle_states;
 	memcpy(energy->idle_states, fn(cpu)->idle_states,
 	       energy->nr_idle_states*sizeof(struct idle_state));
@@ -6655,6 +6679,8 @@  static int build_sched_domains(const struct cpumask *cpu_map,
 			claim_allocations(i, sd);
 			init_sched_groups_capacity(i, sd);
 		}
+
+		init_sched_energy(i, NULL, tl);
 	}
 
 	/* Attach the domains */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1a5f1ee..c971359 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -747,6 +747,8 @@  struct sched_group_capacity {
 	unsigned long cpumask[0]; /* iteration mask */
 };
 
+extern struct sched_group_energy *sse;
+
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
 	atomic_t ref;