diff mbox series

[API-NEXT,5/5] power: Adding a power governor implementation

Message ID 1484670521-28503-6-git-send-email-sergei.trofimov@arm.com
State New
Headers show
Series power management api | expand

Commit Message

Sergei Trofimov Jan. 17, 2017, 4:28 p.m. UTC
Adding an API for a power governor. A governor will try to optimise
power usage within an associated power domains by monitoring packet
queue(s) to determine how much "performance" is necessary at a given
point in time.

This also implements a PoC DVFS governor that will step down the
frequency periodically while the queue is under the thershold and jump
to max if thershold is exceeded (this a similar behavior to the Linux
ondemand cpufreq governor).

Signed-off-by: Sergei Trofimov <sergei.trofimov@arm.com>

---
 doc/users-guide/users-guide.adoc                   |   7 +
 include/odp/api/spec/power.h                       |  37 ++++
 platform/linux-generic/Makefile.am                 |   1 +
 .../linux-generic/include/odp_config_internal.h    |   5 +
 platform/linux-generic/include/odp_internal.h      |   4 +
 platform/linux-generic/odp_init.c                  |  12 ++
 platform/linux-generic/odp_power_governor.c        | 220 +++++++++++++++++++++
 7 files changed, 286 insertions(+)
 create mode 100644 platform/linux-generic/odp_power_governor.c

-- 
1.9.1
diff mbox series

Patch

diff --git a/doc/users-guide/users-guide.adoc b/doc/users-guide/users-guide.adoc
index 41c57d1..00299cf 100755
--- a/doc/users-guide/users-guide.adoc
+++ b/doc/users-guide/users-guide.adoc
@@ -404,6 +404,13 @@  types of atomic variables. The ODP event model also makes use of queues to
 avoid the need for explicit locking in many cases. This will be discussed
 in the next section.
 
+=== Power Governor
+A power governor will try to minimize power usage of a set of cores while
+maintaining QoS on one or more queues. The API define an interface for
+initializing a governor for a set of CPUs specified by a mask, and for
+registering and unregistering queues that will be monitored for QoS with an
+existing governor.
+
 == ODP Components ==
 Building on ODP concepts, ODP offers several components that relate to the
 flow of work through an ODP application. These include the Classifier,
diff --git a/include/odp/api/spec/power.h b/include/odp/api/spec/power.h
index 26dd64e..09a4b0a 100644
--- a/include/odp/api/spec/power.h
+++ b/include/odp/api/spec/power.h
@@ -18,12 +18,20 @@ 
 #include <limits.h>
 
 #include <odp/api/visibility_begin.h>
+
 #include <odp/api/cpumask.h>
+#include <odp/api/queue.h>
+#include <odp/api/std_types.h>
+#include <odp/api/plat/strong_types.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+typedef ODP_HANDLE_T(odp_power_governor_t);
+
+#define ODP_POWER_GOVERNOR_INVALID  _odp_cast_scalar(odp_power_governor_t, 0)
+
 
 /**
  * CPU power domain description.
@@ -108,6 +116,35 @@  int odp_power_domain_set_perf_level(odp_power_domain_t *domain, int level);
  */
 uint64_t odp_power_domain_get_perf_level(odp_power_domain_t *domain);
 
+/**
+ * Initialize a power governor for the specified power domain.
+ *
+ * @param domain 	domain to be governed
+ *
+ */
+odp_power_governor_t odp_power_governor_init(odp_cpumask_t *cpus);
+
+/**
+ *
+ * Add a queue to be monitored by the power governor. The governor will be notified when the
+ * queue hits the specified threshold and will boost performance if possible.
+ *
+ * @param governor 	power governor to which the queue will be added
+ * @param queue 	queue who's depth will be monitored to determine when power
+ * 			needs to be boosted.
+ * @param threshold	queue depth threshold at which power will be boosted (if possible).
+ */
+int odp_power_governor_add_queue(odp_power_governor_t governor, odp_queue_t queue, uint64_t threshold);
+
+/**
+ * Remove the queue from being monitored by the specified power governor.
+ *
+ * @param governor 	power governor to which the queue will be added
+ * @param queue 	queue who's depth will be monitored to determine when power
+ * 			needs to be boosted.
+ */
+int odp_power_governor_remove_queue(odp_power_governor_t governor, odp_queue_t queue);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/platform/linux-generic/Makefile.am b/platform/linux-generic/Makefile.am
index 738c582..16832cd 100644
--- a/platform/linux-generic/Makefile.am
+++ b/platform/linux-generic/Makefile.am
@@ -207,6 +207,7 @@  __LIB__libodp_linux_la_SOURCES = \
 			   odp_pkt_queue.c \
 			   odp_pool.c \
 			   odp_power.c \
+			   odp_power_governor.c \
 			   odp_queue.c \
 			   odp_rwlock.c \
 			   odp_rwlock_recursive.c \
diff --git a/platform/linux-generic/include/odp_config_internal.h b/platform/linux-generic/include/odp_config_internal.h
index dadd59e..c0d12dd 100644
--- a/platform/linux-generic/include/odp_config_internal.h
+++ b/platform/linux-generic/include/odp_config_internal.h
@@ -130,6 +130,11 @@  extern "C" {
 #define CONFIG_BURST_SIZE 16
 
 /*
+ * Maximum number of power governors
+ */
+#define ODP_CONFIG_POWER_GOVERNORS 16
+
+/*
  * Maximum number of events in a pool
  */
 #define CONFIG_POOL_MAX_NUM (1 * 1024 * 1024)
diff --git a/platform/linux-generic/include/odp_internal.h b/platform/linux-generic/include/odp_internal.h
index b313b1f..c84239d 100644
--- a/platform/linux-generic/include/odp_internal.h
+++ b/platform/linux-generic/include/odp_internal.h
@@ -69,6 +69,7 @@  enum init_stage {
 	CLASSIFICATION_INIT,
 	TRAFFIC_MNGR_INIT,
 	NAME_TABLE_INIT,
+	POWER_GOVERNOR_INIT,
 	ALL_INIT      /* All init stages completed */
 };
 
@@ -116,6 +117,9 @@  int odp_time_term_global(void);
 int odp_tm_init_global(void);
 int odp_tm_term_global(void);
 
+int odp_power_governor_init_global(void);
+int odp_power_governor_term_global(void);
+
 int _odp_int_name_tbl_init_global(void);
 int _odp_int_name_tbl_term_global(void);
 
diff --git a/platform/linux-generic/odp_init.c b/platform/linux-generic/odp_init.c
index 06c6143..56df042 100644
--- a/platform/linux-generic/odp_init.c
+++ b/platform/linux-generic/odp_init.c
@@ -179,6 +179,13 @@  int odp_init_global(odp_instance_t *instance,
 		ODP_ERR("ODP name table init failed\n");
 		goto init_failed;
 	}
+	stage = NAME_TABLE_INIT;
+
+	if (odp_power_governor_init_global()) {
+		ODP_ERR("ODP power governor init failed\n");
+		goto init_failed;
+	}
+	stage = POWER_GOVERNOR_INIT;
 
 	*instance = (odp_instance_t)odp_global_data.main_pid;
 
@@ -204,6 +211,11 @@  int _odp_term_global(enum init_stage stage)
 
 	switch (stage) {
 	case ALL_INIT:
+	case POWER_GOVERNOR_INIT:
+		if (odp_power_governor_term_global()) {
+			ODP_ERR("Power governor term failed.\n");
+			rc = -1;
+		}
 	case NAME_TABLE_INIT:
 		if (_odp_int_name_tbl_term_global()) {
 			ODP_ERR("Name table term failed.\n");
diff --git a/platform/linux-generic/odp_power_governor.c b/platform/linux-generic/odp_power_governor.c
new file mode 100644
index 0000000..1f30ffd
--- /dev/null
+++ b/platform/linux-generic/odp_power_governor.c
@@ -0,0 +1,220 @@ 
+#define _POSIX_C_SOURCE 199309L
+
+#include <odp/api/power.h>
+#include <odp/api/spinlock.h>
+#include <odp/api/pool.h>
+#include <odp/api/buffer.h>
+#include <odp/api/hints.h>
+#include <odp/api/shared_memory.h>
+
+#include <odp_internal.h>
+#include <odp_config_internal.h>
+#include <odp_debug_internal.h>
+
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <time.h>
+
+struct power_governor_entry_s;
+typedef struct power_governor_entry_s {
+	struct power_governor_entry_s *next;
+	odp_spinlock_t lock;
+	odp_power_domain_t *domain;
+	odp_queue_t queue;
+	int current_perf_level_idx;
+	int _stop;
+	timer_t timer;
+	pthread_t thread;
+	odp_pool_t pool;
+	odp_event_t event;
+} power_governor_entry_t;
+
+typedef struct power_governor_table_t {
+	power_governor_entry_t  power_governor[ODP_CONFIG_POWER_GOVERNORS];
+	uint32_t num_entries;
+} power_governor_table_t;
+
+static power_governor_table_t *power_governor_tbl;
+
+static inline odp_power_governor_t power_governor_index_to_handle(uint32_t power_governor_id)
+{
+	return _odp_cast_scalar(odp_power_governor_t, power_governor_id);
+}
+
+static inline uint32_t power_governor_handle_to_index(odp_power_governor_t power_governor_hdl)
+{
+	return _odp_typeval(power_governor_hdl);
+}
+
+static inline power_governor_entry_t *get_power_governor_entry(uint32_t index) {
+	return &power_governor_tbl->power_governor[index];
+}
+
+static void governor_reset_timer(power_governor_entry_t *the_governor)
+{
+	struct itimerspec spec;
+
+	spec.it_interval.tv_sec = 0;
+	spec.it_interval.tv_nsec = 0;
+	spec.it_value.tv_sec = 0;
+	spec.it_value.tv_nsec = 10000000;
+	timer_settime(the_governor->timer, 0, &spec, NULL);
+}
+
+static void governor_spindown_cores(union sigval val)
+{
+	power_governor_entry_t *the_governor = (power_governor_entry_t *)val.sival_ptr;
+	odp_spinlock_lock(&the_governor->lock);
+	odp_power_domain_t *pdom = the_governor->domain;
+
+	if (the_governor->current_perf_level_idx > 0) {
+		uint64_t next_level = pdom->perf_levels[--the_governor->current_perf_level_idx];
+		odp_power_domain_set_perf_level(pdom, next_level);
+		governor_reset_timer(the_governor);
+	}
+
+	odp_spinlock_unlock(&the_governor->lock);
+}
+
+static void *governor_monitor(void *arg)
+{
+	power_governor_entry_t *the_governor = (power_governor_entry_t *)arg;
+
+	while (1) {
+		odp_event_t event = odp_queue_deq_wait(the_governor->queue);
+
+		if (the_governor->_stop)
+			break;
+
+		odp_spinlock_lock(&the_governor->lock);
+
+		odp_power_domain_t *pdom = the_governor->domain;
+		uint64_t max_level = pdom->perf_levels[pdom->num_perf_levels - 1];
+
+		if (the_governor->current_perf_level_idx < pdom->num_perf_levels - 1) {
+			odp_power_domain_set_perf_level(pdom, max_level);
+			the_governor->current_perf_level_idx = pdom->num_perf_levels - 1;
+			governor_reset_timer(the_governor);
+		}
+
+		odp_spinlock_unlock(&the_governor->lock);
+		odp_queue_threshold_arm(the_governor->queue, event);
+	}
+
+	return NULL;
+}
+
+int odp_power_governor_init_global(void)
+{
+	odp_shm_t shm;
+
+	shm = odp_shm_reserve("odp_power_governors",
+			      sizeof(power_governor_table_t),
+			      sizeof(power_governor_entry_t), 0);
+
+	power_governor_tbl = odp_shm_addr(shm);
+
+	if (power_governor_tbl == NULL)
+		return -1;
+
+	memset(power_governor_tbl, 0, sizeof(power_governor_table_t));
+
+	return 0;
+}
+
+int odp_power_governor_term_global(void)
+{
+	int ret = odp_shm_free(odp_shm_lookup("odp_power_governors"));
+	if (ret < 0) {
+		ODP_ERR("shm free failed for odp_power_governors");
+		return ret;
+	}
+
+	return 0;
+}
+
+odp_power_governor_t odp_power_governor_init(odp_cpumask_t *cpus)
+{
+	odp_power_domain_info_t cpu_info;
+	odp_power_domain_info_populate(&cpu_info);
+	odp_power_domain_t *domain = odp_power_domain_for_cpu(&cpu_info, odp_cpumask_first(cpus));
+
+	if (power_governor_tbl->num_entries == ODP_CONFIG_POWER_GOVERNORS) {
+		errno = ENOMEM;
+		return ODP_POWER_GOVERNOR_INVALID;
+	}
+	power_governor_entry_t *the_governor = &power_governor_tbl->power_governor[power_governor_tbl->num_entries++];
+
+	odp_spinlock_init(&the_governor->lock);
+	the_governor->next = NULL;
+
+	odp_queue_param_t qparams;
+	odp_queue_param_init(&qparams);
+	qparams.type = ODP_QUEUE_TYPE_NOTIF;
+	the_governor->queue = odp_queue_create("power-governor-queue", &qparams);
+	if (the_governor->queue == ODP_QUEUE_INVALID) {
+		errno = EINVAL;
+		return ODP_POWER_GOVERNOR_INVALID;
+	}
+
+	the_governor->_stop = 0;
+
+	the_governor->domain = domain;
+	int current_perf_level = odp_power_domain_get_perf_level(domain);
+	int i;
+	for (i =0; i < domain->num_perf_levels; i++) {
+		if (current_perf_level == domain->perf_levels[i]) {
+			the_governor->current_perf_level_idx = i;
+			break;
+		}
+	}
+
+	odp_pool_param_t params;
+	odp_pool_param_init(&params);
+	params.buf.size = sizeof(void *);
+	params.buf.num = 1;
+	params.buf.align = 0;
+	params.type = ODP_POOL_BUFFER;
+	
+	the_governor->pool = odp_pool_create("power_governor_pool", &params);
+	odp_buffer_t buffer = odp_buffer_alloc(the_governor->pool);
+	*(void **)odp_buffer_addr(buffer) = (void *)&the_governor;
+	the_governor->event = odp_buffer_to_event(buffer);
+
+	struct sigevent sev;
+	sev.sigev_notify = SIGEV_THREAD;
+	sev.sigev_value.sival_ptr = (void *)the_governor;
+	sev.sigev_notify_function = governor_spindown_cores;
+	sev.sigev_notify_attributes = NULL;
+
+	int ret = timer_create(CLOCK_MONOTONIC, &sev, &the_governor->timer);
+	if (ret == -1) {
+		return ODP_POWER_GOVERNOR_INVALID;
+	}
+
+	ret = pthread_create(&the_governor->thread, NULL, governor_monitor, the_governor);
+	if (ret) {
+		errno = ret;
+		return ODP_POWER_GOVERNOR_INVALID;
+	}
+
+	governor_reset_timer(the_governor);
+
+	return power_governor_index_to_handle(power_governor_tbl->num_entries - 1);
+}
+
+int odp_power_governor_add_queue(odp_power_governor_t handle, odp_queue_t targetq, uint64_t threshold)
+{
+	uint32_t index = power_governor_handle_to_index(handle);
+	power_governor_entry_t *the_governor = get_power_governor_entry(index);
+	odp_queue_threshold_arm(the_governor->queue, the_governor->event);
+	return odp_queue_threshold_set(targetq, threshold, the_governor->queue);
+}
+
+int odp_power_governor_remove_queue(odp_power_governor_t handle ODP_UNUSED, odp_queue_t targetq)
+{
+	return odp_queue_threshold_reset(targetq);
+}