diff mbox

[RFC,API-NEXT,09/12] linux-generic: tm: add timer_wheel support routines

Message ID 1437249827-578-10-git-send-email-bill.fischofer@linaro.org
State New
Headers show

Commit Message

Bill Fischofer July 18, 2015, 8:03 p.m. UTC
Signed-off-by: Barry Spinney <spinney@ezchip.com>
Signed-off-by: Mike Holmes <mike.holmes@linaro.org>
Signed-off-by: Bill Fischofer <bill.fischofer@linaro.org>
---
 platform/linux-generic/Makefile.am                 |   2 +
 .../include/odp_timer_wheel_internal.h             |  62 ++
 platform/linux-generic/odp_timer_wheel.c           | 897 +++++++++++++++++++++
 3 files changed, 961 insertions(+)
 create mode 100644 platform/linux-generic/include/odp_timer_wheel_internal.h
 create mode 100644 platform/linux-generic/odp_timer_wheel.c
diff mbox

Patch

diff --git a/platform/linux-generic/Makefile.am b/platform/linux-generic/Makefile.am
index 358a478..393c066 100644
--- a/platform/linux-generic/Makefile.am
+++ b/platform/linux-generic/Makefile.am
@@ -133,6 +133,7 @@  noinst_HEADERS = \
 	          ${srcdir}/include/odp_name_table_internal.h \
 		  ${srcdir}/include/odp_pkt_queue_internal.h \
 		  ${srcdir}/include/odp_sorted_list_internal.h \
+		  ${srcdir}/include/odp_timer_wheel_internal.h \
 		  ${srcdir}/Makefile.inc
 
 __LIB__libodp_la_SOURCES = \
@@ -147,6 +148,7 @@  __LIB__libodp_la_SOURCES = \
 			   odp_name_table.c \
 			   odp_pkt_queue.c \
 			   odp_sorted_list.c \
+			   odp_timer_wheel.c \
 			   odp_impl.c \
 			   odp_packet.c \
 			   odp_packet_flags.c \
diff --git a/platform/linux-generic/include/odp_timer_wheel_internal.h b/platform/linux-generic/include/odp_timer_wheel_internal.h
new file mode 100644
index 0000000..77744dc
--- /dev/null
+++ b/platform/linux-generic/include/odp_timer_wheel_internal.h
@@ -0,0 +1,62 @@ 
+/* Copyright 2015 EZchip Semiconductor Ltd. All Rights Reserved.
+ *
+ * Copyright (c) 2015, Linaro Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef ODP_INT_TIMER_WHEEL_H_
+#define ODP_INT_TIMER_WHEEL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <odp.h>
+
+/* Note that ALL times in this API are in units of processor/cpu clock
+ * cycles!
+ */
+typedef uint64_t odp_timer_wheel_t;
+
+#define ODP_INT_TIMER_WHEEL_INVALID  0
+
+odp_timer_wheel_t odp_timer_wheel_create(uint32_t max_concurrent_timers,
+					 uint64_t current_time);
+
+/* odp_int_timer_wheel_curr_time_update should be called before the first
+ * call to odp_int_timer_wheel_insert, odp_int_timer_wheel_next, etc..
+ * It returns > 0 if there are timers expired.
+ *
+ */
+uint32_t odp_timer_wheel_curr_time_update(odp_timer_wheel_t timer_wheel,
+					  uint64_t          current_time);
+
+/* Maximum wakeup_time is 100 seconds in the future (though a wakeup time
+ * greater than a dozen seconds or so is of questionable value), and in
+ * addition the wakeup_time MUST represent a time in the future.  Note that
+ * the user_ptr cannot be NULL and must be aligned to an 4-byte boundary
+ * (i.e. the bottom 2 bits of this ptr must be 0).  AGAIN THIS MUST BE
+ * STRESSED - user_ptr is not an arbitrary 64-bit pointer, BUT MUST be
+ * non-zero and have its bottom two bits being 0!
+ */
+int odp_timer_wheel_insert(odp_timer_wheel_t timer_wheel,
+			   uint64_t          wakeup_time,
+			   void             *user_ptr);
+
+/* Returns the exact same user_ptr value as was passed to
+ * odp_int_timer_wheel_insert().
+ */
+void *odp_timer_wheel_next_expired(odp_timer_wheel_t timer_wheel);
+
+void odp_timer_wheel_stats_print(odp_timer_wheel_t timer_wheel);
+
+void odp_timer_wheel_destroy(odp_timer_wheel_t timer_wheel);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/platform/linux-generic/odp_timer_wheel.c b/platform/linux-generic/odp_timer_wheel.c
new file mode 100644
index 0000000..fbf3ffc
--- /dev/null
+++ b/platform/linux-generic/odp_timer_wheel.c
@@ -0,0 +1,897 @@ 
+/* Copyright 2015 EZchip Semiconductor Ltd. All Rights Reserved.
+ *
+ * Copyright (c) 2015, Linaro Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <odp.h>
+#include <odp_timer_wheel_internal.h>
+#include <odp_debug_internal.h>
+
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+/* The following constants can be changed either at compile time or run time
+ * as long as the following constraints are met (by the way REV stands for
+ * REVOLUTION, i.e. one complete sweep through a specific timer wheel):
+ */
+#define CYCLES_TO_TICKS_SHIFT  8
+#define CYCLES_PER_TICK        (1 << CYCLES_TO_TICKS_SHIFT)
+#define CURRENT_TIMER_SLOTS    8192
+#define LEVEL1_TIMER_SLOTS     2048
+#define LEVEL2_TIMER_SLOTS     1024
+#define LEVEL3_TIMER_SLOTS     1024
+
+/* The following values are derived and should generally not be changed.  The
+ * basic idea is that the ticks per current timer wheel slot should always be
+ * 1, since that defines what a tick is - namely the time period of a single
+ * current timer wheel slot.  Then for all levels (including current), the
+ * ticks per revolution is clearly equal to the ticks per slot times the
+ * number of slots.  Finally the ticks per slot for levels 1 thru 3, must be
+ * the ticks per revolution of the previous level divided by a small power of
+ * 2 - e.g. 2, 4, 8, 16 - (but not 1).  The idea being that when an upper
+ * level slot is processed, its entries will be spread across this fraction of
+ * the lower level wheel and we want to make sure that none of these entries
+ * is near the lower level's current index.  Currently we use 4 for all
+ * levels.
+ */
+#define CURRENT_GEAR_RATIO     4
+#define LEVEL1_GEAR_RATIO      4
+#define LEVEL2_GEAR_RATIO      4
+#define TICKS_PER_CURRENT_SLOT 1
+#define TICKS_PER_CURRENT_REV  (CURRENT_TIMER_SLOTS * TICKS_PER_CURRENT_SLOT)
+#define TICKS_PER_LEVEL1_SLOT  (TICKS_PER_CURRENT_REV / CURRENT_GEAR_RATIO)
+#define TICKS_PER_LEVEL1_REV   (LEVEL1_TIMER_SLOTS * TICKS_PER_LEVEL1_SLOT)
+#define TICKS_PER_LEVEL2_SLOT  (TICKS_PER_LEVEL1_REV / LEVEL1_GEAR_RATIO)
+#define TICKS_PER_LEVEL2_REV   (LEVEL2_TIMER_SLOTS * TICKS_PER_LEVEL2_SLOT)
+#define TICKS_PER_LEVEL3_SLOT  (TICKS_PER_LEVEL2_REV / LEVEL2_GEAR_RATIO)
+#define TICKS_PER_LEVEL3_REV   (LEVEL3_TIMER_SLOTS * TICKS_PER_LEVEL3_SLOT)
+
+#define EXPIRED_RING_ENTRIES  64
+
+typedef struct timer_blk_s timer_blk_t;
+
+typedef struct { /* Should be 120 bytes long */
+	uint64_t user_data[15];
+} current_blk_t;
+
+typedef struct { /* Should be 120 bytes long */
+	uint64_t user_data[10];
+	uint32_t wakeup32[10];
+} general_blk_t;
+
+/* Must be exactly 128 bytes long AND cacheline aligned! */
+struct timer_blk_s {
+	timer_blk_t *next_timer_blk;
+	union {
+		current_blk_t current_blk;
+		general_blk_t general_blk;
+	};
+};
+
+/* Each current_timer_slot is 8 bytes long. */
+typedef union {
+	/* The selection of which union element to use is based on the LSB
+	 * bit, under the required assumption that both of these pointers are
+	 * at least 8-byte aligned.  If the entire 8 bytes is zero, then this
+	 * entry is empty, otherwise a LSB bit of 0 indicates a timer_blk_list
+	 * and a LSB bit of 1 indicates a single entry with a 64-bit user_data
+	 * word.
+	 */
+	uint64_t     user_data;
+	timer_blk_t *timer_blk_list;
+} current_timer_slot_t;
+
+typedef struct {
+	current_timer_slot_t slots[0];
+} current_wheel_t;
+
+typedef struct {
+	uint32_t             count;
+	uint32_t             peak_count;
+	uint32_t             expired_ring_full_cnt;
+	uint32_t             head_idx;
+	uint32_t             tail_idx;
+	uint32_t             max_idx;
+	current_timer_slot_t entries[0];
+} expired_ring_t;
+
+typedef struct {
+	uint32_t kind;                  /* 1 for single_entry_t */
+	uint32_t wakeup32;
+	uint64_t user_data;
+} single_entry_t;
+
+typedef struct {
+	uint32_t     kind;              /* 2 for list_entry_t */
+	uint32_t     num_blks;
+	timer_blk_t *timer_blk_list;
+} list_entry_t;
+
+typedef union  { /* Each general_timer_slot is 16 bytes long. */
+	/* The selection of which union element to use is based on the first 4
+	 * byte field which is always the "kind".  If kind is 0, then this
+	 * slot is empty, else if the kind is 1 then it is a single_entry and
+	 * if the kind is 2 then it is a list_entry.
+	 */
+	single_entry_t single_entry;
+	list_entry_t   list_entry;
+} general_timer_slot_t;
+
+typedef struct {
+	general_timer_slot_t slots[0];
+} general_wheel_t;
+
+typedef struct {
+	/* Note that rev stands for revolution - one complete sweep through
+	 * this timer wheel.
+	 */
+	uint16_t num_slots;        /* Must be a power of 2. */
+	uint16_t slot_idx;
+	uint16_t gear_mask;        /* = (num_slots / gear_ratio) - 1 */
+	uint16_t gear_ratio;       /* num of next level wheel updates per this
+				    * rev
+				    */
+	uint32_t ticks_shift;
+	uint32_t ticks_per_slot;   /* Must be a power of 2. */
+	uint64_t ticks_per_rev;    /* = num_slots * ticks_per_slot */
+	uint64_t max_ticks;        /* = current_ticks + ticks_per_rev */
+} wheel_desc_t;
+
+typedef struct {
+	uint32_t          last_slot_served;
+	uint32_t          free_list_size;
+	uint32_t          min_free_list_size;
+	uint32_t          peak_free_list_size;
+	timer_blk_t      *free_list_head;
+	uint64_t          total_timer_inserts;
+	uint64_t          insert_fail_cnt;
+	uint64_t          total_timer_removes;
+	uint64_t          total_promote_cnt;
+	uint64_t          promote_fail_cnt;
+	uint64_t          current_ticks;
+	wheel_desc_t      wheel_descs[4];
+	current_wheel_t  *current_wheel;
+	general_wheel_t  *general_wheels[3];
+	expired_ring_t   *expired_timers_ring;
+} timer_wheels_t;
+
+static uint32_t odp_internal_ilog2(uint64_t value)
+{
+	uint64_t pwr_of_2;
+	uint32_t bit_shift;
+
+	for (bit_shift = 0; bit_shift < 64; bit_shift++) {
+		pwr_of_2 = 1 << bit_shift;
+		if (value == pwr_of_2)
+			return bit_shift;
+		else if (value < pwr_of_2)
+			return bit_shift - 1;
+	}
+
+	return 64;
+}
+
+static inline uint32_t timer_wheel_slot_idx_inc(uint32_t slot_idx,
+						uint32_t num_slots)
+{
+	slot_idx++;
+	if (slot_idx < num_slots)
+		return slot_idx;
+	else
+		return 0;
+}
+
+static current_wheel_t *current_wheel_alloc(timer_wheels_t *timer_wheels,
+					    uint32_t        desc_idx)
+{
+	current_wheel_t *current_wheel;
+	wheel_desc_t    *wheel_desc;
+	uint64_t         ticks_per_slot, current_ticks, adjusted_ticks;
+	uint64_t         ticks_per_rev;
+	uint32_t         num_slots, malloc_len;
+
+	wheel_desc     = &timer_wheels->wheel_descs[desc_idx];
+	num_slots      = wheel_desc->num_slots;
+	ticks_per_slot = 1;
+	malloc_len     = num_slots * sizeof(current_timer_slot_t);
+	current_wheel  = malloc(malloc_len);
+	memset(current_wheel, 0, malloc_len);
+
+	current_ticks  = timer_wheels->current_ticks;
+	adjusted_ticks = current_ticks & (ticks_per_slot - 1);
+	ticks_per_rev  = num_slots;
+
+	wheel_desc->ticks_per_rev = ticks_per_rev;
+	wheel_desc->ticks_shift   = 0;
+	wheel_desc->max_ticks     = adjusted_ticks + ticks_per_rev;
+	wheel_desc->gear_mask     = (num_slots / wheel_desc->gear_ratio) - 1;
+	return current_wheel;
+}
+
+static general_wheel_t *general_wheel_alloc(timer_wheels_t *timer_wheels,
+					    uint32_t        desc_idx)
+{
+	general_wheel_t *general_wheel;
+	wheel_desc_t    *wheel_desc;
+	uint64_t         ticks_per_slot, current_ticks, adjusted_ticks;
+	uint64_t         ticks_per_rev;
+	uint32_t         num_slots, malloc_len;
+
+	wheel_desc     = &timer_wheels->wheel_descs[desc_idx];
+	num_slots      = wheel_desc->num_slots;
+	ticks_per_slot = (uint64_t)wheel_desc->ticks_per_slot;
+	malloc_len     = num_slots * sizeof(general_timer_slot_t);
+	general_wheel  = malloc(malloc_len);
+	memset(general_wheel, 0, malloc_len);
+
+	current_ticks  = timer_wheels->current_ticks;
+	adjusted_ticks = current_ticks & (ticks_per_slot - 1);
+	ticks_per_rev  = num_slots * ticks_per_slot;
+
+	wheel_desc->ticks_per_rev = ticks_per_rev;
+	wheel_desc->ticks_shift   = odp_internal_ilog2(ticks_per_slot);
+	wheel_desc->max_ticks     = adjusted_ticks + ticks_per_rev;
+	wheel_desc->gear_mask     = (num_slots / wheel_desc->gear_ratio) - 1;
+	return general_wheel;
+}
+
+static int expired_ring_create(timer_wheels_t *timer_wheels,
+			       uint32_t        num_entries)
+{
+	expired_ring_t *expired_ring;
+	uint32_t        malloc_len;
+
+	malloc_len = sizeof(expired_ring_t) +
+		(sizeof(current_timer_slot_t) * num_entries);
+	expired_ring = malloc(malloc_len);
+	memset(expired_ring, 0, malloc_len);
+	expired_ring->max_idx = num_entries - 1;
+
+	timer_wheels->expired_timers_ring = expired_ring;
+	return 0;
+}
+
+static int free_list_add(timer_wheels_t *timer_wheels,
+			 uint32_t        num_timer_blks)
+{
+	timer_blk_t *block_of_timer_blks, *timer_blk, *next_timer_blk;
+	uint32_t     malloc_len, idx;
+
+	malloc_len = num_timer_blks * sizeof(timer_blk_t);
+	/* Should be cacheline aligned! */
+	block_of_timer_blks = malloc(malloc_len);
+	if (!block_of_timer_blks)
+		return -1;
+
+	memset(block_of_timer_blks, 0, malloc_len);
+
+	/* Link these timer_blks together. */
+	timer_blk      = block_of_timer_blks;
+	next_timer_blk = timer_blk + 1;
+	for (idx = 0; idx < num_timer_blks; idx++) {
+		timer_blk->next_timer_blk = next_timer_blk;
+		timer_blk = next_timer_blk;
+		next_timer_blk++;
+	}
+
+	timer_blk->next_timer_blk     = timer_wheels->free_list_head;
+	timer_wheels->free_list_size += num_timer_blks;
+	timer_wheels->free_list_head  = block_of_timer_blks;
+	if (timer_wheels->peak_free_list_size < timer_wheels->free_list_size)
+		timer_wheels->peak_free_list_size =
+			timer_wheels->free_list_size;
+	return 0;
+}
+
+static timer_blk_t *timer_blk_alloc(timer_wheels_t *timer_wheels)
+{
+	timer_blk_t *head_timer_blk;
+	int         rc;
+
+	if (timer_wheels->free_list_size <= 1) {
+		/* Replenish the timer_blk_t free list. */
+		timer_wheels->min_free_list_size = timer_wheels->free_list_size;
+		rc = free_list_add(timer_wheels, 1024);
+		if (rc < 0)
+			return NULL;
+	}
+
+	head_timer_blk = timer_wheels->free_list_head;
+	timer_wheels->free_list_size--;
+	timer_wheels->free_list_head = head_timer_blk->next_timer_blk;
+	if (timer_wheels->free_list_size < timer_wheels->min_free_list_size)
+		timer_wheels->min_free_list_size = timer_wheels->free_list_size;
+
+	memset(head_timer_blk, 0, sizeof(timer_blk_t));
+	return head_timer_blk;
+}
+
+static void timer_blk_free(timer_wheels_t *timer_wheels,
+			   timer_blk_t    *timer_blk)
+{
+	timer_blk->next_timer_blk = timer_wheels->free_list_head;
+	timer_wheels->free_list_head = timer_blk;
+	timer_wheels->free_list_size++;
+	if (timer_wheels->peak_free_list_size < timer_wheels->free_list_size)
+		timer_wheels->peak_free_list_size =
+			timer_wheels->free_list_size;
+}
+
+static int current_wheel_insert(timer_wheels_t *timer_wheels,
+				uint32_t        wakeup_ticks,
+				uint64_t        user_data)
+{
+	current_timer_slot_t *timer_slot;
+	current_wheel_t      *current_wheel;
+	wheel_desc_t         *wheel_desc;
+	timer_blk_t          *new_timer_blk, *timer_blk;
+	uint64_t              num_slots;
+	uint32_t              wheel_idx, idx;
+
+	/* To reach here it must be the case that (wakeup_ticks -
+	 * current_ticks) is < current_wheel->num_slots.
+	 */
+	wheel_desc    = &timer_wheels->wheel_descs[0];
+	current_wheel = timer_wheels->current_wheel;
+	num_slots     = (uint64_t)wheel_desc->num_slots;
+	wheel_idx     = (uint32_t)(wakeup_ticks & (num_slots - 1));
+	timer_slot    = &current_wheel->slots[wheel_idx];
+
+	/* Three cases: (a) the timer_slot is currently empty, (b) the
+	 * timer_slot contains a single user_data word directly inside it or
+	 * (c) the timer_slot contains a pointer to a linked list of
+	 * timer_blk's.
+	 */
+	if (timer_slot->user_data == 0) {                    /* case (a) */
+		timer_slot->user_data = user_data | 0x01;
+	} else if ((timer_slot->user_data & 0x3) == 0x01)  { /* case (b) */
+		/* Need to promote this pre-existing single user_data plus the
+		 * new user_data into a timer_blk with two entries.
+		 */
+		new_timer_blk = timer_blk_alloc(timer_wheels);
+		if (!new_timer_blk)
+			return -1;
+
+		new_timer_blk->next_timer_blk           = NULL;
+		new_timer_blk->current_blk.user_data[0] = timer_slot->user_data;
+		new_timer_blk->current_blk.user_data[1] = user_data;
+		timer_slot->timer_blk_list              = new_timer_blk;
+	} else {   /* case (c) */
+		/* First try to find an empty slot in the current timer_blk. */
+		timer_blk = timer_slot->timer_blk_list;
+		for (idx = 0; idx < 15; idx++) {
+			if (timer_blk->current_blk.user_data[idx] == 0) {
+				timer_blk->current_blk.user_data[idx] =
+					user_data;
+				return 0;
+			}
+		}
+
+		/* current timer_blk is full, so we need to allocate a new one
+		 * and link it in.
+		 */
+		new_timer_blk = timer_blk_alloc(timer_wheels);
+		if (!new_timer_blk)
+			return -1;
+
+		new_timer_blk->next_timer_blk           = timer_blk;
+		new_timer_blk->current_blk.user_data[0] = user_data;
+		timer_slot->timer_blk_list              = new_timer_blk;
+	}
+
+	return 0;
+}
+
+static int general_wheel_insert(timer_wheels_t *timer_wheels,
+				uint32_t        desc_idx,
+				uint32_t        wakeup_ticks,
+				uint64_t        user_data)
+{
+	general_timer_slot_t *timer_slot;
+	general_wheel_t      *general_wheel;
+	wheel_desc_t         *wheel_desc;
+	timer_blk_t          *new_timer_blk, *timer_blk;
+	uint64_t              num_slots, old_user_data;
+	uint32_t              wheel_idx, wakeup32, kind, old_wakeup32, idx;
+
+	wheel_desc    = &timer_wheels->wheel_descs[desc_idx];
+	general_wheel = timer_wheels->general_wheels[desc_idx - 1];
+	num_slots     = (uint64_t)wheel_desc->num_slots;
+	wakeup32      = (uint32_t)(wakeup_ticks & 0xFFFFFFFF);
+	wakeup_ticks  = wakeup_ticks >> wheel_desc->ticks_shift;
+	wheel_idx     = (uint32_t)(wakeup_ticks & (num_slots - 1));
+	timer_slot    = &general_wheel->slots[wheel_idx];
+	kind          = timer_slot->single_entry.kind;
+
+	/* Three cases: (a) the timer_slot is currently empty, (b) the
+	 * timer_slot contains a single user_data word directly inside it or
+	 * (c) the timer_slot contains a pointer to a linked list of
+	 * timer_blk's.
+	 */
+	if (kind == 0) {                 /* case (a) */
+		timer_slot->single_entry.kind      = 1;
+		timer_slot->single_entry.wakeup32  = wakeup32;
+		timer_slot->single_entry.user_data = user_data;
+	} else if (kind == 1) {          /* case (b) */
+		/* Need to promote this single entry plus the new user_data
+		 * into a timer_blk with two entries.
+		 */
+		new_timer_blk = timer_blk_alloc(timer_wheels);
+		if (!new_timer_blk)
+			return -1;
+
+		old_user_data = timer_slot->single_entry.user_data;
+		old_wakeup32  = timer_slot->single_entry.wakeup32;
+
+		new_timer_blk->next_timer_blk           = NULL;
+		new_timer_blk->general_blk.user_data[0] = old_user_data;
+		new_timer_blk->general_blk.wakeup32[0]  = old_wakeup32;
+		new_timer_blk->general_blk.user_data[1] = user_data;
+		new_timer_blk->general_blk.wakeup32[1]  = wakeup32;
+		timer_slot->list_entry.kind             = 2;
+		timer_slot->list_entry.num_blks         = 1;
+		timer_slot->list_entry.timer_blk_list   = new_timer_blk;
+	} else  { /* case (c) */
+		/* First try to find an empty slot in the first timer_blk. */
+		timer_blk = timer_slot->list_entry.timer_blk_list;
+		for (idx = 0; idx < 10; idx++) {
+			if (timer_blk->general_blk.user_data[idx] == 0) {
+				timer_blk->general_blk.user_data[idx] =
+					user_data;
+				timer_blk->general_blk.wakeup32[idx]  =
+					wakeup32;
+				return 0;
+			}
+		}
+
+		/* The first timer_blk is full, so we need to allocate a new
+		 * one and link it in.
+		 */
+		new_timer_blk = timer_blk_alloc(timer_wheels);
+		if (!new_timer_blk)
+			return -1;
+
+		new_timer_blk->next_timer_blk           = timer_blk;
+		new_timer_blk->general_blk.user_data[0] = user_data;
+		new_timer_blk->general_blk.wakeup32[0]  = wakeup32;
+		timer_slot->list_entry.kind             = 2;
+		timer_slot->list_entry.num_blks++;
+		timer_slot->list_entry.timer_blk_list   = new_timer_blk;
+	}
+
+	return 0;
+}
+
+static int expired_timers_append(timer_wheels_t       *timer_wheels,
+				 current_timer_slot_t *timer_slot)
+{
+	expired_ring_t *expired_ring;
+	uint32_t        tail_idx;
+
+	/* Append either this single entry or the entire timer_blk_list to the
+	 * ring of expired_timers.
+	 */
+	expired_ring = timer_wheels->expired_timers_ring;
+	if (expired_ring->max_idx <= expired_ring->count)
+		return -1;
+
+	tail_idx = expired_ring->tail_idx;
+	expired_ring->entries[tail_idx] = *timer_slot;
+	tail_idx++;
+	if (expired_ring->max_idx < tail_idx)
+		tail_idx = 0;
+
+	expired_ring->tail_idx = tail_idx;
+	expired_ring->count++;
+	if (expired_ring->peak_count < expired_ring->count)
+		expired_ring->peak_count = expired_ring->count;
+	return 0;
+}
+
+static int timer_blk_list_search(timer_wheels_t       *timer_wheels,
+				 current_timer_slot_t *head_entry,
+				 uint64_t             *user_data_ptr)
+{
+	timer_blk_t *timer_blk, *next_timer_blk;
+	uint64_t     user_data;
+	uint32_t     idx;
+
+	timer_blk = head_entry->timer_blk_list;
+	while (timer_blk) {
+		for (idx = 0; idx < 15; idx++) {
+			user_data = timer_blk->current_blk.user_data[idx];
+			if (user_data != 0) {
+				*user_data_ptr =
+					timer_blk->current_blk.user_data[idx];
+				timer_blk->current_blk.user_data[idx] = 0;
+				return 1;
+			}
+		}
+
+		next_timer_blk = timer_blk->next_timer_blk;
+		timer_blk_free(timer_wheels, timer_blk);
+		timer_blk = next_timer_blk;
+	}
+
+	return 0;
+}
+
+static int expired_timers_remove(timer_wheels_t *timer_wheels,
+				 uint64_t       *user_data_ptr)
+{
+	current_timer_slot_t *head_entry;
+	expired_ring_t       *expired_ring;
+	uint32_t              head_idx;
+	int                   rc;
+
+	expired_ring = timer_wheels->expired_timers_ring;
+	if (expired_ring->count == 0)
+		return 0;
+
+	head_idx   = expired_ring->head_idx;
+	head_entry = &expired_ring->entries[head_idx];
+	if ((head_entry->user_data & 0x3) == 1) {
+		*user_data_ptr = head_entry->user_data;
+		expired_ring->entries[head_idx].user_data = 0;
+		head_idx++;
+		if (expired_ring->max_idx < head_idx)
+			head_idx = 0;
+
+		expired_ring->head_idx = head_idx;
+		expired_ring->count--;
+		return 1;
+	}
+
+	/* Search timer_blk_list for non-empty user_data values. */
+	rc = timer_blk_list_search(timer_wheels, head_entry, user_data_ptr);
+	if (0 < rc)
+		return rc;
+
+	/* Advance to use the next ring entry. */
+	expired_ring->entries[head_idx].user_data = 0;
+	head_idx++;
+	if (expired_ring->max_idx < head_idx)
+		head_idx = 0;
+
+	expired_ring->head_idx = head_idx;
+	expired_ring->count--;
+
+	head_entry = &expired_ring->entries[head_idx];
+	return timer_blk_list_search(timer_wheels, head_entry, user_data_ptr);
+}
+
+static int timer_current_wheel_update(timer_wheels_t *timer_wheels,
+				      uint32_t        elapsed_ticks)
+{
+	current_timer_slot_t *timer_slot;
+	current_wheel_t      *current_wheel;
+	wheel_desc_t         *wheel_desc;
+	uint64_t              max_ticks;
+	uint32_t              num_slots, slot_idx, max_cnt, cnt;
+	int                   ret_code, rc;
+
+	/* Advance current wheel for each elapsed tick, up to a maximum. */
+	wheel_desc    = &timer_wheels->wheel_descs[0];
+	slot_idx      = wheel_desc->slot_idx;
+	num_slots     = wheel_desc->num_slots;
+	max_ticks     = wheel_desc->max_ticks;
+	max_cnt       = (uint32_t)MIN(elapsed_ticks, 15);
+	current_wheel = timer_wheels->current_wheel;
+	ret_code      = 0;
+
+	for (cnt = 1; cnt <= max_cnt; cnt++) {
+		ret_code  |= (slot_idx & wheel_desc->gear_mask) == 0;
+		timer_slot = &current_wheel->slots[slot_idx];
+		if (timer_slot->user_data != 0) {
+			rc = expired_timers_append(timer_wheels, timer_slot);
+			if (rc < 0)
+				timer_wheels->
+					expired_timers_ring->
+					expired_ring_full_cnt++;
+
+			timer_slot->user_data = 0;
+		}
+
+		timer_wheels->current_ticks++;
+		max_ticks++;
+		slot_idx = timer_wheel_slot_idx_inc(slot_idx, num_slots);
+	}
+
+	wheel_desc->slot_idx  = slot_idx;
+	wheel_desc->max_ticks = max_ticks;
+	return ret_code;
+}
+
+static int odp_int_timer_wheel_promote(timer_wheels_t *timer_wheels,
+				       uint32_t        desc_idx,
+				       uint64_t        wakeup_ticks,
+				       uint64_t        user_data)
+{
+	if (desc_idx == 0)
+		return current_wheel_insert(timer_wheels,
+					    wakeup_ticks, user_data);
+	else
+		return general_wheel_insert(timer_wheels,
+					    desc_idx, wakeup_ticks,
+					    user_data);
+}
+
+static void timer_wheel_slot_promote(timer_wheels_t       *timer_wheels,
+				     uint32_t              desc_idx,
+				     general_timer_slot_t *timer_slot)
+{
+	general_blk_t *general_blk;
+	timer_blk_t   *timer_blk, *next_timer_blk;
+	uint64_t       user_data, current_ticks,
+		current_ticks_msb, wakeup_ticks;
+	uint32_t       idx, wakeup32;
+	int            rc;
+
+	current_ticks     = timer_wheels->current_ticks;
+	current_ticks_msb = (current_ticks >> 32) << 32;
+	if (timer_slot->single_entry.kind == 1) {
+		user_data    = timer_slot->single_entry.user_data;
+		wakeup32     = timer_slot->single_entry.wakeup32;
+		wakeup_ticks = current_ticks_msb | (uint64_t)wakeup32;
+		if (wakeup_ticks < current_ticks)
+			wakeup_ticks += 1ULL << 32;
+
+		rc = odp_int_timer_wheel_promote(timer_wheels, desc_idx,
+						 wakeup_ticks, user_data);
+		if (rc < 0)
+			timer_wheels->promote_fail_cnt++;
+		else
+			timer_wheels->total_promote_cnt++;
+		return;
+	}
+
+	timer_blk = timer_slot->list_entry.timer_blk_list;
+	while (timer_blk) {
+		general_blk = &timer_blk->general_blk;
+		for (idx = 0; idx < 10; idx++) {
+			user_data = general_blk->user_data[idx];
+			if (user_data == 0)
+				break;
+
+			wakeup32     = general_blk->wakeup32[idx];
+			wakeup_ticks = current_ticks_msb | (uint64_t)wakeup32;
+			if (wakeup_ticks < current_ticks)
+				wakeup_ticks += 1ULL << 32;
+
+			rc = odp_int_timer_wheel_promote(timer_wheels,
+							 desc_idx,
+							 wakeup_ticks,
+							 user_data);
+			if (rc < 0)
+				timer_wheels->promote_fail_cnt++;
+			else
+				timer_wheels->total_promote_cnt++;
+		}
+
+		next_timer_blk = timer_blk->next_timer_blk;
+		timer_blk_free(timer_wheels, timer_blk);
+		timer_blk = next_timer_blk;
+	}
+}
+
+static int timer_general_wheel_update(timer_wheels_t *timer_wheels,
+				      uint32_t        desc_idx)
+{
+	general_timer_slot_t *timer_slot;
+	general_wheel_t      *general_wheel;
+	wheel_desc_t         *wheel_desc;
+	uint32_t              num_slots, slot_idx;
+	int                   ret_code;
+
+	wheel_desc    = &timer_wheels->wheel_descs[desc_idx];
+	slot_idx      = wheel_desc->slot_idx;
+	num_slots     = wheel_desc->num_slots;
+	general_wheel = timer_wheels->general_wheels[desc_idx - 1];
+	timer_slot    = &general_wheel->slots[slot_idx];
+	ret_code      = (slot_idx & wheel_desc->gear_mask) == 0;
+
+	if (timer_slot->single_entry.kind != 0) {
+		timer_wheel_slot_promote(timer_wheels,
+					 desc_idx - 1, timer_slot);
+		timer_slot->single_entry.kind = 0;
+	}
+
+	wheel_desc->max_ticks++;
+	wheel_desc->slot_idx = timer_wheel_slot_idx_inc(slot_idx, num_slots);
+	return ret_code;
+}
+
+odp_timer_wheel_t odp_timer_wheel_create(uint32_t max_concurrent_timers,
+					 uint64_t current_time)
+{
+	timer_wheels_t *timer_wheels;
+	uint64_t        current_ticks;
+	int             rc;
+
+	timer_wheels  = malloc(sizeof(timer_wheels_t));
+	current_ticks = current_time >> CYCLES_TO_TICKS_SHIFT;
+	timer_wheels->current_ticks = current_ticks;
+
+	timer_wheels->wheel_descs[0].num_slots = CURRENT_TIMER_SLOTS;
+	timer_wheels->wheel_descs[1].num_slots = LEVEL1_TIMER_SLOTS;
+	timer_wheels->wheel_descs[2].num_slots = LEVEL2_TIMER_SLOTS;
+	timer_wheels->wheel_descs[3].num_slots = LEVEL3_TIMER_SLOTS;
+
+	timer_wheels->wheel_descs[0].gear_ratio = CURRENT_GEAR_RATIO;
+	timer_wheels->wheel_descs[1].gear_ratio = LEVEL1_GEAR_RATIO;
+	timer_wheels->wheel_descs[2].gear_ratio = LEVEL2_GEAR_RATIO;
+	timer_wheels->wheel_descs[3].gear_ratio = 1;
+
+	timer_wheels->wheel_descs[0].ticks_per_slot = TICKS_PER_CURRENT_SLOT;
+	timer_wheels->wheel_descs[1].ticks_per_slot = TICKS_PER_LEVEL1_SLOT;
+	timer_wheels->wheel_descs[2].ticks_per_slot = TICKS_PER_LEVEL2_SLOT;
+	timer_wheels->wheel_descs[3].ticks_per_slot = TICKS_PER_LEVEL3_SLOT;
+
+	timer_wheels->current_wheel     = current_wheel_alloc(timer_wheels, 0);
+	timer_wheels->general_wheels[0] = general_wheel_alloc(timer_wheels, 1);
+	timer_wheels->general_wheels[1] = general_wheel_alloc(timer_wheels, 2);
+	timer_wheels->general_wheels[2] = general_wheel_alloc(timer_wheels, 3);
+
+	rc = expired_ring_create(timer_wheels, EXPIRED_RING_ENTRIES);
+	if (rc < 0)
+		return ODP_INT_TIMER_WHEEL_INVALID;
+
+	free_list_add(timer_wheels, max_concurrent_timers / 4);
+	timer_wheels->min_free_list_size  = timer_wheels->free_list_size;
+	timer_wheels->peak_free_list_size = timer_wheels->free_list_size;
+	return (odp_timer_wheel_t)timer_wheels;
+}
+
+uint32_t odp_timer_wheel_curr_time_update(odp_timer_wheel_t timer_wheel,
+					  uint64_t          current_time)
+{
+	timer_wheels_t *timer_wheels;
+	uint64_t        new_current_ticks, elapsed_ticks;
+	uint32_t        desc_idx;
+	int             rc;
+
+	timer_wheels      = (timer_wheels_t *)timer_wheel;
+	new_current_ticks = current_time >> CYCLES_TO_TICKS_SHIFT;
+	elapsed_ticks     = new_current_ticks - timer_wheels->current_ticks;
+	if (elapsed_ticks == 0)
+		return 0;
+
+	/* Advance current wheel for each elapsed tick, up to a maximum. This
+	 * function returns a value > 0, then the next level's timer wheel
+	 * needs to be updated.
+	 */
+	rc = timer_current_wheel_update(timer_wheels, (uint32_t)elapsed_ticks);
+
+	/* See if we need to do any higher wheel advancing or processing.*/
+	desc_idx = 1;
+	while ((0 < rc) && (desc_idx <= 3))
+		rc = timer_general_wheel_update(timer_wheels, desc_idx++);
+
+	return timer_wheels->expired_timers_ring->count;
+}
+
+int odp_timer_wheel_insert(odp_timer_wheel_t timer_wheel,
+			   uint64_t          wakeup_time,
+			   void             *user_ptr)
+{
+	timer_wheels_t *timer_wheels;
+	uint64_t        user_data, wakeup_ticks;
+	int             rc;
+
+	user_data = (uint64_t)user_ptr;
+	if (user_data == 0)
+		return -4;  /* user_data cannot be 0! */
+	else if ((user_data & 0x3) != 0)
+		return -5;  /* user_data ptr must be at least 4-byte aligned. */
+
+	timer_wheels = (timer_wheels_t *)timer_wheel;
+	wakeup_ticks = (wakeup_time >> CYCLES_TO_TICKS_SHIFT) + 1;
+	if (wakeup_time <= timer_wheels->current_ticks)
+		return -6;
+
+	if (wakeup_ticks < timer_wheels->wheel_descs[0].max_ticks)
+		rc = current_wheel_insert(timer_wheels,
+					  wakeup_ticks, user_data);
+	else if (wakeup_ticks < timer_wheels->wheel_descs[1].max_ticks)
+		rc = general_wheel_insert(timer_wheels, 1,
+					  wakeup_ticks, user_data);
+	else if (wakeup_ticks < timer_wheels->wheel_descs[2].max_ticks)
+		rc = general_wheel_insert(timer_wheels, 2, wakeup_ticks,
+					  user_data);
+	else if (wakeup_ticks < timer_wheels->wheel_descs[3].max_ticks)
+		rc = general_wheel_insert(timer_wheels, 3,
+					  wakeup_ticks, user_data);
+	else
+		return -1;
+
+	if (rc < 0)
+		timer_wheels->insert_fail_cnt++;
+	else
+		timer_wheels->total_timer_inserts++;
+
+	return rc;
+}
+
+void *odp_timer_wheel_next_expired(odp_timer_wheel_t timer_wheel)
+
+{
+	timer_wheels_t *timer_wheels;
+	uint64_t        user_data;
+	int             rc;
+
+	/* Remove the head of the timer wheel. */
+	timer_wheels = (timer_wheels_t *)timer_wheel;
+	rc = expired_timers_remove(timer_wheels, &user_data);
+	if (rc <= 0)
+		return NULL;
+
+	user_data &= ~0x3;
+	timer_wheels->total_timer_removes++;
+	return (void *)user_data;
+}
+
+static void odp_int_timer_wheel_desc_print(wheel_desc_t *wheel_desc,
+					   uint32_t      wheel_idx)
+{
+	ODP_DBG("  wheel=%u num_slots=%u ticks_shift=%u ticks_per_slot=%u"
+		" ticks_per_rev=%lu\n",
+		wheel_idx, wheel_desc->num_slots, wheel_desc->ticks_shift,
+		wheel_desc->ticks_per_slot, wheel_desc->ticks_per_rev);
+}
+
+void odp_timer_wheel_stats_print(odp_timer_wheel_t timer_wheel)
+{
+	timer_wheels_t *timer_wheels;
+	expired_ring_t *expired_ring;
+	uint32_t        wheel_idx;
+
+	timer_wheels = (timer_wheels_t *)timer_wheel;
+	expired_ring = timer_wheels->expired_timers_ring;
+
+	ODP_DBG("odp_int_timer_wheel_stats current_ticks=%lu\n",
+		timer_wheels->current_ticks);
+	for (wheel_idx = 0; wheel_idx < 4; wheel_idx++)
+		odp_int_timer_wheel_desc_print(
+			&timer_wheels->wheel_descs[wheel_idx],
+			wheel_idx);
+
+	ODP_DBG("  total timer_inserts=%lu timer_removes=%lu "
+		"insert_fails=%lu\n",
+		timer_wheels->total_timer_inserts,
+		timer_wheels->total_timer_removes,
+		timer_wheels->insert_fail_cnt);
+	ODP_DBG("  total_promote_cnt=%lu promote_fail_cnt=%lu\n",
+		timer_wheels->total_promote_cnt,
+		timer_wheels->promote_fail_cnt);
+	ODP_DBG("  free_list_size=%u min_size=%u peak_size=%u\n",
+		timer_wheels->free_list_size, timer_wheels->min_free_list_size,
+		timer_wheels->peak_free_list_size);
+	ODP_DBG("  expired_timers_ring size=%u count=%u "
+		"peak_count=%u full_cnt=%u\n",
+		expired_ring->max_idx + 1, expired_ring->count,
+		expired_ring->peak_count,  expired_ring->expired_ring_full_cnt);
+}
+
+void odp_timer_wheel_destroy(odp_timer_wheel_t timer_wheel)
+{
+	timer_wheels_t *timer_wheels;
+	expired_ring_t *expired_ring;
+
+	timer_wheels = (timer_wheels_t *)timer_wheel;
+	expired_ring = timer_wheels->expired_timers_ring;
+
+	/* First free all of the block_of_timer_blks @TODO */
+	free(timer_wheels->current_wheel);
+	free(timer_wheels->general_wheels[0]);
+	free(timer_wheels->general_wheels[1]);
+	free(timer_wheels->general_wheels[2]);
+	free(expired_ring);
+	free(timer_wheels);
+}