[API-NEXT,PATCHv8,2/4] linux-generic: tm: implement traffic manager

Message ID	1446732420-27235-3-git-send-email-bill.fischofer@linaro.org
State	Superseded
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of lng-odp-bounces@lists.linaro.org designates 54.225.227.206 as permitted sender) client-ip=54.225.227.206; From: Bill Fischofer <bill.fischofer@linaro.org> To: lng-odp@lists.linaro.org Date: Thu, 5 Nov 2015 08:06:58 -0600 Message-Id: <1446732420-27235-3-git-send-email-bill.fischofer@linaro.org> In-Reply-To: <1446732420-27235-1-git-send-email-bill.fischofer@linaro.org> References: <1446732420-27235-1-git-send-email-bill.fischofer@linaro.org> Cc: Barry Spinney <spinney@ezchip.com> Subject: [lng-odp] [API-NEXT PATCHv8 2/4] linux-generic: tm: implement traffic manager Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: lng-odp-bounces@lists.linaro.org Sender: "lng-odp" <lng-odp-bounces@lists.linaro.org>

diff --git a/platform/linux-generic/include/odp/plat/queue_types.h b/platform/linux-generic/include/odp/plat/queue_types.h index a7df155..ace56fa 100644 --- a/platform/linux-generic/include/odp/plat/queue_types.h +++ b/platform/linux-generic/include/odp/plat/queue_types.h @@ -41,6 +41,13 @@ typedef int odp_queue_type_t; #define ODP_QUEUE_TYPE_PKTIN 2 #define ODP_QUEUE_TYPE_PKTOUT 3 +/** + * @def ODP_QUEUE_TYPE_TM + * Traffic manager queue + * @note Internal to linux-generic implementation--not part of queue type API + */ +#define ODP_QUEUE_TYPE_TM 4 + /** Get printable format of odp_queue_t */ static inline uint64_t odp_queue_to_u64(odp_queue_t hdl) { diff --git a/platform/linux-generic/include/odp_name_table_internal.h b/platform/linux-generic/include/odp_name_table_internal.h new file mode 100644 index 0000000..d810128 --- /dev/null +++ b/platform/linux-generic/include/odp_name_table_internal.h @@ -0,0 +1,61 @@ +/* Copyright 2015 EZchip Semiconductor Ltd. All Rights Reserved. + * + * Copyright (c) 2015, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef ODP_INT_NAME_TABLE_H_ +#define ODP_INT_NAME_TABLE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> +#include <odp.h> + +typedef enum { + ODP_COS_HANDLE, + ODP_PKTIO_HANDLE, + ODP_POOL_HANDLE, + ODP_QUEUE_HANDLE, + ODPH_RING_HANDLE, + ODP_SHM_HANDLE, + ODP_TIMER_POOL_HANDLE, + ODP_TM_HANDLE, + ODP_TM_SHAPER_PROFILE_HANDLE, + ODP_TM_SCHED_PROFILE_HANDLE, + ODP_TM_THRESHOLD_PROFILE_HANDLE, + ODP_TM_WRED_PROFILE_HANDLE, + ODP_TM_NODE_HANDLE +} _odp_int_name_kind_t; + +typedef uint32_t _odp_int_name_t; +#define ODP_INVALID_NAME 0 + +#define _ODP_INT_NAME_LEN 32 + +_odp_int_name_t _odp_int_name_tbl_add(const char *name, + uint8_t name_kind, + uint64_t user_data); + +_odp_int_name_t _odp_int_name_tbl_lookup(const char *name, + uint8_t name_kind); + +int _odp_int_name_tbl_delete(_odp_int_name_t odp_name); + +const char *_odp_int_name_tbl_name(_odp_int_name_t odp_name); + +uint64_t _odp_int_name_tbl_user_data(_odp_int_name_t odp_name); + +void _odp_int_name_tbl_stats_print(void); + +void _odp_int_name_tbl_init(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/include/odp_pkt_queue_internal.h b/platform/linux-generic/include/odp_pkt_queue_internal.h new file mode 100644 index 0000000..85cdada --- /dev/null +++ b/platform/linux-generic/include/odp_pkt_queue_internal.h @@ -0,0 +1,62 @@ +/* Copyright 2015 EZchip Semiconductor Ltd. All Rights Reserved. + * + * Copyright (c) 2015, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _ODP_INT_PKT_QUEUE_H_ +#define _ODP_INT_PKT_QUEUE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> +#include <odp.h> + +typedef uint64_t _odp_int_queue_pool_t; +typedef uint32_t _odp_int_pkt_queue_t; + +#define _ODP_INT_QUEUE_POOL_INVALID 0 +#define _ODP_INT_PKT_QUEUE_INVALID 0 + +/* None of the functions in this file do any locking. Thats because the + * expected usage model is that each TM system will create its own + * _odp_int_queue_pool, and then only call _odp_int_pkt_queue_append and + * _odp_int_pkt_queue_remove from a single thread associated/dedicated to this + * same TM system/_odp_int_queue_pool. The main difficulty that this file + * tries to deal with is the possibility of a huge number of queues (e.g. 16 + * million), where each such queue could have a huge range in the number of + * pkts queued (say 0 to > 1,000) - yet the total "peak" number of pkts queued + * is many orders of magnitude smaller than the product of max_num_queues + * times max_queue_cnt. In particular, it is assumed that even at peak usage, + * only a small fraction of max_num_queues will be "active" - i.e. have any + * pkts queued, yet over time it is expected that most every queue will have + * some sort of backlog. + */ + +/* max_num_queues must be <= 16 * 1024 * 1024. */ +_odp_int_queue_pool_t _odp_queue_pool_create(uint32_t max_num_queues, + uint32_t max_queued_pkts); + +_odp_int_pkt_queue_t _odp_pkt_queue_create(_odp_int_queue_pool_t queue_pool); + +int _odp_pkt_queue_append(_odp_int_queue_pool_t queue_pool, + _odp_int_pkt_queue_t pkt_queue, + odp_packet_t pkt); + +int _odp_pkt_queue_remove(_odp_int_queue_pool_t queue_pool, + _odp_int_pkt_queue_t pkt_queue, + odp_packet_t *pkt); + +void _odp_pkt_queue_stats_print(_odp_int_queue_pool_t queue_pool); + +void _odp_queue_pool_destroy(_odp_int_queue_pool_t queue_pool); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/include/odp_queue_internal.h b/platform/linux-generic/include/odp_queue_internal.h index 6322948..b54d61b 100644 --- a/platform/linux-generic/include/odp_queue_internal.h +++ b/platform/linux-generic/include/odp_queue_internal.h @@ -109,6 +109,12 @@ int queue_pktout_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int queue_pktout_enq_multi(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr[], int num, int sustain); +int queue_tm_reenq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, + int sustain); +int queue_tm_reenq_multi(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr[], + int num, int sustain); +int queue_tm_reorder(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr); + void queue_lock(queue_entry_t *queue); void queue_unlock(queue_entry_t *queue); diff --git a/platform/linux-generic/include/odp_sorted_list_internal.h b/platform/linux-generic/include/odp_sorted_list_internal.h new file mode 100644 index 0000000..832ac5c --- /dev/null +++ b/platform/linux-generic/include/odp_sorted_list_internal.h @@ -0,0 +1,78 @@ +/* Copyright 2015 EZchip Semiconductor Ltd. All Rights Reserved. + * + * Copyright (c) 2015, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _ODP_INT_SORTED_LIST_H_ +#define _ODP_INT_SORTED_LIST_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +typedef uint64_t _odp_int_sorted_pool_t; +typedef uint32_t _odp_int_sorted_list_t; + +#define _ODP_INT_SORTED_POOL_INVALID 0 +#define _ODP_INT_SORTED_LIST_INVALID 0 + +_odp_int_sorted_pool_t _odp_sorted_pool_create(uint32_t max_sorted_lists); + +_odp_int_sorted_list_t +_odp_sorted_list_create(_odp_int_sorted_pool_t sorted_pool, + uint32_t max_entries); + +/* Enters the pair <sort_key, user_data> into a list of such entries, all + * sorted by sort_key (lowest value first with ties going to the oldest + * entry). The user_data is an arbitrary/opaque value. It is returned later + * when a _odp_int_sorted_list_remove() call is made. + */ +int _odp_sorted_list_insert(_odp_int_sorted_pool_t sorted_pool, + _odp_int_sorted_list_t sorted_list, + uint64_t sort_key, + uint64_t user_data); + +/* The odp_sorted_list_find function returns 1 iff a <sort_key, user_data> + * pair exists in the linked list whose user_data field matches the given + * user_data. If no such entry exists in this list 0 is returned and if the + * sorted_pool or sorted_list arguments are invalid -1 is returned. + * If the optional sort_key_ptr argument is supplied, then if the matching + * entry is found, it's sort_key is returned via this pointer, + */ +int _odp_sorted_list_find(_odp_int_sorted_pool_t sorted_pool, + _odp_int_sorted_list_t sorted_list, + uint64_t user_data, + uint64_t *sort_key_ptr); + +/* Deletes a <sort_key, user_data> pair from the given sorted list. Returns 0 + * if the pair is found, otherwise returns -1. + */ +int _odp_sorted_list_delete(_odp_int_sorted_pool_t sorted_pool, + _odp_int_sorted_list_t sorted_list, + uint64_t user_data); + +/* Removes and returns the list entry with the smallest sort_key. The + * sort_key is returned via the out ptr sort_key_ptr, and the opaque user data + * is the return value of _odp_int_sorted_list_remove(). Returns -1 if the + * sorted_list is empty (or upon an error), in which case the value pointed to + * by sort_key_ptr remains unchanged. + */ +int _odp_sorted_list_remove(_odp_int_sorted_pool_t sorted_pool, + _odp_int_sorted_list_t sorted_list, + uint64_t *sort_key_ptr, + uint64_t *user_data_ptr); + +void _odp_sorted_list_stats_print(_odp_int_sorted_pool_t sorted_pool); + +void _odp_sorted_pool_destroy(_odp_int_sorted_pool_t sorted_pool); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/include/odp_timer_wheel_internal.h b/platform/linux-generic/include/odp_timer_wheel_internal.h new file mode 100644 index 0000000..ac0aac5 --- /dev/null +++ b/platform/linux-generic/include/odp_timer_wheel_internal.h @@ -0,0 +1,68 @@ +/* Copyright 2015 EZchip Semiconductor Ltd. All Rights Reserved. + * + * Copyright (c) 2015, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef _ODP_INT_TIMER_WHEEL_H_ +#define _ODP_INT_TIMER_WHEEL_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> +#include <odp.h> + +/* Note that ALL times in this API are in units of processor/cpu clock + * cycles! + */ +typedef uint64_t _odp_timer_wheel_t; + +#define _ODP_INT_TIMER_WHEEL_INVALID 0 + +_odp_timer_wheel_t _odp_timer_wheel_create(uint32_t max_concurrent_timers, + uint64_t current_time); + +/* _odp_int_timer_wheel_curr_time_update should be called before the first + * call to _odp_int_timer_wheel_insert, _odp_int_timer_wheel_next, etc.. + * It returns > 0 if there are timers expired. + */ +uint32_t _odp_timer_wheel_curr_time_update(_odp_timer_wheel_t timer_wheel, + uint64_t current_time); + +/* Maximum wakeup_time is 100 seconds in the future (though a wakeup time + * greater than a dozen seconds or so is of questionable value), and in + * addition the wakeup_time MUST represent a time in the future. Note that + * the user_ptr cannot be NULL and must be aligned to an 4-byte boundary + * (i.e. the bottom 2 bits of this ptr must be 0). AGAIN THIS MUST BE + * STRESSED - user_ptr is not an arbitrary 64-bit pointer, BUT MUST be + * non-zero and have its bottom two bits being 0! + */ +int _odp_timer_wheel_insert(_odp_timer_wheel_t timer_wheel, + uint64_t wakeup_time, + void *user_ptr); + +/* Returns the exact same user_ptr value as was passed to + * _odp_int_timer_wheel_insert(). + */ +void *_odp_timer_wheel_next_expired(_odp_timer_wheel_t timer_wheel); + +/* Returns the number of timers that have been inserted but not yet passed + * back to the user. This number includes the number of timers that have + * internally expired and are in the expired list, but have not yet been + * retrieved via an odp_timer_wheel_next_expired call. + */ +uint32_t _odp_timer_wheel_count(_odp_timer_wheel_t timer_wheel); + +void _odp_timer_wheel_stats_print(_odp_timer_wheel_t timer_wheel); + +void _odp_timer_wheel_destroy(_odp_timer_wheel_t timer_wheel); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/include/odp_traffic_mngr_internal.h b/platform/linux-generic/include/odp_traffic_mngr_internal.h new file mode 100644 index 0000000..c2d5cf4 --- /dev/null +++ b/platform/linux-generic/include/odp_traffic_mngr_internal.h @@ -0,0 +1,324 @@ +/* Copyright 2015 EZchip Semiconductor Ltd. All Rights Reserved. + * + * Copyright (c) 2015, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/** + * @file + * + * ODP Traffic Manager - implementation internal + */ + +#ifndef ODP_TRAFFIC_MNGR_INTERNAL_H_ +#define ODP_TRAFFIC_MNGR_INTERNAL_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <odp/traffic_mngr.h> +#include <odp/packet_io.h> +#include <odp_name_table_internal.h> +#include <odp_timer_wheel_internal.h> +#include <odp_pkt_queue_internal.h> +#include <odp_sorted_list_internal.h> +#include <odp_internal.h> +#include <odp_debug_internal.h> +#include <odp_buffer_internal.h> +#include <odp_queue_internal.h> +#include <odp_packet_internal.h> + +typedef struct stat file_stat_t; + +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +#define INPUT_WORK_RING_SIZE (16 * 1024) + +#define INVALID_PKT 0 + +#define TM_QUEUE_MAGIC_NUM 0xBABEBABE +#define TM_NODE_MAGIC_NUM 0xBEEFBEEF + +/**> @todo Fill this in with what it's supposed to be */ +#define ODP_CYCLES_PER_SEC 1000000000 + +/* Macros to convert handles to internal pointers and vice versa. */ + +#define MAKE_ODP_TM_HANDLE(tm_system) ((odp_tm_t)tm_system) +#define GET_TM_SYSTEM(odp_tm) ((tm_system_t *)odp_tm) + +#define MAKE_PROFILE_HANDLE(profile_kind, tbl_idx) \ + (((profile_kind & 0xF) << 28) | ((tbl_idx + 1) & 0xFFFFFFF)) + +#define GET_PROFILE_KIND(profile_handle) \ + ((profile_kind_t)((profile_handle >> 28) & 0xF)) + +#define GET_TBL_IDX(profile_handle) ((profile_handle & 0xFFFFFFF) - 1) + +#define MAKE_ODP_TM_NODE(tm_node_obj) ((odp_tm_node_t)(tm_node_obj)) +#define GET_TM_NODE_OBJ(odp_tm_node) ((tm_node_obj_t *)(odp_tm_node)) + +#define MAKE_ODP_TM_QUEUE(tm_queue_obj) ((odp_tm_queue_t)(tm_queue_obj)) +#define GET_TM_QUEUE_OBJ(odp_tm_queue) ((tm_queue_obj_t *)(odp_tm_queue)) + +typedef uint64_t tm_handle_t; + +#define PF_RM_CURRENT_BEST 0x01 +#define PF_NEW_PKT_IN 0x02 +#define PF_SHAPER_DELAYED 0x10 +#define PF_CHANGED_OUT_PKT 0x20 +#define PF_REACHED_EGRESS 0x40 +#define PF_ERROR 0x80 + +typedef struct { + uint32_t num_allocd; + uint32_t num_used; + void **array_ptrs; /* Ptr to an array of num_allocd void * ptrs. */ +} dynamic_tbl_t; + +#define ODP_TM_NUM_PROFILES 4 + +typedef enum { + TM_SHAPER_PROFILE, + TM_SCHED_PROFILE, + TM_THRESHOLD_PROFILE, + TM_WRED_PROFILE +} profile_kind_t; + +typedef struct tm_queue_obj_s tm_queue_obj_t; +typedef struct tm_node_obj_s tm_node_obj_t; + +typedef struct { + /* A zero value for max_bytes or max_pkts indicates that this quantity + * is not limited, nor has a RED threshold. + */ + uint64_t max_pkts; + uint64_t max_bytes; + _odp_int_name_t name_tbl_id; +} tm_queue_thresholds_t; + +typedef struct { + odp_atomic_u64_t pkt_cnt; + odp_atomic_u64_t byte_cnt; +} tm_queue_cnts_t; + +typedef struct tm_wred_node_s tm_wred_node_t; + +struct tm_wred_node_s { + tm_wred_node_t *next_tm_wred_node; + odp_tm_wred_params_t *wred_params[ODP_NUM_PACKET_COLORS]; + tm_queue_thresholds_t *threshold_params; + tm_queue_cnts_t queue_cnts; + odp_ticketlock_t tm_wred_node_lock; +}; + +typedef struct { /* 64-bits long. */ + union { + uint64_t word; + struct { + uint32_t queue_num; + uint16_t pkt_len; + int8_t shaper_len_adjust; + uint8_t drop_eligible :1; + uint8_t pkt_color :2; + uint8_t unused:1; + uint8_t epoch :4; + }; + }; +} pkt_desc_t; + +typedef struct { + odp_tm_sched_mode_t sched_modes[ODP_TM_MAX_PRIORITIES]; + uint16_t inverted_weights[ODP_TM_MAX_PRIORITIES]; + _odp_int_name_t name_tbl_id; +} tm_sched_params_t; + +typedef enum { + DELAY_PKT, DECR_NOTHING, DECR_COMMIT, DECR_PEAK, DECR_BOTH +} tm_shaper_action_t; + +typedef struct { + uint8_t output_priority; + tm_shaper_action_t action; +} tm_prop_t; + +typedef struct { + uint64_t commit_rate; + uint64_t peak_rate; + int64_t max_commit; /* Byte cnt as a fp integer with 26 bits. */ + int64_t max_peak; + uint64_t max_commit_time_delta; + uint64_t max_peak_time_delta; + uint32_t min_time_delta; + _odp_int_name_t name_tbl_id; + int8_t len_adjust; + odp_bool_t dual_rate; + odp_bool_t enabled; +} tm_shaper_params_t; + +typedef enum { NO_CALLBACK, UNDELAY_PKT } tm_shaper_callback_reason_t; + +typedef struct { + tm_node_obj_t *next_tm_node; /* NULL if connected to egress. */ + void *enclosing_entity; + tm_shaper_params_t *shaper_params; + tm_sched_params_t *sched_params; + + uint64_t last_update_time; /* In clock cycles. */ + uint64_t callback_time; + + /* The shaper token bucket counters are represented as a number of + * bytes in a 64-bit fixed point format where the decimal point is at + * bit 24. (aka int64_24). In other words, the number of bytes that + * commit_cnt represents is "commit_cnt / 2**24". Hence the + * commit_rate and peak_rate are in units of bytes per cycle = "8 * + * bits per sec / cycles per sec" + */ + int64_t commit_cnt; /* Note token counters can go slightly negative */ + int64_t peak_cnt; /* Note token counters can go slightly negative */ + + uint64_t virtual_finish_time; + pkt_desc_t in_pkt_desc; + pkt_desc_t out_pkt_desc; + tm_queue_obj_t *timer_tm_queue; + uint8_t callback_reason; + tm_prop_t propagation_result; + uint8_t input_priority; + uint8_t out_priority; + uint8_t valid_finish_time; + uint8_t timer_outstanding; + uint8_t in_tm_node_obj; + uint8_t initialized; +} tm_shaper_obj_t; + +typedef struct { + /* Note that the priority is implicit. */ + pkt_desc_t smallest_pkt_desc; + uint64_t base_virtual_time; + uint64_t smallest_finish_time; + _odp_int_sorted_list_t sorted_list; + uint32_t sorted_list_cnt; /* Debugging use only. */ +} tm_sched_state_t; + +typedef struct { + void *enclosing_entity; + pkt_desc_t out_pkt_desc; /* highest priority pkt desc. */ + uint32_t priority_bit_mask; /* bit set if priority has pkt. */ + uint8_t num_priorities; + uint8_t highest_priority; + uint8_t locked; + tm_sched_state_t sched_states[0]; +} tm_schedulers_obj_t; + +struct tm_queue_obj_s { + uint32_t magic_num; + uint32_t pkts_rcvd_cnt; + uint32_t pkts_enqueued_cnt; + uint32_t pkts_dequeued_cnt; + uint32_t pkts_consumed_cnt; + _odp_int_pkt_queue_t _odp_int_pkt_queue; + tm_wred_node_t *tm_wred_node; + odp_packet_t pkt; + odp_packet_t sent_pkt; + uint32_t timer_seq; + uint8_t timer_reason; + uint8_t timer_cancels_outstanding; + tm_shaper_obj_t *timer_shaper; + tm_schedulers_obj_t *blocked_scheduler; + pkt_desc_t in_pkt_desc; + pkt_desc_t sent_pkt_desc; + tm_shaper_obj_t shaper_obj; + uint32_t queue_num; + uint16_t epoch; + uint8_t priority; + uint8_t blocked_priority; + uint8_t tm_idx; + uint8_t delayed_cnt; + uint8_t blocked_cnt; + queue_entry_t tm_qentry; +}; + +struct tm_node_obj_s { + uint32_t magic_num; + tm_wred_node_t *tm_wred_node; + tm_shaper_obj_t shaper_obj; + tm_schedulers_obj_t *schedulers_obj; + _odp_int_name_t name_tbl_id; + uint32_t max_fanin; + uint8_t level; /* Primarily for debugging */ + uint8_t tm_idx; + uint8_t marked; +}; + +typedef struct { + tm_queue_obj_t *tm_queue_obj; + odp_packet_t pkt; +} input_work_item_t; + +typedef struct { + uint64_t total_enqueues; + uint64_t enqueue_fail_cnt; + uint64_t total_dequeues; + odp_atomic_u32_t queue_cnt; + uint32_t peak_cnt; + uint32_t head_idx; + uint32_t tail_idx; + odp_ticketlock_t lock; + input_work_item_t work_ring[INPUT_WORK_RING_SIZE]; +} input_work_queue_t; + +typedef struct { + uint32_t next_random_byte; + uint8_t buf[256]; +} tm_random_data_t; + +typedef struct { + tm_queue_thresholds_t *threshold_params; + tm_queue_cnts_t queue_cnts; +} tm_queue_info_t; + +typedef struct { + odp_ticketlock_t tm_system_lock; + odp_barrier_t tm_system_barrier; + odp_barrier_t tm_system_destroy_barrier; + odp_atomic_u32_t destroying; + _odp_int_name_t name_tbl_id; + + uint32_t next_queue_num; + tm_queue_obj_t **queue_num_tbl; + input_work_queue_t *input_work_queue; + tm_queue_cnts_t priority_queue_cnts; + tm_queue_cnts_t total_queue_cnts; + pkt_desc_t egress_pkt_desc; + + _odp_int_queue_pool_t _odp_int_queue_pool; + _odp_timer_wheel_t _odp_int_timer_wheel; + _odp_int_sorted_pool_t _odp_int_sorted_pool; + + odp_tm_egress_t egress; + odp_tm_capability_t capability; + + tm_queue_info_t total_info; + tm_queue_info_t priority_info[ODP_TM_MAX_PRIORITIES]; + + tm_random_data_t tm_random_data; + + uint64_t current_cycles; + uint8_t tm_idx; + uint8_t first_enq; + odp_bool_t is_idle; + + uint64_t shaper_green_cnt; + uint64_t shaper_yellow_cnt; + uint64_t shaper_red_cnt; +} tm_system_t; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/odp_name_table.c b/platform/linux-generic/odp_name_table.c new file mode 100644 index 0000000..a9861ec --- /dev/null +++ b/platform/linux-generic/odp_name_table.c @@ -0,0 +1,1365 @@ + /* Copyright 2015 EZchip Semiconductor Ltd. All Rights Reserved. + + * Copyright (c) 2015, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <stdint.h> +#include <string.h> +#include <malloc.h> +#include <stdlib.h> +#include <odp_name_table_internal.h> +#include <odp_debug_internal.h> + +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + + /* The following constants define some tunable parameters of this module. + * They are set to fairly reasonable values (perhaps somewhat biased toward + * handling a number of names in the range of one thousand to one million). + * Change these values ONLY if your needs are outside of this range AND you + * have a complete understanding of how this code works. + * + * The primary hash table size should be a power of 2 in the range 256 to 64K. + * The size of the secondary hash tables should be a power of 2 in the range + * 64 to 256. + */ +#define PRIMARY_HASH_TBL_SIZE (16 * 1024) +#define SECONDARY_HASH_TBL_SIZE 128 + + /* The following thresholds set the number of primary table hash collisions + * before either replacing the name_table entry linked list with a secondary + * hash table (in the case of MAX_PRIMARY_LIST_SIZE) when adding OR + * replacing a secondary hash table with a linked list (MIN_SECONDARY_TBL_SIZE) + * when deleting. It is important to make sure these values are sufficiently + * different in value so as to exhibit meaningful hysteresis. + */ +#define MAX_PRIMARY_LIST_SIZE 12 +#define MAX_SECONDARY_LIST_SIZE 12 +#define MIN_SECONDARY_TBL_SIZE 4 + + /* Still to be documented.*/ +#define INITIAL_NAME_TBL_SIZE 1024 + + /* The number of name tables should not be changed. */ +#define NUM_NAME_TBLS 16 + +#define SECONDARY_HASH_HISTO_PRINT 1 + + /* #define USE_AES */ + +#if defined __x86_64__ || defined __i386__ + +#ifdef USE_AES + +typedef long long int v2di __attribute__((vector_size(16))); + +static const v2di HASH_CONST1 = { 0x123456, 0xFEBCDA383 }; +static const v2di HASH_CONST2 = { 0x493BA3F689, 0x102F5D73A8C }; + +#define PLATFORM_HASH_STATE v2di + +#define PLATFORM_HASH32_INIT(hash_state, name_len) \ + ({ \ + hash_state = HASH_CONST1; \ + hash_state[0] ^= name_len; \ + }) + +#define PLATFORM_HASH32(hash_state, name_word) \ + ({ \ + v2di data; \ + \ + data[0] = name_word; \ + data[1] = name_word << 1; \ + hash_state = __builtin_ia32_aesenc128(hash_state, data); \ + }) + +#define PLATFORM_HASH32_FINISH(hash_state, kind) \ + ({ \ + uint64_t result; \ + v2di data; \ + \ + data[0] = name_kind; \ + data[1] = name_kind << 7; \ + hash_state = __builtin_ia32_aesenc128(hash_state, data); \ + hash_state = __builtin_ia32_aesenc128(hash_state, \ + HASH_CONST2); \ + hash_state = __builtin_ia32_aesenc128(hash_state, \ + HASH_CONST1); \ + result = (uint64_t)hash_state[0] ^ hash_state[1]; \ + result = result ^ result >> 32; \ + (uint32_t)result; \ + }) + +#else + +#define PLATFORM_HASH_STATE uint64_t + +#define PLATFORM_HASH32_INIT(hash_state, name_len) \ + ({ \ + hash_state = (uint64_t)name_len; \ + hash_state |= hash_state << 8; \ + hash_state |= hash_state << 16; \ + hash_state |= hash_state << 32; \ + }) + +#define PLATFORM_HASH32(hash_state, name_word) \ + ({ \ + uint64_t temp; \ + \ + temp = ((uint64_t)name_word) * 0xFEFDFCF5; \ + hash_state = hash_state * 0xFF; \ + hash_state ^= temp ^ (uint64_t)name_word; \ + }) + +#define PLATFORM_HASH32_FINISH(hash_state, kind) \ + ({ \ + hash_state ^= (((uint32_t)kind) << 13); \ + hash_state = hash_state * 0xFEFDFCF5; \ + hash_state = hash_state ^ hash_state >> 32; \ + hash_state = hash_state % 0xFEEDDCCBBAA1; \ + hash_state = hash_state ^ hash_state >> 32; \ + (uint32_t)hash_state; \ + }) + +#endif + +#elif defined(__tile_gx__) + +#define PLATFORM_HASH_STATE uint32_t + +#define PLATFORM_HASH32_INIT(hash_state, name_len) \ + ({ \ + hash_state = 0xFEFDFCF5; \ + hash_state = __insn_crc_32_32(hash_state, name_len); \ + }) + +#define PLATFORM_HASH32(hash_state, name_word) \ + ({ \ + hash_state = __insn_crc_32_32(hash_state, name_word); \ + }) + +#define PLATFORM_HASH32_FINISH(hash_state, kind) \ + ({ \ + hash_state = __insn_crc_32_32(hash_state, kind); \ + hash_state = __insn_crc_32_32(hash_state, 0xFFFFFFFF); \ + hash_state = __insn_crc_32_32(hash_state, 0xFEFDFCF5); \ + (uint32_t)hash_state; \ + }) + +#elif defined(__arm__) || defined(__aarch64__) + +#define PLATFORM_HASH_STATE uint32_t + +#define PLATFORM_HASH32_INIT(hash_state, name_len) \ + ({ \ + hash_state = 0xFEFDFCF5; \ + hash_state = __crc32w(hash_state, name_len); \ + }) + +#define PLATFORM_HASH32(hash_state, name_word) \ + ({ \ + __crc32w(hash_state, name_word); \ + }) + +#define PLATFORM_HASH32_FINISH(hash_state, kind) \ + ({ \ + hash_state = __crc32w(hash_state, kind); \ + hash_state = __crc32w(hash_state, 0xFFFFFFFF); \ + hash_state = __crc32w(hash_state, 0xFEFDFCF5); \ + (uint32_t)hash_state; \ + }) + +#else +#error "Need to define PLATFORM_DEPENDENT_HASH32 macro" +#endif + +typedef struct name_tbl_entry_s name_tbl_entry_t; + + /* It is important for most platforms that the following struct fit within + * one cacheline. + */ +struct name_tbl_entry_s { + name_tbl_entry_t *next_entry; + uint64_t user_data; + _odp_int_name_t name_tbl_id; + uint32_t hash_value; + uint8_t name_kind; + uint8_t name_len; + char name[_ODP_INT_NAME_LEN + 1]; +} ODP_ALIGNED_CACHE; + +typedef struct { + uint32_t num_allocd; + uint32_t num_used; + uint32_t num_added_to_free_list; + uint32_t num_avail_to_add; + uint32_t base_id; + name_tbl_entry_t *free_list_head; + name_tbl_entry_t entries[0]; +} ODP_ALIGNED_CACHE name_tbl_t; + +typedef struct { + name_tbl_t *tbls[NUM_NAME_TBLS]; + uint64_t avail_space_bit_mask; + uint64_t num_adds; + uint64_t num_deletes; + uint32_t current_num_names; + uint8_t num_name_tbls; +} name_tbls_t; + + /* A hash table entry is LOGICALLY either empty, a pointer to a 64-byte + * aligned name_tbl_entry_t OR a pointer to a 64-byte aligned secondary hash + * table. Since the bottom 6-bits of this value are not needed to hold the + * address, these 6 bits are used to indicate the what type of object this + * address refers to AND in one case the maximum number of hash "collisions" + * at this level. Specifically, if the entire value is 0 then this entry is + * empty, else if the bottom 6 bits are 0, then this hash_tbl_entry_t value is + * a pointer to a secondary hash table. Otherwise if the bottom 6 bits are + * NOT zero then this values points to a (linked list of) name_table_entry_t + * records AND the bottom 6 bits are the length of this list. + */ +typedef uintptr_t hash_tbl_entry_t; + +typedef struct { + hash_tbl_entry_t hash_entries[SECONDARY_HASH_TBL_SIZE]; +} secondary_hash_tbl_t; + +typedef struct { + hash_tbl_entry_t hash_entries[PRIMARY_HASH_TBL_SIZE]; + uint32_t hash_collisions[PRIMARY_HASH_TBL_SIZE]; + uint32_t num_secondary_tbls[2]; +} primary_hash_tbl_t; + +static uint8_t name_tbls_initialized; +static name_tbls_t name_tbls; +static odp_ticketlock_t name_table_lock; +static primary_hash_tbl_t name_hash_tbl; + +static void *aligned_malloc(uint32_t length, uint32_t align) +{ + uintptr_t malloc_addr, mem_addr, alignment, total_length; + uint32_t pad_len, *pad_len_ptr; + + /* This code assumes that malloc always uses at least 4-byte + * alignment. + */ + alignment = (uintptr_t)align; + total_length = ((uintptr_t)length) + alignment; + malloc_addr = (uintptr_t)malloc(total_length); + mem_addr = (malloc_addr + alignment) & ~(alignment - 1); + pad_len = (uint32_t)(mem_addr - malloc_addr); + pad_len_ptr = (uint32_t *)(mem_addr - 4); + *pad_len_ptr = pad_len; + return (void *)mem_addr; +} + +static void aligned_free(void *mem_ptr) +{ + uintptr_t mem_addr, malloc_addr; + uint32_t *pad_len_ptr; + + mem_addr = (uintptr_t)mem_ptr; + pad_len_ptr = (uint32_t *)(mem_addr - 4); + malloc_addr = mem_addr - *pad_len_ptr; + free((void *)malloc_addr); +} + +static uint32_t hash_name_and_kind(const char *name, uint8_t name_kind) +{ + PLATFORM_HASH_STATE hash_state; + uint32_t name_len, name_word, hash_value; + uint32_t bytes[4]; + + name_len = strlen(name); + PLATFORM_HASH32_INIT(hash_state, name_len); + + while (4 <= name_len) { + /* Get the next four characters. Note that endianness doesn't + * matter here! Also note that this assumes that there is + * either no alignment loading restrictions OR that name is + * 32-bit aligned. Shouldn't be too hard to add code to deal + * with the case when this assumption is false. + */ + /* name_word = *((uint32_t *)name); */ + bytes[0] = name[0]; + bytes[1] = name[1]; + bytes[2] = name[2]; + bytes[3] = name[3]; + name_word = (bytes[3] << 24) | (bytes[2] << 16) | + (bytes[1] << 8) | bytes[0]; + PLATFORM_HASH32(hash_state, name_word); + + name_len -= 4; + name += 4; + } + + if (name_len != 0) { + name_word = 0; + + if (2 <= name_len) { + /* name_word = (uint32_t)*((uint16_t *)name); */ + bytes[0] = name[0]; + bytes[1] = name[1]; + name_word |= (bytes[1] << 8) | bytes[0]; + name_len -= 2; + name += 2; + } + + if (name_len == 1) + name_word |= ((uint32_t)*name) << 16; + + PLATFORM_HASH32(hash_state, name_word); + } + + hash_value = PLATFORM_HASH32_FINISH(hash_state, name_kind); + return hash_value; +} + +static uint32_t linked_list_len(name_tbl_entry_t *name_tbl_entry) +{ + uint32_t count; + + count = 0; + while (name_tbl_entry) { + count++; + name_tbl_entry = name_tbl_entry->next_entry; + } + + return count; +} + +static secondary_hash_tbl_t *secondary_hash_tbl_alloc(void) +{ + secondary_hash_tbl_t *secondary_hash_tbl; + + secondary_hash_tbl = aligned_malloc(sizeof(secondary_hash_tbl_t), + ODP_CACHE_LINE_SIZE); + memset(secondary_hash_tbl, 0, sizeof(secondary_hash_tbl_t)); + return secondary_hash_tbl; +} + +static void secondary_hash_tbl_free(secondary_hash_tbl_t *secondary_hash_tbl) +{ + aligned_free(secondary_hash_tbl); +} + +static void check_secondary_hash(secondary_hash_tbl_t *secondary_hash_tbl) +{ + hash_tbl_entry_t hash_tbl_entry, hash_tbl_entry2; + uint64_t tbn1, tbn2; + uint32_t idx, idx2; + + for (idx = 0; idx < SECONDARY_HASH_TBL_SIZE; idx++) { + hash_tbl_entry = secondary_hash_tbl->hash_entries[idx]; + tbn1 = hash_tbl_entry & ~0x3F; + + if (hash_tbl_entry != 0) { + if ((hash_tbl_entry >> 48) == 0x7FFF) + ; + else if ((hash_tbl_entry >> 48) == 0) + ; + else + abort(); + + for (idx2 = 0; idx2 < idx; idx2++) { + hash_tbl_entry2 = + secondary_hash_tbl->hash_entries[idx2]; + if (hash_tbl_entry2 != 0) { + tbn2 = hash_tbl_entry2 & ~0x3F; + if (tbn1 == tbn2) + abort(); + } + } + } + } +} + +#ifdef NOT_USED /* @todo */ +static void secondary_hash_dump(secondary_hash_tbl_t *secondary_hash_tbl) +{ + name_tbl_entry_t *name_tbl_entry; + hash_tbl_entry_t hash_tbl_entry; + uint32_t count, idx, entry_cnt, list_cnt; + + count = 0; + for (idx = 0; idx < SECONDARY_HASH_TBL_SIZE; idx++) { + hash_tbl_entry = secondary_hash_tbl->hash_entries[idx]; + if (hash_tbl_entry != 0) { + if ((hash_tbl_entry & 0x3F) != 0) { + name_tbl_entry = (name_tbl_entry_t *) + (hash_tbl_entry & ~0x3F); + entry_cnt = hash_tbl_entry & 0x3F; + list_cnt = linked_list_len(name_tbl_entry); + if (entry_cnt != list_cnt) + ODP_DBG("%s idx=%u entry_cnt=%u " + "list_cnt=%u\n", + __func__, + idx, entry_cnt, list_cnt); + + count += entry_cnt; + } else { + ODP_DBG("%s inner secondary tbl\n", + __func__); + } + } + } + + ODP_DBG("%s count=%u\n", __func__, count); +} +#endif + +static uint32_t name_tbl_free_list_add(name_tbl_t *name_tbl, + uint32_t num_to_add) +{ + uint32_t first_idx, name_tbl_id, entry_idx, num_added, cnt; + + first_idx = name_tbl->num_added_to_free_list; + name_tbl_id = name_tbl->base_id | first_idx; + entry_idx = first_idx; + + num_added = MIN(num_to_add, name_tbl->num_avail_to_add); + if (num_added == 0) + return 0; + + for (cnt = 1; cnt < num_added; cnt++) { + name_tbl->entries[entry_idx].name_tbl_id = name_tbl_id; + name_tbl->entries[entry_idx].next_entry = + &name_tbl->entries[entry_idx + 1]; + name_tbl_id++; + entry_idx++; + } + + name_tbl->entries[entry_idx].name_tbl_id = name_tbl_id; + name_tbl->entries[entry_idx].next_entry = name_tbl->free_list_head; + + name_tbl->free_list_head = &name_tbl->entries[first_idx]; + name_tbl->num_added_to_free_list += num_added; + name_tbl->num_avail_to_add -= num_added; + return num_added; +} + +static name_tbl_t *name_tbl_alloc(uint32_t name_tbls_idx, uint32_t num_entries) +{ + name_tbl_t *name_tbl; + uint32_t name_tbl_size; + + name_tbl_size = sizeof(name_tbl_t) + + num_entries * sizeof(name_tbl_entry_t); + name_tbl = aligned_malloc(name_tbl_size, ODP_CACHE_LINE_SIZE); + memset(name_tbl, 0, name_tbl_size); + + name_tbl->num_allocd = num_entries; + name_tbl->num_used = 0; + name_tbl->num_added_to_free_list = 0; + name_tbl->num_avail_to_add = num_entries; + name_tbl->free_list_head = NULL; + name_tbl->base_id = (name_tbls_idx + 1) << 26; + return name_tbl; +} + +static int new_name_tbl_add(void) +{ + name_tbl_t *new_name_tbl; + uint32_t name_tbls_idx, num_entries; + + if (NUM_NAME_TBLS <= name_tbls.num_name_tbls) + return -1; + + name_tbls_idx = name_tbls.num_name_tbls; + num_entries = INITIAL_NAME_TBL_SIZE << name_tbls_idx; + new_name_tbl = name_tbl_alloc(name_tbls_idx, num_entries); + name_tbl_free_list_add(new_name_tbl, MIN(num_entries, 256)); + + name_tbls.tbls[name_tbls_idx] = new_name_tbl; + name_tbls.avail_space_bit_mask |= 1 << name_tbls_idx; + name_tbls.num_name_tbls++; + return 0; +} + +static name_tbl_entry_t *name_tbl_entry_alloc(void) +{ + name_tbl_entry_t *name_tbl_entry; + name_tbl_t *name_tbl; + uint32_t name_tbls_idx, num_added; + int rc; + + /* If avail_space_bit_mask == 0 then we need to make a new name_tbl. */ + if (name_tbls.avail_space_bit_mask == 0) { + rc = new_name_tbl_add(); + if (rc < 0) + return NULL; + } + + /* Find first bit set in avail_space_bit_mask. */ + name_tbls_idx = __builtin_ctzl(name_tbls.avail_space_bit_mask); + name_tbl = name_tbls.tbls[name_tbls_idx]; + + name_tbl_entry = name_tbl->free_list_head; + name_tbl->free_list_head = name_tbl_entry->next_entry; + name_tbl->num_used++; + + if (!name_tbl->free_list_head) { + num_added = name_tbl_free_list_add(name_tbl, 256); + if (num_added == 0) + name_tbls.avail_space_bit_mask ^= 1 << name_tbls_idx; + } + + return name_tbl_entry; +} + +static name_tbl_entry_t *name_tbl_id_parse(_odp_int_name_t name_tbl_id, + name_tbl_t **name_tbl_ptr) +{ + name_tbl_t *name_tbl; + uint32_t name_tbls_idx, name_tbl_idx; + + /* Convert the name_tbl_id into a name_tbls_idx and name_tbl_idx */ + if (name_tbl_id == ODP_INVALID_NAME) + return NULL; + + name_tbls_idx = (((uint32_t)name_tbl_id) >> 26) - 1; + name_tbl_idx = ((uint32_t)name_tbl_id) & 0x03FFFFFF; + if (name_tbls.num_name_tbls < name_tbls_idx) + return NULL; + + name_tbl = name_tbls.tbls[name_tbls_idx]; + if (!name_tbl) + return NULL; + + if (name_tbl->num_used < name_tbl_idx) + return NULL; + + if (name_tbl_ptr) + *name_tbl_ptr = name_tbl; + + return &name_tbl->entries[name_tbl_idx]; +} + +static void name_tbl_entry_free(name_tbl_entry_t *name_tbl_entry) +{ + name_tbl_entry_t *entry; + _odp_int_name_t name_tbl_id; + name_tbl_t *name_tbl; + + name_tbl_id = name_tbl_entry->name_tbl_id; + entry = name_tbl_id_parse(name_tbl_id, &name_tbl); + if (!entry) + return; + + memset(name_tbl_entry, 0, sizeof(name_tbl_entry_t)); + name_tbl_entry->next_entry = name_tbl->free_list_head; + name_tbl->free_list_head = name_tbl_entry; +} + +static hash_tbl_entry_t make_hash_tbl_entry(name_tbl_entry_t *name_tbl_entry, + uint32_t entry_cnt) +{ + hash_tbl_entry_t hash_tbl_entry; + uint32_t new_entry_cnt; + + new_entry_cnt = MIN(entry_cnt + 1, 0x3F); + hash_tbl_entry = (hash_tbl_entry_t)name_tbl_entry; + hash_tbl_entry &= ~0x3F; + hash_tbl_entry |= new_entry_cnt; + return hash_tbl_entry; +} + +static name_tbl_entry_t *name_hash_tbl_lookup(uint32_t hash_value) +{ + secondary_hash_tbl_t *secondary_hash; + hash_tbl_entry_t hash_tbl_entry; + uint32_t hash_idx; + + hash_idx = hash_value & (PRIMARY_HASH_TBL_SIZE - 1); + hash_tbl_entry = name_hash_tbl.hash_entries[hash_idx]; + if (hash_tbl_entry == 0) + return NULL; + else if ((hash_tbl_entry & 0x3F) != 0) + return (name_tbl_entry_t *)(hash_tbl_entry & ~0x3F); + + /* This hash_tbl_entry references a secondary hash table, so get + * some more hash_value bits and index that table. + */ + hash_idx = (hash_value >> 16) & (SECONDARY_HASH_TBL_SIZE - 1); + secondary_hash = (secondary_hash_tbl_t *)hash_tbl_entry; + hash_tbl_entry = secondary_hash->hash_entries[hash_idx]; + if (hash_tbl_entry == 0) + return NULL; + else if ((hash_tbl_entry & 0x3F) != 0) + return (name_tbl_entry_t *)(hash_tbl_entry & ~0x3F); + + /* Yet again, this hash_tbl_entry references a secondary hash table, + * so get some more hash_value bits and index that table. We only + * allow two secondary tables in the path, so if this hash_tbl_entry + * doesn't point to a name_tbl_entry then we signal failure by + * returning NULL. + */ + hash_idx = (hash_value >> 24) & (SECONDARY_HASH_TBL_SIZE - 1); + secondary_hash = (secondary_hash_tbl_t *)hash_tbl_entry; + hash_tbl_entry = secondary_hash->hash_entries[hash_idx]; + if (hash_tbl_entry == 0) + return NULL; + else if ((hash_tbl_entry & 0x3F) != 0) + return (name_tbl_entry_t *)(hash_tbl_entry & ~0x3F); + + return NULL; +} + +static name_tbl_entry_t *internal_name_lookup(const char *name, + uint8_t name_kind) +{ + name_tbl_entry_t *name_tbl_entry; + uint32_t hash_value, name_len; + + hash_value = hash_name_and_kind(name, name_kind); + name_len = strlen(name); + + name_tbl_entry = name_hash_tbl_lookup(hash_value); + while (name_tbl_entry) { + if ((name_tbl_entry->name_kind == name_kind) && + (name_tbl_entry->name_len == name_len) && + (memcmp(name_tbl_entry->name, name, name_len) == 0)) + return name_tbl_entry; + + name_tbl_entry = name_tbl_entry->next_entry; + } + + return NULL; +} + +static hash_tbl_entry_t secondary_hash_add(name_tbl_entry_t *name_tbl_entry, + uint32_t level, + uint32_t hash_shift) +{ + secondary_hash_tbl_t *secondary_hash; + name_tbl_entry_t *next_entry, *first_entry; + hash_tbl_entry_t hash_tbl_entry, new_hash_tbl_entry; + uint32_t shifted_hash_value, hash_idx, entry_cnt; + + secondary_hash = secondary_hash_tbl_alloc(); + name_hash_tbl.num_secondary_tbls[level]++; + while (name_tbl_entry) { + next_entry = name_tbl_entry->next_entry; + shifted_hash_value = name_tbl_entry->hash_value >> hash_shift; + hash_idx = shifted_hash_value & + (SECONDARY_HASH_TBL_SIZE - 1); + + hash_tbl_entry = secondary_hash->hash_entries[hash_idx]; + entry_cnt = hash_tbl_entry & 0x3F; + first_entry = (name_tbl_entry_t *)(hash_tbl_entry & ~0x3F); + + name_tbl_entry->next_entry = first_entry; + new_hash_tbl_entry = + make_hash_tbl_entry(name_tbl_entry, entry_cnt); + + secondary_hash->hash_entries[hash_idx] = new_hash_tbl_entry; + name_tbl_entry = next_entry; + } + + /* secondary_hash_dump(secondary_hash); */ + return (hash_tbl_entry_t)secondary_hash; +} + +static hash_tbl_entry_t hash_tbl_remove(secondary_hash_tbl_t *hash_tbl, + uint32_t level, /* 0 or 1 */ + name_tbl_entry_t **list_head_ptr, + name_tbl_entry_t **list_tail_ptr) +{ + secondary_hash_tbl_t *secondary_hash; + name_tbl_entry_t *linked_list_head, *linked_list_tail; + name_tbl_entry_t *head_entry, *tail_entry; + hash_tbl_entry_t hash_tbl_entry; + uint32_t idx, entry_cnt; + + check_secondary_hash(hash_tbl); + linked_list_head = NULL; + linked_list_tail = NULL; + + for (idx = 0; idx < SECONDARY_HASH_TBL_SIZE; idx++) { + hash_tbl_entry = hash_tbl->hash_entries[idx]; + if (hash_tbl_entry != 0) { + if ((hash_tbl_entry & 0x3F) != 0) { + /* This secondar hash table points to a + * name_tbl_entry_t linked list, so add this + * new entry onto the front of it. + */ + head_entry = (name_tbl_entry_t *) + (hash_tbl_entry & ~0x3F); + tail_entry = head_entry; + } else { + secondary_hash = (secondary_hash_tbl_t *) + hash_tbl_entry; + check_secondary_hash(secondary_hash); + if (level == 1) + break; + + hash_tbl_remove(secondary_hash, level + 1, + &head_entry, &tail_entry); + } + + /* Now concate lists. */ + if (!linked_list_tail) { + linked_list_head = head_entry; + linked_list_tail = tail_entry; + } else { + linked_list_tail->next_entry = head_entry; + linked_list_tail = tail_entry; + } + } + } + + if (list_head_ptr) + *list_head_ptr = linked_list_head; + + if (list_tail_ptr) + *list_tail_ptr = linked_list_tail; + + secondary_hash_tbl_free(hash_tbl); + if (name_hash_tbl.num_secondary_tbls[level] != 0) + name_hash_tbl.num_secondary_tbls[level]--; + + entry_cnt = linked_list_len(linked_list_head); + if ((!linked_list_head) || (entry_cnt == 0)) + return 0; + + hash_tbl_entry = make_hash_tbl_entry(linked_list_head, entry_cnt - 1); + return hash_tbl_entry; +} + +static int name_hash_tbl_add(name_tbl_entry_t *entry_to_add, + uint32_t hash_value) +{ + secondary_hash_tbl_t *secondary_hash; + name_tbl_entry_t *name_tbl_entry; + hash_tbl_entry_t hash_tbl_entry; + uint32_t primary_hash_idx, hash_idx, collisions, entry_cnt; + + primary_hash_idx = hash_value & (PRIMARY_HASH_TBL_SIZE - 1); + hash_tbl_entry = name_hash_tbl.hash_entries[primary_hash_idx]; + entry_cnt = hash_tbl_entry & 0x3F; + name_hash_tbl.hash_collisions[primary_hash_idx]++; + if (hash_tbl_entry == 0) { + /* This primary hash table entry points to an empty bucket, so + * start a new name_tbl_entry_t linked list. + */ + hash_tbl_entry = make_hash_tbl_entry(entry_to_add, 0); + name_hash_tbl.hash_entries[primary_hash_idx] = hash_tbl_entry; + return 0; + } else if (entry_cnt != 0) { + /* This primary hash table entry points to a name_tbl_entry_t + * linked list, so add this new entry onto the front of it. + */ + name_tbl_entry = (name_tbl_entry_t *)(hash_tbl_entry & ~0x3F); + entry_to_add->next_entry = name_tbl_entry; + hash_tbl_entry = make_hash_tbl_entry(entry_to_add, entry_cnt); + name_hash_tbl.hash_entries[primary_hash_idx] = hash_tbl_entry; + + /* See if there are enough hash collisions within this hash + * bucket to justify replacing the linked list with a + * secondary hash table. + */ + collisions = name_hash_tbl.hash_collisions[primary_hash_idx]; + if (collisions <= MAX_PRIMARY_LIST_SIZE) + return 0; + + /* Replace the current linked list with a secondary hash + * table. + */ + hash_tbl_entry = secondary_hash_add(entry_to_add, 0, 16); + name_hash_tbl.hash_entries[primary_hash_idx] = hash_tbl_entry; + return 0; + } + + /* This hash_tbl_entry references a secondary hash table, so get + * some more hash_value bits and index that table. + */ + hash_idx = (hash_value >> 16) & (SECONDARY_HASH_TBL_SIZE - 1); + secondary_hash = (secondary_hash_tbl_t *)hash_tbl_entry; + check_secondary_hash(secondary_hash); + hash_tbl_entry = secondary_hash->hash_entries[hash_idx]; + entry_cnt = hash_tbl_entry & 0x3F; + if (hash_tbl_entry == 0) { + /* This secondary hash table entry points to an empty bucket, + * so start a new name_tbl_entry_t linked list. + */ + hash_tbl_entry = make_hash_tbl_entry(entry_to_add, 0); + secondary_hash->hash_entries[hash_idx] = hash_tbl_entry; + return 0; + } else if (entry_cnt != 0) { + /* This secondary hash table entry points to a + * name_tbl_entry_t linked list, so add this new entry onto + * the front of it. + */ + name_tbl_entry = (name_tbl_entry_t *)(hash_tbl_entry & ~0x3F); + entry_to_add->next_entry = name_tbl_entry; + hash_tbl_entry = make_hash_tbl_entry(entry_to_add, entry_cnt); + secondary_hash->hash_entries[hash_idx] = hash_tbl_entry; + + /* See if there are enough hash collisions within this + * secondary hash bucket to justify replacing the linked list + * with yet another secondary hash table. + */ + if (entry_cnt < MAX_SECONDARY_LIST_SIZE) + return 0; + + /* Replace the current linked list with a secondary hash + * table. + */ + hash_tbl_entry = secondary_hash_add(entry_to_add, 1, 24); + secondary_hash->hash_entries[hash_idx] = hash_tbl_entry; + check_secondary_hash(secondary_hash); + return 0; + } + + /* Yet again, this (secondary) hash_tbl_entry references a level 2 + * secondary hash table, so get some more hash_value bits and index + * that table. We only allow two secondary tables in the path, so if + * this hash_tbl_entry doesn't point to a name_tbl_entry then we + * signal failure by returning -1. + */ + hash_idx = (hash_value >> 24) & (SECONDARY_HASH_TBL_SIZE - 1); + secondary_hash = (secondary_hash_tbl_t *)hash_tbl_entry; + check_secondary_hash(secondary_hash); + hash_tbl_entry = secondary_hash->hash_entries[hash_idx]; + entry_cnt = hash_tbl_entry & 0x3F; + if (hash_tbl_entry == 0) { + /* This secondary hash table entry points to an empty bucket, + * so start a new name_tbl_entry_t linked list. + */ + hash_tbl_entry = make_hash_tbl_entry(entry_to_add, 0); + secondary_hash->hash_entries[hash_idx] = hash_tbl_entry; + check_secondary_hash(secondary_hash); + return 0; + } else if (entry_cnt != 0) { + /* This secondary hash table entry points to a + * name_tbl_entry_t linked list, so add this new entry onto + * the front of it. Note that regardless of the size of this + * linked list, we never add another hash table, so we don't + * need to update any secondary table counts. + */ + name_tbl_entry = (name_tbl_entry_t *)(hash_tbl_entry & ~0x3F); + entry_to_add->next_entry = name_tbl_entry; + hash_tbl_entry = make_hash_tbl_entry(entry_to_add, entry_cnt); + secondary_hash->hash_entries[hash_idx] = hash_tbl_entry; + check_secondary_hash(secondary_hash); + return 0; + } + + name_hash_tbl.hash_collisions[primary_hash_idx]--; + return -1; +} + +static int name_tbl_entry_list_remove(hash_tbl_entry_t *hash_entry_ptr, + name_tbl_entry_t *linked_list, + name_tbl_entry_t *entry_to_delete, + uint32_t entry_cnt) +{ + name_tbl_entry_t *name_tbl_entry, *prev_entry, *next_entry; + hash_tbl_entry_t hash_tbl_entry; + + name_tbl_entry = linked_list; + prev_entry = NULL; + while (name_tbl_entry) { + next_entry = name_tbl_entry->next_entry; + if (name_tbl_entry == entry_to_delete) { + /* We have found the name_tbl_entry that is to be + * deleted. + */ + if (!prev_entry) { + hash_tbl_entry = (hash_tbl_entry_t)next_entry; + hash_tbl_entry &= ~0x3F; + hash_tbl_entry |= entry_cnt; + *hash_entry_ptr = hash_tbl_entry; + } else { + prev_entry->next_entry = next_entry; + } + + /* Now decrement the entry_cnt field - if in the range + * 1 - 0x3E + */ + if ((entry_cnt != 0) && (entry_cnt < 0x3F)) + *hash_entry_ptr = (*hash_entry_ptr) - 1; + + return 0; + } + + prev_entry = name_tbl_entry; + name_tbl_entry = next_entry; + } + + return -2; +} + +static int name_hash_tbl_delete(name_tbl_entry_t *entry_to_delete, + uint32_t hash_value) +{ + secondary_hash_tbl_t *secondary_hash; + hash_tbl_entry_t *hash_entry_ptr, hash_tbl_entry; + name_tbl_entry_t *name_tbl_entry; + uint64_t tbn; + uint32_t primary_hash_idx, hash_idx, collisions, entry_cnt; + int rc; + + primary_hash_idx = hash_value & (PRIMARY_HASH_TBL_SIZE - 1); + hash_entry_ptr = &name_hash_tbl.hash_entries[primary_hash_idx]; + hash_tbl_entry = *hash_entry_ptr; + entry_cnt = hash_tbl_entry & 0x3F; + if (hash_tbl_entry == 0) { + /* This primary hash table entry points to an empty bucket, so + * we have failed to find the matching entry. + */ + return -1; + } else if (entry_cnt != 0) { + /* This primary hash table entry points to a name_tbl_entry_t + * linked list, so remove entry from this linked list. + */ + name_tbl_entry = (name_tbl_entry_t *)(hash_tbl_entry & ~0x3F); + rc = name_tbl_entry_list_remove(hash_entry_ptr, name_tbl_entry, + entry_to_delete, entry_cnt); + tbn = (*hash_entry_ptr) & ~0x3F; + if (tbn == 0xFFFFFFFFFFFFFFC0) + abort(); + + if (rc < 0) + return rc; + + name_hash_tbl.hash_collisions[primary_hash_idx]--; + return 0; + } + + /* This hash_tbl_entry references a secondary hash table, so get + * some more hash_value bits and index that table. + */ + hash_idx = (hash_value >> 16) & (SECONDARY_HASH_TBL_SIZE - 1); + secondary_hash = (secondary_hash_tbl_t *)hash_tbl_entry; + check_secondary_hash(secondary_hash); + hash_entry_ptr = &secondary_hash->hash_entries[hash_idx]; + hash_tbl_entry = *hash_entry_ptr; + entry_cnt = hash_tbl_entry & 0x3F; + if (hash_tbl_entry == 0) { + /* This secondary hash table entry points to an empty bucket, + * so we have failed to find the matching entry. + */ + return -1; + } else if (entry_cnt != 0) { + /* This secondary hash table entry points to a + * name_tbl_entry_t linked list, so try to remove + * entry_to_delete from this linked list. + */ + name_tbl_entry = (name_tbl_entry_t *)(hash_tbl_entry & ~0x3F); + rc = name_tbl_entry_list_remove(hash_entry_ptr, name_tbl_entry, + entry_to_delete, entry_cnt); + tbn = (*hash_entry_ptr) & ~0x3F; + if (tbn == 0xFFFFFFFFFFFFFFC0) + abort(); + + check_secondary_hash(secondary_hash); + if (rc < 0) + return rc; + + name_hash_tbl.hash_collisions[primary_hash_idx]--; + + /* See if we should replace this secondary hash table with a + * linked list. + */ + collisions = name_hash_tbl.hash_collisions[primary_hash_idx]; + if (MIN_SECONDARY_TBL_SIZE < collisions) + return 0; + + /* Replace the secondary hash table with a linked list. */ + hash_tbl_entry = hash_tbl_remove(secondary_hash, 0, NULL, NULL); + name_hash_tbl.hash_entries[primary_hash_idx] = hash_tbl_entry; + return 0; + } + + /* Yet again, this (secondary) hash_tbl_entry references a level 2 + * secondary hash table, so get some more hash_value bits and index + * that table. We only allow two secondary tables in the path, so if + * this hash_tbl_entry doesn't point to a name_tbl_entry then we + * signal failure by returning -1. + */ + hash_idx = (hash_value >> 24) & (SECONDARY_HASH_TBL_SIZE - 1); + secondary_hash = (secondary_hash_tbl_t *)hash_tbl_entry; + check_secondary_hash(secondary_hash); + hash_entry_ptr = &secondary_hash->hash_entries[hash_idx]; + hash_tbl_entry = *hash_entry_ptr; + entry_cnt = hash_tbl_entry & 0x3F; + if (hash_tbl_entry == 0) { + /* This secondary hash table entry points to an empty bucket, + * so we have failed to find the matching entry. + */ + return -1; + } else if (entry_cnt != 0) { + /* This secondary hash table entry points to a + * name_tbl_entry_t linked list, so try to remove + * entry_to_delete from this linked list. + */ + name_tbl_entry = (name_tbl_entry_t *)(hash_tbl_entry & ~0x3F); + rc = name_tbl_entry_list_remove(hash_entry_ptr, name_tbl_entry, + entry_to_delete, entry_cnt); + tbn = (*hash_entry_ptr) & ~0x3F; + if (tbn == 0xFFFFFFFFFFFFFFC0) + abort(); + + check_secondary_hash(secondary_hash); + if (rc < 0) + return rc; + + name_hash_tbl.hash_collisions[primary_hash_idx]--; + check_secondary_hash(secondary_hash); + return 0; + } + + return -1; +} + +_odp_int_name_t _odp_int_name_tbl_add(const char *name, + uint8_t name_kind, + uint64_t user_data) +{ + name_tbl_entry_t *name_tbl_entry; + uint32_t hash_value, name_len; + int rc; + + /* Check for name_tbls_initialized. */ + if (name_tbls_initialized == 0) + return ODP_INVALID_NAME; + + /* Check for NULL names or zero length names. */ + if ((!name) || (name[0] == '\0')) + return ODP_INVALID_NAME; + + /* Check for names that are too long. */ + name_len = strlen(name); + if (_ODP_INT_NAME_LEN < name_len) + return ODP_INVALID_NAME; + + /* Next lookup the <name, name_kind> pair to make sure it doesn't + * already exist. + */ + odp_ticketlock_lock(&name_table_lock); + name_tbl_entry = internal_name_lookup(name, name_kind); + if (name_tbl_entry) { + odp_ticketlock_unlock(&name_table_lock); + return ODP_INVALID_NAME; + } + + /* Allocate a name_tbl_entry record.*/ + name_len = strlen(name); + name_tbl_entry = name_tbl_entry_alloc(); + if (!name_tbl_entry) { + odp_ticketlock_unlock(&name_table_lock); + return ODP_INVALID_NAME; + } + + hash_value = hash_name_and_kind(name, name_kind); + name_tbl_entry->next_entry = NULL; + name_tbl_entry->user_data = user_data; + name_tbl_entry->hash_value = hash_value; + name_tbl_entry->name_kind = name_kind; + name_tbl_entry->name_len = name_len; + memcpy(name_tbl_entry->name, name, name_len); + name_tbl_entry->name[name_len] = '\0'; + + rc = name_hash_tbl_add(name_tbl_entry, hash_value); + if (rc < 0) { + name_tbl_entry_free(name_tbl_entry); + odp_ticketlock_unlock(&name_table_lock); + return ODP_INVALID_NAME; + } + + name_tbls.num_adds++; + name_tbls.current_num_names++; + odp_ticketlock_unlock(&name_table_lock); + return name_tbl_entry->name_tbl_id; +} + +int _odp_int_name_tbl_delete(_odp_int_name_t odp_name) +{ + name_tbl_entry_t *entry_to_delete; + int rc; + + /* Check for name_tbls_initialized. */ + if (name_tbls_initialized == 0) + return -3; + + entry_to_delete = name_tbl_id_parse(odp_name, NULL); + if (!entry_to_delete) + return -1; + + /* First disconnect this entry from its hash bucket linked list. */ + odp_ticketlock_lock(&name_table_lock); + rc = name_hash_tbl_delete(entry_to_delete, entry_to_delete->hash_value); + if (0 <= rc) { + name_tbls.num_deletes++; + if (name_tbls.current_num_names != 0) + name_tbls.current_num_names--; + + name_tbl_entry_free(entry_to_delete); + } + + odp_ticketlock_unlock(&name_table_lock); + return rc; +} + +const char *_odp_int_name_tbl_name(_odp_int_name_t odp_name) +{ + name_tbl_entry_t *name_tbl_entry; + + name_tbl_entry = name_tbl_id_parse(odp_name, NULL); + if (!name_tbl_entry) + return NULL; + else + return name_tbl_entry->name; +} + +uint64_t _odp_int_name_tbl_user_data(_odp_int_name_t odp_name) +{ + name_tbl_entry_t *name_tbl_entry; + + name_tbl_entry = name_tbl_id_parse(odp_name, NULL); + if (!name_tbl_entry) + return 0; /* @todo */ + else + return name_tbl_entry->user_data; +} + +_odp_int_name_t _odp_int_name_tbl_lookup(const char *name, uint8_t name_kind) +{ + name_tbl_entry_t *name_tbl_entry; + _odp_int_name_t name_tbl_id; + + /* Check for name_tbls_initialized. */ + if (name_tbls_initialized == 0) + return ODP_INVALID_NAME; + + /* Check for NULL names or zero length names. */ + name_tbl_id = ODP_INVALID_NAME; + if ((!name) || (name[0] == '\0')) + return name_tbl_id; + + odp_ticketlock_lock(&name_table_lock); + name_tbl_entry = internal_name_lookup(name, name_kind); + if (name_tbl_entry) + name_tbl_id = name_tbl_entry->name_tbl_id; + odp_ticketlock_unlock(&name_table_lock); + + return name_tbl_id; +} + +#ifdef SECONDARY_HASH_HISTO_PRINT + +static uint32_t level2_hash_histo(secondary_hash_tbl_t *hash_tbl, + uint32_t level2_histo[]) +{ + name_tbl_entry_t *name_tbl_entry; + hash_tbl_entry_t hash_tbl_entry; + uint32_t idx, collisions, total_collisions; + + total_collisions = 0; + for (idx = 0; idx < SECONDARY_HASH_TBL_SIZE; idx++) { + hash_tbl_entry = hash_tbl->hash_entries[idx]; + if (hash_tbl_entry == 0) { + collisions = 0; + } else { + name_tbl_entry = (name_tbl_entry_t *) + (hash_tbl_entry & ~0x3F); + collisions = linked_list_len(name_tbl_entry); + } + + level2_histo[MIN(collisions, 256)]++; + total_collisions += collisions; + } + + return total_collisions; +} + +static uint32_t level1_hash_histo(secondary_hash_tbl_t *hash_tbl, + uint32_t level1_histo[], + uint32_t level2_histo[]) +{ + secondary_hash_tbl_t *secondary_hash; + name_tbl_entry_t *name_tbl_entry; + hash_tbl_entry_t hash_tbl_entry; + uint32_t idx, collisions, total_collisions; + + total_collisions = 0; + for (idx = 0; idx < SECONDARY_HASH_TBL_SIZE; idx++) { + hash_tbl_entry = hash_tbl->hash_entries[idx]; + if (hash_tbl_entry == 0) { + collisions = 0; + } else if ((hash_tbl_entry & 0x3F) != 0) { + name_tbl_entry = (name_tbl_entry_t *) + (hash_tbl_entry & ~0x3F); + collisions = linked_list_len(name_tbl_entry); + } else { + secondary_hash = (secondary_hash_tbl_t *) + hash_tbl_entry; + collisions = level2_hash_histo(secondary_hash, + level2_histo); + } + + level1_histo[MIN(collisions, 256)]++; + total_collisions += collisions; + } + + return total_collisions; +} + +static void secondary_hash_histo_print(void) +{ + secondary_hash_tbl_t *secondary_hash; + hash_tbl_entry_t hash_tbl_entry; + uint32_t level1_histo[257], level2_histo[257]; + uint32_t avg, idx, count, total_count; + + memset(level1_histo, 0, sizeof(level1_histo)); + memset(level2_histo, 0, sizeof(level2_histo)); + + for (idx = 0; idx < PRIMARY_HASH_TBL_SIZE; idx++) { + hash_tbl_entry = name_hash_tbl.hash_entries[idx]; + if ((hash_tbl_entry != 0) && ((hash_tbl_entry & 0x3F) == 0)) { + /* This hash_tbl_entry references a level 0 secondary + * hash table + */ + secondary_hash = (secondary_hash_tbl_t *) + hash_tbl_entry; + level1_hash_histo(secondary_hash, level1_histo, + level2_histo); + } + } + + if (name_hash_tbl.num_secondary_tbls[0] == 0) + return; + + ODP_DBG(" level1 secondary hash histogram:\n"); + total_count = 0; + for (idx = 0; idx < 256; idx++) { + count = level1_histo[idx]; + if (idx != 0) + total_count += count * idx; + + if (count != 0) + ODP_DBG(" num collisions=%02u count=%u\n", + idx, count); + } + + count = level1_histo[256]; + total_count += count; + if (count != 0) + ODP_DBG(" num collisions >=256 count=%u\n", count); + + avg = (100 * total_count) / name_hash_tbl.num_secondary_tbls[0]; + avg = avg / SECONDARY_HASH_TBL_SIZE; + ODP_DBG(" avg collisions=%02u.%02u total=%u\n\n", + avg / 100, avg % 100, total_count); + + if (name_hash_tbl.num_secondary_tbls[1] == 0) + return; + + ODP_DBG(" level2 secondary hash histogram:\n"); + total_count = 0; + for (idx = 0; idx < 256; idx++) { + count = level2_histo[idx]; + if (idx != 0) + total_count += count * idx; + + if (count != 0) + ODP_DBG(" num collisions=%02u count=%u\n", + idx, count); + } + + count = level2_histo[256]; + total_count += count; + if (count != 0) + ODP_DBG(" num collisions >=256 count=%u\n", count); + + avg = (100 * total_count) / name_hash_tbl.num_secondary_tbls[1]; + avg = avg / SECONDARY_HASH_TBL_SIZE; + ODP_DBG(" avg collisions=%02u.%02u total=%u\n\n", + avg / 100, avg % 100, total_count); +} + +#endif + +void _odp_int_name_tbl_stats_print(void) +{ + name_tbl_t *name_tbl; + uint32_t primary_hash_histo[257], idx, collisions, + count, total_count; + uint32_t avg; + + ODP_DBG("\nname table stats:\n"); + ODP_DBG(" num_names=%u num_adds=%lu " + "num_deletes=%lu num_name_tbls=%u\n", + name_tbls.current_num_names, name_tbls.num_adds, + name_tbls.num_deletes, name_tbls.num_name_tbls); + for (idx = 0; idx < NUM_NAME_TBLS; idx++) { + name_tbl = name_tbls.tbls[idx]; + if ((name_tbl) && (name_tbl->num_used != 0)) + ODP_DBG(" name_tbl %u num_allocd=%7u " + "num_added_to_free_list=%7u " + "num_used=%7u num_avail_to_add=%7u\n", idx, + name_tbl->num_allocd, + name_tbl->num_added_to_free_list, + name_tbl->num_used, + name_tbl->num_avail_to_add); + } + + memset(primary_hash_histo, 0, sizeof(primary_hash_histo)); + for (idx = 0; idx < PRIMARY_HASH_TBL_SIZE; idx++) { + collisions = MIN(name_hash_tbl.hash_collisions[idx], 256); + primary_hash_histo[collisions]++; + } + + ODP_DBG(" name_tbl primary hash histogram:\n"); + total_count = 0; + for (idx = 0; idx < 256; idx++) { + count = primary_hash_histo[idx]; + if (idx != 0) + total_count += count * idx; + + if (count != 0) + ODP_DBG(" num collisions=%02u count=%u\n", + idx, count); + } + + count = primary_hash_histo[256]; + total_count += count; + if (count != 0) + ODP_DBG(" num collisions >=256 count=%u\n", count); + + avg = (100 * total_count) / PRIMARY_HASH_TBL_SIZE; + ODP_DBG(" avg collisions=%02u.%02u total=%u\n\n", + avg / 100, avg % 100, total_count); + + ODP_DBG(" num of first level secondary hash tbls=%u " + "second level tbls=%u\n", + name_hash_tbl.num_secondary_tbls[0], + name_hash_tbl.num_secondary_tbls[1]); + +#ifdef SECONDARY_HASH_HISTO_PRINT + if (name_hash_tbl.num_secondary_tbls[0] != 0) + secondary_hash_histo_print(); +#endif +} + +void _odp_int_name_tbl_init(void) +{ + name_tbl_t *new_name_tbl; + + memset(&name_hash_tbl, 0, sizeof(name_hash_tbl)); + odp_ticketlock_init(&name_table_lock); + + memset(&name_tbls, 0, sizeof(name_tbls)); + new_name_tbl = name_tbl_alloc(0, INITIAL_NAME_TBL_SIZE); + name_tbl_free_list_add(new_name_tbl, INITIAL_NAME_TBL_SIZE); + + name_tbls.tbls[0] = new_name_tbl; + name_tbls.avail_space_bit_mask |= 1; + name_tbls.num_name_tbls = 1; + name_tbls_initialized = 1; +} diff --git a/platform/linux-generic/odp_pkt_queue.c b/platform/linux-generic/odp_pkt_queue.c new file mode 100644 index 0000000..45508eb --- /dev/null +++ b/platform/linux-generic/odp_pkt_queue.c @@ -0,0 +1,379 @@ +/* Copyright 2015 EZchip Semiconductor Ltd. All Rights Reserved. + + * Copyright (c) 2015, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <stdint.h> +#include <string.h> +#include <malloc.h> +#include <stdio.h> +#include <odp.h> +#include <odp_pkt_queue_internal.h> +#include <odp_debug_internal.h> + +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +#define INVALID_PKT 0 + +typedef struct /* Must be exactly 64 bytes long AND cacheline aligned! */ { + uint32_t next_queue_blk_idx; + uint32_t tail_queue_blk_idx; + odp_packet_t pkts[7]; +} ODP_ALIGNED_CACHE queue_blk_t; + +typedef struct { + queue_blk_t blks[0]; +} ODP_ALIGNED_CACHE queue_blks_t; + +/* The queue_num_tbl is used to map from a queue_num to a queue_num_desc. + * The reason is based on the assumption that usually only a small fraction + * of the max_num_queues will have more than 1 pkt associated with it. This + * way the active queue_desc's can be dynamically allocated and freed according + * to the actual usage pattern. + */ +typedef struct { + uint32_t queue_num_to_blk_idx[0]; +} queue_num_tbl_t; + +typedef struct { + uint32_t num_blks; + uint32_t next_blk_idx; /* blk_idx of queue_blks not yet added. */ + queue_blks_t *queue_blks; +} queue_region_desc_t; + +typedef struct { + uint64_t total_pkt_appends; + uint64_t total_pkt_removes; + uint64_t total_bad_removes; + uint32_t free_list_size; + uint32_t min_free_list_size; + uint32_t peak_free_list_size; + uint32_t free_list_head_idx; + uint32_t max_queue_num; + uint32_t max_queued_pkts; + uint32_t next_queue_num; + queue_region_desc_t queue_region_descs[16]; + uint32_t *queue_num_tbl; + uint8_t current_region; + uint8_t all_regions_used; +} queue_pool_t; + +static queue_blk_t *blk_idx_to_queue_blk(queue_pool_t *queue_pool, + uint32_t queue_blk_idx) +{ + queue_region_desc_t *queue_region_desc; + uint32_t which_region, blk_tbl_idx; + + which_region = queue_blk_idx >> 28; + blk_tbl_idx = queue_blk_idx & ((1 << 28) - 1); + queue_region_desc = &queue_pool->queue_region_descs[which_region]; + return &queue_region_desc->queue_blks->blks[blk_tbl_idx]; +} + +static int pkt_queue_free_list_add(queue_pool_t *pool, + uint32_t num_queue_blks) +{ + queue_region_desc_t *region_desc; + queue_blks_t *queue_blks; + queue_blk_t *queue_blk; + uint32_t which_region, blks_added, num_blks, start_idx; + uint32_t malloc_len, blks_to_add, cnt; + + which_region = pool->current_region; + blks_added = 0; + while ((blks_added < num_queue_blks) && (pool->all_regions_used == 0)) { + region_desc = &pool->queue_region_descs[which_region]; + start_idx = region_desc->next_blk_idx; + num_blks = region_desc->num_blks; + queue_blks = region_desc->queue_blks; + if (!queue_blks) { + malloc_len = num_blks * sizeof(queue_blk_t); + queue_blks = malloc(malloc_len); + memset(queue_blks, 0, malloc_len); + region_desc->queue_blks = queue_blks; + } + + /* Now add as many queue_blks to the free list as... */ + blks_to_add = MIN(num_blks - start_idx, num_queue_blks); + queue_blk = &queue_blks->blks[start_idx]; + for (cnt = 1; cnt <= blks_to_add; cnt++) { + queue_blk->next_queue_blk_idx = start_idx + cnt; + queue_blk++; + } + + blks_added += blks_to_add; + pool->free_list_size += blks_to_add; + region_desc->next_blk_idx += blks_to_add; + if (blks_to_add == (num_blks - start_idx)) { + /* Advance to the next region */ + pool->current_region++; + if (16 <= pool->current_region) { + pool->all_regions_used = 1; + return blks_added; + } + + which_region = pool->current_region; + } + } + + return blks_added; +} + +static queue_blk_t *queue_blk_alloc(queue_pool_t *pool, + uint32_t *queue_blk_idx) +{ + queue_blk_t *head_queue_blk; + uint32_t head_queue_blk_idx; + int rc; + + if (pool->free_list_size <= 1) { + /* Replenish the queue_blk_t free list. */ + pool->min_free_list_size = pool->free_list_size; + rc = pkt_queue_free_list_add(pool, 64); + if (rc <= 0) + return NULL; + } + + head_queue_blk_idx = pool->free_list_head_idx; + head_queue_blk = blk_idx_to_queue_blk(pool, head_queue_blk_idx); + pool->free_list_size--; + pool->free_list_head_idx = head_queue_blk->next_queue_blk_idx; + *queue_blk_idx = head_queue_blk_idx; + if (pool->free_list_size < pool->min_free_list_size) + pool->min_free_list_size = pool->free_list_size; + + memset(head_queue_blk, 0, sizeof(queue_blk_t)); + return head_queue_blk; +} + +static void queue_blk_free(queue_pool_t *pool, queue_blk_t *queue_blk, + uint32_t queue_blk_idx) +{ + if ((!queue_blk) || (queue_blk_idx == 0)) + return; + + queue_blk->next_queue_blk_idx = pool->free_list_head_idx; + pool->free_list_head_idx = queue_blk_idx; + pool->free_list_size++; + if (pool->peak_free_list_size < pool->free_list_size) + pool->peak_free_list_size = pool->free_list_size; +} + +static void queue_region_desc_init(queue_pool_t *pool, uint32_t which_region, + uint32_t num_blks) +{ + queue_region_desc_t *queue_region_desc; + + queue_region_desc = &pool->queue_region_descs[which_region]; + queue_region_desc->num_blks = num_blks; +} + +_odp_int_queue_pool_t _odp_queue_pool_create(uint32_t max_num_queues, + uint32_t max_queued_pkts) +{ + queue_pool_t *pool; + uint32_t idx, initial_free_list_size, malloc_len, first_queue_blk_idx; + int rc; + + pool = malloc(sizeof(queue_pool_t)); + memset(pool, 0, sizeof(queue_pool_t)); + + /* Initialize the queue_blk_tbl_sizes array based upon the + * max_queued_pkts. + */ + max_queued_pkts = MAX(max_queued_pkts, 64 * 1024); + queue_region_desc_init(pool, 0, max_queued_pkts / 4); + queue_region_desc_init(pool, 1, max_queued_pkts / 64); + queue_region_desc_init(pool, 2, max_queued_pkts / 64); + queue_region_desc_init(pool, 3, max_queued_pkts / 64); + queue_region_desc_init(pool, 4, max_queued_pkts / 64); + for (idx = 5; idx < 16; idx++) + queue_region_desc_init(pool, idx, max_queued_pkts / 16); + + /* Now allocate the first queue_blk_tbl and add its blks to the free + * list. Replenish the queue_blk_t free list. + */ + initial_free_list_size = MIN(64 * 1024, max_queued_pkts / 4); + rc = pkt_queue_free_list_add(pool, initial_free_list_size); + if (rc < 0) + return _ODP_INT_QUEUE_POOL_INVALID; + + /* Discard the first queue blk with idx 0 */ + queue_blk_alloc(pool, &first_queue_blk_idx); + + pool->max_queue_num = max_num_queues; + pool->max_queued_pkts = max_queued_pkts; + pool->next_queue_num = 1; + + malloc_len = max_num_queues * sizeof(uint32_t); + pool->queue_num_tbl = malloc(malloc_len); + memset(pool->queue_num_tbl, 0, malloc_len); + + pool->min_free_list_size = pool->free_list_size; + pool->peak_free_list_size = pool->free_list_size; + return (_odp_int_queue_pool_t)pool; +} + +_odp_int_pkt_queue_t _odp_pkt_queue_create(_odp_int_queue_pool_t queue_pool) +{ + queue_pool_t *pool; + uint32_t queue_num; + + pool = (queue_pool_t *)queue_pool; + queue_num = pool->next_queue_num++; + if (pool->max_queue_num < queue_num) + return _ODP_INT_PKT_QUEUE_INVALID; + + return (_odp_int_pkt_queue_t)queue_num; +} + +int _odp_pkt_queue_append(_odp_int_queue_pool_t queue_pool, + _odp_int_pkt_queue_t pkt_queue, odp_packet_t pkt) +{ + queue_pool_t *pool; + queue_blk_t *first_blk, *tail_blk, *new_tail_blk; + uint32_t queue_num, first_blk_idx, tail_blk_idx, new_tail_blk_idx; + uint32_t idx; + + pool = (queue_pool_t *)queue_pool; + queue_num = (uint32_t)pkt_queue; + if ((queue_num == 0) || (pool->max_queue_num < queue_num)) + return -2; + + if (pkt == INVALID_PKT) + return -3; + + pool->total_pkt_appends++; + first_blk_idx = pool->queue_num_tbl[queue_num]; + if (first_blk_idx == 0) { + first_blk = queue_blk_alloc(pool, &first_blk_idx); + if (!first_blk) + return -1; + + pool->queue_num_tbl[queue_num] = first_blk_idx; + memset(first_blk, 0, sizeof(queue_blk_t)); + first_blk->pkts[0] = pkt; + return 0; + } + + first_blk = blk_idx_to_queue_blk(pool, first_blk_idx); + tail_blk_idx = first_blk->tail_queue_blk_idx; + if (tail_blk_idx == 0) + tail_blk = first_blk; + else + tail_blk = blk_idx_to_queue_blk(pool, tail_blk_idx); + + /* Find first empty slot and insert pkt there. */ + for (idx = 0; idx < 7; idx++) { + if (tail_blk->pkts[idx] == INVALID_PKT) { + tail_blk->pkts[idx] = pkt; + return 0; + } + } + + /* If we reach here, the tai_blk was full, so we need to allocate a new + * one and link it in. + */ + new_tail_blk = queue_blk_alloc(pool, &new_tail_blk_idx); + if (!new_tail_blk) + return -1; + + memset(new_tail_blk, 0, sizeof(queue_blk_t)); + new_tail_blk->pkts[0] = pkt; + tail_blk->next_queue_blk_idx = new_tail_blk_idx; + first_blk->tail_queue_blk_idx = new_tail_blk_idx; + return 0; +} + +int _odp_pkt_queue_remove(_odp_int_queue_pool_t queue_pool, + _odp_int_pkt_queue_t pkt_queue, odp_packet_t *pkt) +{ + queue_pool_t *pool; + queue_blk_t *first_blk, *second_blk; + uint32_t queue_num, first_blk_idx, next_blk_idx, idx; + + pool = (queue_pool_t *)queue_pool; + queue_num = (uint32_t)pkt_queue; + if ((queue_num == 0) || (pool->max_queue_num < queue_num)) + return -2; + + first_blk_idx = pool->queue_num_tbl[queue_num]; + if (first_blk_idx == 0) + return 0; /* pkt queue is empty. */ + + /* Now remove the first valid odp_packet_t handle value we find. */ + first_blk = blk_idx_to_queue_blk(pool, first_blk_idx); + for (idx = 0; idx < 7; idx++) { + if (first_blk->pkts[idx] != INVALID_PKT) { + *pkt = first_blk->pkts[idx]; + first_blk->pkts[idx] = INVALID_PKT; + + /* Now see if there are any more pkts in this queue. */ + if ((idx == 6) || + (first_blk->pkts[idx + 1] == INVALID_PKT)) { + /* We have reached the end of this queue_blk. + * Check to see if there is a following block + * or not + */ + next_blk_idx = first_blk->next_queue_blk_idx; + if (next_blk_idx != 0) { + second_blk = + blk_idx_to_queue_blk + (pool, + next_blk_idx); + second_blk->tail_queue_blk_idx = + first_blk->tail_queue_blk_idx; + } + + pool->queue_num_tbl[queue_num] = next_blk_idx; + queue_blk_free(pool, first_blk, first_blk_idx); + } + + pool->total_pkt_removes++; + return 1; + } + } + + /* It is an error to not find at least one pkt in the first_blk! */ + pool->total_bad_removes++; + return -1; +} + +void _odp_pkt_queue_stats_print(_odp_int_queue_pool_t queue_pool) +{ + queue_pool_t *pool; + + pool = (queue_pool_t *)queue_pool; + ODP_DBG("pkt_queue_stats - queue_pool=0x%lX\n", queue_pool); + ODP_DBG(" max_queue_num=%u max_queued_pkts=%u next_queue_num=%u\n", + pool->max_queue_num, pool->max_queued_pkts, + pool->next_queue_num); + ODP_DBG(" total pkt appends=%lu total pkt removes=%lu " + "bad removes=%lu\n", + pool->total_pkt_appends, pool->total_pkt_removes, + pool->total_bad_removes); + ODP_DBG(" free_list size=%u min size=%u peak size=%u\n", + pool->free_list_size, pool->min_free_list_size, + pool->peak_free_list_size); +} + +void _odp_queue_pool_destroy(_odp_int_queue_pool_t queue_pool) +{ + queue_region_desc_t *queue_region_desc; + queue_pool_t *pool; + uint32_t idx; + + pool = (queue_pool_t *)queue_pool; + for (idx = 0; idx < 16; idx++) { + queue_region_desc = &pool->queue_region_descs[idx]; + if (queue_region_desc->queue_blks) + free(queue_region_desc->queue_blks); + } + + free(pool->queue_num_tbl); + free(pool); +} diff --git a/platform/linux-generic/odp_queue.c b/platform/linux-generic/odp_queue.c index 1c15e17..050d7cf 100644 --- a/platform/linux-generic/odp_queue.c +++ b/platform/linux-generic/odp_queue.c @@ -23,6 +23,8 @@ #include <odp/hints.h> #include <odp/sync.h> #include <odp_spin_internal.h> +#include <odp/traffic_mngr.h> +#include <odp_traffic_mngr_internal.h> #ifdef USE_TICKETLOCK #include <odp/ticketlock.h> @@ -380,6 +382,63 @@ odp_queue_t odp_queue_lookup(const char *name) return ODP_QUEUE_INVALID; } +int queue_tm_reenq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, + int sustain ODP_UNUSED) +{ + odp_tm_queue_t tm_queue = MAKE_ODP_TM_QUEUE((uint8_t *)queue - + offsetof(tm_queue_obj_t, + tm_qentry)); + odp_packet_t pkt = (odp_packet_t)buf_hdr->handle.handle; + + return odp_tm_enq(tm_queue, pkt); +} + +int queue_tm_reenq_multi(queue_entry_t *queue ODP_UNUSED, + odp_buffer_hdr_t *buf[] ODP_UNUSED, + int num ODP_UNUSED, + int sustain ODP_UNUSED) +{ + ODP_ABORT("Invalid call to queue_tm_reenq_multi()\n"); + return 0; +} + +int queue_tm_reorder(queue_entry_t *queue, + odp_buffer_hdr_t *buf_hdr) +{ + queue_entry_t *origin_qe; + uint64_t order; + + get_queue_order(&origin_qe, &order, buf_hdr); + + if (!origin_qe) + return 0; + + /* Check if we're in order if we're from an ordered queue */ + LOCK(&origin_qe->s.lock); + if (odp_unlikely(origin_qe->s.status < QUEUE_STATUS_READY)) { + UNLOCK(&origin_qe->s.lock); + ODP_ERR("Bad origin queue status\n"); + return 0; + } + + sched_enq_called(); + + /* Wait if it's not our turn */ + if (order > origin_qe->s.order_out) { + reorder_enq(queue, order, origin_qe, buf_hdr, 1); + UNLOCK(&origin_qe->s.lock); + return 1; + } + + /* Back to TM to handle enqueue + * + * Note: Order will be resolved by a subsequent call to + * odp_schedule_release_ordered() or odp_schedule() as odp_tm_enq() + * calls never resolve order by themselves. + */ + UNLOCK(&origin_qe->s.lock); + return 0; +} int queue_enq(queue_entry_t *queue, odp_buffer_hdr_t *buf_hdr, int sustain) { diff --git a/platform/linux-generic/odp_sorted_list.c b/platform/linux-generic/odp_sorted_list.c new file mode 100644 index 0000000..221754d --- /dev/null +++ b/platform/linux-generic/odp_sorted_list.c @@ -0,0 +1,271 @@ +/* Copyright 2015 EZchip Semiconductor Ltd. All Rights Reserved. + * + * Copyright (c) 2015, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <stdint.h> +#include <string.h> +#include <malloc.h> +#include <stdio.h> +#include <odp.h> +#include <odp_debug_internal.h> +#include <odp_sorted_list_internal.h> + +typedef struct sorted_list_item_s sorted_list_item_t; + +struct sorted_list_item_s { + sorted_list_item_t *next_item; + uint64_t sort_key; + uint64_t user_data; +}; + +typedef struct { + sorted_list_item_t *first_item; + uint32_t sorted_list_len; + uint32_t pad; +} sorted_list_desc_t; + +typedef struct { + sorted_list_desc_t descs[0]; +} sorted_list_descs_t; + +typedef struct { + uint64_t total_inserts; + uint64_t total_deletes; + uint64_t total_removes; + uint32_t max_sorted_lists; + uint32_t next_list_idx; + sorted_list_descs_t *list_descs; +} sorted_pool_t; + +_odp_int_sorted_pool_t _odp_sorted_pool_create(uint32_t max_sorted_lists) +{ + sorted_list_descs_t *list_descs; + sorted_pool_t *pool; + uint32_t malloc_len; + + pool = malloc(sizeof(sorted_pool_t)); + memset(pool, 0, sizeof(sorted_pool_t)); + pool->max_sorted_lists = max_sorted_lists; + pool->next_list_idx = 1; + + malloc_len = max_sorted_lists * sizeof(sorted_list_desc_t); + list_descs = malloc(malloc_len); + memset(list_descs, 0, malloc_len); + pool->list_descs = list_descs; + return (_odp_int_sorted_pool_t)pool; +} + +_odp_int_sorted_list_t +_odp_sorted_list_create(_odp_int_sorted_pool_t sorted_pool, + uint32_t max_entries ODP_UNUSED) +{ + sorted_pool_t *pool; + uint32_t list_idx; + + pool = (sorted_pool_t *)sorted_pool; + list_idx = pool->next_list_idx++; + return (_odp_int_sorted_list_t)list_idx; +} + +int _odp_sorted_list_insert(_odp_int_sorted_pool_t sorted_pool, + _odp_int_sorted_list_t sorted_list, + uint64_t sort_key, + uint64_t user_data) +{ + sorted_list_desc_t *list_desc; + sorted_list_item_t *new_list_item, *list_item, *prev_list_item; + sorted_pool_t *pool; + uint32_t list_idx; + + pool = (sorted_pool_t *)sorted_pool; + list_idx = (uint32_t)sorted_list; + if ((pool->next_list_idx <= list_idx) || + (pool->max_sorted_lists <= list_idx)) + return -1; + + list_desc = &pool->list_descs->descs[list_idx]; + new_list_item = malloc(sizeof(sorted_list_item_t)); + memset(new_list_item, 0, sizeof(sorted_list_item_t)); + new_list_item->next_item = NULL; + new_list_item->sort_key = sort_key; + new_list_item->user_data = user_data; + + /* Now insert the new_list_item according to the sort_key (lowest + * value first). + */ + list_item = list_desc->first_item; + prev_list_item = NULL; + while ((list_item) && (list_item->sort_key <= sort_key)) { + prev_list_item = list_item; + list_item = list_item->next_item; + } + + new_list_item->next_item = list_item; + if (!prev_list_item) + list_desc->first_item = new_list_item; + else + prev_list_item->next_item = new_list_item; + + list_desc->sorted_list_len++; + pool->total_inserts++; + return 0; +} + +int _odp_sorted_list_find(_odp_int_sorted_pool_t sorted_pool, + _odp_int_sorted_list_t sorted_list, + uint64_t user_data, + uint64_t *sort_key_ptr) +{ + sorted_list_desc_t *list_desc; + sorted_list_item_t *list_item; + sorted_pool_t *pool; + uint32_t list_idx; + + pool = (sorted_pool_t *)sorted_pool; + list_idx = (uint32_t)sorted_list; + if ((pool->next_list_idx <= list_idx) || + (pool->max_sorted_lists <= list_idx)) + return -1; + + list_desc = &pool->list_descs->descs[list_idx]; + + /* Now search the sorted linked list - as described by list_desc - + * until an entry is found whose user_data field matches the supplied + * user_data or the end of the list is reached. + */ + list_item = list_desc->first_item; + while (list_item) { + if (list_item->user_data == user_data) { + if (sort_key_ptr) + *sort_key_ptr = list_item->sort_key; + + return 1; + } + + list_item = list_item->next_item; + } + + return 0; +} + +int _odp_sorted_list_delete(_odp_int_sorted_pool_t sorted_pool, + _odp_int_sorted_list_t sorted_list, + uint64_t user_data) +{ + sorted_list_desc_t *list_desc; + sorted_list_item_t *next_list_item, *list_item, *prev_list_item; + sorted_pool_t *pool; + uint32_t list_idx; + + pool = (sorted_pool_t *)sorted_pool; + list_idx = (uint32_t)sorted_list; + if ((pool->next_list_idx <= list_idx) || + (pool->max_sorted_lists <= list_idx)) + return -1; + + list_desc = &pool->list_descs->descs[list_idx]; + + /* Now search the sorted linked list - as described by list_desc - + * until an entry is found whose user_data field matches the supplied + * user_data or the end of the list is reached. + */ + list_item = list_desc->first_item; + prev_list_item = NULL; + while (list_item) { + next_list_item = list_item->next_item; + + if (list_item->user_data == user_data) { + if (!prev_list_item) + list_desc->first_item = next_list_item; + else + prev_list_item->next_item = next_list_item; + + list_desc->sorted_list_len--; + free(list_item); + pool->total_deletes++; + return 0; + } + + prev_list_item = list_item; + list_item = next_list_item; + } + + return -1; +} + +int _odp_sorted_list_remove(_odp_int_sorted_pool_t sorted_pool, + _odp_int_sorted_list_t sorted_list, + uint64_t *sort_key_ptr, + uint64_t *user_data_ptr) +{ + sorted_list_desc_t *list_desc; + sorted_list_item_t *list_item; + sorted_pool_t *pool; + uint32_t list_idx; + + pool = (sorted_pool_t *)sorted_pool; + list_idx = (uint32_t)sorted_list; + if ((pool->next_list_idx <= list_idx) || + (pool->max_sorted_lists <= list_idx)) + return -1; + + list_desc = &pool->list_descs->descs[list_idx]; + if ((list_desc->sorted_list_len == 0) || + (!list_desc->first_item)) + return -1; + + list_item = list_desc->first_item; + list_desc->first_item = list_item->next_item; + list_desc->sorted_list_len--; + + if (sort_key_ptr) + *sort_key_ptr = list_item->sort_key; + + if (user_data_ptr) + *user_data_ptr = list_item->user_data; + + free(list_item); + pool->total_removes++; + return 1; +} + +void _odp_sorted_list_stats_print(_odp_int_sorted_pool_t sorted_pool) +{ + sorted_pool_t *pool; + + pool = (sorted_pool_t *)sorted_pool; + ODP_DBG("sorted_pool=0x%lX\n", sorted_pool); + ODP_DBG(" max_sorted_lists=%u next_list_idx=%u\n", + pool->max_sorted_lists, pool->next_list_idx); + ODP_DBG(" total_inserts=%lu total_deletes=%lu total_removes=%lu\n", + pool->total_inserts, pool->total_deletes, pool->total_removes); +} + +void _odp_sorted_pool_destroy(_odp_int_sorted_pool_t sorted_pool) +{ + sorted_list_descs_t *list_descs; + sorted_list_desc_t *list_desc; + sorted_list_item_t *list_item, *next_list_item; + sorted_pool_t *pool; + uint32_t list_idx; + + pool = (sorted_pool_t *)sorted_pool; + list_descs = pool->list_descs; + + for (list_idx = 0; list_idx < pool->next_list_idx; list_idx++) { + list_desc = &list_descs->descs[list_idx]; + list_item = list_desc->first_item; + while (list_item) { + next_list_item = list_item->next_item; + free(list_item); + list_item = next_list_item; + } + } + + free(list_descs); + free(pool); +} diff --git a/platform/linux-generic/odp_timer_wheel.c b/platform/linux-generic/odp_timer_wheel.c new file mode 100644 index 0000000..bcab01b --- /dev/null +++ b/platform/linux-generic/odp_timer_wheel.c @@ -0,0 +1,907 @@ +/* Copyright 2015 EZchip Semiconductor Ltd. All Rights Reserved. + * + * Copyright (c) 2015, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <stdint.h> +#include <string.h> +#include <malloc.h> +#include <stdio.h> +#include <odp.h> +#include <odp_timer_wheel_internal.h> +#include <odp_debug_internal.h> + +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +/* The following constants can be changed either at compile time or run time + * as long as the following constraints are met (by the way REV stands for + * REVOLUTION, i.e. one complete sweep through a specific timer wheel): + */ +#define CYCLES_TO_TICKS_SHIFT 8 +#define CYCLES_PER_TICK BIT(CYCLES_TO_TICKS_SHIFT) +#define CURRENT_TIMER_SLOTS 8192 +#define LEVEL1_TIMER_SLOTS 2048 +#define LEVEL2_TIMER_SLOTS 1024 +#define LEVEL3_TIMER_SLOTS 1024 + +/* The following values are derived and should generally not be changed. The + * basic idea is that the ticks per current timer wheel slot should always be + * 1, since that defines what a tick is - namely the time period of a single + * current timer wheel slot. Then for all levels (including current), the + * ticks per revolution is clearly equal to the ticks per slot times the + * number of slots. Finally the ticks per slot for levels 1 thru 3, must be + * the ticks per revolution of the previous level divided by a small power of + * 2 - e.g. 2, 4, 8, 16 - (but not 1). The idea being that when an upper + * level slot is processed, its entries will be spread across this fraction of + * the lower level wheel and we want to make sure that none of these entries + * is near the lower level's current index. Currently we use 4 for all + * levels. + */ +#define CURRENT_GEAR_RATIO 4 +#define LEVEL1_GEAR_RATIO 4 +#define LEVEL2_GEAR_RATIO 4 +#define TICKS_PER_CURRENT_SLOT 1 +#define TICKS_PER_CURRENT_REV (CURRENT_TIMER_SLOTS * TICKS_PER_CURRENT_SLOT) +#define TICKS_PER_LEVEL1_SLOT (TICKS_PER_CURRENT_REV / CURRENT_GEAR_RATIO) +#define TICKS_PER_LEVEL1_REV (LEVEL1_TIMER_SLOTS * TICKS_PER_LEVEL1_SLOT) +#define TICKS_PER_LEVEL2_SLOT (TICKS_PER_LEVEL1_REV / LEVEL1_GEAR_RATIO) +#define TICKS_PER_LEVEL2_REV (LEVEL2_TIMER_SLOTS * TICKS_PER_LEVEL2_SLOT) +#define TICKS_PER_LEVEL3_SLOT (TICKS_PER_LEVEL2_REV / LEVEL2_GEAR_RATIO) +#define TICKS_PER_LEVEL3_REV (LEVEL3_TIMER_SLOTS * TICKS_PER_LEVEL3_SLOT) + +#define EXPIRED_RING_ENTRIES 64 + +typedef struct timer_blk_s timer_blk_t; + +typedef struct { /* Should be 120 bytes long */ + uint64_t user_data[15]; +} current_blk_t; + +typedef struct { /* Should be 120 bytes long */ + uint64_t user_data[10]; + uint32_t wakeup32[10]; +} general_blk_t; + +/* Must be exactly 128 bytes long AND cacheline aligned! */ +struct timer_blk_s { + timer_blk_t *next_timer_blk; + union { + current_blk_t current_blk; + general_blk_t general_blk; + }; +}; + +/* Each current_timer_slot is 8 bytes long. */ +typedef union { + /* The selection of which union element to use is based on the LSB + * bit, under the required assumption that both of these pointers are + * at least 8-byte aligned. If the entire 8 bytes is zero, then this + * entry is empty, otherwise a LSB bit of 0 indicates a timer_blk_list + * and a LSB bit of 1 indicates a single entry with a 64-bit user_data + * word. + */ + uint64_t user_data; + timer_blk_t *timer_blk_list; +} current_timer_slot_t; + +typedef struct { + current_timer_slot_t slots[0]; +} current_wheel_t; + +typedef struct { + uint32_t count; + uint32_t peak_count; + uint32_t expired_ring_full_cnt; + uint32_t head_idx; + uint32_t tail_idx; + uint32_t max_idx; + current_timer_slot_t entries[0]; +} expired_ring_t; + +typedef struct { + uint32_t kind; /* 1 for single_entry_t */ + uint32_t wakeup32; + uint64_t user_data; +} single_entry_t; + +typedef struct { + uint32_t kind; /* 2 for list_entry_t */ + uint32_t num_blks; + timer_blk_t *timer_blk_list; +} list_entry_t; + +typedef union { /* Each general_timer_slot is 16 bytes long. */ + /* The selection of which union element to use is based on the first 4 + * byte field which is always the "kind". If kind is 0, then this + * slot is empty, else if the kind is 1 then it is a single_entry and + * if the kind is 2 then it is a list_entry. + */ + single_entry_t single_entry; + list_entry_t list_entry; +} general_timer_slot_t; + +typedef struct { + general_timer_slot_t slots[0]; +} general_wheel_t; + +typedef struct { + /* Note that rev stands for revolution - one complete sweep through + * this timer wheel. + */ + uint16_t num_slots; /* Must be a power of 2. */ + uint16_t slot_idx; + uint16_t gear_mask; /* = (num_slots / gear_ratio) - 1 */ + uint16_t gear_ratio; /* num of next level wheel updates per this + * rev + */ + uint32_t ticks_shift; + uint32_t ticks_per_slot; /* Must be a power of 2. */ + uint64_t ticks_per_rev; /* = num_slots * ticks_per_slot */ + uint64_t max_ticks; /* = current_ticks + ticks_per_rev */ +} wheel_desc_t; + +typedef struct { + uint32_t last_slot_served; + uint32_t free_list_size; + uint32_t min_free_list_size; + uint32_t peak_free_list_size; + timer_blk_t *free_list_head; + uint64_t total_timer_inserts; + uint64_t insert_fail_cnt; + uint64_t total_timer_removes; + uint64_t total_promote_cnt; + uint64_t promote_fail_cnt; + uint64_t current_ticks; + wheel_desc_t wheel_descs[4]; + current_wheel_t *current_wheel; + general_wheel_t *general_wheels[3]; + expired_ring_t *expired_timers_ring; +} timer_wheels_t; + +static uint32_t _odp_internal_ilog2(uint64_t value) +{ + uint64_t pwr_of_2; + uint32_t bit_shift; + + for (bit_shift = 0; bit_shift < 64; bit_shift++) { + pwr_of_2 = 1 << bit_shift; + if (value == pwr_of_2) + return bit_shift; + else if (value < pwr_of_2) + return bit_shift - 1; + } + + return 64; +} + +static inline uint32_t timer_wheel_slot_idx_inc(uint32_t slot_idx, + uint32_t num_slots) +{ + slot_idx++; + if (slot_idx < num_slots) + return slot_idx; + else + return 0; +} + +static current_wheel_t *current_wheel_alloc(timer_wheels_t *timer_wheels, + uint32_t desc_idx) +{ + current_wheel_t *current_wheel; + wheel_desc_t *wheel_desc; + uint64_t ticks_per_slot, current_ticks, adjusted_ticks; + uint64_t ticks_per_rev; + uint32_t num_slots, malloc_len; + + wheel_desc = &timer_wheels->wheel_descs[desc_idx]; + num_slots = wheel_desc->num_slots; + ticks_per_slot = 1; + malloc_len = num_slots * sizeof(current_timer_slot_t); + current_wheel = malloc(malloc_len); + memset(current_wheel, 0, malloc_len); + + current_ticks = timer_wheels->current_ticks; + adjusted_ticks = current_ticks & (ticks_per_slot - 1); + ticks_per_rev = num_slots; + + wheel_desc->ticks_per_rev = ticks_per_rev; + wheel_desc->ticks_shift = 0; + wheel_desc->max_ticks = adjusted_ticks + ticks_per_rev; + wheel_desc->gear_mask = (num_slots / wheel_desc->gear_ratio) - 1; + return current_wheel; +} + +static general_wheel_t *general_wheel_alloc(timer_wheels_t *timer_wheels, + uint32_t desc_idx) +{ + general_wheel_t *general_wheel; + wheel_desc_t *wheel_desc; + uint64_t ticks_per_slot, current_ticks, adjusted_ticks; + uint64_t ticks_per_rev; + uint32_t num_slots, malloc_len; + + wheel_desc = &timer_wheels->wheel_descs[desc_idx]; + num_slots = wheel_desc->num_slots; + ticks_per_slot = (uint64_t)wheel_desc->ticks_per_slot; + malloc_len = num_slots * sizeof(general_timer_slot_t); + general_wheel = malloc(malloc_len); + memset(general_wheel, 0, malloc_len); + + current_ticks = timer_wheels->current_ticks; + adjusted_ticks = current_ticks & (ticks_per_slot - 1); + ticks_per_rev = num_slots * ticks_per_slot; + + wheel_desc->ticks_per_rev = ticks_per_rev; + wheel_desc->ticks_shift = _odp_internal_ilog2(ticks_per_slot); + wheel_desc->max_ticks = adjusted_ticks + ticks_per_rev; + wheel_desc->gear_mask = (num_slots / wheel_desc->gear_ratio) - 1; + return general_wheel; +} + +static int expired_ring_create(timer_wheels_t *timer_wheels, + uint32_t num_entries) +{ + expired_ring_t *expired_ring; + uint32_t malloc_len; + + malloc_len = sizeof(expired_ring_t) + + (sizeof(current_timer_slot_t) * num_entries); + expired_ring = malloc(malloc_len); + memset(expired_ring, 0, malloc_len); + expired_ring->max_idx = num_entries - 1; + + timer_wheels->expired_timers_ring = expired_ring; + return 0; +} + +static int free_list_add(timer_wheels_t *timer_wheels, + uint32_t num_timer_blks) +{ + timer_blk_t *block_of_timer_blks, *timer_blk, *next_timer_blk; + uint32_t malloc_len, idx; + + malloc_len = num_timer_blks * sizeof(timer_blk_t); + /* Should be cacheline aligned! */ + block_of_timer_blks = malloc(malloc_len); + if (!block_of_timer_blks) + return -1; + + memset(block_of_timer_blks, 0, malloc_len); + + /* Link these timer_blks together. */ + timer_blk = block_of_timer_blks; + next_timer_blk = timer_blk + 1; + for (idx = 0; idx < num_timer_blks; idx++) { + timer_blk->next_timer_blk = next_timer_blk; + timer_blk = next_timer_blk; + next_timer_blk++; + } + + timer_blk->next_timer_blk = timer_wheels->free_list_head; + timer_wheels->free_list_size += num_timer_blks; + timer_wheels->free_list_head = block_of_timer_blks; + if (timer_wheels->peak_free_list_size < timer_wheels->free_list_size) + timer_wheels->peak_free_list_size = + timer_wheels->free_list_size; + return 0; +} + +static timer_blk_t *timer_blk_alloc(timer_wheels_t *timer_wheels) +{ + timer_blk_t *head_timer_blk; + int rc; + + if (timer_wheels->free_list_size <= 1) { + /* Replenish the timer_blk_t free list. */ + timer_wheels->min_free_list_size = timer_wheels->free_list_size; + rc = free_list_add(timer_wheels, 1024); + if (rc < 0) + return NULL; + } + + head_timer_blk = timer_wheels->free_list_head; + timer_wheels->free_list_size--; + timer_wheels->free_list_head = head_timer_blk->next_timer_blk; + if (timer_wheels->free_list_size < timer_wheels->min_free_list_size) + timer_wheels->min_free_list_size = timer_wheels->free_list_size; + + memset(head_timer_blk, 0, sizeof(timer_blk_t)); + return head_timer_blk; +} + +static void timer_blk_free(timer_wheels_t *timer_wheels, + timer_blk_t *timer_blk) +{ + timer_blk->next_timer_blk = timer_wheels->free_list_head; + timer_wheels->free_list_head = timer_blk; + timer_wheels->free_list_size++; + if (timer_wheels->peak_free_list_size < timer_wheels->free_list_size) + timer_wheels->peak_free_list_size = + timer_wheels->free_list_size; +} + +static int current_wheel_insert(timer_wheels_t *timer_wheels, + uint32_t wakeup_ticks, + uint64_t user_data) +{ + current_timer_slot_t *timer_slot; + current_wheel_t *current_wheel; + wheel_desc_t *wheel_desc; + timer_blk_t *new_timer_blk, *timer_blk; + uint64_t num_slots; + uint32_t wheel_idx, idx; + + /* To reach here it must be the case that (wakeup_ticks - + * current_ticks) is < current_wheel->num_slots. + */ + wheel_desc = &timer_wheels->wheel_descs[0]; + current_wheel = timer_wheels->current_wheel; + num_slots = (uint64_t)wheel_desc->num_slots; + wheel_idx = (uint32_t)(wakeup_ticks & (num_slots - 1)); + timer_slot = &current_wheel->slots[wheel_idx]; + + /* Three cases: (a) the timer_slot is currently empty, (b) the + * timer_slot contains a single user_data word directly inside it or + * (c) the timer_slot contains a pointer to a linked list of + * timer_blk's. + */ + if (timer_slot->user_data == 0) { /* case (a) */ + timer_slot->user_data = user_data | 0x01; + } else if ((timer_slot->user_data & 0x3) == 0x01) { /* case (b) */ + /* Need to promote this pre-existing single user_data plus the + * new user_data into a timer_blk with two entries. + */ + new_timer_blk = timer_blk_alloc(timer_wheels); + if (!new_timer_blk) + return -1; + + new_timer_blk->next_timer_blk = NULL; + new_timer_blk->current_blk.user_data[0] = timer_slot->user_data; + new_timer_blk->current_blk.user_data[1] = user_data; + timer_slot->timer_blk_list = new_timer_blk; + } else { /* case (c) */ + /* First try to find an empty slot in the current timer_blk. */ + timer_blk = timer_slot->timer_blk_list; + for (idx = 0; idx < 15; idx++) { + if (timer_blk->current_blk.user_data[idx] == 0) { + timer_blk->current_blk.user_data[idx] = + user_data; + return 0; + } + } + + /* current timer_blk is full, so we need to allocate a new one + * and link it in. + */ + new_timer_blk = timer_blk_alloc(timer_wheels); + if (!new_timer_blk) + return -1; + + new_timer_blk->next_timer_blk = timer_blk; + new_timer_blk->current_blk.user_data[0] = user_data; + timer_slot->timer_blk_list = new_timer_blk; + } + + return 0; +} + +static int general_wheel_insert(timer_wheels_t *timer_wheels, + uint32_t desc_idx, + uint32_t wakeup_ticks, + uint64_t user_data) +{ + general_timer_slot_t *timer_slot; + general_wheel_t *general_wheel; + wheel_desc_t *wheel_desc; + timer_blk_t *new_timer_blk, *timer_blk; + uint64_t num_slots, old_user_data; + uint32_t wheel_idx, wakeup32, kind, old_wakeup32, idx; + + wheel_desc = &timer_wheels->wheel_descs[desc_idx]; + general_wheel = timer_wheels->general_wheels[desc_idx - 1]; + num_slots = (uint64_t)wheel_desc->num_slots; + wakeup32 = (uint32_t)(wakeup_ticks & 0xFFFFFFFF); + wakeup_ticks = wakeup_ticks >> wheel_desc->ticks_shift; + wheel_idx = (uint32_t)(wakeup_ticks & (num_slots - 1)); + timer_slot = &general_wheel->slots[wheel_idx]; + kind = timer_slot->single_entry.kind; + + /* Three cases: (a) the timer_slot is currently empty, (b) the + * timer_slot contains a single user_data word directly inside it or + * (c) the timer_slot contains a pointer to a linked list of + * timer_blk's. + */ + if (kind == 0) { /* case (a) */ + timer_slot->single_entry.kind = 1; + timer_slot->single_entry.wakeup32 = wakeup32; + timer_slot->single_entry.user_data = user_data; + } else if (kind == 1) { /* case (b) */ + /* Need to promote this single entry plus the new user_data + * into a timer_blk with two entries. + */ + new_timer_blk = timer_blk_alloc(timer_wheels); + if (!new_timer_blk) + return -1; + + old_user_data = timer_slot->single_entry.user_data; + old_wakeup32 = timer_slot->single_entry.wakeup32; + + new_timer_blk->next_timer_blk = NULL; + new_timer_blk->general_blk.user_data[0] = old_user_data; + new_timer_blk->general_blk.wakeup32[0] = old_wakeup32; + new_timer_blk->general_blk.user_data[1] = user_data; + new_timer_blk->general_blk.wakeup32[1] = wakeup32; + timer_slot->list_entry.kind = 2; + timer_slot->list_entry.num_blks = 1; + timer_slot->list_entry.timer_blk_list = new_timer_blk; + } else { /* case (c) */ + /* First try to find an empty slot in the first timer_blk. */ + timer_blk = timer_slot->list_entry.timer_blk_list; + for (idx = 0; idx < 10; idx++) { + if (timer_blk->general_blk.user_data[idx] == 0) { + timer_blk->general_blk.user_data[idx] = + user_data; + timer_blk->general_blk.wakeup32[idx] = + wakeup32; + return 0; + } + } + + /* The first timer_blk is full, so we need to allocate a new + * one and link it in. + */ + new_timer_blk = timer_blk_alloc(timer_wheels); + if (!new_timer_blk) + return -1; + + new_timer_blk->next_timer_blk = timer_blk; + new_timer_blk->general_blk.user_data[0] = user_data; + new_timer_blk->general_blk.wakeup32[0] = wakeup32; + timer_slot->list_entry.kind = 2; + timer_slot->list_entry.num_blks++; + timer_slot->list_entry.timer_blk_list = new_timer_blk; + } + + return 0; +} + +static int expired_timers_append(timer_wheels_t *timer_wheels, + current_timer_slot_t *timer_slot) +{ + expired_ring_t *expired_ring; + uint32_t tail_idx; + + /* Append either this single entry or the entire timer_blk_list to the + * ring of expired_timers. + */ + expired_ring = timer_wheels->expired_timers_ring; + if (expired_ring->max_idx <= expired_ring->count) + return -1; + + tail_idx = expired_ring->tail_idx; + expired_ring->entries[tail_idx] = *timer_slot; + tail_idx++; + if (expired_ring->max_idx < tail_idx) + tail_idx = 0; + + expired_ring->tail_idx = tail_idx; + expired_ring->count++; + if (expired_ring->peak_count < expired_ring->count) + expired_ring->peak_count = expired_ring->count; + return 0; +} + +static int timer_blk_list_search(timer_wheels_t *timer_wheels, + current_timer_slot_t *head_entry, + uint64_t *user_data_ptr) +{ + timer_blk_t *timer_blk, *next_timer_blk; + uint64_t user_data; + uint32_t idx; + + timer_blk = head_entry->timer_blk_list; + while (timer_blk) { + for (idx = 0; idx < 15; idx++) { + user_data = timer_blk->current_blk.user_data[idx]; + if (user_data != 0) { + *user_data_ptr = + timer_blk->current_blk.user_data[idx]; + timer_blk->current_blk.user_data[idx] = 0; + return 1; + } + } + + next_timer_blk = timer_blk->next_timer_blk; + timer_blk_free(timer_wheels, timer_blk); + timer_blk = next_timer_blk; + } + + return 0; +} + +static int expired_timers_remove(timer_wheels_t *timer_wheels, + uint64_t *user_data_ptr) +{ + current_timer_slot_t *head_entry; + expired_ring_t *expired_ring; + uint32_t count, head_idx; + int rc; + + expired_ring = timer_wheels->expired_timers_ring; + for (count = 1; count <= 2; count++) { + if (expired_ring->count == 0) + return 0; + + head_idx = expired_ring->head_idx; + head_entry = &expired_ring->entries[head_idx]; + if ((head_entry->user_data & 0x3) == 1) { + *user_data_ptr = head_entry->user_data; + expired_ring->entries[head_idx].user_data = 0; + head_idx++; + if (expired_ring->max_idx < head_idx) + head_idx = 0; + + expired_ring->head_idx = head_idx; + expired_ring->count--; + return 1; + } + + /* Search timer_blk_list for non-empty user_data values. */ + rc = timer_blk_list_search(timer_wheels, head_entry, + user_data_ptr); + if (0 < rc) + return 1; + + /* Advance to use the next ring entry. */ + expired_ring->entries[head_idx].user_data = 0; + head_idx++; + if (expired_ring->max_idx < head_idx) + head_idx = 0; + + expired_ring->head_idx = head_idx; + expired_ring->count--; + } + + return 0; +} + +static int timer_current_wheel_update(timer_wheels_t *timer_wheels, + uint32_t elapsed_ticks) +{ + current_timer_slot_t *timer_slot; + current_wheel_t *current_wheel; + wheel_desc_t *wheel_desc; + uint64_t max_ticks; + uint32_t num_slots, slot_idx, max_cnt, cnt; + int ret_code, rc; + + /* Advance current wheel for each elapsed tick, up to a maximum. */ + wheel_desc = &timer_wheels->wheel_descs[0]; + slot_idx = wheel_desc->slot_idx; + num_slots = wheel_desc->num_slots; + max_ticks = wheel_desc->max_ticks; + max_cnt = (uint32_t)MIN(elapsed_ticks, 15); + current_wheel = timer_wheels->current_wheel; + ret_code = 0; + + for (cnt = 1; cnt <= max_cnt; cnt++) { + ret_code |= (slot_idx & wheel_desc->gear_mask) == 0; + timer_slot = &current_wheel->slots[slot_idx]; + if (timer_slot->user_data != 0) { + rc = expired_timers_append(timer_wheels, timer_slot); + if (rc < 0) + timer_wheels-> + expired_timers_ring-> + expired_ring_full_cnt++; + + timer_slot->user_data = 0; + } + + timer_wheels->current_ticks++; + max_ticks++; + slot_idx = timer_wheel_slot_idx_inc(slot_idx, num_slots); + } + + wheel_desc->slot_idx = slot_idx; + wheel_desc->max_ticks = max_ticks; + return ret_code; +} + +static int _odp_int_timer_wheel_promote(timer_wheels_t *timer_wheels, + uint32_t desc_idx, + uint64_t wakeup_ticks, + uint64_t user_data) +{ + if (desc_idx == 0) + return current_wheel_insert(timer_wheels, + wakeup_ticks, user_data); + else + return general_wheel_insert(timer_wheels, + desc_idx, wakeup_ticks, + user_data); +} + +static void timer_wheel_slot_promote(timer_wheels_t *timer_wheels, + uint32_t desc_idx, + general_timer_slot_t *timer_slot) +{ + general_blk_t *general_blk; + timer_blk_t *timer_blk, *next_timer_blk; + uint64_t user_data, current_ticks, + current_ticks_msb, wakeup_ticks; + uint32_t idx, wakeup32; + int rc; + + current_ticks = timer_wheels->current_ticks; + current_ticks_msb = (current_ticks >> 32) << 32; + if (timer_slot->single_entry.kind == 1) { + user_data = timer_slot->single_entry.user_data; + wakeup32 = timer_slot->single_entry.wakeup32; + wakeup_ticks = current_ticks_msb | (uint64_t)wakeup32; + if (wakeup_ticks < current_ticks) + wakeup_ticks += 1ULL << 32; + + rc = _odp_int_timer_wheel_promote(timer_wheels, desc_idx, + wakeup_ticks, user_data); + if (rc < 0) + timer_wheels->promote_fail_cnt++; + else + timer_wheels->total_promote_cnt++; + return; + } + + timer_blk = timer_slot->list_entry.timer_blk_list; + while (timer_blk) { + general_blk = &timer_blk->general_blk; + for (idx = 0; idx < 10; idx++) { + user_data = general_blk->user_data[idx]; + if (user_data == 0) + break; + + wakeup32 = general_blk->wakeup32[idx]; + wakeup_ticks = current_ticks_msb | (uint64_t)wakeup32; + if (wakeup_ticks < current_ticks) + wakeup_ticks += 1ULL << 32; + + rc = _odp_int_timer_wheel_promote(timer_wheels, + desc_idx, + wakeup_ticks, + user_data); + if (rc < 0) + timer_wheels->promote_fail_cnt++; + else + timer_wheels->total_promote_cnt++; + } + + next_timer_blk = timer_blk->next_timer_blk; + timer_blk_free(timer_wheels, timer_blk); + timer_blk = next_timer_blk; + } +} + +static int timer_general_wheel_update(timer_wheels_t *timer_wheels, + uint32_t desc_idx) +{ + general_timer_slot_t *timer_slot; + general_wheel_t *general_wheel; + wheel_desc_t *wheel_desc; + uint32_t num_slots, slot_idx; + int ret_code; + + wheel_desc = &timer_wheels->wheel_descs[desc_idx]; + slot_idx = wheel_desc->slot_idx; + num_slots = wheel_desc->num_slots; + general_wheel = timer_wheels->general_wheels[desc_idx - 1]; + timer_slot = &general_wheel->slots[slot_idx]; + ret_code = (slot_idx & wheel_desc->gear_mask) == 0; + + if (timer_slot->single_entry.kind != 0) { + timer_wheel_slot_promote(timer_wheels, + desc_idx - 1, timer_slot); + timer_slot->single_entry.kind = 0; + } + + wheel_desc->max_ticks++; + wheel_desc->slot_idx = timer_wheel_slot_idx_inc(slot_idx, num_slots); + return ret_code; +} + +_odp_timer_wheel_t _odp_timer_wheel_create(uint32_t max_concurrent_timers, + uint64_t current_time) +{ + timer_wheels_t *timer_wheels; + uint64_t current_ticks; + int rc; + + timer_wheels = malloc(sizeof(timer_wheels_t)); + current_ticks = current_time >> CYCLES_TO_TICKS_SHIFT; + timer_wheels->current_ticks = current_ticks; + + timer_wheels->wheel_descs[0].num_slots = CURRENT_TIMER_SLOTS; + timer_wheels->wheel_descs[1].num_slots = LEVEL1_TIMER_SLOTS; + timer_wheels->wheel_descs[2].num_slots = LEVEL2_TIMER_SLOTS; + timer_wheels->wheel_descs[3].num_slots = LEVEL3_TIMER_SLOTS; + + timer_wheels->wheel_descs[0].gear_ratio = CURRENT_GEAR_RATIO; + timer_wheels->wheel_descs[1].gear_ratio = LEVEL1_GEAR_RATIO; + timer_wheels->wheel_descs[2].gear_ratio = LEVEL2_GEAR_RATIO; + timer_wheels->wheel_descs[3].gear_ratio = 1; + + timer_wheels->wheel_descs[0].ticks_per_slot = TICKS_PER_CURRENT_SLOT; + timer_wheels->wheel_descs[1].ticks_per_slot = TICKS_PER_LEVEL1_SLOT; + timer_wheels->wheel_descs[2].ticks_per_slot = TICKS_PER_LEVEL2_SLOT; + timer_wheels->wheel_descs[3].ticks_per_slot = TICKS_PER_LEVEL3_SLOT; + + timer_wheels->current_wheel = current_wheel_alloc(timer_wheels, 0); + timer_wheels->general_wheels[0] = general_wheel_alloc(timer_wheels, 1); + timer_wheels->general_wheels[1] = general_wheel_alloc(timer_wheels, 2); + timer_wheels->general_wheels[2] = general_wheel_alloc(timer_wheels, 3); + + rc = expired_ring_create(timer_wheels, EXPIRED_RING_ENTRIES); + if (rc < 0) + return _ODP_INT_TIMER_WHEEL_INVALID; + + free_list_add(timer_wheels, max_concurrent_timers / 4); + timer_wheels->min_free_list_size = timer_wheels->free_list_size; + timer_wheels->peak_free_list_size = timer_wheels->free_list_size; + return (_odp_timer_wheel_t)timer_wheels; +} + +uint32_t _odp_timer_wheel_curr_time_update(_odp_timer_wheel_t timer_wheel, + uint64_t current_time) +{ + timer_wheels_t *timer_wheels; + uint64_t new_current_ticks, elapsed_ticks; + uint32_t desc_idx; + int rc; + + timer_wheels = (timer_wheels_t *)timer_wheel; + new_current_ticks = current_time >> CYCLES_TO_TICKS_SHIFT; + elapsed_ticks = new_current_ticks - timer_wheels->current_ticks; + if (elapsed_ticks == 0) + return 0; + + /* Advance current wheel for each elapsed tick, up to a maximum. This + * function returns a value > 0, then the next level's timer wheel + * needs to be updated. + */ + rc = timer_current_wheel_update(timer_wheels, (uint32_t)elapsed_ticks); + + /* See if we need to do any higher wheel advancing or processing.*/ + desc_idx = 1; + while ((0 < rc) && (desc_idx <= 3)) + rc = timer_general_wheel_update(timer_wheels, desc_idx++); + + return timer_wheels->expired_timers_ring->count; +} + +int _odp_timer_wheel_insert(_odp_timer_wheel_t timer_wheel, + uint64_t wakeup_time, + void *user_ptr) +{ + timer_wheels_t *timer_wheels; + uint64_t user_data, wakeup_ticks; + int rc; + + user_data = (uint64_t)user_ptr; + if (user_data == 0) + return -4; /* user_data cannot be 0! */ + else if ((user_data & 0x3) != 0) + return -5; /* user_data ptr must be at least 4-byte aligned. */ + + timer_wheels = (timer_wheels_t *)timer_wheel; + wakeup_ticks = (wakeup_time >> CYCLES_TO_TICKS_SHIFT) + 1; + if (wakeup_time <= timer_wheels->current_ticks) + return -6; + + if (wakeup_ticks < timer_wheels->wheel_descs[0].max_ticks) + rc = current_wheel_insert(timer_wheels, + wakeup_ticks, user_data); + else if (wakeup_ticks < timer_wheels->wheel_descs[1].max_ticks) + rc = general_wheel_insert(timer_wheels, 1, + wakeup_ticks, user_data); + else if (wakeup_ticks < timer_wheels->wheel_descs[2].max_ticks) + rc = general_wheel_insert(timer_wheels, 2, wakeup_ticks, + user_data); + else if (wakeup_ticks < timer_wheels->wheel_descs[3].max_ticks) + rc = general_wheel_insert(timer_wheels, 3, + wakeup_ticks, user_data); + else + return -1; + + if (rc < 0) + timer_wheels->insert_fail_cnt++; + else + timer_wheels->total_timer_inserts++; + + return rc; +} + +void *_odp_timer_wheel_next_expired(_odp_timer_wheel_t timer_wheel) +{ + timer_wheels_t *timer_wheels; + uint64_t user_data; + int rc; + + /* Remove the head of the timer wheel. */ + timer_wheels = (timer_wheels_t *)timer_wheel; + rc = expired_timers_remove(timer_wheels, &user_data); + if (rc <= 0) + return NULL; + + user_data &= ~0x3; + timer_wheels->total_timer_removes++; + return (void *)user_data; +} + +uint32_t _odp_timer_wheel_count(_odp_timer_wheel_t timer_wheel) +{ + timer_wheels_t *timer_wheels; + + timer_wheels = (timer_wheels_t *)timer_wheel; + return timer_wheels->total_timer_inserts - + timer_wheels->total_timer_removes; +} + +static void _odp_int_timer_wheel_desc_print(wheel_desc_t *wheel_desc, + uint32_t wheel_idx) +{ + ODP_DBG(" wheel=%u num_slots=%u ticks_shift=%u ticks_per_slot=%u" + " ticks_per_rev=%lu\n", + wheel_idx, wheel_desc->num_slots, wheel_desc->ticks_shift, + wheel_desc->ticks_per_slot, wheel_desc->ticks_per_rev); +} + +void _odp_timer_wheel_stats_print(_odp_timer_wheel_t timer_wheel) +{ + timer_wheels_t *timer_wheels; + expired_ring_t *expired_ring; + uint32_t wheel_idx; + + timer_wheels = (timer_wheels_t *)timer_wheel; + expired_ring = timer_wheels->expired_timers_ring; + + ODP_DBG("_odp_int_timer_wheel_stats current_ticks=%lu\n", + timer_wheels->current_ticks); + for (wheel_idx = 0; wheel_idx < 4; wheel_idx++) + _odp_int_timer_wheel_desc_print( + &timer_wheels->wheel_descs[wheel_idx], + wheel_idx); + + ODP_DBG(" total timer_inserts=%lu timer_removes=%lu " + "insert_fails=%lu\n", + timer_wheels->total_timer_inserts, + timer_wheels->total_timer_removes, + timer_wheels->insert_fail_cnt); + ODP_DBG(" total_promote_cnt=%lu promote_fail_cnt=%lu\n", + timer_wheels->total_promote_cnt, + timer_wheels->promote_fail_cnt); + ODP_DBG(" free_list_size=%u min_size=%u peak_size=%u\n", + timer_wheels->free_list_size, timer_wheels->min_free_list_size, + timer_wheels->peak_free_list_size); + ODP_DBG(" expired_timers_ring size=%u count=%u " + "peak_count=%u full_cnt=%u\n", + expired_ring->max_idx + 1, expired_ring->count, + expired_ring->peak_count, expired_ring->expired_ring_full_cnt); +} + +void _odp_timer_wheel_destroy(_odp_timer_wheel_t timer_wheel) +{ + timer_wheels_t *timer_wheels; + expired_ring_t *expired_ring; + + timer_wheels = (timer_wheels_t *)timer_wheel; + expired_ring = timer_wheels->expired_timers_ring; + + /* First free all of the block_of_timer_blks @TODO */ + free(timer_wheels->current_wheel); + free(timer_wheels->general_wheels[0]); + free(timer_wheels->general_wheels[1]); + free(timer_wheels->general_wheels[2]); + free(expired_ring); + free(timer_wheels); +} diff --git a/platform/linux-generic/odp_traffic_mngr.c b/platform/linux-generic/odp_traffic_mngr.c new file mode 100644 index 0000000..88e0ada --- /dev/null +++ b/platform/linux-generic/odp_traffic_mngr.c @@ -0,0 +1,2799 @@ +/* Copyright 2015 EZchip Semiconductor Ltd. All Rights Reserved. + * + * Copyright (c) 2015, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <stdint.h> +#include <string.h> +#include <malloc.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <pthread.h> +#include <odp/std_types.h> +#include <odp_traffic_mngr_internal.h> + +/* Local vars */ +static const +_odp_int_name_kind_t PROFILE_TO_HANDLE_KIND[ODP_TM_NUM_PROFILES] = { + [TM_SHAPER_PROFILE] = ODP_TM_SHAPER_PROFILE_HANDLE, + [TM_SCHED_PROFILE] = ODP_TM_SCHED_PROFILE_HANDLE, + [TM_THRESHOLD_PROFILE] = ODP_TM_THRESHOLD_PROFILE_HANDLE, + [TM_WRED_PROFILE] = ODP_TM_WRED_PROFILE_HANDLE +}; + +static const pkt_desc_t EMPTY_PKT_DESC = { .word = 0 }; + +#define MAX_PRIORITIES ODP_TM_MAX_PRIORITIES +#define NUM_SHAPER_COLORS ODP_NUM_SHAPER_COLORS + +static tm_prop_t basic_prop_tbl[MAX_PRIORITIES][NUM_SHAPER_COLORS] = { + [0] = { + [ODP_TM_SHAPER_GREEN] = { 0, DECR_BOTH }, + [ODP_TM_SHAPER_YELLOW] = { 0, DECR_BOTH }, + [ODP_TM_SHAPER_RED] = { 0, DELAY_PKT } }, + [1] = { + [ODP_TM_SHAPER_GREEN] = { 1, DECR_BOTH }, + [ODP_TM_SHAPER_YELLOW] = { 1, DECR_BOTH }, + [ODP_TM_SHAPER_RED] = { 1, DELAY_PKT } }, + [2] = { + [ODP_TM_SHAPER_GREEN] = { 2, DECR_BOTH }, + [ODP_TM_SHAPER_YELLOW] = { 2, DECR_BOTH }, + [ODP_TM_SHAPER_RED] = { 2, DELAY_PKT } }, + [3] = { + [ODP_TM_SHAPER_GREEN] = { 3, DECR_BOTH }, + [ODP_TM_SHAPER_YELLOW] = { 3, DECR_BOTH }, + [ODP_TM_SHAPER_RED] = { 3, DELAY_PKT } }, + [4] = { + [ODP_TM_SHAPER_GREEN] = { 4, DECR_BOTH }, + [ODP_TM_SHAPER_YELLOW] = { 4, DECR_BOTH }, + [ODP_TM_SHAPER_RED] = { 4, DELAY_PKT } }, + [5] = { + [ODP_TM_SHAPER_GREEN] = { 5, DECR_BOTH }, + [ODP_TM_SHAPER_YELLOW] = { 5, DECR_BOTH }, + [ODP_TM_SHAPER_RED] = { 5, DELAY_PKT } }, + [6] = { + [ODP_TM_SHAPER_GREEN] = { 6, DECR_BOTH }, + [ODP_TM_SHAPER_YELLOW] = { 6, DECR_BOTH }, + [ODP_TM_SHAPER_RED] = { 6, DELAY_PKT } }, + [7] = { + [ODP_TM_SHAPER_GREEN] = { 7, DECR_BOTH }, + [ODP_TM_SHAPER_YELLOW] = { 7, DECR_BOTH }, + [ODP_TM_SHAPER_RED] = { 7, DELAY_PKT } } +}; + +/* Profile tables. */ +static dynamic_tbl_t odp_tm_profile_tbls[ODP_TM_NUM_PROFILES]; + +/* TM systems table. */ +static tm_system_t *odp_tm_systems[ODP_TM_MAX_NUM_SYSTEMS]; + +static uint64_t odp_tm_cycles_per_sec = ODP_CYCLES_PER_SEC; + +static odp_ticketlock_t tm_create_lock; +static odp_ticketlock_t tm_profile_lock; +static odp_barrier_t tm_first_enq; + +/* Forward function declarations. */ +static void tm_queue_cnts_decrement(tm_system_t *tm_system, + tm_wred_node_t *tm_wred_node, + uint32_t priority, + uint32_t frame_len); + +static int tm_demote_pkt_desc(tm_system_t *tm_system, + tm_node_obj_t *tm_node_obj, + tm_schedulers_obj_t *blocked_scheduler, + tm_shaper_obj_t *timer_shaper, + pkt_desc_t *demoted_pkt_desc); + +static tm_queue_obj_t *get_tm_queue_obj(tm_system_t *tm_system, + pkt_desc_t *pkt_desc) +{ + tm_queue_obj_t *tm_queue_obj; + uint32_t queue_num; + + if ((!pkt_desc) || (pkt_desc->queue_num == 0)) + return NULL; + + queue_num = pkt_desc->queue_num; + /* Assert(queue_num < tm_system->next_queue_num); */ + + tm_queue_obj = tm_system->queue_num_tbl[queue_num]; + return tm_queue_obj; +} + +static odp_bool_t pkt_descs_equal(pkt_desc_t *pkt_desc1, pkt_desc_t *pkt_desc2) +{ + if ((!pkt_desc1) || (pkt_desc1->queue_num == 0)) + return 0; + else if ((!pkt_desc2) || (pkt_desc2->queue_num == 0)) + return 0; + else if ((pkt_desc1->queue_num == pkt_desc2->queue_num) && + (pkt_desc1->epoch == pkt_desc2->epoch) && + (pkt_desc1->pkt_len == pkt_desc2->pkt_len)) + return 1; + else + return 0; +} + +static odp_bool_t pkt_descs_not_equal(pkt_desc_t *pkt_desc1, + pkt_desc_t *pkt_desc2) +{ + if ((!pkt_desc1) || (pkt_desc1->queue_num == 0)) + return 1; + else if ((!pkt_desc2) || (pkt_desc2->queue_num == 0)) + return 1; + else if ((pkt_desc1->queue_num == pkt_desc2->queue_num) && + (pkt_desc1->epoch == pkt_desc2->epoch) && + (pkt_desc1->pkt_len == pkt_desc2->pkt_len)) + return 0; + else + return 1; +} + +static void tm_init_random_data(tm_random_data_t *tm_random_data) +{ + uint32_t byte_cnt; + + /* Keep calling odp_random_data until we have filled the entire random + * data buffer. Hopefully we don't need to call odp_random_data too + * many times to get 256 bytes worth of random data. + */ + byte_cnt = 0; + while (byte_cnt < 256) + byte_cnt += odp_random_data(&tm_random_data->buf[byte_cnt], + 256 - byte_cnt, 1); + + tm_random_data->next_random_byte = 0; +} + +static uint16_t tm_random16(tm_random_data_t *tm_random_data) +{ + uint32_t buf_idx; + uint16_t rand8a, rand8b; + + if (256 <= tm_random_data->next_random_byte) + tm_init_random_data(tm_random_data); + + /* Collect 2 random bytes and return them. Endianness does NOT matter + * here. This code does assume that next_random_byte is a multiple of + * 2. + */ + buf_idx = tm_random_data->next_random_byte; + rand8a = tm_random_data->buf[buf_idx++]; + rand8b = tm_random_data->buf[buf_idx++]; + + tm_random_data->next_random_byte = buf_idx; + return (rand8b << 8) | rand8a; +} + +static odp_bool_t tm_random_drop(tm_random_data_t *tm_random_data, + odp_tm_percent_t drop_prob) +{ + odp_bool_t drop; + uint32_t random_int, scaled_random_int; + + /* Pick a random integer between 0 and 10000. */ + random_int = tm_random16(tm_random_data); + scaled_random_int = (random_int * 10000) >> 16; + drop = scaled_random_int < (uint32_t)drop_prob; + return drop; +} + +static void *alloc_entry_in_dynamic_tbl(dynamic_tbl_t *dynamic_tbl, + uint32_t record_size, + uint32_t *dynamic_idx_ptr) +{ + uint32_t num_allocd, new_num_allocd, idx; + void **new_array_ptrs, *new_record; + + num_allocd = dynamic_tbl->num_allocd; + if (num_allocd <= dynamic_tbl->num_used) { + /* Need to alloc or realloc the array of ptrs. */ + if (num_allocd <= 32) + new_num_allocd = 64; + else + new_num_allocd = 4 * num_allocd; + + new_array_ptrs = malloc(new_num_allocd * sizeof(void *)); + memset(new_array_ptrs, 0, new_num_allocd * sizeof(void *)); + + if (dynamic_tbl->num_used != 0) + memcpy(new_array_ptrs, dynamic_tbl->array_ptrs, + dynamic_tbl->num_used * sizeof(void *)); + + if (dynamic_tbl->array_ptrs) + free(dynamic_tbl->array_ptrs); + + dynamic_tbl->num_allocd = new_num_allocd; + dynamic_tbl->array_ptrs = new_array_ptrs; + } + + idx = dynamic_tbl->num_used; + new_record = malloc(record_size); + memset(new_record, 0, record_size); + + dynamic_tbl->array_ptrs[idx] = new_record; + dynamic_tbl->num_used++; + if (dynamic_idx_ptr) + *dynamic_idx_ptr = idx; + + return new_record; +} + +static void free_dynamic_tbl_entry(dynamic_tbl_t *dynamic_tbl ODP_UNUSED, + uint32_t record_size ODP_UNUSED, + uint32_t dynamic_idx ODP_UNUSED) +{ + /* < @todo Currently we don't bother with freeing. */ +} + +static input_work_queue_t *input_work_queue_create(void) +{ + input_work_queue_t *input_work_queue; + + input_work_queue = malloc(sizeof(input_work_queue_t)); + memset(input_work_queue, 0, sizeof(input_work_queue_t)); + odp_atomic_init_u32(&input_work_queue->queue_cnt, 0); + odp_ticketlock_init(&input_work_queue->lock); + return input_work_queue; +} + +static void input_work_queue_destroy(input_work_queue_t *input_work_queue) +{ + /* We first need to "flush/drain" this input_work_queue before + * freeing it. Of course, elsewhere it is essential to have first + * stopped new tm_enq() (et al) calls from succeeding. + */ + odp_ticketlock_lock(&input_work_queue->lock); + free(input_work_queue); +} + +static int input_work_queue_append(tm_system_t *tm_system, + input_work_item_t *work_item) +{ + input_work_queue_t *input_work_queue; + input_work_item_t *entry_ptr; + uint32_t queue_cnt, tail_idx; + + input_work_queue = tm_system->input_work_queue; + queue_cnt = odp_atomic_load_u32(&input_work_queue->queue_cnt); + if (INPUT_WORK_RING_SIZE <= queue_cnt) { + input_work_queue->enqueue_fail_cnt++; + return -1; + } + + odp_ticketlock_lock(&input_work_queue->lock); + tail_idx = input_work_queue->tail_idx; + entry_ptr = &input_work_queue->work_ring[tail_idx]; + + *entry_ptr = *work_item; + tail_idx++; + if (INPUT_WORK_RING_SIZE <= tail_idx) + tail_idx = 0; + + input_work_queue->total_enqueues++; + input_work_queue->tail_idx = tail_idx; + odp_ticketlock_unlock(&input_work_queue->lock); + odp_atomic_inc_u32(&input_work_queue->queue_cnt); + if (input_work_queue->peak_cnt <= queue_cnt) + input_work_queue->peak_cnt = queue_cnt + 1; + return 0; +} + +static int input_work_queue_remove(input_work_queue_t *input_work_queue, + input_work_item_t *work_item) +{ + input_work_item_t *entry_ptr; + uint32_t queue_cnt, head_idx; + + queue_cnt = odp_atomic_load_u32(&input_work_queue->queue_cnt); + if (queue_cnt == 0) + return -1; + + odp_ticketlock_lock(&input_work_queue->lock); + head_idx = input_work_queue->head_idx; + entry_ptr = &input_work_queue->work_ring[head_idx]; + + *work_item = *entry_ptr; + head_idx++; + if (INPUT_WORK_RING_SIZE <= head_idx) + head_idx = 0; + + input_work_queue->total_dequeues++; + input_work_queue->head_idx = head_idx; + odp_ticketlock_unlock(&input_work_queue->lock); + odp_atomic_dec_u32(&input_work_queue->queue_cnt); + return 0; +} + +static tm_system_t *tm_system_alloc(void) +{ + tm_system_t *tm_system; + uint32_t tm_idx; + + /* Find an open slot in the odp_tm_systems array. */ + for (tm_idx = 0; tm_idx < ODP_TM_MAX_NUM_SYSTEMS; tm_idx++) { + if (!odp_tm_systems[tm_idx]) { + tm_system = malloc(sizeof(tm_system_t)); + memset(tm_system, 0, sizeof(tm_system_t)); + odp_tm_systems[tm_idx] = tm_system; + tm_system->tm_idx = tm_idx; + return tm_system; + } + } + + return NULL; +} + +static void tm_system_free(tm_system_t *tm_system) +{ + /* @todo Free any internally malloc'd blocks. */ + odp_tm_systems[tm_system->tm_idx] = NULL; + free(tm_system); +} + +static void *tm_common_profile_create(const char *name, + profile_kind_t profile_kind, + uint32_t object_size, + tm_handle_t *profile_handle_ptr, + _odp_int_name_t *name_tbl_id_ptr) +{ + _odp_int_name_kind_t handle_kind; + _odp_int_name_t name_tbl_id; + dynamic_tbl_t *dynamic_tbl; + tm_handle_t profile_handle; + uint32_t dynamic_tbl_idx; + void *object_ptr; + + dynamic_tbl = &odp_tm_profile_tbls[profile_kind]; + object_ptr = alloc_entry_in_dynamic_tbl(dynamic_tbl, object_size, + &dynamic_tbl_idx); + if (!object_ptr) + return NULL; + + handle_kind = PROFILE_TO_HANDLE_KIND[profile_kind]; + profile_handle = MAKE_PROFILE_HANDLE(profile_kind, dynamic_tbl_idx); + name_tbl_id = ODP_INVALID_NAME; + + if ((!name) && (name[0] != '\0')) { + name_tbl_id = _odp_int_name_tbl_add(name, handle_kind, + profile_handle); + if (name_tbl_id == ODP_INVALID_NAME) { + free_dynamic_tbl_entry(dynamic_tbl, object_size, + dynamic_tbl_idx); + return NULL; + } + } + + *profile_handle_ptr = profile_handle; + if (name_tbl_id_ptr) + *name_tbl_id_ptr = name_tbl_id; + return object_ptr; +} + +static void *tm_get_profile_params(tm_handle_t profile_handle, + profile_kind_t expected_profile_kind) +{ + profile_kind_t profile_kind; + dynamic_tbl_t *dynamic_tbl; + uint32_t dynamic_tbl_idx; + + profile_kind = GET_PROFILE_KIND(profile_handle); + if (profile_kind != expected_profile_kind) + return NULL; + /* @todo Call some odp error function */ + + dynamic_tbl = &odp_tm_profile_tbls[profile_kind]; + dynamic_tbl_idx = GET_TBL_IDX(profile_handle); + return dynamic_tbl->array_ptrs[dynamic_tbl_idx]; +} + +static uint64_t tm_bps_to_rate(uint64_t bps) +{ + /* This code assumes that bps is in the range 1 kbps .. 1 tbps. */ + if ((bps >> 32) == 0) + return (bps << 29) / odp_tm_cycles_per_sec; + else + return ((bps << 21) / odp_tm_cycles_per_sec) << 8; +} + +static uint64_t tm_rate_to_bps(uint64_t rate) +{ + /* This code assumes that bps is in the range 1 kbps .. 1 tbps. */ + return (rate * odp_tm_cycles_per_sec) >> 29; +} + +static uint64_t tm_max_time_delta(uint64_t rate) +{ + if (rate == 0) + return 0; + else + return (1ULL << (26 + 30)) / rate; +} + +static void tm_shaper_params_cvt_to(odp_tm_shaper_params_t *odp_shaper_params, + tm_shaper_params_t *tm_shaper_params) +{ + uint64_t commit_rate, peak_rate, max_commit_time_delta, highest_rate; + uint64_t max_peak_time_delta; + uint32_t min_time_delta; + int64_t commit_burst, peak_burst; + + commit_rate = tm_bps_to_rate(odp_shaper_params->commit_bps); + peak_rate = tm_bps_to_rate(odp_shaper_params->peak_bps); + max_commit_time_delta = tm_max_time_delta(commit_rate); + max_peak_time_delta = tm_max_time_delta(peak_rate); + highest_rate = MAX(commit_rate, peak_rate); + min_time_delta = (uint32_t)((1 << 26) / highest_rate); + commit_burst = (int64_t)odp_shaper_params->commit_burst; + peak_burst = (int64_t)odp_shaper_params->peak_burst; + + tm_shaper_params->max_commit_time_delta = max_commit_time_delta; + tm_shaper_params->max_peak_time_delta = max_peak_time_delta; + + tm_shaper_params->commit_rate = commit_rate; + tm_shaper_params->peak_rate = peak_rate; + tm_shaper_params->max_commit = commit_burst << (26 - 3); + tm_shaper_params->max_peak = peak_burst << (26 - 3); + tm_shaper_params->min_time_delta = min_time_delta; + tm_shaper_params->len_adjust = odp_shaper_params->shaper_len_adjust; + tm_shaper_params->dual_rate = odp_shaper_params->dual_rate; + tm_shaper_params->enabled = commit_rate != 0; +} + +static void +tm_shaper_params_cvt_from(tm_shaper_params_t *tm_shaper_params, + odp_tm_shaper_params_t *odp_shaper_params) +{ + uint64_t commit_bps, peak_bps, commit_burst, peak_burst; + + commit_bps = tm_rate_to_bps(tm_shaper_params->commit_rate); + peak_bps = tm_rate_to_bps(tm_shaper_params->peak_rate); + commit_burst = tm_shaper_params->max_commit >> (26 - 3); + peak_burst = tm_shaper_params->max_peak >> (26 - 3); + + odp_shaper_params->commit_bps = commit_bps; + odp_shaper_params->peak_bps = peak_bps; + odp_shaper_params->commit_burst = (uint32_t)commit_burst; + odp_shaper_params->peak_burst = (uint32_t)peak_burst; + odp_shaper_params->shaper_len_adjust = tm_shaper_params->len_adjust; + odp_shaper_params->dual_rate = tm_shaper_params->dual_rate; +} + +static void tm_shaper_obj_init(tm_system_t *tm_system, + tm_shaper_params_t *shaper_params, + tm_shaper_obj_t *shaper_obj) +{ + shaper_obj->shaper_params = shaper_params; + shaper_obj->last_update_time = tm_system->current_cycles; + shaper_obj->callback_time = 0; + shaper_obj->commit_cnt = shaper_params->max_commit; + shaper_obj->peak_cnt = shaper_params->max_peak; + shaper_obj->callback_reason = NO_CALLBACK; + shaper_obj->initialized = 1; +} + +/* Any locking required must be done by the caller! */ +static void tm_shaper_config_set(tm_system_t *tm_system, + odp_tm_shaper_t shaper_profile, + tm_shaper_obj_t *shaper_obj) +{ + tm_shaper_params_t *shaper_params; + + if (shaper_profile == ODP_TM_INVALID) + return; + + shaper_params = tm_get_profile_params(shaper_profile, + TM_SHAPER_PROFILE); + if (shaper_obj->initialized == 0) + tm_shaper_obj_init(tm_system, shaper_params, shaper_obj); + else + shaper_obj->shaper_params = shaper_params; +} + +static void update_shaper_elapsed_time(tm_system_t *tm_system, + tm_shaper_params_t *shaper_params, + tm_shaper_obj_t *shaper_obj) +{ + uint64_t time_delta; + int64_t commit, peak, commit_inc, peak_inc, max_commit, max_peak; + + /* If the time_delta is "too small" then we just exit without making + * any changes. Too small is defined such that + * time_delta/cycles_per_sec * MAX(commit_rate, peak_rate) is less + * than a byte. + */ + time_delta = tm_system->current_cycles - shaper_obj->last_update_time; + if (time_delta < (uint64_t)shaper_params->min_time_delta) + return; + + commit = shaper_obj->commit_cnt; + max_commit = shaper_params->max_commit; + + /* If the time_delta is "too large" then we need to prevent overflow + * of the multiplications of time_delta and either commit_rate or + * peak_rate. + */ + if (shaper_params->max_commit_time_delta <= time_delta) + commit_inc = max_commit; + else + commit_inc = time_delta * shaper_params->commit_rate; + + shaper_obj->commit_cnt = (int64_t)MIN(max_commit, commit + commit_inc); + + if (shaper_params->peak_rate != 0) { + peak = shaper_obj->peak_cnt; + max_peak = shaper_params->max_peak; + if (shaper_params->max_peak_time_delta <= time_delta) + peak_inc = max_peak; + else + peak_inc = time_delta * shaper_params->peak_rate; + + shaper_obj->peak_cnt = (int64_t)MIN(max_peak, peak + peak_inc); + } + + shaper_obj->last_update_time = tm_system->current_cycles; +} + +static uint64_t cycles_till_not_red(tm_shaper_params_t *shaper_params, + tm_shaper_obj_t *shaper_obj) +{ + uint64_t min_time_delay, commit_delay, peak_delay; + + /* Normal case requires that peak_cnt be <= commit_cnt and that + * peak_rate be >= commit_rate, but just in case this code handles other + * weird cases that might actually be invalid. + */ + commit_delay = 0; + if (shaper_obj->commit_cnt < 0) + commit_delay = (-shaper_obj->commit_cnt) + / shaper_params->commit_rate; + + min_time_delay = MAX(shaper_obj->shaper_params->min_time_delta, 256); + commit_delay = MAX(commit_delay, min_time_delay); + if (shaper_params->peak_rate == 0) + return commit_delay; + + peak_delay = 0; + if (shaper_obj->peak_cnt < 0) + peak_delay = (-shaper_obj->peak_cnt) / shaper_params->peak_rate; + + peak_delay = MAX(peak_delay, min_time_delay); + if (0 < shaper_obj->commit_cnt) + return peak_delay; + else if (0 < shaper_obj->peak_cnt) + return commit_delay; + else + return MIN(commit_delay, peak_delay); +} + +static int delete_timer(tm_system_t *tm_system ODP_UNUSED, + tm_queue_obj_t *tm_queue_obj, + uint8_t cancel_timer) +{ + tm_shaper_obj_t *shaper_obj; + + shaper_obj = tm_queue_obj->timer_shaper; + if (cancel_timer) + tm_queue_obj->timer_cancels_outstanding++; + + if ((tm_queue_obj->timer_reason == NO_CALLBACK) || + (!shaper_obj)) + return -1; + + tm_queue_obj->timer_reason = NO_CALLBACK; + tm_queue_obj->timer_seq++; + tm_queue_obj->timer_shaper = NULL; + + if (tm_queue_obj->delayed_cnt != 0) + tm_queue_obj->delayed_cnt--; + + if (shaper_obj->timer_outstanding != 0) + shaper_obj->timer_outstanding--; + + shaper_obj->timer_tm_queue = NULL; + shaper_obj->callback_reason = NO_CALLBACK; + return 0; +} + +static void tm_unblock_pkt(tm_system_t *tm_system, pkt_desc_t *pkt_desc) +{ + tm_queue_obj_t *tm_queue_obj; + + tm_queue_obj = get_tm_queue_obj(tm_system, pkt_desc); + tm_queue_obj->blocked_scheduler = NULL; + if (tm_queue_obj->blocked_cnt != 0) + tm_queue_obj->blocked_cnt--; +} + +static void tm_block_pkt(tm_system_t *tm_system, + tm_node_obj_t *tm_node_obj, + tm_schedulers_obj_t *schedulers_obj, + pkt_desc_t *pkt_desc, + uint8_t priority) +{ + tm_queue_obj_t *tm_queue_obj; + + /* Remove the blocked pkt from all downstream entities. + * This only happens if the propagation of another pkt caused this pkt + * to become blocked, since if the propagation of a pkt causes itself + * to be blocked then there can't be any downstream copies to remove. + * The caller signals us which case it is by whether the tm_node_obj + * is NULL or not. + */ + tm_queue_obj = get_tm_queue_obj(tm_system, pkt_desc); + if (tm_node_obj) + tm_demote_pkt_desc(tm_system, tm_node_obj, + tm_queue_obj->blocked_scheduler, + tm_queue_obj->timer_shaper, pkt_desc); + + tm_queue_obj->blocked_cnt = 1; + tm_queue_obj->blocked_scheduler = schedulers_obj; + tm_queue_obj->blocked_priority = priority; +} + +static int tm_delay_pkt(tm_system_t *tm_system, tm_shaper_obj_t *shaper_obj, + pkt_desc_t *pkt_desc) +{ + tm_queue_obj_t *tm_queue_obj; + uint64_t delay_cycles, wakeup_time, timer_context; + + /* Calculate elapsed time before this pkt will be + * green or yellow. + */ + delay_cycles = cycles_till_not_red(shaper_obj->shaper_params, + shaper_obj); + wakeup_time = tm_system->current_cycles + delay_cycles; + + tm_queue_obj = get_tm_queue_obj(tm_system, pkt_desc); + tm_queue_obj->delayed_cnt++; + + /* Insert into timer wheel. */ + tm_queue_obj->timer_seq++; + timer_context = (((uint64_t)tm_queue_obj->timer_seq) << 32) | + (((uint64_t)tm_queue_obj->queue_num) << 4); + _odp_timer_wheel_insert(tm_system->_odp_int_timer_wheel, + wakeup_time, (void *)timer_context); + + tm_queue_obj->timer_reason = UNDELAY_PKT; + tm_queue_obj->timer_shaper = shaper_obj; + + shaper_obj->timer_outstanding++; + shaper_obj->callback_reason = UNDELAY_PKT; + shaper_obj->callback_time = wakeup_time; + shaper_obj->timer_tm_queue = tm_queue_obj; + shaper_obj->in_pkt_desc = *pkt_desc; + shaper_obj->out_pkt_desc = EMPTY_PKT_DESC; + shaper_obj->valid_finish_time = 0; + return 1; +} + +/* We call remove_pkt_from_shaper for pkts sent AND for pkts demoted. */ + +static int remove_pkt_from_shaper(tm_system_t *tm_system, + tm_shaper_obj_t *shaper_obj, + pkt_desc_t *pkt_desc_to_remove, + uint8_t is_sent_pkt) +{ + tm_shaper_params_t *shaper_params; + tm_shaper_action_t shaper_action; + tm_queue_obj_t *tm_queue_obj; + uint32_t frame_len; + int64_t tkn_count; + + tm_queue_obj = get_tm_queue_obj(tm_system, pkt_desc_to_remove); + if (shaper_obj->in_pkt_desc.queue_num != pkt_desc_to_remove->queue_num) + return -1; + + shaper_action = shaper_obj->propagation_result.action; + if ((tm_queue_obj->timer_shaper == shaper_obj) || + (shaper_obj->callback_reason == UNDELAY_PKT)) { + /* Need to delete the timer - which is either a cancel or just + * a delete of a sent_pkt. + */ + if (tm_queue_obj->timer_reason == NO_CALLBACK) + return -1; + + if (!is_sent_pkt) + tm_queue_obj->timer_cancels_outstanding++; + + tm_queue_obj->timer_reason = NO_CALLBACK; + tm_queue_obj->timer_seq++; + tm_queue_obj->timer_shaper = NULL; + if (tm_queue_obj->delayed_cnt != 0) + tm_queue_obj->delayed_cnt--; + + if (shaper_obj->timer_outstanding != 0) + shaper_obj->timer_outstanding--; + + shaper_obj->callback_time = 0; + shaper_obj->callback_reason = NO_CALLBACK; + shaper_obj->timer_tm_queue = NULL; + } + + shaper_obj->propagation_result.action = DECR_NOTHING; + shaper_obj->in_pkt_desc = EMPTY_PKT_DESC; + shaper_obj->out_pkt_desc = EMPTY_PKT_DESC; + shaper_obj->out_priority = 0; + + if ((!is_sent_pkt) || (shaper_action == DECR_NOTHING) || + (shaper_action == DELAY_PKT)) + return 0; + + shaper_params = shaper_obj->shaper_params; + frame_len = pkt_desc_to_remove->pkt_len + + pkt_desc_to_remove->shaper_len_adjust + + shaper_params->len_adjust; + + tkn_count = ((int64_t)frame_len) << 26; + if ((shaper_action == DECR_BOTH) || (shaper_action == DECR_COMMIT)) + shaper_obj->commit_cnt -= tkn_count; + + if (shaper_params->peak_rate != 0) + if ((shaper_action == DECR_BOTH) || + (shaper_action == DECR_PEAK)) + shaper_obj->peak_cnt -= tkn_count; + return 0; +} + +static int tm_run_shaper(tm_system_t *tm_system, tm_shaper_obj_t *shaper_obj, + pkt_desc_t *pkt_desc, uint8_t priority) +{ + odp_tm_shaper_color_t shaper_color; + tm_shaper_params_t *shaper_params; + odp_bool_t output_change; + tm_prop_t propagation; + + shaper_params = shaper_obj->shaper_params; + + if ((!shaper_params) || (shaper_params->enabled == 0)) { + output_change = + pkt_descs_not_equal(&shaper_obj->out_pkt_desc, + pkt_desc) || + (shaper_obj->out_priority != priority); + + shaper_obj->in_pkt_desc = *pkt_desc; + shaper_obj->input_priority = priority; + shaper_obj->out_pkt_desc = *pkt_desc; + shaper_obj->out_priority = priority; + if (output_change) + shaper_obj->valid_finish_time = 0; + + return output_change; + } + + update_shaper_elapsed_time(tm_system, shaper_params, shaper_obj); + if (0 < shaper_obj->commit_cnt) + shaper_color = ODP_TM_SHAPER_GREEN; + else if (shaper_params->peak_rate == 0) + shaper_color = ODP_TM_SHAPER_RED; + else if (shaper_obj->peak_cnt <= 0) + shaper_color = ODP_TM_SHAPER_RED; + else + shaper_color = ODP_TM_SHAPER_YELLOW; + + if (shaper_color == ODP_TM_SHAPER_GREEN) + tm_system->shaper_green_cnt++; + else if (shaper_color == ODP_TM_SHAPER_YELLOW) + tm_system->shaper_yellow_cnt++; + else + tm_system->shaper_red_cnt++; + + /* Run thru propagation tbl to get shaper_action and out_priority */ + propagation = basic_prop_tbl[priority][shaper_color]; + + /* See if this shaper had a previous timer associated with it. If so + * we need to cancel it. + */ + if ((shaper_obj->timer_outstanding != 0) && + (shaper_obj->in_pkt_desc.queue_num != 0)) + remove_pkt_from_shaper(tm_system, shaper_obj, + &shaper_obj->in_pkt_desc, 0); + + shaper_obj->propagation_result = propagation; + if (propagation.action == DELAY_PKT) + return tm_delay_pkt(tm_system, shaper_obj, pkt_desc); + + shaper_obj->callback_time = 0; + shaper_obj->callback_reason = NO_CALLBACK; + + /* Propagate pkt_desc to the next level */ + priority = propagation.output_priority; + output_change = + pkt_descs_not_equal(&shaper_obj->out_pkt_desc, pkt_desc) || + (shaper_obj->out_priority != priority); + + shaper_obj->in_pkt_desc = *pkt_desc; + shaper_obj->input_priority = priority; + shaper_obj->out_pkt_desc = *pkt_desc; + shaper_obj->out_priority = priority; + if (output_change) + shaper_obj->valid_finish_time = 0; + + return output_change; +} + +static int tm_set_finish_time(tm_schedulers_obj_t *schedulers_obj, + tm_shaper_obj_t *prod_shaper_obj, + pkt_desc_t *new_pkt_desc, + uint8_t new_priority) +{ + odp_tm_sched_mode_t mode; + tm_sched_params_t *sched_params; + tm_sched_state_t *sched_state; + uint64_t base_virtual_time, virtual_finish_time; + uint32_t inverted_weight, frame_len, frame_weight; + + /* Check to see if this pkt_desc is "new" or not */ + if (prod_shaper_obj->valid_finish_time) { + /* return 0 or 1 depending on whether this pkt_desc is the + * current chosen one + */ + return 0; + } + + /* First determine the virtual finish_time of this new pkt. */ + sched_params = prod_shaper_obj->sched_params; + if (!sched_params) { + mode = ODP_TM_BYTE_BASED_WEIGHTS; + inverted_weight = 4096; + /* Same as a weight of 16. */ + } else { + mode = sched_params->sched_modes[new_priority]; + inverted_weight = sched_params->inverted_weights[new_priority]; + } + + frame_len = new_pkt_desc->pkt_len; + if (mode == ODP_TM_FRAME_BASED_WEIGHTS) + frame_len = 256; + + frame_weight = ((inverted_weight * frame_len) + (1 << 15)) >> 16; + + sched_state = &schedulers_obj->sched_states[new_priority]; + base_virtual_time = MAX(prod_shaper_obj->virtual_finish_time, + sched_state->base_virtual_time); + + virtual_finish_time = base_virtual_time + frame_weight; + prod_shaper_obj->virtual_finish_time = virtual_finish_time; + prod_shaper_obj->valid_finish_time = 1; + return 1; +} + +static int tm_run_scheduler(tm_system_t *tm_system, + tm_shaper_obj_t *prod_shaper_obj, + tm_schedulers_obj_t *schedulers_obj, + pkt_desc_t *new_pkt_desc, + uint8_t new_priority) +{ + tm_sched_state_t *new_sched_state; + tm_node_obj_t *tm_node_obj; + pkt_desc_t prev_best_pkt_desc; + uint64_t new_finish_time, prev_best_time; + uint32_t new_priority_mask; + int rc; + + /* Determine the virtual finish_time of this new pkt. */ + tm_set_finish_time(schedulers_obj, prod_shaper_obj, new_pkt_desc, + new_priority); + new_finish_time = prod_shaper_obj->virtual_finish_time; + new_sched_state = &schedulers_obj->sched_states[new_priority]; + + prev_best_pkt_desc = new_sched_state->smallest_pkt_desc; + prev_best_time = new_sched_state->smallest_finish_time; + if (prev_best_pkt_desc.queue_num) { + /* See if this new pkt has the smallest virtual finish time + * for all fanin at the given input priority level, as + * represented by the new_sched_state. + */ + if (prev_best_time <= new_finish_time) { + /* Since this new pkt doesn't have the smallest + * virtual finish time, just insert it into this + * sched_state's list sorted by virtual finish times. + */ + rc = _odp_sorted_list_insert( + tm_system->_odp_int_sorted_pool, + new_sched_state->sorted_list, + new_finish_time, new_pkt_desc->word); + + if (0 <= rc) { + new_sched_state->sorted_list_cnt++; + tm_block_pkt(tm_system, NULL, schedulers_obj, + new_pkt_desc, new_priority); + } + + return 0; + } + + /* Since this new pkt does have the smallest virtual finish + * time, insert the previous best into this sched_state's list + * sorted by virtual finish times (though it should always be + * inserted at the front), and continue processing. + */ + rc = _odp_sorted_list_insert + (tm_system->_odp_int_sorted_pool, + new_sched_state->sorted_list, prev_best_time, + prev_best_pkt_desc.word); + if (rc < 0) + return rc; + + new_sched_state->sorted_list_cnt++; + tm_node_obj = schedulers_obj->enclosing_entity; + tm_block_pkt(tm_system, tm_node_obj, schedulers_obj, + &prev_best_pkt_desc, new_priority); + } + + /* Record the fact that this new pkt is now the best choice of this + * sched_state. + */ + new_sched_state->smallest_finish_time = new_finish_time; + new_sched_state->smallest_pkt_desc = *new_pkt_desc; + schedulers_obj->priority_bit_mask |= 1ULL << new_priority; + + /* Next see if this new pkt is the highest priority pkt of all of the + * schedulers within this tm_node (recalling that the highest priority + * has the lowest priority value). + */ + new_priority_mask = (1ULL << new_priority) - 1; + if ((schedulers_obj->priority_bit_mask & new_priority_mask) != 0) { + tm_block_pkt(tm_system, NULL, schedulers_obj, new_pkt_desc, + new_priority); + return 0; + } else if (schedulers_obj->out_pkt_desc.queue_num != 0) { + tm_node_obj = schedulers_obj->enclosing_entity; + tm_block_pkt(tm_system, tm_node_obj, schedulers_obj, + &schedulers_obj->out_pkt_desc, + schedulers_obj->highest_priority); + } + + schedulers_obj->highest_priority = new_priority; + schedulers_obj->out_pkt_desc = *new_pkt_desc; + return 1; +} + +/* We call remove_pkt_from_scheduler both for pkts sent AND for pkts demoted. */ + +static int remove_pkt_from_scheduler(tm_system_t *tm_system, + tm_schedulers_obj_t *schedulers_obj, + pkt_desc_t *pkt_desc_to_remove, + uint8_t pkt_desc_priority, + uint8_t is_sent_pkt) +{ + _odp_int_sorted_pool_t sorted_pool; + _odp_int_sorted_list_t sorted_list; + tm_sched_state_t *sched_state, *best_sched_state; + pkt_desc_t best_pkt_desc, pkt_desc; + uint64_t finish_time; + uint32_t priority, best_priority; + uint8_t found; + int rc; + + if (is_sent_pkt) + priority = schedulers_obj->highest_priority; + else + priority = pkt_desc_priority; + + sched_state = &schedulers_obj->sched_states[priority]; + sorted_pool = tm_system->_odp_int_sorted_pool; + sorted_list = sched_state->sorted_list; + found = 0; + if (pkt_descs_equal(&sched_state->smallest_pkt_desc, + pkt_desc_to_remove)) + found = 1; + else if (!is_sent_pkt) { + rc = _odp_sorted_list_delete(sorted_pool, sorted_list, + pkt_desc_to_remove->word); + if (0 <= rc) { + sched_state->sorted_list_cnt--; + return 0; + } + } + + if (!found) + return -1; + + /* This is the case where the pkt_desc_to_remove is NOT in a sorted + * list but is instead the best/smallest pkt_desc for "some" priority + * level. If is_sent_pkt is TRUE, then this priority level MUST be + * the highest_priority level, but if FALSE then this priority level + * can be at any priority. + */ + if (is_sent_pkt) { + finish_time = sched_state->smallest_finish_time; + sched_state->base_virtual_time = finish_time; + } + + sched_state->smallest_finish_time = 0; + sched_state->smallest_pkt_desc = EMPTY_PKT_DESC; + schedulers_obj->priority_bit_mask &= ~(1ULL << priority); + if (pkt_descs_equal(&schedulers_obj->out_pkt_desc, + pkt_desc_to_remove)) { + schedulers_obj->out_pkt_desc = EMPTY_PKT_DESC; + schedulers_obj->highest_priority = 0; + } + + /* Now that we have removed the pkt_desc_to_remove, we need to see if + * there is a new pkt_desc available to become the best pkt_desc for + * this priority level/sched_state. + */ + if (sched_state->sorted_list_cnt != 0) { + rc = _odp_sorted_list_remove(sorted_pool, sorted_list, + &finish_time, &pkt_desc.word); + if (rc <= 0) + return -1; + + sched_state->sorted_list_cnt--; + sched_state->smallest_finish_time = finish_time; + sched_state->smallest_pkt_desc = pkt_desc; + schedulers_obj->priority_bit_mask |= 1ULL << priority; + } + + /* Finally we must see if there is a new_pkt desc that is now the + * best/smallest for this entire schedulers_obj - which could even be + * the pkt_desc we just removed from the sorted_list, or not. + */ + if (schedulers_obj->priority_bit_mask == 0) { + schedulers_obj->out_pkt_desc = EMPTY_PKT_DESC; + schedulers_obj->highest_priority = 0; + } else { + /* Set the highest_priority and out_pkt_desc of the + * schedulers_obj. Such a pkt_desc must exist because + * otherwise priority_bit_mask would be 0. + */ + best_priority = + __builtin_ctz(schedulers_obj->priority_bit_mask); + best_sched_state = &schedulers_obj->sched_states[best_priority]; + best_pkt_desc = best_sched_state->smallest_pkt_desc; + + schedulers_obj->highest_priority = best_priority; + schedulers_obj->out_pkt_desc = best_pkt_desc; + tm_unblock_pkt(tm_system, &best_pkt_desc); + } + + return 0; +} + +static int tm_propagate_pkt_desc(tm_system_t *tm_system, + tm_shaper_obj_t *shaper_obj, + pkt_desc_t *new_pkt_desc, + uint8_t new_priority) +{ + tm_schedulers_obj_t *schedulers_obj; + tm_node_obj_t *tm_node_obj; + int rc, ret_code; + + /* Run shaper. */ + tm_run_shaper(tm_system, shaper_obj, new_pkt_desc, new_priority); + + tm_node_obj = shaper_obj->next_tm_node; + while (tm_node_obj) { /* not at egress */ + /* Run scheduler, including priority multiplexor. */ + new_pkt_desc = &shaper_obj->out_pkt_desc; + new_priority = shaper_obj->out_priority; + if ((!new_pkt_desc) || (new_pkt_desc->queue_num == 0)) + return 0; + + schedulers_obj = tm_node_obj->schedulers_obj; + rc = tm_run_scheduler(tm_system, shaper_obj, schedulers_obj, + new_pkt_desc, new_priority); + + if (rc <= 0) + return rc; + + /* Run shaper. */ + new_pkt_desc = &schedulers_obj->out_pkt_desc; + new_priority = schedulers_obj->highest_priority; + shaper_obj = &tm_node_obj->shaper_obj; + if ((!new_pkt_desc) || (new_pkt_desc->queue_num == 0)) + return 0; + + tm_run_shaper(tm_system, shaper_obj, new_pkt_desc, + new_priority); + + tm_node_obj = shaper_obj->next_tm_node; + } + + new_pkt_desc = &shaper_obj->out_pkt_desc; + new_priority = shaper_obj->out_priority; + ret_code = 0; + if ((new_pkt_desc) && (new_pkt_desc->queue_num != 0)) { + tm_system->egress_pkt_desc = *new_pkt_desc; + ret_code = 1; + } + + return ret_code; +} + +static void tm_pkt_desc_init(pkt_desc_t *pkt_desc, odp_packet_t pkt, + tm_queue_obj_t *tm_queue_obj) +{ + tm_queue_obj->epoch++; + pkt_desc->queue_num = tm_queue_obj->queue_num; + pkt_desc->pkt_len = odp_packet_len(pkt); + pkt_desc->shaper_len_adjust = odp_packet_shaper_len_adjust(pkt); + pkt_desc->drop_eligible = odp_packet_drop_eligible(pkt); + pkt_desc->pkt_color = odp_packet_color(pkt); + pkt_desc->epoch = tm_queue_obj->epoch & 0x0F; +} + +static int tm_demote_pkt_desc(tm_system_t *tm_system, + tm_node_obj_t *tm_node_obj, + tm_schedulers_obj_t *blocked_scheduler, + tm_shaper_obj_t *timer_shaper, + pkt_desc_t *demoted_pkt_desc) +{ + tm_schedulers_obj_t *schedulers_obj; + tm_shaper_obj_t *shaper_obj; + pkt_desc_t *new_pkt_desc; + uint8_t new_priority, demoted_priority; + int ret_code; + + shaper_obj = &tm_node_obj->shaper_obj; + if ((!blocked_scheduler) && (!timer_shaper)) + return 0; + + if (tm_node_obj->schedulers_obj == blocked_scheduler) + return 0; + + demoted_priority = 3; + if (pkt_descs_equal(&shaper_obj->out_pkt_desc, demoted_pkt_desc)) + demoted_priority = shaper_obj->out_priority; + + /* See if this first shaper_obj is delaying the demoted_pkt_desc */ + if (pkt_descs_equal(&shaper_obj->out_pkt_desc, demoted_pkt_desc)) + demoted_priority = shaper_obj->out_priority; + + remove_pkt_from_shaper(tm_system, shaper_obj, demoted_pkt_desc, 0); + if (shaper_obj == timer_shaper) { + demoted_pkt_desc = NULL; + return 0; + } + + tm_node_obj = shaper_obj->next_tm_node; + + while (tm_node_obj) { /* not at egress */ + schedulers_obj = tm_node_obj->schedulers_obj; + if (demoted_pkt_desc) { + remove_pkt_from_scheduler(tm_system, schedulers_obj, + demoted_pkt_desc, + demoted_priority, 0); + if (schedulers_obj == blocked_scheduler) + demoted_pkt_desc = NULL; + } + + new_pkt_desc = &shaper_obj->out_pkt_desc; + new_priority = shaper_obj->out_priority; + if ((new_pkt_desc) && (new_pkt_desc->queue_num != 0)) + tm_run_scheduler(tm_system, shaper_obj, schedulers_obj, + new_pkt_desc, new_priority); + else if (!demoted_pkt_desc) + return 0; + + new_pkt_desc = &schedulers_obj->out_pkt_desc; + new_priority = schedulers_obj->highest_priority; + shaper_obj = &tm_node_obj->shaper_obj; + + if (demoted_pkt_desc) { + if (pkt_descs_equal(&shaper_obj->out_pkt_desc, + demoted_pkt_desc)) + demoted_priority = shaper_obj->out_priority; + + remove_pkt_from_shaper(tm_system, shaper_obj, + demoted_pkt_desc, 0); + if (shaper_obj == timer_shaper) + demoted_pkt_desc = NULL; + } + + if ((new_pkt_desc) && (new_pkt_desc->queue_num != 0)) + tm_run_shaper(tm_system, shaper_obj, new_pkt_desc, + new_priority); + else if (!demoted_pkt_desc) + return 0; + + tm_node_obj = shaper_obj->next_tm_node; + } + + new_pkt_desc = &shaper_obj->out_pkt_desc; + new_priority = shaper_obj->out_priority; + ret_code = 0; + if ((new_pkt_desc) && (new_pkt_desc->queue_num != 0)) { + tm_system->egress_pkt_desc = *new_pkt_desc; + ret_code = 1; + } + + return ret_code; +} + +static int tm_consume_pkt_desc(tm_system_t *tm_system, + tm_shaper_obj_t *shaper_obj, + pkt_desc_t *new_pkt_desc, + uint8_t new_priority, + pkt_desc_t *sent_pkt_desc) +{ + tm_schedulers_obj_t *schedulers_obj; + tm_node_obj_t *tm_node_obj; + uint8_t sent_priority; + int rc, ret_code; + + remove_pkt_from_shaper(tm_system, shaper_obj, sent_pkt_desc, 1); + if ((new_pkt_desc) && (new_pkt_desc->queue_num != 0)) + tm_run_shaper(tm_system, shaper_obj, new_pkt_desc, + new_priority); + + tm_node_obj = shaper_obj->next_tm_node; + while (tm_node_obj) { /* not at egress */ + schedulers_obj = tm_node_obj->schedulers_obj; + sent_priority = schedulers_obj->highest_priority; + remove_pkt_from_scheduler(tm_system, schedulers_obj, + sent_pkt_desc, sent_priority, 1); + + new_pkt_desc = &shaper_obj->out_pkt_desc; + new_priority = shaper_obj->out_priority; + + if ((new_pkt_desc) && (new_pkt_desc->queue_num != 0)) { + rc = tm_run_scheduler(tm_system, shaper_obj, + schedulers_obj, + new_pkt_desc, new_priority); + if (rc < 0) + return rc; + } + + new_pkt_desc = &schedulers_obj->out_pkt_desc; + new_priority = schedulers_obj->highest_priority; + + shaper_obj = &tm_node_obj->shaper_obj; + remove_pkt_from_shaper(tm_system, shaper_obj, sent_pkt_desc, 1); + if ((new_pkt_desc) && (new_pkt_desc->queue_num != 0)) + tm_run_shaper(tm_system, shaper_obj, new_pkt_desc, + new_priority); + + tm_node_obj = shaper_obj->next_tm_node; + } + + new_pkt_desc = &shaper_obj->out_pkt_desc; + new_priority = shaper_obj->out_priority; + + ret_code = 0; + if ((new_pkt_desc) && (new_pkt_desc->queue_num != 0)) { + tm_system->egress_pkt_desc = *new_pkt_desc; + ret_code = 1; + } + + return ret_code; +} + +static int tm_consume_sent_pkt(tm_system_t *tm_system, + pkt_desc_t *sent_pkt_desc) +{ + _odp_int_pkt_queue_t _odp_int_pkt_queue; + tm_queue_obj_t *tm_queue_obj; + odp_packet_t pkt; + pkt_desc_t *new_pkt_desc; + uint32_t queue_num, pkt_len; + int rc; + + queue_num = sent_pkt_desc->queue_num; + tm_queue_obj = tm_system->queue_num_tbl[queue_num]; + pkt_len = sent_pkt_desc->pkt_len; + tm_queue_obj->pkts_consumed_cnt++; + tm_queue_cnts_decrement(tm_system, tm_queue_obj->tm_wred_node, + tm_queue_obj->priority, pkt_len); + + /* Get the next pkt in the tm_queue, if there is one. */ + _odp_int_pkt_queue = tm_queue_obj->_odp_int_pkt_queue; + rc = _odp_pkt_queue_remove(tm_system->_odp_int_queue_pool, + _odp_int_pkt_queue, &pkt); + if (rc < 0) + return rc; + + new_pkt_desc = NULL; + if (0 < rc) { + tm_queue_obj->pkt = pkt; + tm_pkt_desc_init(&tm_queue_obj->in_pkt_desc, pkt, tm_queue_obj); + new_pkt_desc = &tm_queue_obj->in_pkt_desc; + tm_queue_obj->pkts_dequeued_cnt++; + } + + rc = tm_consume_pkt_desc(tm_system, &tm_queue_obj->shaper_obj, + new_pkt_desc, tm_queue_obj->priority, + sent_pkt_desc); + + return rc > 0; +} + +static odp_tm_percent_t tm_queue_fullness(odp_tm_wred_params_t *wred_params, + tm_queue_thresholds_t *thresholds, + tm_queue_cnts_t *queue_cnts) +{ + uint64_t current_cnt, max_cnt, fullness; + + if (wred_params->use_byte_fullness) { + current_cnt = odp_atomic_load_u64(&queue_cnts->byte_cnt); + max_cnt = thresholds->max_bytes; + } else { + current_cnt = odp_atomic_load_u64(&queue_cnts->pkt_cnt); + max_cnt = thresholds->max_pkts; + } + + if (max_cnt == 0) + return 0; + + fullness = (10000 * current_cnt) / max_cnt; + return (odp_tm_percent_t)MIN(fullness, 50000); +} + +static odp_bool_t tm_local_random_drop(tm_system_t *tm_system, + odp_tm_wred_params_t *wred_params, + odp_tm_percent_t queue_fullness) +{ + odp_tm_percent_t min_threshold, med_threshold, first_threshold, + drop_prob; + odp_tm_percent_t med_drop_prob, max_drop_prob; + uint32_t denom, numer; + + if (wred_params->enable_wred == 0) + return 0; + + min_threshold = wred_params->min_threshold; + med_threshold = wred_params->med_threshold; + first_threshold = (min_threshold != 0) ? min_threshold : med_threshold; + if (10000 <= queue_fullness) + return 1; + else if (queue_fullness <= first_threshold) + return 0; + + /* Determine if we have two active thresholds, min_threshold and + * med_threshold or just med_threshold. + */ + med_drop_prob = wred_params->med_drop_prob; + max_drop_prob = wred_params->max_drop_prob; + if (min_threshold == 0) { + denom = (uint32_t)(10000 - med_threshold); + numer = (uint32_t)max_drop_prob; + drop_prob = + (numer * (uint32_t)(queue_fullness - med_threshold)) / + denom; + } else if ((min_threshold < queue_fullness) && + (queue_fullness < med_threshold)) { + denom = (uint32_t)(med_threshold - min_threshold); + numer = (uint32_t)med_drop_prob; + drop_prob = + (numer * (uint32_t)(queue_fullness - min_threshold)) / + denom; + } else { /* med_threshold <= queue_fullness. */ + denom = (uint32_t)(10000 - med_threshold); + numer = (uint32_t)(max_drop_prob - med_drop_prob); + drop_prob = max_drop_prob - + ((numer * (10000 - queue_fullness)) / denom); + } + + if (drop_prob == 0) + return 0; + else if (10000 <= drop_prob) + return 1; + else + return tm_random_drop(&tm_system->tm_random_data, drop_prob); +} + +static odp_bool_t tm_queue_is_full(tm_queue_thresholds_t *thresholds, + tm_queue_cnts_t *queue_cnts) +{ + odp_bool_t queue_is_full; + uint64_t max_bytes; + uint32_t max_pkts; + + max_bytes = thresholds->max_bytes; + max_pkts = thresholds->max_pkts; + queue_is_full = 0; + if (max_pkts != 0) + queue_is_full = + max_pkts <= odp_atomic_load_u64(&queue_cnts->pkt_cnt); + + if (max_bytes != 0) + queue_is_full |= + max_bytes <= odp_atomic_load_u64(&queue_cnts->byte_cnt); + + return queue_is_full; +} + +static odp_bool_t tm_random_early_discard(tm_system_t *tm_system, + tm_queue_obj_t *tm_queue_obj + ODP_UNUSED, + tm_wred_node_t *tm_wred_node, + odp_packet_color_t pkt_color) +{ + tm_queue_thresholds_t *thresholds; + odp_tm_wred_params_t *wred_params; + odp_tm_percent_t fullness; + tm_queue_cnts_t *queue_cnts; + + thresholds = tm_wred_node->threshold_params; + if (thresholds) { + wred_params = tm_wred_node->wred_params[pkt_color]; + queue_cnts = &tm_wred_node->queue_cnts; + if ((!wred_params) || (wred_params->enable_wred == 0)) { + if (tm_queue_is_full(thresholds, queue_cnts)) + return 1; + } else { + fullness = tm_queue_fullness(wred_params, thresholds, + queue_cnts); + if (tm_local_random_drop(tm_system, wred_params, + fullness)) + return 1; + } + } + + /* For each tm_wred_node between the initial and a TM egress, do a + * Random Early Discard check, if enabled. + */ + tm_wred_node = tm_wred_node->next_tm_wred_node; + while (tm_wred_node) { + thresholds = tm_wred_node->threshold_params; + if (thresholds) { + wred_params = tm_wred_node->wred_params[pkt_color]; + queue_cnts = &tm_wred_node->queue_cnts; + if ((wred_params) && wred_params->enable_wred) { + fullness = tm_queue_fullness(wred_params, + thresholds, + queue_cnts); + if (tm_local_random_drop(tm_system, wred_params, + fullness)) + return 1; + } + } + + tm_wred_node = tm_wred_node->next_tm_wred_node; + } + + return 0; +} + +/* Returns the current queue pkt_cnt. */ +static uint32_t tm_queue_cnts_increment(tm_system_t *tm_system, + tm_wred_node_t *tm_wred_node, + uint32_t priority, + uint32_t frame_len) +{ + tm_queue_cnts_t *queue_cnts; + uint32_t tm_queue_pkt_cnt; + + odp_ticketlock_lock(&tm_wred_node->tm_wred_node_lock); + queue_cnts = &tm_wred_node->queue_cnts; + odp_atomic_inc_u64(&queue_cnts->pkt_cnt); + odp_atomic_add_u64(&queue_cnts->byte_cnt, frame_len); + + tm_queue_pkt_cnt = (uint32_t)odp_atomic_load_u64(&queue_cnts->pkt_cnt); + odp_ticketlock_unlock(&tm_wred_node->tm_wred_node_lock); + + /* For each tm_wred_node between the initial one and a TM egress, + * increment its queue_cnts, if enabled. + */ + tm_wred_node = tm_wred_node->next_tm_wred_node; + while (tm_wred_node) { + odp_ticketlock_lock(&tm_wred_node->tm_wred_node_lock); + queue_cnts = &tm_wred_node->queue_cnts; + odp_atomic_inc_u64(&queue_cnts->pkt_cnt); + odp_atomic_add_u64(&queue_cnts->byte_cnt, frame_len); + odp_ticketlock_unlock(&tm_wred_node->tm_wred_node_lock); + + tm_wred_node = tm_wred_node->next_tm_wred_node; + } + + queue_cnts = &tm_system->total_info.queue_cnts; + odp_atomic_inc_u64(&queue_cnts->pkt_cnt); + odp_atomic_add_u64(&queue_cnts->byte_cnt, frame_len); + + queue_cnts = &tm_system->priority_info[priority].queue_cnts; + odp_atomic_inc_u64(&queue_cnts->pkt_cnt); + odp_atomic_add_u64(&queue_cnts->byte_cnt, frame_len); + + return tm_queue_pkt_cnt; +} + +static void tm_queue_cnts_decrement(tm_system_t *tm_system, + tm_wred_node_t *tm_wred_node, + uint32_t priority, + uint32_t frame_len) +{ + tm_queue_cnts_t *queue_cnts; + + odp_ticketlock_lock(&tm_wred_node->tm_wred_node_lock); + queue_cnts = &tm_wred_node->queue_cnts; + odp_atomic_dec_u64(&queue_cnts->pkt_cnt); + odp_atomic_sub_u64(&queue_cnts->byte_cnt, frame_len); + odp_ticketlock_unlock(&tm_wred_node->tm_wred_node_lock); + + /* For each tm_wred_node between the initial one and a TM egress, + * decrement its queue_cnts, if enabled. + */ + tm_wred_node = tm_wred_node->next_tm_wred_node; + while (tm_wred_node) { + odp_ticketlock_lock(&tm_wred_node->tm_wred_node_lock); + queue_cnts = &tm_wred_node->queue_cnts; + odp_atomic_dec_u64(&queue_cnts->pkt_cnt); + odp_atomic_sub_u64(&queue_cnts->byte_cnt, frame_len); + odp_ticketlock_unlock(&tm_wred_node->tm_wred_node_lock); + tm_wred_node = tm_wred_node->next_tm_wred_node; + } + + queue_cnts = &tm_system->total_info.queue_cnts; + odp_atomic_dec_u64(&queue_cnts->pkt_cnt); + odp_atomic_sub_u64(&queue_cnts->byte_cnt, frame_len); + + queue_cnts = &tm_system->priority_info[priority].queue_cnts; + odp_atomic_dec_u64(&queue_cnts->pkt_cnt); + odp_atomic_sub_u64(&queue_cnts->byte_cnt, frame_len); +} + +static int tm_enqueue(tm_system_t *tm_system, + tm_queue_obj_t *tm_queue_obj, + odp_packet_t pkt) +{ + input_work_item_t work_item; + odp_packet_color_t pkt_color; + tm_wred_node_t *initial_tm_wred_node; + odp_bool_t drop_eligible, drop; + uint32_t frame_len, pkt_depth; + int rc; + odp_packet_hdr_t *pkt_hdr = odp_packet_hdr(pkt); + + /* If we're from an ordered queue and not in order + * record the event and wait until order is resolved + */ + if (queue_tm_reorder(&tm_queue_obj->tm_qentry, + &pkt_hdr->buf_hdr)) + return 0; + + if (tm_system->first_enq == 0) { + odp_barrier_wait(&tm_system->tm_system_barrier); + tm_system->first_enq = 1; + } + + pkt_color = odp_packet_color(pkt); + drop_eligible = odp_packet_drop_eligible(pkt); + + initial_tm_wred_node = tm_queue_obj->tm_wred_node; + if (drop_eligible) { + drop = tm_random_early_discard(tm_system, tm_queue_obj, + initial_tm_wred_node, pkt_color); + if (drop) + return -1; + } + + work_item.tm_queue_obj = tm_queue_obj; + work_item.pkt = pkt; + rc = input_work_queue_append(tm_system, &work_item); + if (rc < 0) { + ODP_DBG("%s work queue full\n", __func__); + return rc; + } + + frame_len = odp_packet_len(pkt); + pkt_depth = tm_queue_cnts_increment(tm_system, initial_tm_wred_node, + tm_queue_obj->priority, frame_len); + return pkt_depth; +} + +static void tm_send_pkt(tm_system_t *tm_system, + uint32_t max_consume_sends ODP_UNUSED) +{ + tm_queue_obj_t *tm_queue_obj; + odp_packet_t odp_pkt; + pkt_desc_t *pkt_desc; + uint32_t cnt, queue_num; + + /* for (cnt = 1; cnt < max_consume_sends; cnt++) @todo */ + for (cnt = 1; cnt < 1000; cnt++) { + pkt_desc = &tm_system->egress_pkt_desc; + queue_num = pkt_desc->queue_num; + if (queue_num == 0) + return; + + tm_system->egress_pkt_desc = EMPTY_PKT_DESC; + tm_queue_obj = tm_system->queue_num_tbl[queue_num]; + odp_pkt = tm_queue_obj->pkt; + if (odp_pkt == INVALID_PKT) + return; + + tm_system->egress.egress_fcn(odp_pkt); + tm_queue_obj->sent_pkt = tm_queue_obj->pkt; + tm_queue_obj->sent_pkt_desc = tm_queue_obj->in_pkt_desc; + tm_queue_obj->pkt = INVALID_PKT; + tm_queue_obj->in_pkt_desc = EMPTY_PKT_DESC; + tm_consume_sent_pkt(tm_system, &tm_queue_obj->sent_pkt_desc); + tm_queue_obj->sent_pkt = INVALID_PKT; + tm_queue_obj->sent_pkt_desc = EMPTY_PKT_DESC; + if (tm_system->egress_pkt_desc.queue_num == 0) + return; + } +} + +static int tm_process_input_work_queue(tm_system_t *tm_system, + input_work_queue_t *input_work_queue, + uint32_t pkts_to_process) +{ + input_work_item_t work_item; + tm_queue_obj_t *tm_queue_obj; + tm_shaper_obj_t *shaper_obj; + odp_packet_t pkt; + pkt_desc_t *pkt_desc; + uint32_t cnt; + int rc; + + for (cnt = 1; cnt <= pkts_to_process; cnt++) { + rc = input_work_queue_remove(input_work_queue, &work_item); + if (rc < 0) + return rc; + + tm_queue_obj = work_item.tm_queue_obj; + pkt = work_item.pkt; + tm_queue_obj->pkts_rcvd_cnt++; + if (tm_queue_obj->pkt != INVALID_PKT) { + /* If the tm_queue_obj already has a pkt to work with, + * then just add this new pkt to the associated + * _odp_int_pkt_queue. + */ + rc = _odp_pkt_queue_append( + tm_system->_odp_int_queue_pool, + tm_queue_obj->_odp_int_pkt_queue, pkt); + tm_queue_obj->pkts_enqueued_cnt++; + } else { + /* If the tm_queue_obj doesn't have a pkt to work + * with, then make this one the head pkt. + */ + tm_queue_obj->pkt = pkt; + tm_pkt_desc_init(&tm_queue_obj->in_pkt_desc, pkt, + tm_queue_obj); + pkt_desc = &tm_queue_obj->in_pkt_desc; + shaper_obj = &tm_queue_obj->shaper_obj; + rc = tm_propagate_pkt_desc(tm_system, shaper_obj, + pkt_desc, + tm_queue_obj->priority); + if (0 < rc) + return 1; + /* Send thru spigot */ + } + } + + return 0; +} + +static int tm_process_expired_timers(tm_system_t *tm_system, + _odp_timer_wheel_t _odp_int_timer_wheel, + uint64_t current_cycles ODP_UNUSED) +{ + tm_shaper_obj_t *shaper_obj; + tm_queue_obj_t *tm_queue_obj; + pkt_desc_t *pkt_desc; + uint64_t timer_context; + uint32_t work_done, cnt, queue_num, timer_seq; + uint8_t priority; + void *ptr; + + work_done = 0; + for (cnt = 1; cnt <= 4; cnt++) { + ptr = _odp_timer_wheel_next_expired(_odp_int_timer_wheel); + if (!ptr) + return work_done; + + timer_context = (uint64_t)ptr; + queue_num = (timer_context & 0xFFFFFFFF) >> 4; + timer_seq = timer_context >> 32; + tm_queue_obj = tm_system->queue_num_tbl[queue_num]; + + if ((!tm_queue_obj) || + (tm_queue_obj->timer_reason == NO_CALLBACK) || + (!tm_queue_obj->timer_shaper) || + (tm_queue_obj->timer_seq != timer_seq)) { + if (tm_queue_obj->timer_cancels_outstanding != 0) + tm_queue_obj->timer_cancels_outstanding--; + return work_done; + } + + shaper_obj = tm_queue_obj->timer_shaper; + pkt_desc = &shaper_obj->in_pkt_desc; + priority = shaper_obj->input_priority; + + delete_timer(tm_system, tm_queue_obj, 0); + + tm_propagate_pkt_desc(tm_system, shaper_obj, + pkt_desc, priority); + work_done++; + if (tm_system->egress_pkt_desc.queue_num != 0) + tm_send_pkt(tm_system, 4); + } + + return work_done; +} + +static void *tm_system_thread(void *arg) +{ + _odp_timer_wheel_t _odp_int_timer_wheel; + input_work_queue_t *input_work_queue; + tm_system_t *tm_system; + uint64_t current_cycles; + uint32_t destroying, work_queue_cnt, timer_cnt; + int rc; + + odp_init_local(ODP_THREAD_WORKER); + tm_system = arg; + _odp_int_timer_wheel = tm_system->_odp_int_timer_wheel; + input_work_queue = tm_system->input_work_queue; + + /* Wait here until we have seen the first enqueue operation. */ + odp_barrier_wait(&tm_system->tm_system_barrier); + + current_cycles = 100; + destroying = odp_atomic_load_u32(&tm_system->destroying); + while (destroying == 0) { + tm_system->current_cycles = current_cycles; + rc = _odp_timer_wheel_curr_time_update(_odp_int_timer_wheel, + current_cycles); + if (0 < rc) { + /* Process a batch of expired timers - each of which + * could cause a pkt to egress the tm system. + */ + timer_cnt = 1; + rc = tm_process_expired_timers(tm_system, + _odp_int_timer_wheel, + current_cycles); + current_cycles += 16; + } else { + timer_cnt = + _odp_timer_wheel_count(_odp_int_timer_wheel); + } + + work_queue_cnt = + odp_atomic_load_u32(&input_work_queue->queue_cnt); + if (work_queue_cnt != 0) { + rc = tm_process_input_work_queue(tm_system, + input_work_queue, 1); + current_cycles += 8; + if (tm_system->egress_pkt_desc.queue_num != 0) { + tm_send_pkt(tm_system, 4); + current_cycles += 8; + } + } + + current_cycles += 16; + tm_system->is_idle = (timer_cnt == 0) && + (work_queue_cnt == 0); + destroying = odp_atomic_load_u32(&tm_system->destroying); + } + + odp_barrier_wait(&tm_system->tm_system_destroy_barrier); + return NULL; +} + +odp_bool_t odp_tm_is_idle(odp_tm_t odp_tm) +{ + tm_system_t *tm_system; + + tm_system = GET_TM_SYSTEM(odp_tm); + return tm_system->is_idle; +} + +void odp_tm_capability_init(odp_tm_capability_t *capability) +{ + memset(capability, 0, sizeof(odp_tm_capability_t)); +} + +void odp_tm_params_init(odp_tm_params_t *params) +{ + memset(params, 0, sizeof(odp_tm_params_t)); +} + +odp_tm_t odp_tm_create(const char *name, odp_tm_params_t *params) +{ + _odp_int_name_t name_tbl_id; + tm_system_t *tm_system; + odp_bool_t create_fail; + pthread_t pthread; + odp_tm_t odp_tm; + uint64_t current_cycles; + uint32_t malloc_len, max_num_queues, max_queued_pkts, max_timers; + uint32_t max_sorted_lists; + int rc; + + /* Allocate tm_system_t record. */ + odp_ticketlock_lock(&tm_create_lock); + tm_system = tm_system_alloc(); + if (!tm_system) { + odp_ticketlock_unlock(&tm_create_lock); + return ODP_TM_INVALID; + } + + odp_tm = MAKE_ODP_TM_HANDLE(tm_system); + name_tbl_id = _odp_int_name_tbl_add(name, ODP_TM_HANDLE, odp_tm); + if (name_tbl_id == ODP_INVALID_NAME) { + tm_system_free(tm_system); + odp_ticketlock_unlock(&tm_create_lock); + return ODP_TM_INVALID; + } + + tm_system->name_tbl_id = name_tbl_id; + memcpy(&tm_system->egress, &params->egress, sizeof(odp_tm_egress_t)); + memcpy(&tm_system->capability, &params->capability, + sizeof(odp_tm_capability_t)); + + malloc_len = params->capability.max_tm_queues + * sizeof(tm_queue_obj_t *); + tm_system->queue_num_tbl = malloc(malloc_len); + memset(tm_system->queue_num_tbl, 0, malloc_len); + tm_system->next_queue_num = 1; + + tm_init_random_data(&tm_system->tm_random_data); + + max_sorted_lists = 2 * params->capability.max_tm_queues; + max_num_queues = params->capability.max_tm_queues; + max_queued_pkts = 16 * params->capability.max_tm_queues; + max_timers = 2 * params->capability.max_tm_queues; + current_cycles = 10; + create_fail = 0; + + tm_system->_odp_int_sorted_pool = _ODP_INT_SORTED_POOL_INVALID; + tm_system->_odp_int_queue_pool = _ODP_INT_QUEUE_POOL_INVALID; + tm_system->_odp_int_timer_wheel = _ODP_INT_TIMER_WHEEL_INVALID; + + odp_ticketlock_init(&tm_system->tm_system_lock); + odp_barrier_init(&tm_system->tm_system_barrier, 2); + odp_atomic_init_u32(&tm_system->destroying, 0); + + tm_system->_odp_int_sorted_pool = _odp_sorted_pool_create( + max_sorted_lists); + create_fail |= tm_system->_odp_int_sorted_pool + == _ODP_INT_SORTED_POOL_INVALID; + + if (create_fail == 0) { + tm_system->_odp_int_queue_pool = _odp_queue_pool_create( + max_num_queues, max_queued_pkts); + create_fail |= tm_system->_odp_int_queue_pool + == _ODP_INT_QUEUE_POOL_INVALID; + } + + if (create_fail == 0) { + tm_system->_odp_int_timer_wheel = _odp_timer_wheel_create( + max_timers, current_cycles); + create_fail |= tm_system->_odp_int_timer_wheel + == _ODP_INT_TIMER_WHEEL_INVALID; + } + + if (create_fail == 0) { + tm_system->input_work_queue = input_work_queue_create(); + create_fail |= !tm_system->input_work_queue; + } + + if (create_fail == 0) { + rc = pthread_create(&pthread, NULL, tm_system_thread, + tm_system); + create_fail |= rc < 0; + } + + if (create_fail) { + _odp_int_name_tbl_delete(name_tbl_id); + if (tm_system->input_work_queue) + input_work_queue_destroy(tm_system->input_work_queue); + + if (tm_system->_odp_int_sorted_pool + != _ODP_INT_SORTED_POOL_INVALID) + _odp_sorted_pool_destroy( + tm_system->_odp_int_sorted_pool); + + if (tm_system->_odp_int_queue_pool != + _ODP_INT_QUEUE_POOL_INVALID) + _odp_queue_pool_destroy( + tm_system->_odp_int_queue_pool); + + if (tm_system->_odp_int_timer_wheel + != _ODP_INT_TIMER_WHEEL_INVALID) + _odp_timer_wheel_destroy( + tm_system->_odp_int_timer_wheel); + + tm_system_free(tm_system); + odp_ticketlock_unlock(&tm_create_lock); + return ODP_TM_INVALID; + } + + odp_ticketlock_unlock(&tm_create_lock); + return odp_tm; +} + +odp_tm_t odp_tm_find(const char *name ODP_UNUSED, + odp_tm_capability_t *capability ODP_UNUSED) +{ + return ODP_TM_INVALID; /* @todo Not yet implemented. */ +} + +int odp_tm_capability(odp_tm_t odp_tm, odp_tm_capability_t *capability) +{ + tm_system_t *tm_system; + + tm_system = GET_TM_SYSTEM(odp_tm); + memcpy(capability, &tm_system->capability, sizeof(odp_tm_capability_t)); + return 0; +} + +int odp_tm_destroy(odp_tm_t odp_tm) +{ + tm_system_t *tm_system; + + tm_system = GET_TM_SYSTEM(odp_tm); + + /* First mark the tm_system as being in the destroying state so that + * all new pkts are prevented from coming in. + */ + odp_barrier_init(&tm_system->tm_system_destroy_barrier, 2); + odp_atomic_inc_u32(&tm_system->destroying); + odp_barrier_wait(&tm_system->tm_system_destroy_barrier); + + input_work_queue_destroy(tm_system->input_work_queue); + _odp_sorted_pool_destroy(tm_system->_odp_int_sorted_pool); + _odp_queue_pool_destroy(tm_system->_odp_int_queue_pool); + _odp_timer_wheel_destroy(tm_system->_odp_int_timer_wheel); + + tm_system_free(tm_system); + return 0; +} + +void odp_tm_shaper_params_init(odp_tm_shaper_params_t *params) +{ + memset(params, 0, sizeof(odp_tm_shaper_params_t)); +} + +odp_tm_shaper_t odp_tm_shaper_create(const char *name, + odp_tm_shaper_params_t *params) +{ + tm_shaper_params_t *profile_obj; + odp_tm_shaper_t shaper_handle; + _odp_int_name_t name_tbl_id; + + profile_obj = tm_common_profile_create(name, TM_SHAPER_PROFILE, + sizeof(tm_shaper_params_t), + &shaper_handle, + &name_tbl_id); + if (!profile_obj) + return ODP_TM_INVALID; + + tm_shaper_params_cvt_to(params, profile_obj); + profile_obj->name_tbl_id = name_tbl_id; + return shaper_handle; +} + +int odp_tm_shaper_params_read(odp_tm_shaper_t shaper_profile, + odp_tm_shaper_params_t *params) +{ + tm_shaper_params_t *profile_obj; + + profile_obj = tm_get_profile_params(shaper_profile, TM_SHAPER_PROFILE); + if (!profile_obj) + return -1; + + tm_shaper_params_cvt_from(profile_obj, params); + return 0; +} + +int odp_tm_shaper_params_update(odp_tm_shaper_t shaper_profile, + odp_tm_shaper_params_t *params) +{ + tm_shaper_params_t *profile_obj; + + profile_obj = tm_get_profile_params(shaper_profile, TM_SHAPER_PROFILE); + if (!profile_obj) + return -1; + + tm_shaper_params_cvt_to(params, profile_obj); + return 0; +} + +odp_tm_shaper_t odp_tm_shaper_lookup(const char *name) +{ + return _odp_int_name_tbl_lookup(name, ODP_TM_SHAPER_PROFILE_HANDLE); +} + +void odp_tm_sched_params_init(odp_tm_sched_params_t *params) +{ + memset(params, 0, sizeof(odp_tm_sched_params_t)); +} + +odp_tm_sched_t odp_tm_sched_create(const char *name, + odp_tm_sched_params_t *params) +{ + odp_tm_sched_mode_t sched_mode; + tm_sched_params_t *profile_obj; + _odp_int_name_t name_tbl_id; + odp_tm_sched_t sched_handle; + uint32_t priority, weight; + + profile_obj = tm_common_profile_create(name, TM_SCHED_PROFILE, + sizeof(tm_sched_params_t), + &sched_handle, &name_tbl_id); + if (!profile_obj) + return ODP_TM_INVALID; + + profile_obj->name_tbl_id = name_tbl_id; + + for (priority = 0; priority < ODP_TM_MAX_PRIORITIES; priority++) { + sched_mode = params->sched_modes[priority]; + weight = params->sched_weights[priority]; + + profile_obj->sched_modes[priority] = sched_mode; + profile_obj->inverted_weights[priority] = 0x10000 / weight; + } + + return sched_handle; +} + +int odp_tm_sched_params_read(odp_tm_sched_t sched_profile, + odp_tm_sched_params_t *params) +{ + odp_tm_sched_params_t *sched_params; + + sched_params = tm_get_profile_params(sched_profile, TM_SCHED_PROFILE); + if (!sched_params) + return -1; + + *params = *sched_params; + return 0; +} + +int odp_tm_sched_params_update(odp_tm_sched_t sched_profile, + odp_tm_sched_params_t *params) +{ + odp_tm_sched_params_t *sched_params; + + sched_params = tm_get_profile_params(sched_profile, TM_SCHED_PROFILE); + if (!sched_params) + return -1; + + *sched_params = *params; + return 0; +} + +odp_tm_sched_t odp_tm_sched_lookup(const char *name) +{ + return _odp_int_name_tbl_lookup(name, ODP_TM_SCHED_PROFILE_HANDLE); +} + +void odp_tm_threshold_params_init(odp_tm_threshold_params_t *params) +{ + memset(params, 0, sizeof(odp_tm_threshold_params_t)); +} + +odp_tm_threshold_t odp_tm_threshold_create(const char *name, + odp_tm_threshold_params_t *params) +{ + tm_queue_thresholds_t *profile_obj; + odp_tm_threshold_t threshold_handle; + _odp_int_name_t name_tbl_id; + + profile_obj = + tm_common_profile_create(name, TM_THRESHOLD_PROFILE, + sizeof(odp_tm_threshold_params_t), + &threshold_handle, + &name_tbl_id); + if (!profile_obj) + return ODP_TM_INVALID; + + profile_obj->max_pkts = params->enable_max_pkts ? params->max_pkts : 0; + profile_obj->max_bytes = + params->enable_max_bytes ? params->max_bytes : 0; + profile_obj->name_tbl_id = name_tbl_id; + return threshold_handle; +} + +int odp_tm_thresholds_params_read(odp_tm_threshold_t threshold_profile, + odp_tm_threshold_params_t *params) +{ + tm_queue_thresholds_t *threshold_params; + + threshold_params = tm_get_profile_params(threshold_profile, + TM_THRESHOLD_PROFILE); + if (!threshold_params) + return -1; + + params->max_pkts = threshold_params->max_pkts; + params->max_bytes = threshold_params->max_bytes; + params->enable_max_pkts = threshold_params->max_pkts != 0; + params->enable_max_bytes = threshold_params->max_bytes != 0; + return 0; +} + +int odp_tm_thresholds_params_update(odp_tm_threshold_t threshold_profile, + odp_tm_threshold_params_t *params) +{ + tm_queue_thresholds_t *profile_obj; + + profile_obj = tm_get_profile_params(threshold_profile, + TM_THRESHOLD_PROFILE); + if (!profile_obj) + return -1; + + profile_obj->max_pkts = params->enable_max_pkts ? params->max_pkts : 0; + profile_obj->max_bytes = + params->enable_max_bytes ? params->max_bytes : 0; + return 0; +} + +odp_tm_threshold_t odp_tm_thresholds_lookup(const char *name) +{ + return _odp_int_name_tbl_lookup(name, ODP_TM_THRESHOLD_PROFILE_HANDLE); +} + +void odp_tm_wred_params_init(odp_tm_wred_params_t *params) +{ + memset(params, 0, sizeof(odp_tm_wred_params_t)); +} + +odp_tm_wred_t odp_tm_wred_create(const char *name, odp_tm_wred_params_t *params) +{ + odp_tm_wred_params_t *profile_obj; + odp_tm_wred_t wred_handle; + _odp_int_name_t name_tbl_id; + + profile_obj = tm_common_profile_create(name, TM_WRED_PROFILE, + sizeof(odp_tm_wred_params_t), + &wred_handle, + &name_tbl_id); + if (!profile_obj) + return ODP_TM_INVALID; + + *profile_obj = *params; + return wred_handle; +} + +int odp_tm_wred_params_read(odp_tm_wred_t wred_profile, + odp_tm_wred_params_t *params) +{ + odp_tm_wred_params_t *wred_params; + + wred_params = tm_get_profile_params(wred_profile, TM_WRED_PROFILE); + if (!wred_params) + return -1; + + *params = *wred_params; + return 0; +} + +int odp_tm_wred_params_update(odp_tm_wred_t wred_profile, + odp_tm_wred_params_t *params) +{ + odp_tm_wred_params_t *wred_params; + + wred_params = tm_get_profile_params(wred_profile, TM_WRED_PROFILE); + if (!wred_params) + return -1; + + *wred_params = *params; + return 0; +} + +odp_tm_wred_t odp_tm_wred_lookup(const char *name) +{ + return _odp_int_name_tbl_lookup(name, ODP_TM_WRED_PROFILE_HANDLE); +} + +void odp_tm_node_params_init(odp_tm_node_params_t *params) +{ + memset(params, 0, sizeof(odp_tm_node_params_t)); +} + +odp_tm_node_t odp_tm_node_create(odp_tm_t odp_tm, const char *name, + odp_tm_node_params_t *params) +{ + _odp_int_sorted_list_t sorted_list; + tm_schedulers_obj_t *schedulers_obj; + _odp_int_name_t name_tbl_id; + tm_wred_node_t *tm_wred_node; + tm_node_obj_t *tm_node_obj; + odp_tm_node_t odp_tm_node; + odp_tm_wred_t wred_profile; + tm_system_t *tm_system; + uint32_t num_priorities, priority, schedulers_obj_len, color; + + /* Allocate a tm_node_obj_t record. */ + tm_system = GET_TM_SYSTEM(odp_tm); + tm_node_obj = malloc(sizeof(tm_node_obj_t)); + if (!tm_node_obj) + return ODP_TM_INVALID; + + tm_wred_node = malloc(sizeof(tm_wred_node_t)); + if (!tm_wred_node) { + free(tm_node_obj); + return ODP_TM_INVALID; + } + + num_priorities = tm_system->capability.max_priority + 1; + schedulers_obj_len = sizeof(tm_schedulers_obj_t) + + (sizeof(tm_sched_state_t) * num_priorities); + schedulers_obj = malloc(schedulers_obj_len); + if (!schedulers_obj) { + free(tm_wred_node); + free(tm_node_obj); + return ODP_TM_INVALID; + } + + memset(schedulers_obj, 0, schedulers_obj_len); + odp_tm_node = MAKE_ODP_TM_NODE(tm_node_obj); + name_tbl_id = ODP_INVALID_NAME; + if ((name) && (name[0] != '\0')) { + name_tbl_id = _odp_int_name_tbl_add(name, ODP_TM_NODE_HANDLE, + odp_tm_node); + if (name_tbl_id == ODP_INVALID_NAME) { + free(schedulers_obj); + free(tm_wred_node); + free(tm_node_obj); + return ODP_TM_INVALID; + } + } + + memset(tm_node_obj, 0, sizeof(tm_node_obj_t)); + memset(tm_wred_node, 0, sizeof(tm_wred_node_t)); + memset(schedulers_obj, 0, schedulers_obj_len); + tm_node_obj->name_tbl_id = name_tbl_id; + tm_node_obj->max_fanin = params->max_fanin; + tm_node_obj->level = params->level; + tm_node_obj->tm_idx = tm_system->tm_idx; + tm_node_obj->tm_wred_node = tm_wred_node; + tm_node_obj->schedulers_obj = schedulers_obj; + odp_ticketlock_init(&tm_wred_node->tm_wred_node_lock); + + schedulers_obj->num_priorities = num_priorities; + for (priority = 0; priority < num_priorities; priority++) { + sorted_list = _odp_sorted_list_create( + tm_system->_odp_int_sorted_pool, + params->max_fanin); + schedulers_obj->sched_states[priority].sorted_list = + sorted_list; + } + + odp_ticketlock_lock(&tm_system->tm_system_lock); + if (params->shaper_profile != ODP_TM_INVALID) + tm_shaper_config_set(tm_system, params->shaper_profile, + &tm_node_obj->shaper_obj); + + if (params->threshold_profile != ODP_TM_INVALID) + tm_wred_node->threshold_params = tm_get_profile_params( + params->threshold_profile, + TM_THRESHOLD_PROFILE); + + for (color = 0; color < ODP_NUM_PACKET_COLORS; color++) { + wred_profile = params->wred_profile[color]; + if (wred_profile != ODP_TM_INVALID) + tm_wred_node->wred_params[color] = + tm_get_profile_params(wred_profile, + TM_WRED_PROFILE); + } + + tm_node_obj->magic_num = TM_NODE_MAGIC_NUM; + tm_node_obj->shaper_obj.enclosing_entity = tm_node_obj; + tm_node_obj->shaper_obj.in_tm_node_obj = 1; + tm_node_obj->schedulers_obj->enclosing_entity = tm_node_obj; + + odp_ticketlock_unlock(&tm_system->tm_system_lock); + return odp_tm_node; +} + +int odp_tm_node_shaper_config(odp_tm_node_t tm_node, + odp_tm_shaper_t shaper_profile) +{ + tm_node_obj_t *tm_node_obj; + tm_system_t *tm_system; + + tm_node_obj = GET_TM_NODE_OBJ(tm_node); + if (!tm_node_obj) + return -1; + + tm_system = odp_tm_systems[tm_node_obj->tm_idx]; + if (!tm_system) + return -2; + + odp_ticketlock_lock(&tm_profile_lock); + tm_shaper_config_set(tm_system, shaper_profile, + &tm_node_obj->shaper_obj); + odp_ticketlock_unlock(&tm_profile_lock); + return 0; +} + +int odp_tm_node_sched_config(odp_tm_node_t tm_node, + odp_tm_node_t tm_fan_in_node, + odp_tm_sched_t sched_profile) +{ + tm_shaper_obj_t *child_shaper_obj; + tm_node_obj_t *tm_node_obj, *child_tm_node_obj; + + tm_node_obj = GET_TM_NODE_OBJ(tm_node); + if (!tm_node_obj) + return -1; + + child_tm_node_obj = GET_TM_NODE_OBJ(tm_fan_in_node); + if (!child_tm_node_obj) + return -1; + + odp_ticketlock_lock(&tm_profile_lock); + child_shaper_obj = &child_tm_node_obj->shaper_obj; + child_shaper_obj->sched_params = + tm_get_profile_params(sched_profile, + TM_SCHED_PROFILE); + odp_ticketlock_unlock(&tm_profile_lock); + return 0; +} + +int odp_tm_node_threshold_config(odp_tm_node_t tm_node, + odp_tm_threshold_t thresholds_profile) +{ + tm_node_obj_t *tm_node_obj; + + tm_node_obj = GET_TM_NODE_OBJ(tm_node); + if (!tm_node_obj) + return -1; + + odp_ticketlock_lock(&tm_profile_lock); + tm_node_obj->tm_wred_node->threshold_params = tm_get_profile_params( + thresholds_profile, TM_THRESHOLD_PROFILE); + odp_ticketlock_unlock(&tm_profile_lock); + return 0; +} + +int odp_tm_node_wred_config(odp_tm_node_t tm_node, odp_packet_color_t pkt_color, + odp_tm_wred_t wred_profile) +{ + tm_node_obj_t *tm_node_obj; + uint32_t color; + int rc; + + tm_node_obj = GET_TM_NODE_OBJ(tm_node); + if (!tm_node_obj) + return -1; + + odp_ticketlock_lock(&tm_profile_lock); + rc = 0; + if (pkt_color == ODP_PACKET_ALL_COLORS) { + for (color = 0; color < ODP_NUM_PACKET_COLORS; color++) + tm_node_obj->tm_wred_node->wred_params[color] = + tm_get_profile_params(wred_profile, + TM_WRED_PROFILE); + } else if (pkt_color < ODP_NUM_PACKET_COLORS) { + tm_node_obj->tm_wred_node->wred_params[pkt_color] = + tm_get_profile_params(wred_profile, + TM_WRED_PROFILE); + } else { + rc = -1; + } + + odp_ticketlock_unlock(&tm_profile_lock); + return rc; +} + +odp_tm_node_t odp_tm_node_lookup(odp_tm_t odp_tm ODP_UNUSED, const char *name) +{ + return _odp_int_name_tbl_lookup(name, ODP_TM_NODE_HANDLE); +} + +void odp_tm_queue_params_init(odp_tm_queue_params_t *params) +{ + memset(params, 0, sizeof(odp_tm_queue_params_t)); +} + +odp_tm_queue_t odp_tm_queue_create(odp_tm_t odp_tm, + odp_tm_queue_params_t *params) +{ + _odp_int_pkt_queue_t _odp_int_pkt_queue; + tm_queue_obj_t *tm_queue_obj; + tm_wred_node_t *tm_wred_node; + odp_tm_queue_t odp_tm_queue; + odp_tm_wred_t wred_profile; + tm_system_t *tm_system; + uint32_t color; + + /* Allocate a tm_queue_obj_t record. */ + tm_system = GET_TM_SYSTEM(odp_tm); + tm_queue_obj = malloc(sizeof(tm_queue_obj_t)); + if (!tm_queue_obj) + return ODP_TM_INVALID; + + tm_wred_node = malloc(sizeof(tm_wred_node_t)); + if (!tm_wred_node) { + free(tm_queue_obj); + return ODP_TM_INVALID; + } + + _odp_int_pkt_queue = _odp_pkt_queue_create( + tm_system->_odp_int_queue_pool); + if (_odp_int_pkt_queue == _ODP_INT_PKT_QUEUE_INVALID) { + free(tm_wred_node); + free(tm_queue_obj); + return ODP_TM_INVALID; + } + + odp_tm_queue = MAKE_ODP_TM_QUEUE(tm_queue_obj); + memset(tm_queue_obj, 0, sizeof(tm_queue_obj_t)); + memset(tm_wred_node, 0, sizeof(tm_wred_node_t)); + tm_queue_obj->priority = params->priority; + tm_queue_obj->tm_idx = tm_system->tm_idx; + tm_queue_obj->queue_num = tm_system->next_queue_num++; + tm_queue_obj->tm_wred_node = tm_wred_node; + tm_queue_obj->_odp_int_pkt_queue = _odp_int_pkt_queue; + tm_queue_obj->pkt = INVALID_PKT; + odp_ticketlock_init(&tm_wred_node->tm_wred_node_lock); + + tm_queue_obj->tm_qentry.s.type = ODP_QUEUE_TYPE_TM; + tm_queue_obj->tm_qentry.s.enqueue = queue_tm_reenq; + tm_queue_obj->tm_qentry.s.enqueue_multi = queue_tm_reenq_multi; + + tm_system->queue_num_tbl[tm_queue_obj->queue_num] = tm_queue_obj; + odp_ticketlock_lock(&tm_system->tm_system_lock); + if (params->shaper_profile != ODP_TM_INVALID) + tm_shaper_config_set(tm_system, params->shaper_profile, + &tm_queue_obj->shaper_obj); + + if (params->threshold_profile != ODP_TM_INVALID) + tm_wred_node->threshold_params = tm_get_profile_params( + params->threshold_profile, + TM_THRESHOLD_PROFILE); + + for (color = 0; color < ODP_NUM_PACKET_COLORS; color++) { + wred_profile = params->wred_profile[color]; + if (wred_profile != ODP_TM_INVALID) + tm_wred_node->wred_params[color] = + tm_get_profile_params(wred_profile, + TM_WRED_PROFILE); + } + + tm_queue_obj->magic_num = TM_QUEUE_MAGIC_NUM; + tm_queue_obj->shaper_obj.enclosing_entity = tm_queue_obj; + tm_queue_obj->shaper_obj.in_tm_node_obj = 0; + + odp_ticketlock_unlock(&tm_system->tm_system_lock); + return odp_tm_queue; +} + +int odp_tm_queue_shaper_config(odp_tm_queue_t tm_queue, + odp_tm_shaper_t shaper_profile) +{ + tm_queue_obj_t *tm_queue_obj; + tm_system_t *tm_system; + + tm_queue_obj = GET_TM_QUEUE_OBJ(tm_queue); + if (!tm_queue_obj) + return -1; + + tm_system = odp_tm_systems[tm_queue_obj->tm_idx]; + if (!tm_system) + return -2; + + odp_ticketlock_lock(&tm_profile_lock); + tm_shaper_config_set(tm_system, shaper_profile, + &tm_queue_obj->shaper_obj); + odp_ticketlock_unlock(&tm_profile_lock); + return 0; +} + +int odp_tm_queue_sched_config(odp_tm_node_t tm_node, + odp_tm_queue_t tm_fan_in_queue, + odp_tm_sched_t sched_profile) +{ + tm_shaper_obj_t *child_shaper_obj; + tm_queue_obj_t *child_tm_queue_obj; + tm_node_obj_t *tm_node_obj; + + tm_node_obj = GET_TM_NODE_OBJ(tm_node); + if (!tm_node_obj) + return -1; + + child_tm_queue_obj = GET_TM_QUEUE_OBJ(tm_fan_in_queue); + if (!child_tm_queue_obj) + return -1; + + odp_ticketlock_lock(&tm_profile_lock); + child_shaper_obj = &child_tm_queue_obj->shaper_obj; + child_shaper_obj->sched_params = + tm_get_profile_params(sched_profile, + TM_SCHED_PROFILE); + odp_ticketlock_unlock(&tm_profile_lock); + return 0; +} + +int odp_tm_queue_threshold_config(odp_tm_queue_t tm_queue, + odp_tm_threshold_t thresholds_profile) +{ + tm_queue_obj_t *tm_queue_obj; + + tm_queue_obj = GET_TM_QUEUE_OBJ(tm_queue); + if (!tm_queue_obj) + return -1; + + odp_ticketlock_lock(&tm_profile_lock); + tm_queue_obj->tm_wred_node->threshold_params = tm_get_profile_params( + thresholds_profile, TM_THRESHOLD_PROFILE); + odp_ticketlock_unlock(&tm_profile_lock); + return 0; +} + +int odp_tm_queue_wred_config(odp_tm_queue_t tm_queue, + odp_packet_color_t pkt_color, + odp_tm_wred_t wred_profile) +{ + tm_queue_obj_t *tm_queue_obj; + uint32_t color; + int rc; + + tm_queue_obj = GET_TM_QUEUE_OBJ(tm_queue); + if (!tm_queue_obj) + return -1; + + odp_ticketlock_lock(&tm_profile_lock); + rc = 0; + if (pkt_color == ODP_PACKET_ALL_COLORS) { + for (color = 0; color < ODP_NUM_PACKET_COLORS; color++) + tm_queue_obj->tm_wred_node->wred_params[color] = + tm_get_profile_params(wred_profile, + TM_WRED_PROFILE); + } else if (pkt_color < ODP_NUM_PACKET_COLORS) { + tm_queue_obj->tm_wred_node->wred_params[pkt_color] = + tm_get_profile_params(wred_profile, + TM_WRED_PROFILE); + } else { + rc = -1; + } + + odp_ticketlock_unlock(&tm_profile_lock); + return rc; +} + +int odp_tm_node_connect(odp_tm_node_t src_tm_node, odp_tm_node_t dst_tm_node) +{ + tm_wred_node_t *src_tm_wred_node, *dst_tm_wred_node; + tm_node_obj_t *src_tm_node_obj, *dst_tm_node_obj; + + src_tm_node_obj = GET_TM_NODE_OBJ(src_tm_node); + dst_tm_node_obj = GET_TM_NODE_OBJ(dst_tm_node); + if (!src_tm_node_obj) + return -1; + + if (dst_tm_node_obj) { + src_tm_wred_node = src_tm_node_obj->tm_wred_node; + dst_tm_wred_node = dst_tm_node_obj->tm_wred_node; + src_tm_wred_node->next_tm_wred_node = dst_tm_wred_node; + } + + src_tm_node_obj->shaper_obj.next_tm_node = dst_tm_node_obj; + return 0; +} + +int odp_tm_queue_connect(odp_tm_queue_t tm_queue, odp_tm_node_t dst_tm_node) +{ + tm_wred_node_t *src_tm_wred_node, *dst_tm_wred_node; + tm_queue_obj_t *src_tm_queue_obj; + tm_node_obj_t *dst_tm_node_obj; + + src_tm_queue_obj = GET_TM_QUEUE_OBJ(tm_queue); + dst_tm_node_obj = GET_TM_NODE_OBJ(dst_tm_node); + if ((!src_tm_queue_obj) || (!dst_tm_node_obj)) + return -1; + + src_tm_wred_node = src_tm_queue_obj->tm_wred_node; + dst_tm_wred_node = dst_tm_node_obj->tm_wred_node; + src_tm_wred_node->next_tm_wred_node = dst_tm_wred_node; + + src_tm_queue_obj->shaper_obj.next_tm_node = dst_tm_node_obj; + return 0; +} + +int odp_tm_enq(odp_tm_queue_t tm_queue, odp_packet_t pkt) +{ + tm_queue_obj_t *tm_queue_obj; + tm_system_t *tm_system; + + tm_queue_obj = GET_TM_QUEUE_OBJ(tm_queue); + if (!tm_queue_obj) + return -1; /* @todo fix magic number */ + + tm_system = odp_tm_systems[tm_queue_obj->tm_idx]; + if (!tm_system) + return -2; /* @todo fix magic number */ + + if (odp_atomic_load_u32(&tm_system->destroying)) + return -6; /* @todo fix magic number */ + + return tm_enqueue(tm_system, tm_queue_obj, pkt); +} + +int odp_tm_enq_with_cnt(odp_tm_queue_t tm_queue, odp_packet_t pkt) +{ + tm_queue_obj_t *tm_queue_obj; + tm_system_t *tm_system; + uint32_t pkt_cnt; + int rc; + + tm_queue_obj = GET_TM_QUEUE_OBJ(tm_queue); + if (!tm_queue_obj) + return -1; + + tm_system = odp_tm_systems[tm_queue_obj->tm_idx]; + if (!tm_system) + return -2; + + if (odp_atomic_load_u32(&tm_system->destroying)) + return -6; + + rc = tm_enqueue(tm_system, tm_queue_obj, pkt); + if (rc < 0) + return rc; + + pkt_cnt = rc; + return pkt_cnt; +} + +#ifdef NOT_USED /* @todo use or delete */ +static uint32_t odp_tm_input_work_queue_fullness(odp_tm_t odp_tm ODP_UNUSED) +{ + input_work_queue_t *input_work_queue; + tm_system_t *tm_system; + uint32_t queue_cnt, fullness; + + tm_system = GET_TM_SYSTEM(odp_tm); + input_work_queue = tm_system->input_work_queue; + queue_cnt = odp_atomic_load_u32(&input_work_queue->queue_cnt); + fullness = (100 * queue_cnt) / INPUT_WORK_RING_SIZE; + return fullness; +} +#endif + +static int tm_queue_info_copy(tm_queue_info_t *queue_info, uint32_t query_flags, + odp_tm_queue_info_t *info) +{ + tm_queue_thresholds_t *threshold_params; + + memset(info, 0, sizeof(odp_tm_queue_info_t)); + info->total_pkt_cnt = + odp_atomic_load_u64(&queue_info->queue_cnts.pkt_cnt); + info->total_byte_cnt = + odp_atomic_load_u64(&queue_info->queue_cnts.byte_cnt); + info->total_pkt_cnt_valid = 1; + info->total_byte_cnt_valid = 1; + info->approx_byte_cnt = 0; + + if (query_flags & ODP_TM_QUERY_THRESHOLDS) { + threshold_params = queue_info->threshold_params; + if (!threshold_params) + return -1; + + info->max_pkt_cnt = threshold_params->max_pkts; + info->max_byte_cnt = threshold_params->max_bytes; + info->max_pkt_cnt_valid = info->max_pkt_cnt != 0; + info->max_byte_cnt_valid = info->max_byte_cnt != 0; + } + + return 0; +} + +int odp_tm_queue_query(odp_tm_queue_t tm_queue, uint32_t query_flags, + odp_tm_queue_info_t *info) +{ + tm_queue_info_t queue_info; + tm_queue_obj_t *tm_queue_obj; + tm_wred_node_t *tm_wred_node; + + tm_queue_obj = GET_TM_QUEUE_OBJ(tm_queue); + if (!tm_queue_obj) + return -1; + + tm_wred_node = tm_queue_obj->tm_wred_node; + if (!tm_wred_node) + return -2; + + queue_info.threshold_params = tm_wred_node->threshold_params; + queue_info.queue_cnts = tm_wred_node->queue_cnts; + return tm_queue_info_copy(&queue_info, query_flags, info); +} + +int odp_tm_priority_query(odp_tm_t odp_tm, uint8_t priority, + uint32_t query_flags, odp_tm_queue_info_t *info) +{ + tm_queue_info_t queue_info; + tm_system_t *tm_system; + + tm_system = GET_TM_SYSTEM(odp_tm); + queue_info = tm_system->priority_info[priority]; + return tm_queue_info_copy(&queue_info, query_flags, info); +} + +int odp_tm_total_query(odp_tm_t odp_tm, uint32_t query_flags, + odp_tm_queue_info_t *info) +{ + tm_queue_info_t queue_info; + tm_system_t *tm_system; + + tm_system = GET_TM_SYSTEM(odp_tm); + queue_info = tm_system->total_info; + return tm_queue_info_copy(&queue_info, query_flags, info); +} + +int odp_tm_priority_threshold_config(odp_tm_t odp_tm, uint8_t priority, + odp_tm_threshold_t thresholds_profile) +{ + tm_system_t *tm_system; + + tm_system = GET_TM_SYSTEM(odp_tm); + + odp_ticketlock_lock(&tm_profile_lock); + tm_system->priority_info[priority].threshold_params = + tm_get_profile_params(thresholds_profile, + TM_THRESHOLD_PROFILE); + odp_ticketlock_unlock(&tm_profile_lock); + return 0; +} + +int odp_tm_total_threshold_config(odp_tm_t odp_tm, + odp_tm_threshold_t thresholds_profile) +{ + tm_system_t *tm_system; + + tm_system = GET_TM_SYSTEM(odp_tm); + + odp_ticketlock_lock(&tm_profile_lock); + tm_system->total_info.threshold_params = tm_get_profile_params( + thresholds_profile, TM_THRESHOLD_PROFILE); + odp_ticketlock_unlock(&tm_profile_lock); + return 0; +} + +void odp_tm_stats_print(odp_tm_t odp_tm) +{ + input_work_queue_t *input_work_queue; + tm_queue_obj_t *tm_queue_obj; + tm_system_t *tm_system; + uint32_t queue_num, max_queue_num; + + tm_system = GET_TM_SYSTEM(odp_tm); + input_work_queue = tm_system->input_work_queue; + + ODP_DBG("odp_tm_stats_print - tm_system=0x%lX tm_idx=%u\n", odp_tm, + tm_system->tm_idx); + ODP_DBG(" input_work_queue size=%u current cnt=%u peak cnt=%u\n", + INPUT_WORK_RING_SIZE, input_work_queue->queue_cnt, + input_work_queue->peak_cnt); + ODP_DBG(" input_work_queue enqueues=%lu dequeues=%lu fail_cnt=%lu\n", + input_work_queue->total_enqueues, + input_work_queue->total_dequeues, + input_work_queue->enqueue_fail_cnt); + ODP_DBG(" green_cnt=%lu yellow_cnt=%lu red_cnt=%lu\n", + tm_system->shaper_green_cnt, + tm_system->shaper_yellow_cnt, + tm_system->shaper_red_cnt); + + _odp_pkt_queue_stats_print(tm_system->_odp_int_queue_pool); + _odp_timer_wheel_stats_print(tm_system->_odp_int_timer_wheel); + _odp_sorted_list_stats_print(tm_system->_odp_int_sorted_pool); + + max_queue_num = tm_system->next_queue_num; + for (queue_num = 1; queue_num < max_queue_num; queue_num++) { + tm_queue_obj = tm_system->queue_num_tbl[queue_num]; + ODP_DBG("queue_num=%u priority=%u rcvd=%u enqueued=%u " + "dequeued=%u consumed=%u\n", + queue_num, + tm_queue_obj->priority, + tm_queue_obj->pkts_rcvd_cnt, + tm_queue_obj->pkts_enqueued_cnt, + tm_queue_obj->pkts_dequeued_cnt, + tm_queue_obj->pkts_consumed_cnt); + } +} + +void odp_tm_periodic_update(void) +{ + return; /* Nothing to be done here for this implementation. */ +} + +int odp_tm_init_global(void) +{ + odp_ticketlock_init(&tm_create_lock); + odp_ticketlock_init(&tm_profile_lock); + odp_barrier_init(&tm_first_enq, 2); + + return 0; +}

[API-NEXT,PATCHv8,2/4] linux-generic: tm: implement traffic manager

Commit Message

Patch