Message ID | 1472608455-31488-3-git-send-email-forrest.shi@linaro.org |
---|---|
State | New |
Headers | show |
Hi Matias, can you please review this flow lookup patch? Maxim. On 08/31/16 04:54, forrest.shi@linaro.org wrote: > From: Xuelin Shi <forrest.shi@linaro.org> > > increase the hash size to accomodate more entries. > pre-compute the hash keys of possible ip destinations in a subnet. > > Signed-off-by: Xuelin Shi <forrest.shi@linaro.org> > --- > example/l3fwd/odp_l3fwd.c | 3 - > example/l3fwd/odp_l3fwd_db.c | 160 ++++++++++++++++++++++++++++++------------- > example/l3fwd/odp_l3fwd_db.h | 35 ++++++---- > 3 files changed, 131 insertions(+), 67 deletions(-) > > diff --git a/example/l3fwd/odp_l3fwd.c b/example/l3fwd/odp_l3fwd.c > index 0998614..44778b0 100644 > --- a/example/l3fwd/odp_l3fwd.c > +++ b/example/l3fwd/odp_l3fwd.c > @@ -1061,9 +1061,6 @@ int main(int argc, char **argv) > for (i = 0; i < nb_worker; i++) > odph_odpthreads_join(&thread_tbl[i]); > > - /* release resource on exit */ > - destroy_fwd_db(); > - > /* if_names share a single buffer, so only one free */ > free(args->if_names[0]); > > diff --git a/example/l3fwd/odp_l3fwd_db.c b/example/l3fwd/odp_l3fwd_db.c > index b3a237f..4731237 100644 > --- a/example/l3fwd/odp_l3fwd_db.c > +++ b/example/l3fwd/odp_l3fwd_db.c > @@ -150,7 +150,7 @@ char *mac_addr_str(char *b, odph_ethaddr_t *mac) > */ > typedef struct flow_entry_s { > ipv4_tuple5_t key; /**< match key */ > - struct flow_entry_s *next; /**< next entry on the list */ > + struct flow_entry_s *next; /**< next entry in the bucket */ > fwd_db_entry_t *fwd_entry; /**< entry info in db */ > } flow_entry_t; > > @@ -158,59 +158,96 @@ typedef struct flow_entry_s { > * Flow cache table bucket > */ > typedef struct flow_bucket_s { > - odp_spinlock_t lock; /**< Bucket lock*/ > - flow_entry_t *next; /**< Pointer to first flow entry in bucket*/ > + odp_rwlock_t lock; /**< Bucket lock*/ > + flow_entry_t *next; /**< First flow entry in bucket*/ > } flow_bucket_t; > > /** > * Flow hash table, fast lookup cache > */ > typedef struct flow_table_s { > - flow_bucket_t *bucket; > - uint32_t count; > + odp_rwlock_t flow_lock; /**< flow table lock*/ > + flow_entry_t *flows; /**< flow store */ > + flow_bucket_t *bucket; /**< bucket store */ > + uint32_t bkt_cnt; > + uint32_t flow_cnt; > + uint32_t next_flow; /**< next available flow in the store */ > } flow_table_t; > > static flow_table_t fwd_lookup_cache; > > -void init_fwd_hash_cache(void) > +static void create_fwd_hash_cache(void) > { > odp_shm_t hash_shm; > flow_bucket_t *bucket; > - uint32_t bucket_count; > + flow_entry_t *flows; > + uint32_t bucket_count, flow_count, size; > uint32_t i; > > - bucket_count = FWD_DEF_BUCKET_COUNT; > + flow_count = FWD_MAX_FLOW_COUNT; > + bucket_count = flow_count / FWD_DEF_BUCKET_ENTRIES; > > /*Reserve memory for Routing hash table*/ > - hash_shm = odp_shm_reserve("route_table", > - sizeof(flow_bucket_t) * bucket_count, > - ODP_CACHE_LINE_SIZE, 0); > + size = sizeof(flow_bucket_t) * bucket_count + > + sizeof(flow_entry_t) * flow_count; > + hash_shm = odp_shm_reserve("flow_table", size, ODP_CACHE_LINE_SIZE, 0); > > bucket = odp_shm_addr(hash_shm); > if (!bucket) { > - EXAMPLE_ERR("Error: shared mem alloc failed.\n"); > - exit(-1); > + /* Try the second time with small request */ > + flow_count /= 4; > + bucket_count = flow_count / FWD_DEF_BUCKET_ENTRIES; > + size = sizeof(flow_bucket_t) * bucket_count + > + sizeof(flow_entry_t) * flow_count; > + hash_shm = odp_shm_reserve("flow_table", size, > + ODP_CACHE_LINE_SIZE, 0); > + bucket = odp_shm_addr(hash_shm); > + if (!bucket) { > + EXAMPLE_ERR("Error: shared mem alloc failed.\n"); > + exit(-1); > + } > } > > + size = sizeof(flow_bucket_t) * bucket_count; > + flows = (flow_entry_t *)((char *)bucket + size); > + > fwd_lookup_cache.bucket = bucket; > - fwd_lookup_cache.count = bucket_count; > + fwd_lookup_cache.bkt_cnt = bucket_count; > + fwd_lookup_cache.flows = flows; > + fwd_lookup_cache.flow_cnt = flow_count; > > - /*Initialize Locks*/ > + /*Initialize bucket locks*/ > for (i = 0; i < bucket_count; i++) { > bucket = &fwd_lookup_cache.bucket[i]; > - odp_spinlock_init(&bucket->lock); > + odp_rwlock_init(&bucket->lock); > bucket->next = NULL; > } > + > + memset(flows, 0, sizeof(flow_entry_t) * flow_count); > + odp_rwlock_init(&fwd_lookup_cache.flow_lock); > + fwd_lookup_cache.next_flow = 0; > +} > + > +static inline flow_entry_t *get_new_flow(void) > +{ > + uint32_t next; > + flow_entry_t *flow = NULL; > + > + odp_rwlock_write_lock(&fwd_lookup_cache.flow_lock); > + next = fwd_lookup_cache.next_flow; > + if (next < fwd_lookup_cache.flow_cnt) { > + flow = &fwd_lookup_cache.flows[next]; > + fwd_lookup_cache.next_flow++; > + } > + odp_rwlock_write_unlock(&fwd_lookup_cache.flow_lock); > + > + return flow; > } > > static inline > int match_key_flow(ipv4_tuple5_t *key, flow_entry_t *flow) > { > - if (key->src_ip == flow->key.src_ip && > - key->dst_ip == flow->key.dst_ip && > - key->src_port == flow->key.src_port && > - key->dst_port == flow->key.dst_port && > - key->proto == flow->key.proto) > + if (key->hi64 == flow->key.hi64 && key->lo64 == flow->key.lo64) > return 1; > > return 0; > @@ -221,10 +258,12 @@ flow_entry_t *lookup_fwd_cache(ipv4_tuple5_t *key, flow_bucket_t *bucket) > { > flow_entry_t *rst; > > + odp_rwlock_read_lock(&bucket->lock); > for (rst = bucket->next; rst != NULL; rst = rst->next) { > if (match_key_flow(key, rst)) > break; > } > + odp_rwlock_read_unlock(&bucket->lock); > > return rst; > } > @@ -239,22 +278,58 @@ flow_entry_t *insert_fwd_cache(ipv4_tuple5_t *key, > if (!entry) > return NULL; > > - flow = malloc(sizeof(flow_entry_t)); > + flow = get_new_flow(); > + if (!flow) > + return NULL; > + > flow->key = *key; > flow->fwd_entry = entry; > > - odp_spinlock_lock(&bucket->lock); > - if (!bucket->next) { > - bucket->next = flow; > - } else { > + odp_rwlock_write_lock(&bucket->lock); > + if (bucket->next) > flow->next = bucket->next; > - bucket->next = flow; > - } > - odp_spinlock_unlock(&bucket->lock); > + bucket->next = flow; > + odp_rwlock_write_unlock(&bucket->lock); > > return flow; > } > > +void init_fwd_hash_cache(void) > +{ > + fwd_db_entry_t *entry; > + flow_entry_t *flow; > + flow_bucket_t *bucket; > + uint64_t hash; > + uint32_t i, nb_hosts; > + ipv4_tuple5_t key; > + > + create_fwd_hash_cache(); > + > + /** > + * warm up the lookup cache with possible hosts. > + * with millions flows, save significant time during runtime. > + */ > + memset(&key, 0, sizeof(key)); > + for (entry = fwd_db->list; NULL != entry; entry = entry->next) { > + nb_hosts = 1 << (32 - entry->subnet.depth); > + for (i = 0; i < nb_hosts; i++) { > + key.dst_ip = entry->subnet.addr + i; > + hash = l3fwd_calc_hash(&key); > + hash &= fwd_lookup_cache.bkt_cnt - 1; > + bucket = &fwd_lookup_cache.bucket[hash]; > + flow = lookup_fwd_cache(&key, bucket); > + if (flow) > + return; > + > + flow = insert_fwd_cache(&key, bucket, entry); > + if (!flow) > + goto out; > + } > + } > +out: > + return; > +} > + > /** Global pointer to fwd db */ > fwd_db_t *fwd_db; > > @@ -382,35 +457,22 @@ void dump_fwd_db(void) > printf("\n"); > } > > -void destroy_fwd_db(void) > -{ > - flow_bucket_t *bucket; > - flow_entry_t *flow, *tmp; > - uint32_t i; > - > - for (i = 0; i < fwd_lookup_cache.count; i++) { > - bucket = &fwd_lookup_cache.bucket[i]; > - flow = bucket->next; > - odp_spinlock_lock(&bucket->lock); > - while (flow) { > - tmp = flow->next; > - free(flow); > - flow = tmp; > - } > - odp_spinlock_unlock(&bucket->lock); > - } > -} > - > fwd_db_entry_t *find_fwd_db_entry(ipv4_tuple5_t *key) > { > fwd_db_entry_t *entry; > flow_entry_t *flow; > flow_bucket_t *bucket; > uint64_t hash; > + ipv4_tuple5_t newkey; > + > + newkey.hi64 = 0; > + newkey.lo64 = 0; > + newkey.dst_ip = key->dst_ip; > + key = &newkey; > > /* first find in cache */ > hash = l3fwd_calc_hash(key); > - hash &= fwd_lookup_cache.count - 1; > + hash &= fwd_lookup_cache.bkt_cnt - 1; > bucket = &fwd_lookup_cache.bucket[hash]; > flow = lookup_fwd_cache(key, bucket); > if (flow) > diff --git a/example/l3fwd/odp_l3fwd_db.h b/example/l3fwd/odp_l3fwd_db.h > index d9f6d61..1b24f1a 100644 > --- a/example/l3fwd/odp_l3fwd_db.h > +++ b/example/l3fwd/odp_l3fwd_db.h > @@ -19,14 +19,14 @@ extern "C" { > #define MAX_STRING 32 > > /** > - * Default number of flows > + * Max number of flows > */ > -#define FWD_DEF_FLOW_COUNT 100000 > +#define FWD_MAX_FLOW_COUNT (1 << 22) > > /** > - * Default Hash bucket number > + * Default hash entries in a bucket > */ > -#define FWD_DEF_BUCKET_COUNT (FWD_DEF_FLOW_COUNT / 8) > +#define FWD_DEF_BUCKET_ENTRIES 4 > > /** > * IP address range (subnet) > @@ -40,12 +40,22 @@ typedef struct ip_addr_range_s { > * TCP/UDP flow > */ > typedef struct ipv4_tuple5_s { > - uint32_t src_ip; > - uint32_t dst_ip; > - uint16_t src_port; > - uint16_t dst_port; > - uint8_t proto; > -} ipv4_tuple5_t; > + union { > + struct { > + int32_t src_ip; > + int32_t dst_ip; > + int16_t src_port; > + int16_t dst_port; > + int8_t proto; > + int8_t pad1; > + int16_t pad2; > + }; > + struct { > + int64_t hi64; > + int64_t lo64; > + }; > + }; > +} ipv4_tuple5_t ODP_ALIGNED_CACHE; > > /** > * Forwarding data base entry > @@ -116,11 +126,6 @@ void dump_fwd_db_entry(fwd_db_entry_t *entry); > void dump_fwd_db(void); > > /** > - * Destroy the forwarding database > - */ > -void destroy_fwd_db(void); > - > -/** > * Find a matching forwarding database entry > * > * @param key ipv4 tuple
merged. On 09/02/16 18:23, Elo, Matias (Nokia - FI/Espoo) wrote: > Just nitpicking here but there is an extra ' From: Xuelin Shi <forrest.shi@linaro.org>' in all commit messages. > > You should also include the version tag in all patches (e.g. [PATCHv9 3/3]). This way one can apply all the patches with a single 'git am <patch_dir>/*' call in right order. > > There's one typo in the commit message accomodate -> accommodate. > > Reviewed-and-tested-by: Matias Elo <matias.elo@nokia.com> > >> From: Xuelin Shi <forrest.shi@linaro.org> >> >> increase the hash size to accomodate more entries. >> pre-compute the hash keys of possible ip destinations in a subnet. >> >> Signed-off-by: Xuelin Shi <forrest.shi@linaro.org> >> --- >> example/l3fwd/odp_l3fwd.c | 3 - >> example/l3fwd/odp_l3fwd_db.c | 160 ++++++++++++++++++++++++++++++--- >> ---------- >> example/l3fwd/odp_l3fwd_db.h | 35 ++++++---- >> 3 files changed, 131 insertions(+), 67 deletions(-) >> >> diff --git a/example/l3fwd/odp_l3fwd.c b/example/l3fwd/odp_l3fwd.c >> index 0998614..44778b0 100644 >> --- a/example/l3fwd/odp_l3fwd.c >> +++ b/example/l3fwd/odp_l3fwd.c >> @@ -1061,9 +1061,6 @@ int main(int argc, char **argv) >> for (i = 0; i < nb_worker; i++) >> odph_odpthreads_join(&thread_tbl[i]); >> >> - /* release resource on exit */ >> - destroy_fwd_db(); >> - >> /* if_names share a single buffer, so only one free */ >> free(args->if_names[0]); >> >> diff --git a/example/l3fwd/odp_l3fwd_db.c b/example/l3fwd/odp_l3fwd_db.c >> index b3a237f..4731237 100644 >> --- a/example/l3fwd/odp_l3fwd_db.c >> +++ b/example/l3fwd/odp_l3fwd_db.c >> @@ -150,7 +150,7 @@ char *mac_addr_str(char *b, odph_ethaddr_t *mac) >> */ >> typedef struct flow_entry_s { >> ipv4_tuple5_t key; /**< match key */ >> - struct flow_entry_s *next; /**< next entry on the list */ >> + struct flow_entry_s *next; /**< next entry in the bucket */ >> fwd_db_entry_t *fwd_entry; /**< entry info in db */ >> } flow_entry_t; >> >> @@ -158,59 +158,96 @@ typedef struct flow_entry_s { >> * Flow cache table bucket >> */ >> typedef struct flow_bucket_s { >> - odp_spinlock_t lock; /**< Bucket lock*/ >> - flow_entry_t *next; /**< Pointer to first flow entry in >> bucket*/ >> + odp_rwlock_t lock; /**< Bucket lock*/ >> + flow_entry_t *next; /**< First flow entry in bucket*/ >> } flow_bucket_t; >> >> /** >> * Flow hash table, fast lookup cache >> */ >> typedef struct flow_table_s { >> - flow_bucket_t *bucket; >> - uint32_t count; >> + odp_rwlock_t flow_lock; /**< flow table lock*/ >> + flow_entry_t *flows; /**< flow store */ >> + flow_bucket_t *bucket; /**< bucket store */ >> + uint32_t bkt_cnt; >> + uint32_t flow_cnt; >> + uint32_t next_flow; /**< next available flow in the store */ >> } flow_table_t; >> >> static flow_table_t fwd_lookup_cache; >> >> -void init_fwd_hash_cache(void) >> +static void create_fwd_hash_cache(void) >> { >> odp_shm_t hash_shm; >> flow_bucket_t *bucket; >> - uint32_t bucket_count; >> + flow_entry_t *flows; >> + uint32_t bucket_count, flow_count, size; >> uint32_t i; >> >> - bucket_count = FWD_DEF_BUCKET_COUNT; >> + flow_count = FWD_MAX_FLOW_COUNT; >> + bucket_count = flow_count / FWD_DEF_BUCKET_ENTRIES; >> >> /*Reserve memory for Routing hash table*/ >> - hash_shm = odp_shm_reserve("route_table", >> - sizeof(flow_bucket_t) * bucket_count, >> - ODP_CACHE_LINE_SIZE, 0); >> + size = sizeof(flow_bucket_t) * bucket_count + >> + sizeof(flow_entry_t) * flow_count; >> + hash_shm = odp_shm_reserve("flow_table", size, >> ODP_CACHE_LINE_SIZE, 0); >> >> bucket = odp_shm_addr(hash_shm); >> if (!bucket) { >> - EXAMPLE_ERR("Error: shared mem alloc failed.\n"); >> - exit(-1); >> + /* Try the second time with small request */ >> + flow_count /= 4; >> + bucket_count = flow_count / FWD_DEF_BUCKET_ENTRIES; >> + size = sizeof(flow_bucket_t) * bucket_count + >> + sizeof(flow_entry_t) * flow_count; >> + hash_shm = odp_shm_reserve("flow_table", size, >> + ODP_CACHE_LINE_SIZE, 0); >> + bucket = odp_shm_addr(hash_shm); >> + if (!bucket) { >> + EXAMPLE_ERR("Error: shared mem alloc failed.\n"); >> + exit(-1); >> + } >> } >> >> + size = sizeof(flow_bucket_t) * bucket_count; >> + flows = (flow_entry_t *)((char *)bucket + size); >> + >> fwd_lookup_cache.bucket = bucket; >> - fwd_lookup_cache.count = bucket_count; >> + fwd_lookup_cache.bkt_cnt = bucket_count; >> + fwd_lookup_cache.flows = flows; >> + fwd_lookup_cache.flow_cnt = flow_count; >> >> - /*Initialize Locks*/ >> + /*Initialize bucket locks*/ >> for (i = 0; i < bucket_count; i++) { >> bucket = &fwd_lookup_cache.bucket[i]; >> - odp_spinlock_init(&bucket->lock); >> + odp_rwlock_init(&bucket->lock); >> bucket->next = NULL; >> } >> + >> + memset(flows, 0, sizeof(flow_entry_t) * flow_count); >> + odp_rwlock_init(&fwd_lookup_cache.flow_lock); >> + fwd_lookup_cache.next_flow = 0; >> +} >> + >> +static inline flow_entry_t *get_new_flow(void) >> +{ >> + uint32_t next; >> + flow_entry_t *flow = NULL; >> + >> + odp_rwlock_write_lock(&fwd_lookup_cache.flow_lock); >> + next = fwd_lookup_cache.next_flow; >> + if (next < fwd_lookup_cache.flow_cnt) { >> + flow = &fwd_lookup_cache.flows[next]; >> + fwd_lookup_cache.next_flow++; >> + } >> + odp_rwlock_write_unlock(&fwd_lookup_cache.flow_lock); >> + >> + return flow; >> } >> >> static inline >> int match_key_flow(ipv4_tuple5_t *key, flow_entry_t *flow) >> { >> - if (key->src_ip == flow->key.src_ip && >> - key->dst_ip == flow->key.dst_ip && >> - key->src_port == flow->key.src_port && >> - key->dst_port == flow->key.dst_port && >> - key->proto == flow->key.proto) >> + if (key->hi64 == flow->key.hi64 && key->lo64 == flow->key.lo64) >> return 1; >> >> return 0; >> @@ -221,10 +258,12 @@ flow_entry_t *lookup_fwd_cache(ipv4_tuple5_t *key, >> flow_bucket_t *bucket) >> { >> flow_entry_t *rst; >> >> + odp_rwlock_read_lock(&bucket->lock); >> for (rst = bucket->next; rst != NULL; rst = rst->next) { >> if (match_key_flow(key, rst)) >> break; >> } >> + odp_rwlock_read_unlock(&bucket->lock); >> >> return rst; >> } >> @@ -239,22 +278,58 @@ flow_entry_t *insert_fwd_cache(ipv4_tuple5_t *key, >> if (!entry) >> return NULL; >> >> - flow = malloc(sizeof(flow_entry_t)); >> + flow = get_new_flow(); >> + if (!flow) >> + return NULL; >> + >> flow->key = *key; >> flow->fwd_entry = entry; >> >> - odp_spinlock_lock(&bucket->lock); >> - if (!bucket->next) { >> - bucket->next = flow; >> - } else { >> + odp_rwlock_write_lock(&bucket->lock); >> + if (bucket->next) >> flow->next = bucket->next; >> - bucket->next = flow; >> - } >> - odp_spinlock_unlock(&bucket->lock); >> + bucket->next = flow; >> + odp_rwlock_write_unlock(&bucket->lock); >> >> return flow; >> } >> >> +void init_fwd_hash_cache(void) >> +{ >> + fwd_db_entry_t *entry; >> + flow_entry_t *flow; >> + flow_bucket_t *bucket; >> + uint64_t hash; >> + uint32_t i, nb_hosts; >> + ipv4_tuple5_t key; >> + >> + create_fwd_hash_cache(); >> + >> + /** >> + * warm up the lookup cache with possible hosts. >> + * with millions flows, save significant time during runtime. >> + */ >> + memset(&key, 0, sizeof(key)); >> + for (entry = fwd_db->list; NULL != entry; entry = entry->next) { >> + nb_hosts = 1 << (32 - entry->subnet.depth); >> + for (i = 0; i < nb_hosts; i++) { >> + key.dst_ip = entry->subnet.addr + i; >> + hash = l3fwd_calc_hash(&key); >> + hash &= fwd_lookup_cache.bkt_cnt - 1; >> + bucket = &fwd_lookup_cache.bucket[hash]; >> + flow = lookup_fwd_cache(&key, bucket); >> + if (flow) >> + return; >> + >> + flow = insert_fwd_cache(&key, bucket, entry); >> + if (!flow) >> + goto out; >> + } >> + } >> +out: >> + return; >> +} >> + >> /** Global pointer to fwd db */ >> fwd_db_t *fwd_db; >> >> @@ -382,35 +457,22 @@ void dump_fwd_db(void) >> printf("\n"); >> } >> >> -void destroy_fwd_db(void) >> -{ >> - flow_bucket_t *bucket; >> - flow_entry_t *flow, *tmp; >> - uint32_t i; >> - >> - for (i = 0; i < fwd_lookup_cache.count; i++) { >> - bucket = &fwd_lookup_cache.bucket[i]; >> - flow = bucket->next; >> - odp_spinlock_lock(&bucket->lock); >> - while (flow) { >> - tmp = flow->next; >> - free(flow); >> - flow = tmp; >> - } >> - odp_spinlock_unlock(&bucket->lock); >> - } >> -} >> - >> fwd_db_entry_t *find_fwd_db_entry(ipv4_tuple5_t *key) >> { >> fwd_db_entry_t *entry; >> flow_entry_t *flow; >> flow_bucket_t *bucket; >> uint64_t hash; >> + ipv4_tuple5_t newkey; >> + >> + newkey.hi64 = 0; >> + newkey.lo64 = 0; >> + newkey.dst_ip = key->dst_ip; >> + key = &newkey; >> >> /* first find in cache */ >> hash = l3fwd_calc_hash(key); >> - hash &= fwd_lookup_cache.count - 1; >> + hash &= fwd_lookup_cache.bkt_cnt - 1; >> bucket = &fwd_lookup_cache.bucket[hash]; >> flow = lookup_fwd_cache(key, bucket); >> if (flow) >> diff --git a/example/l3fwd/odp_l3fwd_db.h b/example/l3fwd/odp_l3fwd_db.h >> index d9f6d61..1b24f1a 100644 >> --- a/example/l3fwd/odp_l3fwd_db.h >> +++ b/example/l3fwd/odp_l3fwd_db.h >> @@ -19,14 +19,14 @@ extern "C" { >> #define MAX_STRING 32 >> >> /** >> - * Default number of flows >> + * Max number of flows >> */ >> -#define FWD_DEF_FLOW_COUNT 100000 >> +#define FWD_MAX_FLOW_COUNT (1 << 22) >> >> /** >> - * Default Hash bucket number >> + * Default hash entries in a bucket >> */ >> -#define FWD_DEF_BUCKET_COUNT (FWD_DEF_FLOW_COUNT / 8) >> +#define FWD_DEF_BUCKET_ENTRIES 4 >> >> /** >> * IP address range (subnet) >> @@ -40,12 +40,22 @@ typedef struct ip_addr_range_s { >> * TCP/UDP flow >> */ >> typedef struct ipv4_tuple5_s { >> - uint32_t src_ip; >> - uint32_t dst_ip; >> - uint16_t src_port; >> - uint16_t dst_port; >> - uint8_t proto; >> -} ipv4_tuple5_t; >> + union { >> + struct { >> + int32_t src_ip; >> + int32_t dst_ip; >> + int16_t src_port; >> + int16_t dst_port; >> + int8_t proto; >> + int8_t pad1; >> + int16_t pad2; >> + }; >> + struct { >> + int64_t hi64; >> + int64_t lo64; >> + }; >> + }; >> +} ipv4_tuple5_t ODP_ALIGNED_CACHE; >> >> /** >> * Forwarding data base entry >> @@ -116,11 +126,6 @@ void dump_fwd_db_entry(fwd_db_entry_t *entry); >> void dump_fwd_db(void); >> >> /** >> - * Destroy the forwarding database >> - */ >> -void destroy_fwd_db(void); >> - >> -/** >> * Find a matching forwarding database entry >> * >> * @param key ipv4 tuple >> -- >> 2.7.4
Looks like this was merged too soon. It fails to compile with clang: Making all in l3fwd make[2]: Entering directory '/home/bill/linaro/petrisched/example/l3fwd' CC odp_l3fwd-odp_l3fwd.o CC odp_l3fwd-odp_l3fwd_db.o odp_l3fwd_db.c:212:10: error: cast from 'char *' to 'flow_entry_t *' (aka 'struct flow_entry_s *') increases required alignment from 1 to 64 [-Werror,-Wcast-align] flows = (flow_entry_t *)((char *)bucket + size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 error generated. Makefile:735: recipe for target 'odp_l3fwd-odp_l3fwd_db.o' failed make[2]: *** [odp_l3fwd-odp_l3fwd_db.o] Error 1 make[2]: Leaving directory '/home/bill/linaro/petrisched/example/l3fwd' On Mon, Sep 5, 2016 at 2:37 PM, Maxim Uvarov <maxim.uvarov@linaro.org> wrote: > merged. > > > On 09/02/16 18:23, Elo, Matias (Nokia - FI/Espoo) wrote: > >> Just nitpicking here but there is an extra ' From: Xuelin Shi < >> forrest.shi@linaro.org>' in all commit messages. >> >> You should also include the version tag in all patches (e.g. [PATCHv9 >> 3/3]). This way one can apply all the patches with a single 'git am >> <patch_dir>/*' call in right order. >> >> There's one typo in the commit message accomodate -> accommodate. >> >> Reviewed-and-tested-by: Matias Elo <matias.elo@nokia.com> >> >> From: Xuelin Shi <forrest.shi@linaro.org> >>> >>> increase the hash size to accomodate more entries. >>> pre-compute the hash keys of possible ip destinations in a subnet. >>> >>> Signed-off-by: Xuelin Shi <forrest.shi@linaro.org> >>> --- >>> example/l3fwd/odp_l3fwd.c | 3 - >>> example/l3fwd/odp_l3fwd_db.c | 160 ++++++++++++++++++++++++++++++--- >>> ---------- >>> example/l3fwd/odp_l3fwd_db.h | 35 ++++++---- >>> 3 files changed, 131 insertions(+), 67 deletions(-) >>> >>> diff --git a/example/l3fwd/odp_l3fwd.c b/example/l3fwd/odp_l3fwd.c >>> index 0998614..44778b0 100644 >>> --- a/example/l3fwd/odp_l3fwd.c >>> +++ b/example/l3fwd/odp_l3fwd.c >>> @@ -1061,9 +1061,6 @@ int main(int argc, char **argv) >>> for (i = 0; i < nb_worker; i++) >>> odph_odpthreads_join(&thread_tbl[i]); >>> >>> - /* release resource on exit */ >>> - destroy_fwd_db(); >>> - >>> /* if_names share a single buffer, so only one free */ >>> free(args->if_names[0]); >>> >>> diff --git a/example/l3fwd/odp_l3fwd_db.c b/example/l3fwd/odp_l3fwd_db.c >>> index b3a237f..4731237 100644 >>> --- a/example/l3fwd/odp_l3fwd_db.c >>> +++ b/example/l3fwd/odp_l3fwd_db.c >>> @@ -150,7 +150,7 @@ char *mac_addr_str(char *b, odph_ethaddr_t *mac) >>> */ >>> typedef struct flow_entry_s { >>> ipv4_tuple5_t key; /**< match key */ >>> - struct flow_entry_s *next; /**< next entry on the list */ >>> + struct flow_entry_s *next; /**< next entry in the bucket */ >>> fwd_db_entry_t *fwd_entry; /**< entry info in db */ >>> } flow_entry_t; >>> >>> @@ -158,59 +158,96 @@ typedef struct flow_entry_s { >>> * Flow cache table bucket >>> */ >>> typedef struct flow_bucket_s { >>> - odp_spinlock_t lock; /**< Bucket lock*/ >>> - flow_entry_t *next; /**< Pointer to first flow entry >>> in >>> bucket*/ >>> + odp_rwlock_t lock; /**< Bucket lock*/ >>> + flow_entry_t *next; /**< First flow entry in bucket*/ >>> } flow_bucket_t; >>> >>> /** >>> * Flow hash table, fast lookup cache >>> */ >>> typedef struct flow_table_s { >>> - flow_bucket_t *bucket; >>> - uint32_t count; >>> + odp_rwlock_t flow_lock; /**< flow table lock*/ >>> + flow_entry_t *flows; /**< flow store */ >>> + flow_bucket_t *bucket; /**< bucket store */ >>> + uint32_t bkt_cnt; >>> + uint32_t flow_cnt; >>> + uint32_t next_flow; /**< next available flow in the store */ >>> } flow_table_t; >>> >>> static flow_table_t fwd_lookup_cache; >>> >>> -void init_fwd_hash_cache(void) >>> +static void create_fwd_hash_cache(void) >>> { >>> odp_shm_t hash_shm; >>> flow_bucket_t *bucket; >>> - uint32_t bucket_count; >>> + flow_entry_t *flows; >>> + uint32_t bucket_count, flow_count, size; >>> uint32_t i; >>> >>> - bucket_count = FWD_DEF_BUCKET_COUNT; >>> + flow_count = FWD_MAX_FLOW_COUNT; >>> + bucket_count = flow_count / FWD_DEF_BUCKET_ENTRIES; >>> >>> /*Reserve memory for Routing hash table*/ >>> - hash_shm = odp_shm_reserve("route_table", >>> - sizeof(flow_bucket_t) * bucket_count, >>> - ODP_CACHE_LINE_SIZE, 0); >>> + size = sizeof(flow_bucket_t) * bucket_count + >>> + sizeof(flow_entry_t) * flow_count; >>> + hash_shm = odp_shm_reserve("flow_table", size, >>> ODP_CACHE_LINE_SIZE, 0); >>> >>> bucket = odp_shm_addr(hash_shm); >>> if (!bucket) { >>> - EXAMPLE_ERR("Error: shared mem alloc failed.\n"); >>> - exit(-1); >>> + /* Try the second time with small request */ >>> + flow_count /= 4; >>> + bucket_count = flow_count / FWD_DEF_BUCKET_ENTRIES; >>> + size = sizeof(flow_bucket_t) * bucket_count + >>> + sizeof(flow_entry_t) * flow_count; >>> + hash_shm = odp_shm_reserve("flow_table", size, >>> + ODP_CACHE_LINE_SIZE, 0); >>> + bucket = odp_shm_addr(hash_shm); >>> + if (!bucket) { >>> + EXAMPLE_ERR("Error: shared mem alloc failed.\n"); >>> + exit(-1); >>> + } >>> } >>> >>> + size = sizeof(flow_bucket_t) * bucket_count; >>> + flows = (flow_entry_t *)((char *)bucket + size); >>> + >>> fwd_lookup_cache.bucket = bucket; >>> - fwd_lookup_cache.count = bucket_count; >>> + fwd_lookup_cache.bkt_cnt = bucket_count; >>> + fwd_lookup_cache.flows = flows; >>> + fwd_lookup_cache.flow_cnt = flow_count; >>> >>> - /*Initialize Locks*/ >>> + /*Initialize bucket locks*/ >>> for (i = 0; i < bucket_count; i++) { >>> bucket = &fwd_lookup_cache.bucket[i]; >>> - odp_spinlock_init(&bucket->lock); >>> + odp_rwlock_init(&bucket->lock); >>> bucket->next = NULL; >>> } >>> + >>> + memset(flows, 0, sizeof(flow_entry_t) * flow_count); >>> + odp_rwlock_init(&fwd_lookup_cache.flow_lock); >>> + fwd_lookup_cache.next_flow = 0; >>> +} >>> + >>> +static inline flow_entry_t *get_new_flow(void) >>> +{ >>> + uint32_t next; >>> + flow_entry_t *flow = NULL; >>> + >>> + odp_rwlock_write_lock(&fwd_lookup_cache.flow_lock); >>> + next = fwd_lookup_cache.next_flow; >>> + if (next < fwd_lookup_cache.flow_cnt) { >>> + flow = &fwd_lookup_cache.flows[next]; >>> + fwd_lookup_cache.next_flow++; >>> + } >>> + odp_rwlock_write_unlock(&fwd_lookup_cache.flow_lock); >>> + >>> + return flow; >>> } >>> >>> static inline >>> int match_key_flow(ipv4_tuple5_t *key, flow_entry_t *flow) >>> { >>> - if (key->src_ip == flow->key.src_ip && >>> - key->dst_ip == flow->key.dst_ip && >>> - key->src_port == flow->key.src_port && >>> - key->dst_port == flow->key.dst_port && >>> - key->proto == flow->key.proto) >>> + if (key->hi64 == flow->key.hi64 && key->lo64 == flow->key.lo64) >>> return 1; >>> >>> return 0; >>> @@ -221,10 +258,12 @@ flow_entry_t *lookup_fwd_cache(ipv4_tuple5_t *key, >>> flow_bucket_t *bucket) >>> { >>> flow_entry_t *rst; >>> >>> + odp_rwlock_read_lock(&bucket->lock); >>> for (rst = bucket->next; rst != NULL; rst = rst->next) { >>> if (match_key_flow(key, rst)) >>> break; >>> } >>> + odp_rwlock_read_unlock(&bucket->lock); >>> >>> return rst; >>> } >>> @@ -239,22 +278,58 @@ flow_entry_t *insert_fwd_cache(ipv4_tuple5_t *key, >>> if (!entry) >>> return NULL; >>> >>> - flow = malloc(sizeof(flow_entry_t)); >>> + flow = get_new_flow(); >>> + if (!flow) >>> + return NULL; >>> + >>> flow->key = *key; >>> flow->fwd_entry = entry; >>> >>> - odp_spinlock_lock(&bucket->lock); >>> - if (!bucket->next) { >>> - bucket->next = flow; >>> - } else { >>> + odp_rwlock_write_lock(&bucket->lock); >>> + if (bucket->next) >>> flow->next = bucket->next; >>> - bucket->next = flow; >>> - } >>> - odp_spinlock_unlock(&bucket->lock); >>> + bucket->next = flow; >>> + odp_rwlock_write_unlock(&bucket->lock); >>> >>> return flow; >>> } >>> >>> +void init_fwd_hash_cache(void) >>> +{ >>> + fwd_db_entry_t *entry; >>> + flow_entry_t *flow; >>> + flow_bucket_t *bucket; >>> + uint64_t hash; >>> + uint32_t i, nb_hosts; >>> + ipv4_tuple5_t key; >>> + >>> + create_fwd_hash_cache(); >>> + >>> + /** >>> + * warm up the lookup cache with possible hosts. >>> + * with millions flows, save significant time during runtime. >>> + */ >>> + memset(&key, 0, sizeof(key)); >>> + for (entry = fwd_db->list; NULL != entry; entry = entry->next) { >>> + nb_hosts = 1 << (32 - entry->subnet.depth); >>> + for (i = 0; i < nb_hosts; i++) { >>> + key.dst_ip = entry->subnet.addr + i; >>> + hash = l3fwd_calc_hash(&key); >>> + hash &= fwd_lookup_cache.bkt_cnt - 1; >>> + bucket = &fwd_lookup_cache.bucket[hash]; >>> + flow = lookup_fwd_cache(&key, bucket); >>> + if (flow) >>> + return; >>> + >>> + flow = insert_fwd_cache(&key, bucket, entry); >>> + if (!flow) >>> + goto out; >>> + } >>> + } >>> +out: >>> + return; >>> +} >>> + >>> /** Global pointer to fwd db */ >>> fwd_db_t *fwd_db; >>> >>> @@ -382,35 +457,22 @@ void dump_fwd_db(void) >>> printf("\n"); >>> } >>> >>> -void destroy_fwd_db(void) >>> -{ >>> - flow_bucket_t *bucket; >>> - flow_entry_t *flow, *tmp; >>> - uint32_t i; >>> - >>> - for (i = 0; i < fwd_lookup_cache.count; i++) { >>> - bucket = &fwd_lookup_cache.bucket[i]; >>> - flow = bucket->next; >>> - odp_spinlock_lock(&bucket->lock); >>> - while (flow) { >>> - tmp = flow->next; >>> - free(flow); >>> - flow = tmp; >>> - } >>> - odp_spinlock_unlock(&bucket->lock); >>> - } >>> -} >>> - >>> fwd_db_entry_t *find_fwd_db_entry(ipv4_tuple5_t *key) >>> { >>> fwd_db_entry_t *entry; >>> flow_entry_t *flow; >>> flow_bucket_t *bucket; >>> uint64_t hash; >>> + ipv4_tuple5_t newkey; >>> + >>> + newkey.hi64 = 0; >>> + newkey.lo64 = 0; >>> + newkey.dst_ip = key->dst_ip; >>> + key = &newkey; >>> >>> /* first find in cache */ >>> hash = l3fwd_calc_hash(key); >>> - hash &= fwd_lookup_cache.count - 1; >>> + hash &= fwd_lookup_cache.bkt_cnt - 1; >>> bucket = &fwd_lookup_cache.bucket[hash]; >>> flow = lookup_fwd_cache(key, bucket); >>> if (flow) >>> diff --git a/example/l3fwd/odp_l3fwd_db.h b/example/l3fwd/odp_l3fwd_db.h >>> index d9f6d61..1b24f1a 100644 >>> --- a/example/l3fwd/odp_l3fwd_db.h >>> +++ b/example/l3fwd/odp_l3fwd_db.h >>> @@ -19,14 +19,14 @@ extern "C" { >>> #define MAX_STRING 32 >>> >>> /** >>> - * Default number of flows >>> + * Max number of flows >>> */ >>> -#define FWD_DEF_FLOW_COUNT 100000 >>> +#define FWD_MAX_FLOW_COUNT (1 << 22) >>> >>> /** >>> - * Default Hash bucket number >>> + * Default hash entries in a bucket >>> */ >>> -#define FWD_DEF_BUCKET_COUNT (FWD_DEF_FLOW_COUNT / 8) >>> +#define FWD_DEF_BUCKET_ENTRIES 4 >>> >>> /** >>> * IP address range (subnet) >>> @@ -40,12 +40,22 @@ typedef struct ip_addr_range_s { >>> * TCP/UDP flow >>> */ >>> typedef struct ipv4_tuple5_s { >>> - uint32_t src_ip; >>> - uint32_t dst_ip; >>> - uint16_t src_port; >>> - uint16_t dst_port; >>> - uint8_t proto; >>> -} ipv4_tuple5_t; >>> + union { >>> + struct { >>> + int32_t src_ip; >>> + int32_t dst_ip; >>> + int16_t src_port; >>> + int16_t dst_port; >>> + int8_t proto; >>> + int8_t pad1; >>> + int16_t pad2; >>> + }; >>> + struct { >>> + int64_t hi64; >>> + int64_t lo64; >>> + }; >>> + }; >>> +} ipv4_tuple5_t ODP_ALIGNED_CACHE; >>> >>> /** >>> * Forwarding data base entry >>> @@ -116,11 +126,6 @@ void dump_fwd_db_entry(fwd_db_entry_t *entry); >>> void dump_fwd_db(void); >>> >>> /** >>> - * Destroy the forwarding database >>> - */ >>> -void destroy_fwd_db(void); >>> - >>> -/** >>> * Find a matching forwarding database entry >>> * >>> * @param key ipv4 tuple >>> -- >>> 2.7.4 >>> >> >
uh.. some previous version had clang issue and I was sure that it's fixed. Will send a patch due to it's very trivial. Maxim. On 5 September 2016 at 16:34, Bill Fischofer <bill.fischofer@linaro.org> wrote: > Looks like this was merged too soon. It fails to compile with clang: > > Making all in l3fwd > make[2]: Entering directory '/home/bill/linaro/petrisched/example/l3fwd' > CC odp_l3fwd-odp_l3fwd.o > CC odp_l3fwd-odp_l3fwd_db.o > odp_l3fwd_db.c:212:10: error: cast from 'char *' to 'flow_entry_t *' (aka > 'struct flow_entry_s *') increases required alignment from 1 to 64 > [-Werror,-Wcast-align] > flows = (flow_entry_t *)((char *)bucket + size); > ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > 1 error generated. > Makefile:735: recipe for target 'odp_l3fwd-odp_l3fwd_db.o' failed > make[2]: *** [odp_l3fwd-odp_l3fwd_db.o] Error 1 > make[2]: Leaving directory '/home/bill/linaro/petrisched/example/l3fwd' > > > On Mon, Sep 5, 2016 at 2:37 PM, Maxim Uvarov <maxim.uvarov@linaro.org> > wrote: > >> merged. >> >> >> On 09/02/16 18:23, Elo, Matias (Nokia - FI/Espoo) wrote: >> >>> Just nitpicking here but there is an extra ' From: Xuelin Shi < >>> forrest.shi@linaro.org>' in all commit messages. >>> >>> You should also include the version tag in all patches (e.g. [PATCHv9 >>> 3/3]). This way one can apply all the patches with a single 'git am >>> <patch_dir>/*' call in right order. >>> >>> There's one typo in the commit message accomodate -> accommodate. >>> >>> Reviewed-and-tested-by: Matias Elo <matias.elo@nokia.com> >>> >>> From: Xuelin Shi <forrest.shi@linaro.org> >>>> >>>> increase the hash size to accomodate more entries. >>>> pre-compute the hash keys of possible ip destinations in a subnet. >>>> >>>> Signed-off-by: Xuelin Shi <forrest.shi@linaro.org> >>>> --- >>>> example/l3fwd/odp_l3fwd.c | 3 - >>>> example/l3fwd/odp_l3fwd_db.c | 160 ++++++++++++++++++++++++++++++--- >>>> ---------- >>>> example/l3fwd/odp_l3fwd_db.h | 35 ++++++---- >>>> 3 files changed, 131 insertions(+), 67 deletions(-) >>>> >>>> diff --git a/example/l3fwd/odp_l3fwd.c b/example/l3fwd/odp_l3fwd.c >>>> index 0998614..44778b0 100644 >>>> --- a/example/l3fwd/odp_l3fwd.c >>>> +++ b/example/l3fwd/odp_l3fwd.c >>>> @@ -1061,9 +1061,6 @@ int main(int argc, char **argv) >>>> for (i = 0; i < nb_worker; i++) >>>> odph_odpthreads_join(&thread_tbl[i]); >>>> >>>> - /* release resource on exit */ >>>> - destroy_fwd_db(); >>>> - >>>> /* if_names share a single buffer, so only one free */ >>>> free(args->if_names[0]); >>>> >>>> diff --git a/example/l3fwd/odp_l3fwd_db.c b/example/l3fwd/odp_l3fwd_db.c >>>> index b3a237f..4731237 100644 >>>> --- a/example/l3fwd/odp_l3fwd_db.c >>>> +++ b/example/l3fwd/odp_l3fwd_db.c >>>> @@ -150,7 +150,7 @@ char *mac_addr_str(char *b, odph_ethaddr_t *mac) >>>> */ >>>> typedef struct flow_entry_s { >>>> ipv4_tuple5_t key; /**< match key */ >>>> - struct flow_entry_s *next; /**< next entry on the list */ >>>> + struct flow_entry_s *next; /**< next entry in the bucket */ >>>> fwd_db_entry_t *fwd_entry; /**< entry info in db */ >>>> } flow_entry_t; >>>> >>>> @@ -158,59 +158,96 @@ typedef struct flow_entry_s { >>>> * Flow cache table bucket >>>> */ >>>> typedef struct flow_bucket_s { >>>> - odp_spinlock_t lock; /**< Bucket lock*/ >>>> - flow_entry_t *next; /**< Pointer to first flow >>>> entry in >>>> bucket*/ >>>> + odp_rwlock_t lock; /**< Bucket lock*/ >>>> + flow_entry_t *next; /**< First flow entry in bucket*/ >>>> } flow_bucket_t; >>>> >>>> /** >>>> * Flow hash table, fast lookup cache >>>> */ >>>> typedef struct flow_table_s { >>>> - flow_bucket_t *bucket; >>>> - uint32_t count; >>>> + odp_rwlock_t flow_lock; /**< flow table lock*/ >>>> + flow_entry_t *flows; /**< flow store */ >>>> + flow_bucket_t *bucket; /**< bucket store */ >>>> + uint32_t bkt_cnt; >>>> + uint32_t flow_cnt; >>>> + uint32_t next_flow; /**< next available flow in the store */ >>>> } flow_table_t; >>>> >>>> static flow_table_t fwd_lookup_cache; >>>> >>>> -void init_fwd_hash_cache(void) >>>> +static void create_fwd_hash_cache(void) >>>> { >>>> odp_shm_t hash_shm; >>>> flow_bucket_t *bucket; >>>> - uint32_t bucket_count; >>>> + flow_entry_t *flows; >>>> + uint32_t bucket_count, flow_count, size; >>>> uint32_t i; >>>> >>>> - bucket_count = FWD_DEF_BUCKET_COUNT; >>>> + flow_count = FWD_MAX_FLOW_COUNT; >>>> + bucket_count = flow_count / FWD_DEF_BUCKET_ENTRIES; >>>> >>>> /*Reserve memory for Routing hash table*/ >>>> - hash_shm = odp_shm_reserve("route_table", >>>> - sizeof(flow_bucket_t) * bucket_count, >>>> - ODP_CACHE_LINE_SIZE, 0); >>>> + size = sizeof(flow_bucket_t) * bucket_count + >>>> + sizeof(flow_entry_t) * flow_count; >>>> + hash_shm = odp_shm_reserve("flow_table", size, >>>> ODP_CACHE_LINE_SIZE, 0); >>>> >>>> bucket = odp_shm_addr(hash_shm); >>>> if (!bucket) { >>>> - EXAMPLE_ERR("Error: shared mem alloc failed.\n"); >>>> - exit(-1); >>>> + /* Try the second time with small request */ >>>> + flow_count /= 4; >>>> + bucket_count = flow_count / FWD_DEF_BUCKET_ENTRIES; >>>> + size = sizeof(flow_bucket_t) * bucket_count + >>>> + sizeof(flow_entry_t) * flow_count; >>>> + hash_shm = odp_shm_reserve("flow_table", size, >>>> + ODP_CACHE_LINE_SIZE, 0); >>>> + bucket = odp_shm_addr(hash_shm); >>>> + if (!bucket) { >>>> + EXAMPLE_ERR("Error: shared mem alloc >>>> failed.\n"); >>>> + exit(-1); >>>> + } >>>> } >>>> >>>> + size = sizeof(flow_bucket_t) * bucket_count; >>>> + flows = (flow_entry_t *)((char *)bucket + size); >>>> + >>>> fwd_lookup_cache.bucket = bucket; >>>> - fwd_lookup_cache.count = bucket_count; >>>> + fwd_lookup_cache.bkt_cnt = bucket_count; >>>> + fwd_lookup_cache.flows = flows; >>>> + fwd_lookup_cache.flow_cnt = flow_count; >>>> >>>> - /*Initialize Locks*/ >>>> + /*Initialize bucket locks*/ >>>> for (i = 0; i < bucket_count; i++) { >>>> bucket = &fwd_lookup_cache.bucket[i]; >>>> - odp_spinlock_init(&bucket->lock); >>>> + odp_rwlock_init(&bucket->lock); >>>> bucket->next = NULL; >>>> } >>>> + >>>> + memset(flows, 0, sizeof(flow_entry_t) * flow_count); >>>> + odp_rwlock_init(&fwd_lookup_cache.flow_lock); >>>> + fwd_lookup_cache.next_flow = 0; >>>> +} >>>> + >>>> +static inline flow_entry_t *get_new_flow(void) >>>> +{ >>>> + uint32_t next; >>>> + flow_entry_t *flow = NULL; >>>> + >>>> + odp_rwlock_write_lock(&fwd_lookup_cache.flow_lock); >>>> + next = fwd_lookup_cache.next_flow; >>>> + if (next < fwd_lookup_cache.flow_cnt) { >>>> + flow = &fwd_lookup_cache.flows[next]; >>>> + fwd_lookup_cache.next_flow++; >>>> + } >>>> + odp_rwlock_write_unlock(&fwd_lookup_cache.flow_lock); >>>> + >>>> + return flow; >>>> } >>>> >>>> static inline >>>> int match_key_flow(ipv4_tuple5_t *key, flow_entry_t *flow) >>>> { >>>> - if (key->src_ip == flow->key.src_ip && >>>> - key->dst_ip == flow->key.dst_ip && >>>> - key->src_port == flow->key.src_port && >>>> - key->dst_port == flow->key.dst_port && >>>> - key->proto == flow->key.proto) >>>> + if (key->hi64 == flow->key.hi64 && key->lo64 == flow->key.lo64) >>>> return 1; >>>> >>>> return 0; >>>> @@ -221,10 +258,12 @@ flow_entry_t *lookup_fwd_cache(ipv4_tuple5_t >>>> *key, >>>> flow_bucket_t *bucket) >>>> { >>>> flow_entry_t *rst; >>>> >>>> + odp_rwlock_read_lock(&bucket->lock); >>>> for (rst = bucket->next; rst != NULL; rst = rst->next) { >>>> if (match_key_flow(key, rst)) >>>> break; >>>> } >>>> + odp_rwlock_read_unlock(&bucket->lock); >>>> >>>> return rst; >>>> } >>>> @@ -239,22 +278,58 @@ flow_entry_t *insert_fwd_cache(ipv4_tuple5_t >>>> *key, >>>> if (!entry) >>>> return NULL; >>>> >>>> - flow = malloc(sizeof(flow_entry_t)); >>>> + flow = get_new_flow(); >>>> + if (!flow) >>>> + return NULL; >>>> + >>>> flow->key = *key; >>>> flow->fwd_entry = entry; >>>> >>>> - odp_spinlock_lock(&bucket->lock); >>>> - if (!bucket->next) { >>>> - bucket->next = flow; >>>> - } else { >>>> + odp_rwlock_write_lock(&bucket->lock); >>>> + if (bucket->next) >>>> flow->next = bucket->next; >>>> - bucket->next = flow; >>>> - } >>>> - odp_spinlock_unlock(&bucket->lock); >>>> + bucket->next = flow; >>>> + odp_rwlock_write_unlock(&bucket->lock); >>>> >>>> return flow; >>>> } >>>> >>>> +void init_fwd_hash_cache(void) >>>> +{ >>>> + fwd_db_entry_t *entry; >>>> + flow_entry_t *flow; >>>> + flow_bucket_t *bucket; >>>> + uint64_t hash; >>>> + uint32_t i, nb_hosts; >>>> + ipv4_tuple5_t key; >>>> + >>>> + create_fwd_hash_cache(); >>>> + >>>> + /** >>>> + * warm up the lookup cache with possible hosts. >>>> + * with millions flows, save significant time during runtime. >>>> + */ >>>> + memset(&key, 0, sizeof(key)); >>>> + for (entry = fwd_db->list; NULL != entry; entry = entry->next) { >>>> + nb_hosts = 1 << (32 - entry->subnet.depth); >>>> + for (i = 0; i < nb_hosts; i++) { >>>> + key.dst_ip = entry->subnet.addr + i; >>>> + hash = l3fwd_calc_hash(&key); >>>> + hash &= fwd_lookup_cache.bkt_cnt - 1; >>>> + bucket = &fwd_lookup_cache.bucket[hash]; >>>> + flow = lookup_fwd_cache(&key, bucket); >>>> + if (flow) >>>> + return; >>>> + >>>> + flow = insert_fwd_cache(&key, bucket, entry); >>>> + if (!flow) >>>> + goto out; >>>> + } >>>> + } >>>> +out: >>>> + return; >>>> +} >>>> + >>>> /** Global pointer to fwd db */ >>>> fwd_db_t *fwd_db; >>>> >>>> @@ -382,35 +457,22 @@ void dump_fwd_db(void) >>>> printf("\n"); >>>> } >>>> >>>> -void destroy_fwd_db(void) >>>> -{ >>>> - flow_bucket_t *bucket; >>>> - flow_entry_t *flow, *tmp; >>>> - uint32_t i; >>>> - >>>> - for (i = 0; i < fwd_lookup_cache.count; i++) { >>>> - bucket = &fwd_lookup_cache.bucket[i]; >>>> - flow = bucket->next; >>>> - odp_spinlock_lock(&bucket->lock); >>>> - while (flow) { >>>> - tmp = flow->next; >>>> - free(flow); >>>> - flow = tmp; >>>> - } >>>> - odp_spinlock_unlock(&bucket->lock); >>>> - } >>>> -} >>>> - >>>> fwd_db_entry_t *find_fwd_db_entry(ipv4_tuple5_t *key) >>>> { >>>> fwd_db_entry_t *entry; >>>> flow_entry_t *flow; >>>> flow_bucket_t *bucket; >>>> uint64_t hash; >>>> + ipv4_tuple5_t newkey; >>>> + >>>> + newkey.hi64 = 0; >>>> + newkey.lo64 = 0; >>>> + newkey.dst_ip = key->dst_ip; >>>> + key = &newkey; >>>> >>>> /* first find in cache */ >>>> hash = l3fwd_calc_hash(key); >>>> - hash &= fwd_lookup_cache.count - 1; >>>> + hash &= fwd_lookup_cache.bkt_cnt - 1; >>>> bucket = &fwd_lookup_cache.bucket[hash]; >>>> flow = lookup_fwd_cache(key, bucket); >>>> if (flow) >>>> diff --git a/example/l3fwd/odp_l3fwd_db.h b/example/l3fwd/odp_l3fwd_db.h >>>> index d9f6d61..1b24f1a 100644 >>>> --- a/example/l3fwd/odp_l3fwd_db.h >>>> +++ b/example/l3fwd/odp_l3fwd_db.h >>>> @@ -19,14 +19,14 @@ extern "C" { >>>> #define MAX_STRING 32 >>>> >>>> /** >>>> - * Default number of flows >>>> + * Max number of flows >>>> */ >>>> -#define FWD_DEF_FLOW_COUNT 100000 >>>> +#define FWD_MAX_FLOW_COUNT (1 << 22) >>>> >>>> /** >>>> - * Default Hash bucket number >>>> + * Default hash entries in a bucket >>>> */ >>>> -#define FWD_DEF_BUCKET_COUNT (FWD_DEF_FLOW_COUNT / 8) >>>> +#define FWD_DEF_BUCKET_ENTRIES 4 >>>> >>>> /** >>>> * IP address range (subnet) >>>> @@ -40,12 +40,22 @@ typedef struct ip_addr_range_s { >>>> * TCP/UDP flow >>>> */ >>>> typedef struct ipv4_tuple5_s { >>>> - uint32_t src_ip; >>>> - uint32_t dst_ip; >>>> - uint16_t src_port; >>>> - uint16_t dst_port; >>>> - uint8_t proto; >>>> -} ipv4_tuple5_t; >>>> + union { >>>> + struct { >>>> + int32_t src_ip; >>>> + int32_t dst_ip; >>>> + int16_t src_port; >>>> + int16_t dst_port; >>>> + int8_t proto; >>>> + int8_t pad1; >>>> + int16_t pad2; >>>> + }; >>>> + struct { >>>> + int64_t hi64; >>>> + int64_t lo64; >>>> + }; >>>> + }; >>>> +} ipv4_tuple5_t ODP_ALIGNED_CACHE; >>>> >>>> /** >>>> * Forwarding data base entry >>>> @@ -116,11 +126,6 @@ void dump_fwd_db_entry(fwd_db_entry_t *entry); >>>> void dump_fwd_db(void); >>>> >>>> /** >>>> - * Destroy the forwarding database >>>> - */ >>>> -void destroy_fwd_db(void); >>>> - >>>> -/** >>>> * Find a matching forwarding database entry >>>> * >>>> * @param key ipv4 tuple >>>> -- >>>> 2.7.4 >>>> >>> >> >
The previous version does not include the "hash fixing" patch. sorry to confuse you. On 6 September 2016 at 03:04, Maxim Uvarov <maxim.uvarov@linaro.org> wrote: > uh.. some previous version had clang issue and I was sure that it's fixed. > Will send a patch due to it's very trivial. > > Maxim. > > On 5 September 2016 at 16:34, Bill Fischofer <bill.fischofer@linaro.org> > wrote: > >> Looks like this was merged too soon. It fails to compile with clang: >> >> Making all in l3fwd >> make[2]: Entering directory '/home/bill/linaro/petrisched/example/l3fwd' >> CC odp_l3fwd-odp_l3fwd.o >> CC odp_l3fwd-odp_l3fwd_db.o >> odp_l3fwd_db.c:212:10: error: cast from 'char *' to 'flow_entry_t *' (aka >> 'struct flow_entry_s *') increases required alignment from 1 to 64 >> [-Werror,-Wcast-align] >> flows = (flow_entry_t *)((char *)bucket + size); >> ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >> 1 error generated. >> Makefile:735: recipe for target 'odp_l3fwd-odp_l3fwd_db.o' failed >> make[2]: *** [odp_l3fwd-odp_l3fwd_db.o] Error 1 >> make[2]: Leaving directory '/home/bill/linaro/petrisched/example/l3fwd' >> >> >> On Mon, Sep 5, 2016 at 2:37 PM, Maxim Uvarov <maxim.uvarov@linaro.org> >> wrote: >> >>> merged. >>> >>> >>> On 09/02/16 18:23, Elo, Matias (Nokia - FI/Espoo) wrote: >>> >>>> Just nitpicking here but there is an extra ' From: Xuelin Shi < >>>> forrest.shi@linaro.org>' in all commit messages. >>>> >>>> You should also include the version tag in all patches (e.g. [PATCHv9 >>>> 3/3]). This way one can apply all the patches with a single 'git am >>>> <patch_dir>/*' call in right order. >>>> >>>> There's one typo in the commit message accomodate -> accommodate. >>>> >>>> Reviewed-and-tested-by: Matias Elo <matias.elo@nokia.com> >>>> >>>> From: Xuelin Shi <forrest.shi@linaro.org> >>>>> >>>>> increase the hash size to accomodate more entries. >>>>> pre-compute the hash keys of possible ip destinations in a subnet. >>>>> >>>>> Signed-off-by: Xuelin Shi <forrest.shi@linaro.org> >>>>> --- >>>>> example/l3fwd/odp_l3fwd.c | 3 - >>>>> example/l3fwd/odp_l3fwd_db.c | 160 ++++++++++++++++++++++++++++++--- >>>>> ---------- >>>>> example/l3fwd/odp_l3fwd_db.h | 35 ++++++---- >>>>> 3 files changed, 131 insertions(+), 67 deletions(-) >>>>> >>>>> diff --git a/example/l3fwd/odp_l3fwd.c b/example/l3fwd/odp_l3fwd.c >>>>> index 0998614..44778b0 100644 >>>>> --- a/example/l3fwd/odp_l3fwd.c >>>>> +++ b/example/l3fwd/odp_l3fwd.c >>>>> @@ -1061,9 +1061,6 @@ int main(int argc, char **argv) >>>>> for (i = 0; i < nb_worker; i++) >>>>> odph_odpthreads_join(&thread_tbl[i]); >>>>> >>>>> - /* release resource on exit */ >>>>> - destroy_fwd_db(); >>>>> - >>>>> /* if_names share a single buffer, so only one free */ >>>>> free(args->if_names[0]); >>>>> >>>>> diff --git a/example/l3fwd/odp_l3fwd_db.c >>>>> b/example/l3fwd/odp_l3fwd_db.c >>>>> index b3a237f..4731237 100644 >>>>> --- a/example/l3fwd/odp_l3fwd_db.c >>>>> +++ b/example/l3fwd/odp_l3fwd_db.c >>>>> @@ -150,7 +150,7 @@ char *mac_addr_str(char *b, odph_ethaddr_t *mac) >>>>> */ >>>>> typedef struct flow_entry_s { >>>>> ipv4_tuple5_t key; /**< match key */ >>>>> - struct flow_entry_s *next; /**< next entry on the list */ >>>>> + struct flow_entry_s *next; /**< next entry in the bucket >>>>> */ >>>>> fwd_db_entry_t *fwd_entry; /**< entry info in db */ >>>>> } flow_entry_t; >>>>> >>>>> @@ -158,59 +158,96 @@ typedef struct flow_entry_s { >>>>> * Flow cache table bucket >>>>> */ >>>>> typedef struct flow_bucket_s { >>>>> - odp_spinlock_t lock; /**< Bucket lock*/ >>>>> - flow_entry_t *next; /**< Pointer to first flow >>>>> entry in >>>>> bucket*/ >>>>> + odp_rwlock_t lock; /**< Bucket lock*/ >>>>> + flow_entry_t *next; /**< First flow entry in bucket*/ >>>>> } flow_bucket_t; >>>>> >>>>> /** >>>>> * Flow hash table, fast lookup cache >>>>> */ >>>>> typedef struct flow_table_s { >>>>> - flow_bucket_t *bucket; >>>>> - uint32_t count; >>>>> + odp_rwlock_t flow_lock; /**< flow table lock*/ >>>>> + flow_entry_t *flows; /**< flow store */ >>>>> + flow_bucket_t *bucket; /**< bucket store */ >>>>> + uint32_t bkt_cnt; >>>>> + uint32_t flow_cnt; >>>>> + uint32_t next_flow; /**< next available flow in the store >>>>> */ >>>>> } flow_table_t; >>>>> >>>>> static flow_table_t fwd_lookup_cache; >>>>> >>>>> -void init_fwd_hash_cache(void) >>>>> +static void create_fwd_hash_cache(void) >>>>> { >>>>> odp_shm_t hash_shm; >>>>> flow_bucket_t *bucket; >>>>> - uint32_t bucket_count; >>>>> + flow_entry_t *flows; >>>>> + uint32_t bucket_count, flow_count, size; >>>>> uint32_t i; >>>>> >>>>> - bucket_count = FWD_DEF_BUCKET_COUNT; >>>>> + flow_count = FWD_MAX_FLOW_COUNT; >>>>> + bucket_count = flow_count / FWD_DEF_BUCKET_ENTRIES; >>>>> >>>>> /*Reserve memory for Routing hash table*/ >>>>> - hash_shm = odp_shm_reserve("route_table", >>>>> - sizeof(flow_bucket_t) * >>>>> bucket_count, >>>>> - ODP_CACHE_LINE_SIZE, 0); >>>>> + size = sizeof(flow_bucket_t) * bucket_count + >>>>> + sizeof(flow_entry_t) * flow_count; >>>>> + hash_shm = odp_shm_reserve("flow_table", size, >>>>> ODP_CACHE_LINE_SIZE, 0); >>>>> >>>>> bucket = odp_shm_addr(hash_shm); >>>>> if (!bucket) { >>>>> - EXAMPLE_ERR("Error: shared mem alloc failed.\n"); >>>>> - exit(-1); >>>>> + /* Try the second time with small request */ >>>>> + flow_count /= 4; >>>>> + bucket_count = flow_count / FWD_DEF_BUCKET_ENTRIES; >>>>> + size = sizeof(flow_bucket_t) * bucket_count + >>>>> + sizeof(flow_entry_t) * flow_count; >>>>> + hash_shm = odp_shm_reserve("flow_table", size, >>>>> + ODP_CACHE_LINE_SIZE, 0); >>>>> + bucket = odp_shm_addr(hash_shm); >>>>> + if (!bucket) { >>>>> + EXAMPLE_ERR("Error: shared mem alloc >>>>> failed.\n"); >>>>> + exit(-1); >>>>> + } >>>>> } >>>>> >>>>> + size = sizeof(flow_bucket_t) * bucket_count; >>>>> + flows = (flow_entry_t *)((char *)bucket + size); >>>>> + >>>>> fwd_lookup_cache.bucket = bucket; >>>>> - fwd_lookup_cache.count = bucket_count; >>>>> + fwd_lookup_cache.bkt_cnt = bucket_count; >>>>> + fwd_lookup_cache.flows = flows; >>>>> + fwd_lookup_cache.flow_cnt = flow_count; >>>>> >>>>> - /*Initialize Locks*/ >>>>> + /*Initialize bucket locks*/ >>>>> for (i = 0; i < bucket_count; i++) { >>>>> bucket = &fwd_lookup_cache.bucket[i]; >>>>> - odp_spinlock_init(&bucket->lock); >>>>> + odp_rwlock_init(&bucket->lock); >>>>> bucket->next = NULL; >>>>> } >>>>> + >>>>> + memset(flows, 0, sizeof(flow_entry_t) * flow_count); >>>>> + odp_rwlock_init(&fwd_lookup_cache.flow_lock); >>>>> + fwd_lookup_cache.next_flow = 0; >>>>> +} >>>>> + >>>>> +static inline flow_entry_t *get_new_flow(void) >>>>> +{ >>>>> + uint32_t next; >>>>> + flow_entry_t *flow = NULL; >>>>> + >>>>> + odp_rwlock_write_lock(&fwd_lookup_cache.flow_lock); >>>>> + next = fwd_lookup_cache.next_flow; >>>>> + if (next < fwd_lookup_cache.flow_cnt) { >>>>> + flow = &fwd_lookup_cache.flows[next]; >>>>> + fwd_lookup_cache.next_flow++; >>>>> + } >>>>> + odp_rwlock_write_unlock(&fwd_lookup_cache.flow_lock); >>>>> + >>>>> + return flow; >>>>> } >>>>> >>>>> static inline >>>>> int match_key_flow(ipv4_tuple5_t *key, flow_entry_t *flow) >>>>> { >>>>> - if (key->src_ip == flow->key.src_ip && >>>>> - key->dst_ip == flow->key.dst_ip && >>>>> - key->src_port == flow->key.src_port && >>>>> - key->dst_port == flow->key.dst_port && >>>>> - key->proto == flow->key.proto) >>>>> + if (key->hi64 == flow->key.hi64 && key->lo64 == flow->key.lo64) >>>>> return 1; >>>>> >>>>> return 0; >>>>> @@ -221,10 +258,12 @@ flow_entry_t *lookup_fwd_cache(ipv4_tuple5_t >>>>> *key, >>>>> flow_bucket_t *bucket) >>>>> { >>>>> flow_entry_t *rst; >>>>> >>>>> + odp_rwlock_read_lock(&bucket->lock); >>>>> for (rst = bucket->next; rst != NULL; rst = rst->next) { >>>>> if (match_key_flow(key, rst)) >>>>> break; >>>>> } >>>>> + odp_rwlock_read_unlock(&bucket->lock); >>>>> >>>>> return rst; >>>>> } >>>>> @@ -239,22 +278,58 @@ flow_entry_t *insert_fwd_cache(ipv4_tuple5_t >>>>> *key, >>>>> if (!entry) >>>>> return NULL; >>>>> >>>>> - flow = malloc(sizeof(flow_entry_t)); >>>>> + flow = get_new_flow(); >>>>> + if (!flow) >>>>> + return NULL; >>>>> + >>>>> flow->key = *key; >>>>> flow->fwd_entry = entry; >>>>> >>>>> - odp_spinlock_lock(&bucket->lock); >>>>> - if (!bucket->next) { >>>>> - bucket->next = flow; >>>>> - } else { >>>>> + odp_rwlock_write_lock(&bucket->lock); >>>>> + if (bucket->next) >>>>> flow->next = bucket->next; >>>>> - bucket->next = flow; >>>>> - } >>>>> - odp_spinlock_unlock(&bucket->lock); >>>>> + bucket->next = flow; >>>>> + odp_rwlock_write_unlock(&bucket->lock); >>>>> >>>>> return flow; >>>>> } >>>>> >>>>> +void init_fwd_hash_cache(void) >>>>> +{ >>>>> + fwd_db_entry_t *entry; >>>>> + flow_entry_t *flow; >>>>> + flow_bucket_t *bucket; >>>>> + uint64_t hash; >>>>> + uint32_t i, nb_hosts; >>>>> + ipv4_tuple5_t key; >>>>> + >>>>> + create_fwd_hash_cache(); >>>>> + >>>>> + /** >>>>> + * warm up the lookup cache with possible hosts. >>>>> + * with millions flows, save significant time during runtime. >>>>> + */ >>>>> + memset(&key, 0, sizeof(key)); >>>>> + for (entry = fwd_db->list; NULL != entry; entry = entry->next) >>>>> { >>>>> + nb_hosts = 1 << (32 - entry->subnet.depth); >>>>> + for (i = 0; i < nb_hosts; i++) { >>>>> + key.dst_ip = entry->subnet.addr + i; >>>>> + hash = l3fwd_calc_hash(&key); >>>>> + hash &= fwd_lookup_cache.bkt_cnt - 1; >>>>> + bucket = &fwd_lookup_cache.bucket[hash]; >>>>> + flow = lookup_fwd_cache(&key, bucket); >>>>> + if (flow) >>>>> + return; >>>>> + >>>>> + flow = insert_fwd_cache(&key, bucket, entry); >>>>> + if (!flow) >>>>> + goto out; >>>>> + } >>>>> + } >>>>> +out: >>>>> + return; >>>>> +} >>>>> + >>>>> /** Global pointer to fwd db */ >>>>> fwd_db_t *fwd_db; >>>>> >>>>> @@ -382,35 +457,22 @@ void dump_fwd_db(void) >>>>> printf("\n"); >>>>> } >>>>> >>>>> -void destroy_fwd_db(void) >>>>> -{ >>>>> - flow_bucket_t *bucket; >>>>> - flow_entry_t *flow, *tmp; >>>>> - uint32_t i; >>>>> - >>>>> - for (i = 0; i < fwd_lookup_cache.count; i++) { >>>>> - bucket = &fwd_lookup_cache.bucket[i]; >>>>> - flow = bucket->next; >>>>> - odp_spinlock_lock(&bucket->lock); >>>>> - while (flow) { >>>>> - tmp = flow->next; >>>>> - free(flow); >>>>> - flow = tmp; >>>>> - } >>>>> - odp_spinlock_unlock(&bucket->lock); >>>>> - } >>>>> -} >>>>> - >>>>> fwd_db_entry_t *find_fwd_db_entry(ipv4_tuple5_t *key) >>>>> { >>>>> fwd_db_entry_t *entry; >>>>> flow_entry_t *flow; >>>>> flow_bucket_t *bucket; >>>>> uint64_t hash; >>>>> + ipv4_tuple5_t newkey; >>>>> + >>>>> + newkey.hi64 = 0; >>>>> + newkey.lo64 = 0; >>>>> + newkey.dst_ip = key->dst_ip; >>>>> + key = &newkey; >>>>> >>>>> /* first find in cache */ >>>>> hash = l3fwd_calc_hash(key); >>>>> - hash &= fwd_lookup_cache.count - 1; >>>>> + hash &= fwd_lookup_cache.bkt_cnt - 1; >>>>> bucket = &fwd_lookup_cache.bucket[hash]; >>>>> flow = lookup_fwd_cache(key, bucket); >>>>> if (flow) >>>>> diff --git a/example/l3fwd/odp_l3fwd_db.h >>>>> b/example/l3fwd/odp_l3fwd_db.h >>>>> index d9f6d61..1b24f1a 100644 >>>>> --- a/example/l3fwd/odp_l3fwd_db.h >>>>> +++ b/example/l3fwd/odp_l3fwd_db.h >>>>> @@ -19,14 +19,14 @@ extern "C" { >>>>> #define MAX_STRING 32 >>>>> >>>>> /** >>>>> - * Default number of flows >>>>> + * Max number of flows >>>>> */ >>>>> -#define FWD_DEF_FLOW_COUNT 100000 >>>>> +#define FWD_MAX_FLOW_COUNT (1 << 22) >>>>> >>>>> /** >>>>> - * Default Hash bucket number >>>>> + * Default hash entries in a bucket >>>>> */ >>>>> -#define FWD_DEF_BUCKET_COUNT (FWD_DEF_FLOW_COUNT / 8) >>>>> +#define FWD_DEF_BUCKET_ENTRIES 4 >>>>> >>>>> /** >>>>> * IP address range (subnet) >>>>> @@ -40,12 +40,22 @@ typedef struct ip_addr_range_s { >>>>> * TCP/UDP flow >>>>> */ >>>>> typedef struct ipv4_tuple5_s { >>>>> - uint32_t src_ip; >>>>> - uint32_t dst_ip; >>>>> - uint16_t src_port; >>>>> - uint16_t dst_port; >>>>> - uint8_t proto; >>>>> -} ipv4_tuple5_t; >>>>> + union { >>>>> + struct { >>>>> + int32_t src_ip; >>>>> + int32_t dst_ip; >>>>> + int16_t src_port; >>>>> + int16_t dst_port; >>>>> + int8_t proto; >>>>> + int8_t pad1; >>>>> + int16_t pad2; >>>>> + }; >>>>> + struct { >>>>> + int64_t hi64; >>>>> + int64_t lo64; >>>>> + }; >>>>> + }; >>>>> +} ipv4_tuple5_t ODP_ALIGNED_CACHE; >>>>> >>>>> /** >>>>> * Forwarding data base entry >>>>> @@ -116,11 +126,6 @@ void dump_fwd_db_entry(fwd_db_entry_t *entry); >>>>> void dump_fwd_db(void); >>>>> >>>>> /** >>>>> - * Destroy the forwarding database >>>>> - */ >>>>> -void destroy_fwd_db(void); >>>>> - >>>>> -/** >>>>> * Find a matching forwarding database entry >>>>> * >>>>> * @param key ipv4 tuple >>>>> -- >>>>> 2.7.4 >>>>> >>>> >>> >> >
diff --git a/example/l3fwd/odp_l3fwd.c b/example/l3fwd/odp_l3fwd.c index 0998614..44778b0 100644 --- a/example/l3fwd/odp_l3fwd.c +++ b/example/l3fwd/odp_l3fwd.c @@ -1061,9 +1061,6 @@ int main(int argc, char **argv) for (i = 0; i < nb_worker; i++) odph_odpthreads_join(&thread_tbl[i]); - /* release resource on exit */ - destroy_fwd_db(); - /* if_names share a single buffer, so only one free */ free(args->if_names[0]); diff --git a/example/l3fwd/odp_l3fwd_db.c b/example/l3fwd/odp_l3fwd_db.c index b3a237f..4731237 100644 --- a/example/l3fwd/odp_l3fwd_db.c +++ b/example/l3fwd/odp_l3fwd_db.c @@ -150,7 +150,7 @@ char *mac_addr_str(char *b, odph_ethaddr_t *mac) */ typedef struct flow_entry_s { ipv4_tuple5_t key; /**< match key */ - struct flow_entry_s *next; /**< next entry on the list */ + struct flow_entry_s *next; /**< next entry in the bucket */ fwd_db_entry_t *fwd_entry; /**< entry info in db */ } flow_entry_t; @@ -158,59 +158,96 @@ typedef struct flow_entry_s { * Flow cache table bucket */ typedef struct flow_bucket_s { - odp_spinlock_t lock; /**< Bucket lock*/ - flow_entry_t *next; /**< Pointer to first flow entry in bucket*/ + odp_rwlock_t lock; /**< Bucket lock*/ + flow_entry_t *next; /**< First flow entry in bucket*/ } flow_bucket_t; /** * Flow hash table, fast lookup cache */ typedef struct flow_table_s { - flow_bucket_t *bucket; - uint32_t count; + odp_rwlock_t flow_lock; /**< flow table lock*/ + flow_entry_t *flows; /**< flow store */ + flow_bucket_t *bucket; /**< bucket store */ + uint32_t bkt_cnt; + uint32_t flow_cnt; + uint32_t next_flow; /**< next available flow in the store */ } flow_table_t; static flow_table_t fwd_lookup_cache; -void init_fwd_hash_cache(void) +static void create_fwd_hash_cache(void) { odp_shm_t hash_shm; flow_bucket_t *bucket; - uint32_t bucket_count; + flow_entry_t *flows; + uint32_t bucket_count, flow_count, size; uint32_t i; - bucket_count = FWD_DEF_BUCKET_COUNT; + flow_count = FWD_MAX_FLOW_COUNT; + bucket_count = flow_count / FWD_DEF_BUCKET_ENTRIES; /*Reserve memory for Routing hash table*/ - hash_shm = odp_shm_reserve("route_table", - sizeof(flow_bucket_t) * bucket_count, - ODP_CACHE_LINE_SIZE, 0); + size = sizeof(flow_bucket_t) * bucket_count + + sizeof(flow_entry_t) * flow_count; + hash_shm = odp_shm_reserve("flow_table", size, ODP_CACHE_LINE_SIZE, 0); bucket = odp_shm_addr(hash_shm); if (!bucket) { - EXAMPLE_ERR("Error: shared mem alloc failed.\n"); - exit(-1); + /* Try the second time with small request */ + flow_count /= 4; + bucket_count = flow_count / FWD_DEF_BUCKET_ENTRIES; + size = sizeof(flow_bucket_t) * bucket_count + + sizeof(flow_entry_t) * flow_count; + hash_shm = odp_shm_reserve("flow_table", size, + ODP_CACHE_LINE_SIZE, 0); + bucket = odp_shm_addr(hash_shm); + if (!bucket) { + EXAMPLE_ERR("Error: shared mem alloc failed.\n"); + exit(-1); + } } + size = sizeof(flow_bucket_t) * bucket_count; + flows = (flow_entry_t *)((char *)bucket + size); + fwd_lookup_cache.bucket = bucket; - fwd_lookup_cache.count = bucket_count; + fwd_lookup_cache.bkt_cnt = bucket_count; + fwd_lookup_cache.flows = flows; + fwd_lookup_cache.flow_cnt = flow_count; - /*Initialize Locks*/ + /*Initialize bucket locks*/ for (i = 0; i < bucket_count; i++) { bucket = &fwd_lookup_cache.bucket[i]; - odp_spinlock_init(&bucket->lock); + odp_rwlock_init(&bucket->lock); bucket->next = NULL; } + + memset(flows, 0, sizeof(flow_entry_t) * flow_count); + odp_rwlock_init(&fwd_lookup_cache.flow_lock); + fwd_lookup_cache.next_flow = 0; +} + +static inline flow_entry_t *get_new_flow(void) +{ + uint32_t next; + flow_entry_t *flow = NULL; + + odp_rwlock_write_lock(&fwd_lookup_cache.flow_lock); + next = fwd_lookup_cache.next_flow; + if (next < fwd_lookup_cache.flow_cnt) { + flow = &fwd_lookup_cache.flows[next]; + fwd_lookup_cache.next_flow++; + } + odp_rwlock_write_unlock(&fwd_lookup_cache.flow_lock); + + return flow; } static inline int match_key_flow(ipv4_tuple5_t *key, flow_entry_t *flow) { - if (key->src_ip == flow->key.src_ip && - key->dst_ip == flow->key.dst_ip && - key->src_port == flow->key.src_port && - key->dst_port == flow->key.dst_port && - key->proto == flow->key.proto) + if (key->hi64 == flow->key.hi64 && key->lo64 == flow->key.lo64) return 1; return 0; @@ -221,10 +258,12 @@ flow_entry_t *lookup_fwd_cache(ipv4_tuple5_t *key, flow_bucket_t *bucket) { flow_entry_t *rst; + odp_rwlock_read_lock(&bucket->lock); for (rst = bucket->next; rst != NULL; rst = rst->next) { if (match_key_flow(key, rst)) break; } + odp_rwlock_read_unlock(&bucket->lock); return rst; } @@ -239,22 +278,58 @@ flow_entry_t *insert_fwd_cache(ipv4_tuple5_t *key, if (!entry) return NULL; - flow = malloc(sizeof(flow_entry_t)); + flow = get_new_flow(); + if (!flow) + return NULL; + flow->key = *key; flow->fwd_entry = entry; - odp_spinlock_lock(&bucket->lock); - if (!bucket->next) { - bucket->next = flow; - } else { + odp_rwlock_write_lock(&bucket->lock); + if (bucket->next) flow->next = bucket->next; - bucket->next = flow; - } - odp_spinlock_unlock(&bucket->lock); + bucket->next = flow; + odp_rwlock_write_unlock(&bucket->lock); return flow; } +void init_fwd_hash_cache(void) +{ + fwd_db_entry_t *entry; + flow_entry_t *flow; + flow_bucket_t *bucket; + uint64_t hash; + uint32_t i, nb_hosts; + ipv4_tuple5_t key; + + create_fwd_hash_cache(); + + /** + * warm up the lookup cache with possible hosts. + * with millions flows, save significant time during runtime. + */ + memset(&key, 0, sizeof(key)); + for (entry = fwd_db->list; NULL != entry; entry = entry->next) { + nb_hosts = 1 << (32 - entry->subnet.depth); + for (i = 0; i < nb_hosts; i++) { + key.dst_ip = entry->subnet.addr + i; + hash = l3fwd_calc_hash(&key); + hash &= fwd_lookup_cache.bkt_cnt - 1; + bucket = &fwd_lookup_cache.bucket[hash]; + flow = lookup_fwd_cache(&key, bucket); + if (flow) + return; + + flow = insert_fwd_cache(&key, bucket, entry); + if (!flow) + goto out; + } + } +out: + return; +} + /** Global pointer to fwd db */ fwd_db_t *fwd_db; @@ -382,35 +457,22 @@ void dump_fwd_db(void) printf("\n"); } -void destroy_fwd_db(void) -{ - flow_bucket_t *bucket; - flow_entry_t *flow, *tmp; - uint32_t i; - - for (i = 0; i < fwd_lookup_cache.count; i++) { - bucket = &fwd_lookup_cache.bucket[i]; - flow = bucket->next; - odp_spinlock_lock(&bucket->lock); - while (flow) { - tmp = flow->next; - free(flow); - flow = tmp; - } - odp_spinlock_unlock(&bucket->lock); - } -} - fwd_db_entry_t *find_fwd_db_entry(ipv4_tuple5_t *key) { fwd_db_entry_t *entry; flow_entry_t *flow; flow_bucket_t *bucket; uint64_t hash; + ipv4_tuple5_t newkey; + + newkey.hi64 = 0; + newkey.lo64 = 0; + newkey.dst_ip = key->dst_ip; + key = &newkey; /* first find in cache */ hash = l3fwd_calc_hash(key); - hash &= fwd_lookup_cache.count - 1; + hash &= fwd_lookup_cache.bkt_cnt - 1; bucket = &fwd_lookup_cache.bucket[hash]; flow = lookup_fwd_cache(key, bucket); if (flow) diff --git a/example/l3fwd/odp_l3fwd_db.h b/example/l3fwd/odp_l3fwd_db.h index d9f6d61..1b24f1a 100644 --- a/example/l3fwd/odp_l3fwd_db.h +++ b/example/l3fwd/odp_l3fwd_db.h @@ -19,14 +19,14 @@ extern "C" { #define MAX_STRING 32 /** - * Default number of flows + * Max number of flows */ -#define FWD_DEF_FLOW_COUNT 100000 +#define FWD_MAX_FLOW_COUNT (1 << 22) /** - * Default Hash bucket number + * Default hash entries in a bucket */ -#define FWD_DEF_BUCKET_COUNT (FWD_DEF_FLOW_COUNT / 8) +#define FWD_DEF_BUCKET_ENTRIES 4 /** * IP address range (subnet) @@ -40,12 +40,22 @@ typedef struct ip_addr_range_s { * TCP/UDP flow */ typedef struct ipv4_tuple5_s { - uint32_t src_ip; - uint32_t dst_ip; - uint16_t src_port; - uint16_t dst_port; - uint8_t proto; -} ipv4_tuple5_t; + union { + struct { + int32_t src_ip; + int32_t dst_ip; + int16_t src_port; + int16_t dst_port; + int8_t proto; + int8_t pad1; + int16_t pad2; + }; + struct { + int64_t hi64; + int64_t lo64; + }; + }; +} ipv4_tuple5_t ODP_ALIGNED_CACHE; /** * Forwarding data base entry @@ -116,11 +126,6 @@ void dump_fwd_db_entry(fwd_db_entry_t *entry); void dump_fwd_db(void); /** - * Destroy the forwarding database - */ -void destroy_fwd_db(void); - -/** * Find a matching forwarding database entry * * @param key ipv4 tuple