Message ID | 1438938017-6663-1-git-send-email-balakrishna.garapati@linaro.org |
---|---|
State | New |
Headers | show |
On 08/07/15 12:00, Balakrishna.Garapati wrote: > Signed-off-by: Balakrishna.Garapati <balakrishna.garapati@linaro.org> > --- > Made updates to print recv stats based on mode > > example/generator/odp_generator.c | 81 ++++++++++++++++++++++++++++++++------- > 1 file changed, 67 insertions(+), 14 deletions(-) > > diff --git a/example/generator/odp_generator.c b/example/generator/odp_generator.c > index bdee222..5d1c54a 100644 > --- a/example/generator/odp_generator.c > +++ b/example/generator/odp_generator.c > @@ -101,6 +101,7 @@ static void usage(char *progname); > static int scan_ip(char *buf, unsigned int *paddr); > static int scan_mac(char *in, odph_ethaddr_t *des); > static void tv_sub(struct timeval *recvtime, struct timeval *sendtime); > +static void print_global_stats(int num_workers); > > /** > * Sleep for the specified amount of milliseconds > @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char *dev, odp_pool_t pool) > static void *gen_send_thread(void *arg) > { > int thr; > - uint64_t start, now, diff; > odp_pktio_t pktio; > thread_args_t *thr_args; > odp_queue_t outq_def; > @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg) > return NULL; > } > > - start = odp_time_cycles(); > printf(" [%02i] created mode: SEND\n", thr); > for (;;) { > int err; > @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg) > >= (unsigned int)args->appl.number) { > break; > } > - > - now = odp_time_cycles(); > - diff = odp_time_diff_cycles(start, now); > - if (odp_time_cycles_to_ns(diff) > 20 * ODP_TIME_SEC) { > - start = odp_time_cycles(); > - printf(" [%02i] total send: %ju\n", > - thr, odp_atomic_load_u64(&counters.seq)); > - fflush(stdout); > - } > } > > /* receive number of reply pks until timeout */ > @@ -502,16 +492,16 @@ static void print_pkts(int thr, odp_packet_t pkt_tbl[], unsigned len) > continue; > > odp_atomic_inc_u64(&counters.ip); > - rlen += sprintf(msg, "receive Packet proto:IP "); > buf = odp_packet_data(pkt); > ip = (odph_ipv4hdr_t *)(buf + odp_packet_l3_offset(pkt)); > - rlen += sprintf(msg + rlen, "id %d ", > - odp_be_to_cpu_16(ip->id)); > offset = odp_packet_l4_offset(pkt); > > /* udp */ > if (ip->proto == ODPH_IPPROTO_UDP) { > odp_atomic_inc_u64(&counters.udp); > + rlen += sprintf(msg, "receive Packet proto:IP "); > + rlen += sprintf(msg + rlen, "id %d ", > + odp_be_to_cpu_16(ip->id)); > udp = (odph_udphdr_t *)(buf + offset); > rlen += sprintf(msg + rlen, "UDP payload %d ", > odp_be_to_cpu_16(udp->length) - > @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg) > > return arg; > } > + > +/** > + * printing verbose statistics > + * > + */ > +static void print_global_stats(int num_workers) > +{ > + uint64_t start, now, diff; > + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0; > + int verbose_interval = 20, worker_count; > + odp_thrmask_t thrd_mask; > + > + start = odp_time_cycles(); > + while (1) { > + now = odp_time_cycles(); > + diff = odp_time_diff_cycles(start, now); > + if (odp_time_cycles_to_ns(diff) < > + verbose_interval * ODP_TIME_SEC) { > + continue; > + } > + > + start = odp_time_cycles(); > + > + worker_count = odp_thrmask_worker(&thrd_mask); try to not call any odp_api in while 1 loops. Api might be itself slow or block something else, depends on implementation. Here you already calculated number of workers and init. Just reuse that number. Also just looked to that calculation and there is bug: /* Default to system CPU count unless user specified */ num_workers = MAX_WORKERS; if (args->appl.cpu_count) num_workers = args->appl.cpu_count; /* ping mode need two worker */ if (args->appl.mode == APPL_MODE_PING) num_workers = 2; /* Get default worker cpumask */ num_workers = odp_cpumask_def_worker(&cpumask, num_workers); (void)odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr)); MAX_WORKERS is not needed. It has to be just 0. In that case odp_cpumask_def_worker will return all available workers. Maxim. > + if (worker_count < num_workers) > + break; > + if (args->appl.mode == APPL_MODE_PING) { > + if (worker_count == num_workers) > + break; > + } > + > + if (args->appl.mode == APPL_MODE_RCV) { > + pkts = odp_atomic_load_u64(&counters.udp); > + if (pkts != 0) > + printf(" total receive(UDP: %" PRIu64 ")\n", > + pkts); > + continue; > + } > + > + if (args->appl.mode == APPL_MODE_PING) { > + pkts = odp_atomic_load_u64(&counters.icmp); > + if (pkts != 0) > + printf(" total receive(ICMP: %" PRIu64 ")\n", > + pkts); > + } > + > + pkts = odp_atomic_load_u64(&counters.seq); > + printf(" total sent: %" PRIu64 "\n", pkts); > + > + if (args->appl.mode == APPL_MODE_UDP) { > + pps = (pkts - pkts_prev) / verbose_interval; > + if (pps > maximum_pps) > + maximum_pps = pps; > + printf(" %" PRIu64 " pps, %" PRIu64 " max pps\n", > + pps, maximum_pps); > + } > + > + pkts_prev = pkts; > + }; > +} > + > /** > * ODP packet example main function > */ > @@ -796,6 +847,8 @@ int main(int argc, char *argv[]) > } > } > > + print_global_stats(num_workers); > + > /* Master thread waits for other threads to exit */ > odph_linux_pthread_join(thread_tbl, num_workers); >
I will look into the API replacement. If I understood your comment correctly, I am already reusing the initial calculated num_workers and trying to compare it to the current workers to see if any thread has been exited. So that we can break out from the loop. And this check happens only after each verbose_timeout, does it still effects the performance?. And about the MAX_WORKERS, I kinda fixed the issue in the other patch "[PATCHv4] example:generator:option to supply core mask" ( http://patches.opendataplane.org/patch/2561/). Can you look at that and give me comments which might be relevant patch for this comment?. /Krishna Ma to supply core mask On 7 August 2015 at 13:36, Maxim Uvarov <maxim.uvarov@linaro.org> wrote: > On 08/07/15 12:00, Balakrishna.Garapati wrote: > >> Signed-off-by: Balakrishna.Garapati <balakrishna.garapati@linaro.org> >> --- >> Made updates to print recv stats based on mode >> >> example/generator/odp_generator.c | 81 >> ++++++++++++++++++++++++++++++++------- >> 1 file changed, 67 insertions(+), 14 deletions(-) >> >> diff --git a/example/generator/odp_generator.c >> b/example/generator/odp_generator.c >> index bdee222..5d1c54a 100644 >> --- a/example/generator/odp_generator.c >> +++ b/example/generator/odp_generator.c >> @@ -101,6 +101,7 @@ static void usage(char *progname); >> static int scan_ip(char *buf, unsigned int *paddr); >> static int scan_mac(char *in, odph_ethaddr_t *des); >> static void tv_sub(struct timeval *recvtime, struct timeval *sendtime); >> +static void print_global_stats(int num_workers); >> /** >> * Sleep for the specified amount of milliseconds >> @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char *dev, >> odp_pool_t pool) >> static void *gen_send_thread(void *arg) >> { >> int thr; >> - uint64_t start, now, diff; >> odp_pktio_t pktio; >> thread_args_t *thr_args; >> odp_queue_t outq_def; >> @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg) >> return NULL; >> } >> - start = odp_time_cycles(); >> printf(" [%02i] created mode: SEND\n", thr); >> for (;;) { >> int err; >> @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg) >> >= (unsigned int)args->appl.number) { >> break; >> } >> - >> - now = odp_time_cycles(); >> - diff = odp_time_diff_cycles(start, now); >> - if (odp_time_cycles_to_ns(diff) > 20 * ODP_TIME_SEC) { >> - start = odp_time_cycles(); >> - printf(" [%02i] total send: %ju\n", >> - thr, odp_atomic_load_u64(&counters.seq)); >> - fflush(stdout); >> - } >> } >> /* receive number of reply pks until timeout */ >> @@ -502,16 +492,16 @@ static void print_pkts(int thr, odp_packet_t >> pkt_tbl[], unsigned len) >> continue; >> odp_atomic_inc_u64(&counters.ip); >> - rlen += sprintf(msg, "receive Packet proto:IP "); >> buf = odp_packet_data(pkt); >> ip = (odph_ipv4hdr_t *)(buf + odp_packet_l3_offset(pkt)); >> - rlen += sprintf(msg + rlen, "id %d ", >> - odp_be_to_cpu_16(ip->id)); >> offset = odp_packet_l4_offset(pkt); >> /* udp */ >> if (ip->proto == ODPH_IPPROTO_UDP) { >> odp_atomic_inc_u64(&counters.udp); >> + rlen += sprintf(msg, "receive Packet proto:IP "); >> + rlen += sprintf(msg + rlen, "id %d ", >> + odp_be_to_cpu_16(ip->id)); >> udp = (odph_udphdr_t *)(buf + offset); >> rlen += sprintf(msg + rlen, "UDP payload %d ", >> odp_be_to_cpu_16(udp->length) - >> @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg) >> return arg; >> } >> + >> +/** >> + * printing verbose statistics >> + * >> + */ >> +static void print_global_stats(int num_workers) >> +{ >> + uint64_t start, now, diff; >> + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0; >> + int verbose_interval = 20, worker_count; >> + odp_thrmask_t thrd_mask; >> + >> + start = odp_time_cycles(); >> + while (1) { >> + now = odp_time_cycles(); >> + diff = odp_time_diff_cycles(start, now); >> + if (odp_time_cycles_to_ns(diff) < >> + verbose_interval * ODP_TIME_SEC) { >> + continue; >> + } >> + >> + start = odp_time_cycles(); >> + >> + worker_count = odp_thrmask_worker(&thrd_mask); >> > > try to not call any odp_api in while 1 loops. Api might be itself slow or > block > something else, depends on implementation. Here you already calculated > number of workers > and init. Just reuse that number. > Also just looked to that calculation and there is bug: > > > /* Default to system CPU count unless user specified */ > num_workers = MAX_WORKERS; > if (args->appl.cpu_count) > num_workers = args->appl.cpu_count; > > /* ping mode need two worker */ > if (args->appl.mode == APPL_MODE_PING) > num_workers = 2; > > /* Get default worker cpumask */ > num_workers = odp_cpumask_def_worker(&cpumask, num_workers); > (void)odp_cpumask_to_str(&cpumask, cpumaskstr, sizeof(cpumaskstr)); > > MAX_WORKERS is not needed. It has to be just 0. In that case > odp_cpumask_def_worker will return all available workers. > > Maxim. > > > > + if (worker_count < num_workers) >> + break; >> + if (args->appl.mode == APPL_MODE_PING) { >> + if (worker_count == num_workers) >> + break; >> + } >> + >> + if (args->appl.mode == APPL_MODE_RCV) { >> + pkts = odp_atomic_load_u64(&counters.udp); >> + if (pkts != 0) >> + printf(" total receive(UDP: %" PRIu64 >> ")\n", >> + pkts); >> + continue; >> + } >> + >> + if (args->appl.mode == APPL_MODE_PING) { >> + pkts = odp_atomic_load_u64(&counters.icmp); >> + if (pkts != 0) >> + printf(" total receive(ICMP: %" PRIu64 >> ")\n", >> + pkts); >> + } >> + >> + pkts = odp_atomic_load_u64(&counters.seq); >> + printf(" total sent: %" PRIu64 "\n", pkts); >> + >> + if (args->appl.mode == APPL_MODE_UDP) { >> + pps = (pkts - pkts_prev) / verbose_interval; >> + if (pps > maximum_pps) >> + maximum_pps = pps; >> + printf(" %" PRIu64 " pps, %" PRIu64 " max pps\n", >> + pps, maximum_pps); >> + } >> + >> + pkts_prev = pkts; >> + }; >> +} >> + >> /** >> * ODP packet example main function >> */ >> @@ -796,6 +847,8 @@ int main(int argc, char *argv[]) >> } >> } >> + print_global_stats(num_workers); >> + >> /* Master thread waits for other threads to exit */ >> odph_linux_pthread_join(thread_tbl, num_workers); >> >> > >
On 08/07/15 15:23, Krishna Garapati wrote: > I will look into the API replacement. If I understood your comment > correctly, I am already reusing the initial calculated num_workers and > trying to compare it to the current workers to see if any thread has > been exited. So that we can break out from the loop. And this check > happens only after each verbose_timeout, does it still effects the > performance?. Ok, I did not release first that you use odp_thrmask_worker() to check how many worker threads run now. I double checked with Bill and Stuart that we support dynamic workers and that function can be used in that way. > > And about the MAX_WORKERS, I kinda fixed the issue in the other patch > "[PATCHv4] example:generator:option to supply core mask" > (http://patches.opendataplane.org/patch/2561/). > > Can you look at that and give me comments which might be relevant > patch for this comment?. > Ok. Maxim. > /Krishna > > Ma to supply core mask > > On 7 August 2015 at 13:36, Maxim Uvarov <maxim.uvarov@linaro.org > <mailto:maxim.uvarov@linaro.org>> wrote: > > On 08/07/15 12:00, Balakrishna.Garapati wrote: > > Signed-off-by: Balakrishna.Garapati > <balakrishna.garapati@linaro.org > <mailto:balakrishna.garapati@linaro.org>> > --- > Made updates to print recv stats based on mode > > example/generator/odp_generator.c | 81 > ++++++++++++++++++++++++++++++++------- > 1 file changed, 67 insertions(+), 14 deletions(-) > > diff --git a/example/generator/odp_generator.c > b/example/generator/odp_generator.c > index bdee222..5d1c54a 100644 > --- a/example/generator/odp_generator.c > +++ b/example/generator/odp_generator.c > @@ -101,6 +101,7 @@ static void usage(char *progname); > static int scan_ip(char *buf, unsigned int *paddr); > static int scan_mac(char *in, odph_ethaddr_t *des); > static void tv_sub(struct timeval *recvtime, struct timeval > *sendtime); > +static void print_global_stats(int num_workers); > /** > * Sleep for the specified amount of milliseconds > @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char > *dev, odp_pool_t pool) > static void *gen_send_thread(void *arg) > { > int thr; > - uint64_t start, now, diff; > odp_pktio_t pktio; > thread_args_t *thr_args; > odp_queue_t outq_def; > @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg) > return NULL; > } > - start = odp_time_cycles(); > printf(" [%02i] created mode: SEND\n", thr); > for (;;) { > int err; > @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg) > >= (unsigned int)args->appl.number) { > break; > } > - > - now = odp_time_cycles(); > - diff = odp_time_diff_cycles(start, now); > - if (odp_time_cycles_to_ns(diff) > 20 * > ODP_TIME_SEC) { > - start = odp_time_cycles(); > - printf(" [%02i] total send: %ju\n", > - thr, > odp_atomic_load_u64(&counters.seq)); > - fflush(stdout); > - } > } > /* receive number of reply pks until timeout */ > @@ -502,16 +492,16 @@ static void print_pkts(int thr, > odp_packet_t pkt_tbl[], unsigned len) > continue; > odp_atomic_inc_u64(&counters.ip); > - rlen += sprintf(msg, "receive Packet proto:IP "); > buf = odp_packet_data(pkt); > ip = (odph_ipv4hdr_t *)(buf + > odp_packet_l3_offset(pkt)); > - rlen += sprintf(msg + rlen, "id %d ", > - odp_be_to_cpu_16(ip->id)); > offset = odp_packet_l4_offset(pkt); > /* udp */ > if (ip->proto == ODPH_IPPROTO_UDP) { > odp_atomic_inc_u64(&counters.udp); > + rlen += sprintf(msg, "receive Packet > proto:IP "); > + rlen += sprintf(msg + rlen, "id %d ", > + odp_be_to_cpu_16(ip->id)); > udp = (odph_udphdr_t *)(buf + offset); > rlen += sprintf(msg + rlen, "UDP > payload %d ", > odp_be_to_cpu_16(udp->length) - > @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg) > return arg; > } > + > +/** > + * printing verbose statistics > + * > + */ > +static void print_global_stats(int num_workers) > +{ > + uint64_t start, now, diff; > + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0; > + int verbose_interval = 20, worker_count; > + odp_thrmask_t thrd_mask; > + > + start = odp_time_cycles(); > + while (1) { > + now = odp_time_cycles(); > + diff = odp_time_diff_cycles(start, now); > + if (odp_time_cycles_to_ns(diff) < > + verbose_interval * ODP_TIME_SEC) { > + continue; > + } > + > + start = odp_time_cycles(); > + > + worker_count = odp_thrmask_worker(&thrd_mask); > > > try to not call any odp_api in while 1 loops. Api might be itself > slow or block > something else, depends on implementation. Here you already > calculated number of workers > and init. Just reuse that number. > Also just looked to that calculation and there is bug: > > > /* Default to system CPU count unless user specified */ > num_workers = MAX_WORKERS; > if (args->appl.cpu_count) > num_workers = args->appl.cpu_count; > > /* ping mode need two worker */ > if (args->appl.mode == APPL_MODE_PING) > num_workers = 2; > > /* Get default worker cpumask */ > num_workers = odp_cpumask_def_worker(&cpumask, num_workers); > (void)odp_cpumask_to_str(&cpumask, cpumaskstr, > sizeof(cpumaskstr)); > > MAX_WORKERS is not needed. It has to be just 0. In that case > odp_cpumask_def_worker will return all available workers. > > Maxim. > > > > + if (worker_count < num_workers) > + break; > + if (args->appl.mode == APPL_MODE_PING) { > + if (worker_count == num_workers) > + break; > + } > + > + if (args->appl.mode == APPL_MODE_RCV) { > + pkts = odp_atomic_load_u64(&counters.udp); > + if (pkts != 0) > + printf(" total receive(UDP: %" > PRIu64 ")\n", > + pkts); > + continue; > + } > + > + if (args->appl.mode == APPL_MODE_PING) { > + pkts = > odp_atomic_load_u64(&counters.icmp); > + if (pkts != 0) > + printf(" total receive(ICMP: > %" PRIu64 ")\n", > + pkts); > + } > + > + pkts = odp_atomic_load_u64(&counters.seq); > + printf(" total sent: %" PRIu64 "\n", pkts); > + > + if (args->appl.mode == APPL_MODE_UDP) { > + pps = (pkts - pkts_prev) / > verbose_interval; > + if (pps > maximum_pps) > + maximum_pps = pps; > + printf(" %" PRIu64 " pps, %" PRIu64 " > max pps\n", > + pps, maximum_pps); > + } > + > + pkts_prev = pkts; > + }; > +} > + > /** > * ODP packet example main function > */ > @@ -796,6 +847,8 @@ int main(int argc, char *argv[]) > } > } > + print_global_stats(num_workers); > + > /* Master thread waits for other threads to exit */ > odph_linux_pthread_join(thread_tbl, num_workers); > > >
Stuart, does this patch look ok ? On 7 August 2015 at 15:37, Maxim Uvarov <maxim.uvarov@linaro.org> wrote: > On 08/07/15 15:23, Krishna Garapati wrote: > >> I will look into the API replacement. If I understood your comment >> correctly, I am already reusing the initial calculated num_workers and >> trying to compare it to the current workers to see if any thread has been >> exited. So that we can break out from the loop. And this check happens >> only after each verbose_timeout, does it still effects the performance?. >> > > Ok, I did not release first that you use odp_thrmask_worker() to check > how many worker threads run now. I double checked with Bill and Stuart that > we support dynamic workers and that function can be used in that way. > > > > >> And about the MAX_WORKERS, I kinda fixed the issue in the other patch >> "[PATCHv4] example:generator:option to supply core mask" ( >> http://patches.opendataplane.org/patch/2561/). >> >> Can you look at that and give me comments which might be relevant patch >> for this comment?. >> >> Ok. > > Maxim. > >> /Krishna >> >> Ma to supply core mask >> >> On 7 August 2015 at 13:36, Maxim Uvarov <maxim.uvarov@linaro.org <mailto: >> maxim.uvarov@linaro.org>> wrote: >> >> On 08/07/15 12:00, Balakrishna.Garapati wrote: >> >> Signed-off-by: Balakrishna.Garapati >> <balakrishna.garapati@linaro.org >> <mailto:balakrishna.garapati@linaro.org>> >> >> --- >> Made updates to print recv stats based on mode >> >> example/generator/odp_generator.c | 81 >> ++++++++++++++++++++++++++++++++------- >> 1 file changed, 67 insertions(+), 14 deletions(-) >> >> diff --git a/example/generator/odp_generator.c >> b/example/generator/odp_generator.c >> index bdee222..5d1c54a 100644 >> --- a/example/generator/odp_generator.c >> +++ b/example/generator/odp_generator.c >> @@ -101,6 +101,7 @@ static void usage(char *progname); >> static int scan_ip(char *buf, unsigned int *paddr); >> static int scan_mac(char *in, odph_ethaddr_t *des); >> static void tv_sub(struct timeval *recvtime, struct timeval >> *sendtime); >> +static void print_global_stats(int num_workers); >> /** >> * Sleep for the specified amount of milliseconds >> @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char >> *dev, odp_pool_t pool) >> static void *gen_send_thread(void *arg) >> { >> int thr; >> - uint64_t start, now, diff; >> odp_pktio_t pktio; >> thread_args_t *thr_args; >> odp_queue_t outq_def; >> @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg) >> return NULL; >> } >> - start = odp_time_cycles(); >> printf(" [%02i] created mode: SEND\n", thr); >> for (;;) { >> int err; >> @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg) >> >= (unsigned int)args->appl.number) { >> break; >> } >> - >> - now = odp_time_cycles(); >> - diff = odp_time_diff_cycles(start, now); >> - if (odp_time_cycles_to_ns(diff) > 20 * >> ODP_TIME_SEC) { >> - start = odp_time_cycles(); >> - printf(" [%02i] total send: %ju\n", >> - thr, >> odp_atomic_load_u64(&counters.seq)); >> - fflush(stdout); >> - } >> } >> /* receive number of reply pks until timeout */ >> @@ -502,16 +492,16 @@ static void print_pkts(int thr, >> odp_packet_t pkt_tbl[], unsigned len) >> continue; >> odp_atomic_inc_u64(&counters.ip); >> - rlen += sprintf(msg, "receive Packet proto:IP "); >> buf = odp_packet_data(pkt); >> ip = (odph_ipv4hdr_t *)(buf + >> odp_packet_l3_offset(pkt)); >> - rlen += sprintf(msg + rlen, "id %d ", >> - odp_be_to_cpu_16(ip->id)); >> offset = odp_packet_l4_offset(pkt); >> /* udp */ >> if (ip->proto == ODPH_IPPROTO_UDP) { >> odp_atomic_inc_u64(&counters.udp); >> + rlen += sprintf(msg, "receive Packet >> proto:IP "); >> + rlen += sprintf(msg + rlen, "id %d ", >> + odp_be_to_cpu_16(ip->id)); >> udp = (odph_udphdr_t *)(buf + offset); >> rlen += sprintf(msg + rlen, "UDP >> payload %d ", >> odp_be_to_cpu_16(udp->length) - >> @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg) >> return arg; >> } >> + >> +/** >> + * printing verbose statistics >> + * >> + */ >> +static void print_global_stats(int num_workers) >> +{ >> + uint64_t start, now, diff; >> + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0; >> + int verbose_interval = 20, worker_count; >> + odp_thrmask_t thrd_mask; >> + >> + start = odp_time_cycles(); >> + while (1) { >> + now = odp_time_cycles(); >> + diff = odp_time_diff_cycles(start, now); >> + if (odp_time_cycles_to_ns(diff) < >> + verbose_interval * ODP_TIME_SEC) { >> + continue; >> + } >> + >> + start = odp_time_cycles(); >> + >> + worker_count = odp_thrmask_worker(&thrd_mask); >> >> >> try to not call any odp_api in while 1 loops. Api might be itself >> slow or block >> something else, depends on implementation. Here you already >> calculated number of workers >> and init. Just reuse that number. >> Also just looked to that calculation and there is bug: >> >> >> /* Default to system CPU count unless user specified */ >> num_workers = MAX_WORKERS; >> if (args->appl.cpu_count) >> num_workers = args->appl.cpu_count; >> >> /* ping mode need two worker */ >> if (args->appl.mode == APPL_MODE_PING) >> num_workers = 2; >> >> /* Get default worker cpumask */ >> num_workers = odp_cpumask_def_worker(&cpumask, num_workers); >> (void)odp_cpumask_to_str(&cpumask, cpumaskstr, >> sizeof(cpumaskstr)); >> >> MAX_WORKERS is not needed. It has to be just 0. In that case >> odp_cpumask_def_worker will return all available workers. >> >> Maxim. >> >> >> >> + if (worker_count < num_workers) >> + break; >> + if (args->appl.mode == APPL_MODE_PING) { >> + if (worker_count == num_workers) >> + break; >> + } >> + >> + if (args->appl.mode == APPL_MODE_RCV) { >> + pkts = odp_atomic_load_u64(&counters.udp); >> + if (pkts != 0) >> + printf(" total receive(UDP: %" >> PRIu64 ")\n", >> + pkts); >> + continue; >> + } >> + >> + if (args->appl.mode == APPL_MODE_PING) { >> + pkts = >> odp_atomic_load_u64(&counters.icmp); >> + if (pkts != 0) >> + printf(" total receive(ICMP: >> %" PRIu64 ")\n", >> + pkts); >> + } >> + >> + pkts = odp_atomic_load_u64(&counters.seq); >> + printf(" total sent: %" PRIu64 "\n", pkts); >> + >> + if (args->appl.mode == APPL_MODE_UDP) { >> + pps = (pkts - pkts_prev) / >> verbose_interval; >> + if (pps > maximum_pps) >> + maximum_pps = pps; >> + printf(" %" PRIu64 " pps, %" PRIu64 " >> max pps\n", >> + pps, maximum_pps); >> + } >> + >> + pkts_prev = pkts; >> + }; >> +} >> + >> /** >> * ODP packet example main function >> */ >> @@ -796,6 +847,8 @@ int main(int argc, char *argv[]) >> } >> } >> + print_global_stats(num_workers); >> + >> /* Master thread waits for other threads to exit */ >> odph_linux_pthread_join(thread_tbl, num_workers); >> >> >> >> >
On Fri, Aug 07, 2015 at 11:00:17AM +0200, Balakrishna.Garapati wrote: > Signed-off-by: Balakrishna.Garapati <balakrishna.garapati@linaro.org> > --- > Made updates to print recv stats based on mode > > example/generator/odp_generator.c | 81 ++++++++++++++++++++++++++++++++------- > 1 file changed, 67 insertions(+), 14 deletions(-) > > diff --git a/example/generator/odp_generator.c b/example/generator/odp_generator.c > index bdee222..5d1c54a 100644 > --- a/example/generator/odp_generator.c > +++ b/example/generator/odp_generator.c > @@ -101,6 +101,7 @@ static void usage(char *progname); > static int scan_ip(char *buf, unsigned int *paddr); > static int scan_mac(char *in, odph_ethaddr_t *des); > static void tv_sub(struct timeval *recvtime, struct timeval *sendtime); > +static void print_global_stats(int num_workers); > > /** > * Sleep for the specified amount of milliseconds > @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char *dev, odp_pool_t pool) > static void *gen_send_thread(void *arg) > { > int thr; > - uint64_t start, now, diff; > odp_pktio_t pktio; > thread_args_t *thr_args; > odp_queue_t outq_def; > @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg) > return NULL; > } > > - start = odp_time_cycles(); > printf(" [%02i] created mode: SEND\n", thr); > for (;;) { > int err; > @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg) > >= (unsigned int)args->appl.number) { > break; > } > - > - now = odp_time_cycles(); > - diff = odp_time_diff_cycles(start, now); > - if (odp_time_cycles_to_ns(diff) > 20 * ODP_TIME_SEC) { > - start = odp_time_cycles(); > - printf(" [%02i] total send: %ju\n", > - thr, odp_atomic_load_u64(&counters.seq)); > - fflush(stdout); > - } > } > > /* receive number of reply pks until timeout */ > @@ -502,16 +492,16 @@ static void print_pkts(int thr, odp_packet_t pkt_tbl[], unsigned len) > continue; > > odp_atomic_inc_u64(&counters.ip); > - rlen += sprintf(msg, "receive Packet proto:IP "); > buf = odp_packet_data(pkt); > ip = (odph_ipv4hdr_t *)(buf + odp_packet_l3_offset(pkt)); > - rlen += sprintf(msg + rlen, "id %d ", > - odp_be_to_cpu_16(ip->id)); > offset = odp_packet_l4_offset(pkt); > > /* udp */ > if (ip->proto == ODPH_IPPROTO_UDP) { > odp_atomic_inc_u64(&counters.udp); > + rlen += sprintf(msg, "receive Packet proto:IP "); > + rlen += sprintf(msg + rlen, "id %d ", > + odp_be_to_cpu_16(ip->id)); > udp = (odph_udphdr_t *)(buf + offset); > rlen += sprintf(msg + rlen, "UDP payload %d ", > odp_be_to_cpu_16(udp->length) - There's still an unconditional print for every received packet. > @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg) > > return arg; > } > + > +/** > + * printing verbose statistics > + * > + */ > +static void print_global_stats(int num_workers) > +{ > + uint64_t start, now, diff; > + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0; > + int verbose_interval = 20, worker_count; > + odp_thrmask_t thrd_mask; > + > + start = odp_time_cycles(); > + while (1) { > + now = odp_time_cycles(); > + diff = odp_time_diff_cycles(start, now); > + if (odp_time_cycles_to_ns(diff) < > + verbose_interval * ODP_TIME_SEC) { > + continue; > + } There's no exit condition check in this loop above, so you'll wait up to verbose_interval extra seconds unnecessarily, for example when running with "-n 100" the workers will finish quickly. > + > + start = odp_time_cycles(); > + > + worker_count = odp_thrmask_worker(&thrd_mask); There's a potential race here as the worker thread counts get incremented after the thread has started (rather than during the odph_linux_pthread_create() call), and it's pretty likely this code will be reached by the main thread before the workers have started. Should wait until worker count has reached num_workers before entering the loop. > + if (worker_count < num_workers) > + break; > + if (args->appl.mode == APPL_MODE_PING) { > + if (worker_count == num_workers) > + break; > + } > + > + if (args->appl.mode == APPL_MODE_RCV) { > + pkts = odp_atomic_load_u64(&counters.udp); > + if (pkts != 0) This check isn't needed, I want to see how many packets have been received even (especially) if it's 0. > + printf(" total receive(UDP: %" PRIu64 ")\n", > + pkts); > + continue; > + } > + > + if (args->appl.mode == APPL_MODE_PING) { > + pkts = odp_atomic_load_u64(&counters.icmp); > + if (pkts != 0) Same here. > + printf(" total receive(ICMP: %" PRIu64 ")\n", > + pkts); > + } > + > + pkts = odp_atomic_load_u64(&counters.seq); > + printf(" total sent: %" PRIu64 "\n", pkts); > + > + if (args->appl.mode == APPL_MODE_UDP) { Alignment problem here, need another tab. > + pps = (pkts - pkts_prev) / verbose_interval; > + if (pps > maximum_pps) > + maximum_pps = pps; > + printf(" %" PRIu64 " pps, %" PRIu64 " max pps\n", > + pps, maximum_pps); > + } > + > + pkts_prev = pkts; This should go inside the condition above. > + }; Spurious ; > +} > + > /** > * ODP packet example main function > */ > @@ -796,6 +847,8 @@ int main(int argc, char *argv[]) > } > } > > + print_global_stats(num_workers); > + > /* Master thread waits for other threads to exit */ > odph_linux_pthread_join(thread_tbl, num_workers); > > -- > 1.9.1 >
> > > > +static void print_global_stats(int num_workers) > > +{ > > + uint64_t start, now, diff; > > + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0; > > + int verbose_interval = 20, worker_count; > > + odp_thrmask_t thrd_mask; > > + > > + start = odp_time_cycles(); > > + while (1) { > > + now = odp_time_cycles(); > > + diff = odp_time_diff_cycles(start, now); > > + if (odp_time_cycles_to_ns(diff) < > > + verbose_interval * ODP_TIME_SEC) { > > + continue; > > + } > > There's no exit condition check in this loop above, so you'll wait up to > verbose_interval extra seconds unnecessarily, for example when running > with "-n 100" the workers will finish quickly. > > + > > + start = odp_time_cycles(); > > + > > + worker_count = odp_thrmask_worker(&thrd_mask); > > There's a potential race here as the worker thread counts get > incremented after the thread has started (rather than during the > odph_linux_pthread_create() call), and it's pretty likely this code > will be reached by the main thread before the workers have started. > > Should wait until worker count has reached num_workers before entering > the loop. > verbose_interval * ODP_TIME_SEC internally a sleep time before reaching this check. Even if we introduce the worker_count == num_workers, we might have to do a default sleep anyway to make sure we have workers up and running otherwise we break the loop. do we still need a separate check for this ? > > + if (worker_count < num_workers) > > + break; > > -- > Stuart. >
On Thu, Aug 20, 2015 at 01:39:29PM +0200, Krishna Garapati wrote: > > > > > > > +static void print_global_stats(int num_workers) > > > +{ > > > + uint64_t start, now, diff; > > > + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0; > > > + int verbose_interval = 20, worker_count; > > > + odp_thrmask_t thrd_mask; > > > + > > > + start = odp_time_cycles(); > > > + while (1) { > > > + now = odp_time_cycles(); > > > + diff = odp_time_diff_cycles(start, now); > > > + if (odp_time_cycles_to_ns(diff) < > > > + verbose_interval * ODP_TIME_SEC) { > > > + continue; > > > + } > > > > There's no exit condition check in this loop above, so you'll wait up to > > verbose_interval extra seconds unnecessarily, for example when running > > with "-n 100" the workers will finish quickly. > > > > + > > > + start = odp_time_cycles(); > > > + > > > + worker_count = odp_thrmask_worker(&thrd_mask); > > > > There's a potential race here as the worker thread counts get > > incremented after the thread has started (rather than during the > > odph_linux_pthread_create() call), and it's pretty likely this code > > will be reached by the main thread before the workers have started. > > > > Should wait until worker count has reached num_workers before entering > > the loop. > > > > > verbose_interval * ODP_TIME_SEC internally a sleep time before reaching > this check. Even if we introduce the worker_count == num_workers, we might > have to do a default sleep anyway to make sure we have workers up and > running otherwise we break the loop. do we still need a separate check for > this ? > Sleeps/delays are generally a bad way to resolve race conditions, since the race is still there but you're just avoiding it. If at some point in the future we were to make verbose_interval configurable then it may be exposed again. It's better to have an explicit check that the precondition (workers started) has been met. Also as I said in the previous comment the existing 20 second delay before entering the rest of the loop causes other problems. I was thinking of something like; while (odp_thrmask_workers(&thr_mask) < num_workers) { } while (odp_thrmask_workers(&thr_mask) == num_workers) { .. }
diff --git a/example/generator/odp_generator.c b/example/generator/odp_generator.c index bdee222..5d1c54a 100644 --- a/example/generator/odp_generator.c +++ b/example/generator/odp_generator.c @@ -101,6 +101,7 @@ static void usage(char *progname); static int scan_ip(char *buf, unsigned int *paddr); static int scan_mac(char *in, odph_ethaddr_t *des); static void tv_sub(struct timeval *recvtime, struct timeval *sendtime); +static void print_global_stats(int num_workers); /** * Sleep for the specified amount of milliseconds @@ -371,7 +372,6 @@ static odp_pktio_t create_pktio(const char *dev, odp_pool_t pool) static void *gen_send_thread(void *arg) { int thr; - uint64_t start, now, diff; odp_pktio_t pktio; thread_args_t *thr_args; odp_queue_t outq_def; @@ -393,7 +393,6 @@ static void *gen_send_thread(void *arg) return NULL; } - start = odp_time_cycles(); printf(" [%02i] created mode: SEND\n", thr); for (;;) { int err; @@ -434,15 +433,6 @@ static void *gen_send_thread(void *arg) >= (unsigned int)args->appl.number) { break; } - - now = odp_time_cycles(); - diff = odp_time_diff_cycles(start, now); - if (odp_time_cycles_to_ns(diff) > 20 * ODP_TIME_SEC) { - start = odp_time_cycles(); - printf(" [%02i] total send: %ju\n", - thr, odp_atomic_load_u64(&counters.seq)); - fflush(stdout); - } } /* receive number of reply pks until timeout */ @@ -502,16 +492,16 @@ static void print_pkts(int thr, odp_packet_t pkt_tbl[], unsigned len) continue; odp_atomic_inc_u64(&counters.ip); - rlen += sprintf(msg, "receive Packet proto:IP "); buf = odp_packet_data(pkt); ip = (odph_ipv4hdr_t *)(buf + odp_packet_l3_offset(pkt)); - rlen += sprintf(msg + rlen, "id %d ", - odp_be_to_cpu_16(ip->id)); offset = odp_packet_l4_offset(pkt); /* udp */ if (ip->proto == ODPH_IPPROTO_UDP) { odp_atomic_inc_u64(&counters.udp); + rlen += sprintf(msg, "receive Packet proto:IP "); + rlen += sprintf(msg + rlen, "id %d ", + odp_be_to_cpu_16(ip->id)); udp = (odph_udphdr_t *)(buf + offset); rlen += sprintf(msg + rlen, "UDP payload %d ", odp_be_to_cpu_16(udp->length) - @@ -589,6 +579,67 @@ static void *gen_recv_thread(void *arg) return arg; } + +/** + * printing verbose statistics + * + */ +static void print_global_stats(int num_workers) +{ + uint64_t start, now, diff; + uint64_t pkts, pkts_prev = 0, pps, maximum_pps = 0; + int verbose_interval = 20, worker_count; + odp_thrmask_t thrd_mask; + + start = odp_time_cycles(); + while (1) { + now = odp_time_cycles(); + diff = odp_time_diff_cycles(start, now); + if (odp_time_cycles_to_ns(diff) < + verbose_interval * ODP_TIME_SEC) { + continue; + } + + start = odp_time_cycles(); + + worker_count = odp_thrmask_worker(&thrd_mask); + if (worker_count < num_workers) + break; + if (args->appl.mode == APPL_MODE_PING) { + if (worker_count == num_workers) + break; + } + + if (args->appl.mode == APPL_MODE_RCV) { + pkts = odp_atomic_load_u64(&counters.udp); + if (pkts != 0) + printf(" total receive(UDP: %" PRIu64 ")\n", + pkts); + continue; + } + + if (args->appl.mode == APPL_MODE_PING) { + pkts = odp_atomic_load_u64(&counters.icmp); + if (pkts != 0) + printf(" total receive(ICMP: %" PRIu64 ")\n", + pkts); + } + + pkts = odp_atomic_load_u64(&counters.seq); + printf(" total sent: %" PRIu64 "\n", pkts); + + if (args->appl.mode == APPL_MODE_UDP) { + pps = (pkts - pkts_prev) / verbose_interval; + if (pps > maximum_pps) + maximum_pps = pps; + printf(" %" PRIu64 " pps, %" PRIu64 " max pps\n", + pps, maximum_pps); + } + + pkts_prev = pkts; + }; +} + /** * ODP packet example main function */ @@ -796,6 +847,8 @@ int main(int argc, char *argv[]) } } + print_global_stats(num_workers); + /* Master thread waits for other threads to exit */ odph_linux_pthread_join(thread_tbl, num_workers);
Signed-off-by: Balakrishna.Garapati <balakrishna.garapati@linaro.org> --- Made updates to print recv stats based on mode example/generator/odp_generator.c | 81 ++++++++++++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 14 deletions(-)