Message ID | 20191005091614.11635-6-leo.yan@linaro.org |
---|---|
State | New |
Headers | show |
Series | perf cs-etm: Support thread stack and callchain | expand |
On Sat, Oct 05, 2019 at 05:16:13PM +0800, Leo Yan wrote: > Now CoreSight has supported the thread stack; based on the thread stack > we can synthesize call chain for the instruction sample; the call chain > can be injected by option '--itrace=g'. > > Before: > > # perf script --itrace=g16l64i100 > main 1579 100 instructions: ffff0000102137f0 group_sched_in+0xb0 ([kernel.kallsyms]) > main 1579 100 instructions: ffff000010213b78 flexible_sched_in+0xf0 ([kernel.kallsyms]) > main 1579 100 instructions: ffff0000102135ac event_sched_in.isra.57+0x74 ([kernel.kallsyms]) > main 1579 100 instructions: ffff000010219344 perf_swevent_add+0x6c ([kernel.kallsyms]) > main 1579 100 instructions: ffff000010214854 perf_event_update_userpage+0x4c ([kernel.kallsyms]) > [...] > > After: > > # perf script --itrace=g16l64i100 > > main 1579 100 instructions: > ffff000010213b78 flexible_sched_in+0xf0 ([kernel.kallsyms]) > ffff00001020c0b4 visit_groups_merge+0x12c ([kernel.kallsyms]) > > main 1579 100 instructions: > ffff0000102135ac event_sched_in.isra.57+0x74 ([kernel.kallsyms]) > ffff0000102137a0 group_sched_in+0x60 ([kernel.kallsyms]) > ffff000010213b84 flexible_sched_in+0xfc ([kernel.kallsyms]) > ffff00001020c0b4 visit_groups_merge+0x12c ([kernel.kallsyms]) > > main 1579 100 instructions: > ffff000010219344 perf_swevent_add+0x6c ([kernel.kallsyms]) > ffff0000102135f4 event_sched_in.isra.57+0xbc ([kernel.kallsyms]) > ffff0000102137a0 group_sched_in+0x60 ([kernel.kallsyms]) > ffff000010213b84 flexible_sched_in+0xfc ([kernel.kallsyms]) > ffff00001020c0b4 visit_groups_merge+0x12c ([kernel.kallsyms]) > [...] > > Signed-off-by: Leo Yan <leo.yan@linaro.org> > --- > tools/perf/util/cs-etm.c | 35 +++++++++++++++++++++++++++++++++-- > 1 file changed, 33 insertions(+), 2 deletions(-) > > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c > index 4b42f9c9bd34..56e501cd2f5f 100644 > --- a/tools/perf/util/cs-etm.c > +++ b/tools/perf/util/cs-etm.c > @@ -17,6 +17,7 @@ > #include <stdlib.h> > > #include "auxtrace.h" > +#include "callchain.h" > #include "color.h" > #include "cs-etm.h" > #include "cs-etm-decoder/cs-etm-decoder.h" > @@ -74,6 +75,7 @@ struct cs_etm_traceid_queue { > size_t last_branch_pos; > union perf_event *event_buf; > struct thread *thread; > + struct ip_callchain *chain; > struct branch_stack *last_branch; > struct branch_stack *last_branch_rb; > struct cs_etm_packet *prev_packet; > @@ -251,6 +253,16 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, > if (!tidq->prev_packet) > goto out_free; > > + if (etm->synth_opts.callchain) { > + size_t sz = sizeof(struct ip_callchain); > + > + /* Add 1 to callchain_sz for callchain context */ > + sz += (etm->synth_opts.callchain_sz + 1) * sizeof(u64); > + tidq->chain = zalloc(sz); > + if (!tidq->chain) > + goto out_free; > + } > + > if (etm->synth_opts.last_branch) { > size_t sz = sizeof(struct branch_stack); > > @@ -275,6 +287,7 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, > zfree(&tidq->last_branch); > zfree(&tidq->prev_packet); > zfree(&tidq->packet); > + zfree(&tidq->chain); Theoretically this should go two lines up, i.e just below zfree(&tidq->prev_packet). If you agree with the comment I did in 3/6 then it is worth doing the above change, otherwise it can stay that way. > out: > return rc; > } > @@ -546,6 +559,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) > zfree(&tidq->last_branch_rb); > zfree(&tidq->prev_packet); > zfree(&tidq->packet); > + zfree(&tidq->chain); Same comment as above. The rest looks good to me. Mathieu > zfree(&tidq); > > /* > @@ -1126,7 +1140,7 @@ static void cs_etm__add_stack_event(struct cs_etm_queue *etmq, > int insn_len; > u64 from_ip, to_ip; > > - if (etm->synth_opts.thread_stack) { > + if (etm->synth_opts.callchain || etm->synth_opts.thread_stack) { > from_ip = cs_etm__last_executed_instr(tidq->prev_packet); > to_ip = cs_etm__first_executed_instr(tidq->packet); > > @@ -1182,6 +1196,14 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, > > cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); > > + if (etm->synth_opts.callchain) { > + thread_stack__sample(tidq->thread, tidq->packet->cpu, > + tidq->chain, > + etm->synth_opts.callchain_sz + 1, > + sample.ip, etm->kernel_start); > + sample.callchain = tidq->chain; > + } > + > if (etm->synth_opts.last_branch) { > cs_etm__copy_last_branch_rb(etmq, tidq); > sample.branch_stack = tidq->last_branch; > @@ -1369,6 +1391,8 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, > attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; > } > > + if (etm->synth_opts.callchain) > + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; > if (etm->synth_opts.last_branch) > attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; > > @@ -2639,7 +2663,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event, > } else { > itrace_synth_opts__set_default(&etm->synth_opts, > session->itrace_synth_opts->default_no_sample); > - etm->synth_opts.callchain = false; > etm->synth_opts.thread_stack = > session->itrace_synth_opts->thread_stack; > } > @@ -2651,6 +2674,14 @@ int cs_etm__process_auxtrace_info(union perf_event *event, > etm->branches_filter |= PERF_IP_FLAG_RETURN | > PERF_IP_FLAG_TRACE_BEGIN; > > + if (etm->synth_opts.callchain && !symbol_conf.use_callchain) { > + symbol_conf.use_callchain = true; > + if (callchain_register_param(&callchain_param) < 0) { > + symbol_conf.use_callchain = false; > + etm->synth_opts.callchain = false; > + } > + } > + > err = cs_etm__synth_events(etm, session); > if (err) > goto err_delete_thread; > -- > 2.17.1 >
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 4b42f9c9bd34..56e501cd2f5f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -17,6 +17,7 @@ #include <stdlib.h> #include "auxtrace.h" +#include "callchain.h" #include "color.h" #include "cs-etm.h" #include "cs-etm-decoder/cs-etm-decoder.h" @@ -74,6 +75,7 @@ struct cs_etm_traceid_queue { size_t last_branch_pos; union perf_event *event_buf; struct thread *thread; + struct ip_callchain *chain; struct branch_stack *last_branch; struct branch_stack *last_branch_rb; struct cs_etm_packet *prev_packet; @@ -251,6 +253,16 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, if (!tidq->prev_packet) goto out_free; + if (etm->synth_opts.callchain) { + size_t sz = sizeof(struct ip_callchain); + + /* Add 1 to callchain_sz for callchain context */ + sz += (etm->synth_opts.callchain_sz + 1) * sizeof(u64); + tidq->chain = zalloc(sz); + if (!tidq->chain) + goto out_free; + } + if (etm->synth_opts.last_branch) { size_t sz = sizeof(struct branch_stack); @@ -275,6 +287,7 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, zfree(&tidq->last_branch); zfree(&tidq->prev_packet); zfree(&tidq->packet); + zfree(&tidq->chain); out: return rc; } @@ -546,6 +559,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) zfree(&tidq->last_branch_rb); zfree(&tidq->prev_packet); zfree(&tidq->packet); + zfree(&tidq->chain); zfree(&tidq); /* @@ -1126,7 +1140,7 @@ static void cs_etm__add_stack_event(struct cs_etm_queue *etmq, int insn_len; u64 from_ip, to_ip; - if (etm->synth_opts.thread_stack) { + if (etm->synth_opts.callchain || etm->synth_opts.thread_stack) { from_ip = cs_etm__last_executed_instr(tidq->prev_packet); to_ip = cs_etm__first_executed_instr(tidq->packet); @@ -1182,6 +1196,14 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); + if (etm->synth_opts.callchain) { + thread_stack__sample(tidq->thread, tidq->packet->cpu, + tidq->chain, + etm->synth_opts.callchain_sz + 1, + sample.ip, etm->kernel_start); + sample.callchain = tidq->chain; + } + if (etm->synth_opts.last_branch) { cs_etm__copy_last_branch_rb(etmq, tidq); sample.branch_stack = tidq->last_branch; @@ -1369,6 +1391,8 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; } + if (etm->synth_opts.callchain) + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; if (etm->synth_opts.last_branch) attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2639,7 +2663,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event, } else { itrace_synth_opts__set_default(&etm->synth_opts, session->itrace_synth_opts->default_no_sample); - etm->synth_opts.callchain = false; etm->synth_opts.thread_stack = session->itrace_synth_opts->thread_stack; } @@ -2651,6 +2674,14 @@ int cs_etm__process_auxtrace_info(union perf_event *event, etm->branches_filter |= PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_BEGIN; + if (etm->synth_opts.callchain && !symbol_conf.use_callchain) { + symbol_conf.use_callchain = true; + if (callchain_register_param(&callchain_param) < 0) { + symbol_conf.use_callchain = false; + etm->synth_opts.callchain = false; + } + } + err = cs_etm__synth_events(etm, session); if (err) goto err_delete_thread;
Now CoreSight has supported the thread stack; based on the thread stack we can synthesize call chain for the instruction sample; the call chain can be injected by option '--itrace=g'. Before: # perf script --itrace=g16l64i100 main 1579 100 instructions: ffff0000102137f0 group_sched_in+0xb0 ([kernel.kallsyms]) main 1579 100 instructions: ffff000010213b78 flexible_sched_in+0xf0 ([kernel.kallsyms]) main 1579 100 instructions: ffff0000102135ac event_sched_in.isra.57+0x74 ([kernel.kallsyms]) main 1579 100 instructions: ffff000010219344 perf_swevent_add+0x6c ([kernel.kallsyms]) main 1579 100 instructions: ffff000010214854 perf_event_update_userpage+0x4c ([kernel.kallsyms]) [...] After: # perf script --itrace=g16l64i100 main 1579 100 instructions: ffff000010213b78 flexible_sched_in+0xf0 ([kernel.kallsyms]) ffff00001020c0b4 visit_groups_merge+0x12c ([kernel.kallsyms]) main 1579 100 instructions: ffff0000102135ac event_sched_in.isra.57+0x74 ([kernel.kallsyms]) ffff0000102137a0 group_sched_in+0x60 ([kernel.kallsyms]) ffff000010213b84 flexible_sched_in+0xfc ([kernel.kallsyms]) ffff00001020c0b4 visit_groups_merge+0x12c ([kernel.kallsyms]) main 1579 100 instructions: ffff000010219344 perf_swevent_add+0x6c ([kernel.kallsyms]) ffff0000102135f4 event_sched_in.isra.57+0xbc ([kernel.kallsyms]) ffff0000102137a0 group_sched_in+0x60 ([kernel.kallsyms]) ffff000010213b84 flexible_sched_in+0xfc ([kernel.kallsyms]) ffff00001020c0b4 visit_groups_merge+0x12c ([kernel.kallsyms]) [...] Signed-off-by: Leo Yan <leo.yan@linaro.org> --- tools/perf/util/cs-etm.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) -- 2.17.1