From 76b10655818c939e257377f83992975a5f55ffb3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 11 Aug 2015 06:36:55 -0700 Subject: perf sort: Check for SRCLINE_UNKNOWN case in "srcfile" processing Handle the SRCLINE_UNKNOWN case correctly when processing "srcfile". Commiter note: We can't just free it, as it was't allocated via malloc, its a guard variable. Reported-by: Namhyung Kim Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20150811133655.GC4524@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index c0c32b050e45..7e3871606df3 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -330,6 +330,8 @@ static char *get_srcfile(struct hist_entry *e) sf = get_srcline(map->dso, map__rip_2objdump(map, e->ip), e->ms.sym, true); + if (!strcmp(sf, SRCLINE_UNKNOWN)) + return no_srcfile; p = strchr(sf, ':'); if (p && *sf) { *p = 0; -- cgit v1.2.3 From 75186a9b09e47072f442f43e292cd47180b67b5c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 12 Aug 2015 10:24:07 +0900 Subject: perf probe: Fix to show lines of sys_ functions correctly "perf probe --lines sys_poll" shows only the first line of sys_poll, because the SYSCALL_DEFINE macro: ---- SYSCALL_DEFINE*(foo,...) { body; } ---- is expanded as below (on debuginfo) ---- static inline int SYSC_foo(...) { body; } int SyS_foo(...) <- is an alias of sys_foo. { return SYSC_foo(...); } ---- So, "perf probe --lines sys_foo" decodes SyS_foo function and it also skips inlined functions(SYSC_foo) inside the target function because those functions are usually defined somewhere else. To fix this issue, this fix checks whether the inlined function is defined at the same point of the target function, and if so, it doesn't skip the inline function. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20150812012406.11811.94691.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 57f3ef41c2bc..445f455dd377 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -734,15 +734,18 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) Dwarf_Lines *lines; Dwarf_Line *line; Dwarf_Addr addr; - const char *fname; + const char *fname, *decf = NULL; int lineno, ret = 0; + int decl = 0, inl; Dwarf_Die die_mem, *cu_die; size_t nlines, i; /* Get the CU die */ - if (dwarf_tag(rt_die) != DW_TAG_compile_unit) + if (dwarf_tag(rt_die) != DW_TAG_compile_unit) { cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); - else + dwarf_decl_line(rt_die, &decl); + decf = dwarf_decl_file(rt_die); + } else cu_die = rt_die; if (!cu_die) { pr_debug2("Failed to get CU from given DIE.\n"); @@ -773,9 +776,14 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) * The line is included in given function, and * no inline block includes it. */ - if (!dwarf_haspc(rt_die, addr) || - die_find_inlinefunc(rt_die, addr, &die_mem)) + if (!dwarf_haspc(rt_die, addr)) continue; + if (die_find_inlinefunc(rt_die, addr, &die_mem)) { + dwarf_decl_line(&die_mem, &inl); + if (inl != decl || + decf != dwarf_decl_file(&die_mem)) + continue; + } /* Get source line */ fname = dwarf_linesrc(line, NULL, NULL); -- cgit v1.2.3 From d457c96392bb418bd998f3ccf93e0e4c958fcd0f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 11 Aug 2015 06:30:47 -0400 Subject: perf callchain: Per-event type selection support This patchkit adds the ability to set callgraph mode (fp, dwarf, lbr) per event. This in term can reduce sampling overhead and the size of the perf.data. Here is an example. perf record -e 'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/' sleep 1 perf evlist -v cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112, config: 0x3c, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1 Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1439289050-40510-1-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 3 ++ tools/perf/util/evsel.c | 62 ++++++++++++++++++++++++++++++-- tools/perf/util/evsel.h | 4 +++ tools/perf/util/parse-events.c | 12 +++++++ tools/perf/util/parse-events.h | 2 ++ tools/perf/util/parse-events.l | 2 ++ tools/perf/util/pmu.c | 4 ++- 7 files changed, 86 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index afbe45ef7e3e..7f82dec2b541 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -53,6 +53,9 @@ OPTIONS - 'time': Disable/enable time stamping. Acceptable values are 1 for enabling time stamping. 0 for disabling time stamping. The default is 1. + - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for + FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode. + - 'stack-size': user stack size for dwarf mode Note: If user explicitly sets options which conflict with the params, the value set by the params will be overridden. diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 04fddddc6b6f..6647925d5f28 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -588,11 +588,36 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel, } } -static void apply_config_terms(struct perf_evsel *evsel) +static void +perf_evsel__reset_callgraph(struct perf_evsel *evsel, + struct callchain_param *param) +{ + struct perf_event_attr *attr = &evsel->attr; + + perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + if (param->record_mode == CALLCHAIN_LBR) { + perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_CALL_STACK); + } + if (param->record_mode == CALLCHAIN_DWARF) { + perf_evsel__reset_sample_bit(evsel, REGS_USER); + perf_evsel__reset_sample_bit(evsel, STACK_USER); + } +} + +static void apply_config_terms(struct perf_evsel *evsel, + struct record_opts *opts) { struct perf_evsel_config_term *term; struct list_head *config_terms = &evsel->config_terms; struct perf_event_attr *attr = &evsel->attr; + struct callchain_param param; + u32 dump_size = 0; + char *callgraph_buf = NULL; + + /* callgraph default */ + param.record_mode = callchain_param.record_mode; list_for_each_entry(term, config_terms, list) { switch (term->type) { @@ -610,10 +635,43 @@ static void apply_config_terms(struct perf_evsel *evsel) else perf_evsel__reset_sample_bit(evsel, TIME); break; + case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: + callgraph_buf = term->val.callgraph; + break; + case PERF_EVSEL__CONFIG_TERM_STACK_USER: + dump_size = term->val.stack_user; + break; default: break; } } + + /* User explicitly set per-event callgraph, clear the old setting and reset. */ + if ((callgraph_buf != NULL) || (dump_size > 0)) { + + /* parse callgraph parameters */ + if (callgraph_buf != NULL) { + param.enabled = true; + if (parse_callchain_record(callgraph_buf, ¶m)) { + pr_err("per-event callgraph setting for %s failed. " + "Apply callgraph global setting for it\n", + evsel->name); + return; + } + } + if (dump_size > 0) { + dump_size = round_up(dump_size, sizeof(u64)); + param.dump_size = dump_size; + } + + /* If global callgraph set, clear it */ + if (callchain_param.enabled) + perf_evsel__reset_callgraph(evsel, &callchain_param); + + /* set perf-event callgraph */ + if (param.enabled) + perf_evsel__config_callgraph(evsel, opts, ¶m); + } } /* @@ -812,7 +870,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) * Apply event specific term settings, * it overloads any global configuration. */ - apply_config_terms(evsel); + apply_config_terms(evsel, opts); } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index fdf2674ab339..93ac6b128149 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -41,6 +41,8 @@ enum { PERF_EVSEL__CONFIG_TERM_PERIOD, PERF_EVSEL__CONFIG_TERM_FREQ, PERF_EVSEL__CONFIG_TERM_TIME, + PERF_EVSEL__CONFIG_TERM_CALLGRAPH, + PERF_EVSEL__CONFIG_TERM_STACK_USER, PERF_EVSEL__CONFIG_TERM_MAX, }; @@ -51,6 +53,8 @@ struct perf_evsel_config_term { u64 period; u64 freq; bool time; + char *callgraph; + u64 stack_user; } val; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index dbf315df4220..d826e6f515db 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -614,6 +614,12 @@ do { \ return -EINVAL; } break; + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: + CHECK_TYPE_VAL(STR); + break; + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: + CHECK_TYPE_VAL(NUM); + break; case PARSE_EVENTS__TERM_TYPE_NAME: CHECK_TYPE_VAL(STR); break; @@ -668,6 +674,12 @@ do { \ case PARSE_EVENTS__TERM_TYPE_TIME: ADD_CONFIG_TERM(TIME, time, term->val.num); break; + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: + ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str); + break; + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: + ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num); + break; default: break; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index ce2d13a16226..a09b0e210997 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -65,6 +65,8 @@ enum { PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE, PARSE_EVENTS__TERM_TYPE_TIME, + PARSE_EVENTS__TERM_TYPE_CALLGRAPH, + PARSE_EVENTS__TERM_TYPE_STACKSIZE, }; struct parse_events_term { diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 4306f5ad75c7..936d566f48d8 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -185,6 +185,8 @@ period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); } branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } +call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } +stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d85f11b8cacf..84cad054d6f7 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -634,7 +634,9 @@ static char *formats_error_string(struct list_head *formats) { struct perf_pmu_format *format; char *err, *str; - static const char *static_terms = "config,config1,config2,name,period,freq,branch_type,time\n"; + static const char *static_terms = "config,config1,config2,name," + "period,freq,branch_type,time," + "call-graph,stack-size\n"; unsigned i = 0; if (!asprintf(&str, "valid terms:")) -- cgit v1.2.3 From f9db0d0f1b2cf030083c83d3ed3a4bbae6bdc8b7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 11 Aug 2015 06:30:48 -0400 Subject: perf callchain: Allow disabling call graphs per event This patch introduce "call-graph=no" to disable per-event callgraph. Here is an example. perf record -e 'cpu/cpu-cycles,call-graph=fp/,cpu/instructions,call-graph=no/' sleep 1 perf report --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 6 of event 'cpu/cpu-cycles,call-graph=fp/' # Event count (approx.): 774218 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................ ........................................ # 61.94% 0.00% sleep [kernel.vmlinux] [k] entry_SYSCALL_64_fastpath | ---entry_SYSCALL_64_fastpath | |--97.30%-- __brk | --2.70%-- mmap64 _dl_check_map_versions _dl_check_all_versions 61.94% 0.00% sleep [kernel.vmlinux] [k] perf_event_mmap | ---perf_event_mmap | |--97.30%-- do_brk | sys_brk | entry_SYSCALL_64_fastpath | __brk | --2.70%-- mmap_region do_mmap_pgoff vm_mmap_pgoff sys_mmap_pgoff sys_mmap entry_SYSCALL_64_fastpath mmap64 _dl_check_map_versions _dl_check_all_versions ...... # Samples: 6 of event 'cpu/instructions,call-graph=no/' # Event count (approx.): 359692 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................ ................................. # 89.03% 0.00% sleep [unknown] [.] 0xffff6598ffff6598 89.03% 0.00% sleep ld-2.17.so [.] _dl_resolve_conflicts 89.03% 0.00% sleep [kernel.vmlinux] [k] page_fault Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1439289050-40510-2-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 3 ++- tools/perf/builtin-annotate.c | 2 ++ tools/perf/builtin-diff.c | 3 +++ tools/perf/tests/hists_cumulate.c | 4 ++++ tools/perf/util/evsel.c | 17 +++++++++++------ tools/perf/util/hist.c | 9 ++++++--- 6 files changed, 28 insertions(+), 10 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 7f82dec2b541..347a27322ed8 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -54,7 +54,8 @@ OPTIONS enabling time stamping. 0 for disabling time stamping. The default is 1. - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for - FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode. + FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and + "no" for disable callgraph. - 'stack-size': user stack size for dwarf mode Note: If user explicitly sets options which conflict with the params, the value set by the params will be overridden. diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 467a23b14e2f..a32a64ef08e2 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -239,6 +239,8 @@ static int __cmd_annotate(struct perf_annotate *ann) if (nr_samples > 0) { total_nr_samples += nr_samples; hists__collapse_resort(hists, NULL); + /* Don't sort callchain */ + perf_evsel__reset_sample_bit(pos, CALLCHAIN); hists__output_resort(hists, NULL); if (symbol_conf.event_group && diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index daaa7dca9c3b..0b180a885ba3 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -722,6 +722,9 @@ static void data_process(void) if (verbose || data__files_cnt > 2) data__fprintf(); + /* Don't sort callchain for perf diff */ + perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN); + hists__process(hists_base); } } diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 7d82c8be5e36..7ed737019de7 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -279,6 +279,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = false; + perf_evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); @@ -425,6 +426,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = false; + perf_evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); @@ -482,6 +484,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = true; + perf_evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); @@ -665,6 +668,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = true; + perf_evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6647925d5f28..b096ef7a240c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -651,12 +651,17 @@ static void apply_config_terms(struct perf_evsel *evsel, /* parse callgraph parameters */ if (callgraph_buf != NULL) { - param.enabled = true; - if (parse_callchain_record(callgraph_buf, ¶m)) { - pr_err("per-event callgraph setting for %s failed. " - "Apply callgraph global setting for it\n", - evsel->name); - return; + if (!strcmp(callgraph_buf, "no")) { + param.enabled = false; + param.record_mode = CALLCHAIN_NONE; + } else { + param.enabled = true; + if (parse_callchain_record(callgraph_buf, ¶m)) { + pr_err("per-event callgraph setting for %s failed. " + "Apply callgraph global setting for it\n", + evsel->name); + return; + } } } if (dump_size > 0) { diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6bccfae334b1..1cd785b5b56e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1109,13 +1109,14 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) static void __hists__insert_output_entry(struct rb_root *entries, struct hist_entry *he, - u64 min_callchain_hits) + u64 min_callchain_hits, + bool use_callchain) { struct rb_node **p = &entries->rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; - if (symbol_conf.use_callchain) + if (use_callchain) callchain_param.sort(&he->sorted_chain, he->callchain, min_callchain_hits, &callchain_param); @@ -1139,6 +1140,8 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) struct rb_node *next; struct hist_entry *n; u64 min_callchain_hits; + struct perf_evsel *evsel = hists_to_evsel(hists); + bool use_callchain = evsel ? (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) : symbol_conf.use_callchain; min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); @@ -1157,7 +1160,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); - __hists__insert_output_entry(&hists->entries, n, min_callchain_hits); + __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain); hists__inc_stats(hists, n); if (!n->filtered) -- cgit v1.2.3 From 9e207ddfa20781e56465ce9a537f0a377c9d34fb Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 11 Aug 2015 06:30:49 -0400 Subject: perf report: Show call graph from reference events Introduce --show-ref-call-graph for perf report to print reference callgraph for no callgraph event. Here is an example. perf report --show-ref-call-graph --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 5 of event 'cpu/cpu-cycles,call-graph=fp/' # Event count (approx.): 144985 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................ ........................................ # 72.30% 0.00% sleep [kernel.vmlinux] [k] entry_SYSCALL_64_fastpath | ---entry_SYSCALL_64_fastpath | |--22.62%-- __GI___libc_nanosleep --77.38%-- [...] ...... # Samples: 6 of event 'cpu/instructions,call-graph=no/', show reference callgraph # Event count (approx.): 172780 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................ ........................................ # 73.16% 0.00% sleep [kernel.vmlinux] [k] entry_SYSCALL_64_fastpath | ---entry_SYSCALL_64_fastpath | |--31.44%-- __GI___libc_nanosleep --68.56%-- [...] Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1439289050-40510-3-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 11 +++++++++++ tools/perf/builtin-report.c | 7 +++++++ tools/perf/ui/browsers/hists.c | 9 +++++++-- tools/perf/util/hist.c | 7 ++++++- tools/perf/util/symbol.h | 3 ++- 5 files changed, 33 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 7b07d19e2d54..a18ba757a0ed 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -359,6 +359,17 @@ OPTIONS --full-source-path:: Show the full path for source files for srcline output. +--show-ref-call-graph:: + When multiple events are sampled, it may not be needed to collect + callgraphs for all of them. The sample sites are usually nearby, + and it's enough to collect the callgraphs on a reference event. + So user can use "call-graph=no" event modifier to disable callgraph + for other events to reduce the overhead. + However, perf report cannot show callgraphs for the event which + disable the callgraph. + This option extends the perf report to show reference callgraphs, + which collected by reference event, in no callgraph event. + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f301e865001f..62b285e32aa5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -316,6 +316,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report if (evname != NULL) ret += fprintf(fp, " of event '%s'", evname); + if (symbol_conf.show_ref_callgraph && + strstr(evname, "call-graph=no")) { + ret += fprintf(fp, ", show reference callgraph"); + } + if (rep->mem_mode) { ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events); ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order); @@ -740,6 +745,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) itrace_parse_synth_opts), OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename, "Show full source file name path for source lines"), + OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph, + "Show callgraph from reference event"), OPT_END() }; struct perf_data_file file = { diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f296b7348449..10c7ec041039 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1267,6 +1267,8 @@ static int hists__browser_title(struct hists *hists, const char *ev_name = perf_evsel__name(evsel); char buf[512]; size_t buflen = sizeof(buf); + char ref[30] = " show reference callgraph, "; + bool enable_ref = false; if (symbol_conf.filter_relative) { nr_samples = hists->stats.nr_non_filtered_samples; @@ -1292,10 +1294,13 @@ static int hists__browser_title(struct hists *hists, } } + if (symbol_conf.show_ref_callgraph && + strstr(ev_name, "call-graph=no")) + enable_ref = true; nr_samples = convert_unit(nr_samples, &unit); printed = scnprintf(bf, size, - "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64, - nr_samples, unit, ev_name, nr_events); + "Samples: %lu%c of event '%s',%sEvent count (approx.): %" PRIu64, + nr_samples, unit, ev_name, enable_ref ? ref : " ", nr_events); if (hists->uid_filter_str) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 1cd785b5b56e..08b6cd945f1e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1141,7 +1141,12 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) struct hist_entry *n; u64 min_callchain_hits; struct perf_evsel *evsel = hists_to_evsel(hists); - bool use_callchain = evsel ? (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) : symbol_conf.use_callchain; + bool use_callchain; + + if (evsel && !symbol_conf.show_ref_callgraph) + use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; + else + use_callchain = symbol_conf.use_callchain; min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index b98ce51af142..a4cde92afbad 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -106,7 +106,8 @@ struct symbol_conf { filter_relative, show_hist_headers, branch_callstack, - has_filter; + has_filter, + show_ref_callgraph; const char *vmlinux_name, *kallsyms_name, *source_prefix, -- cgit v1.2.3