From d85ce830eef6c10d1e9617172dea4681f02b8424 Mon Sep 17 00:00:00 2001 From: Markus Trippelsdorf Date: Mon, 14 Dec 2015 16:44:40 +0100 Subject: perf pmu: Fix misleadingly indented assignment (whitespace) One line in perf_pmu__parse_unit() is indented wrongly, leading to a warning (=> error) from gcc 6: util/pmu.c:156:3: error: statement is indented as if it were guarded by... [-Werror=misleading-indentation] sret = read(fd, alias->unit, UNIT_MAX_LEN); ^~~~ util/pmu.c:153:2: note: ...this 'if' clause, but it is not if (fd == -1) ^~ Signed-off-by: Markus Trippelsdorf Acked-by: Ingo Molnar Cc: Ben Hutchings Cc: Matt Fleming Cc: Peter Zijlstra Fixes: 410136f5dd96 ("tools/perf/stat: Add event unit and scale support") Link: http://lkml.kernel.org/r/20151214154440.GC1409@x4 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b597bcc8fc78..41a9c875e492 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -153,7 +153,7 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n if (fd == -1) return -1; - sret = read(fd, alias->unit, UNIT_MAX_LEN); + sret = read(fd, alias->unit, UNIT_MAX_LEN); if (sret < 0) goto error; -- cgit v1.2.3 From 403567217d3fa5d4801f820317ada52e5c5f0e53 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Jan 2016 12:56:32 +0100 Subject: perf symbols: Do not read symbols/data from device files With mem sampling we could get data source within mapped device file. Processing such sample would block during report phase on trying to read the device file. Chacking for device files and skip the processing if it's detected. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453290995-18485-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 5 +++++ tools/perf/util/symbol.c | 6 +++++- tools/perf/util/util.c | 10 ++++++++++ tools/perf/util/util.h | 1 + 4 files changed, 21 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index e8e9a9dbf5e3..8e6395439ca0 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -52,6 +52,11 @@ int dso__read_binary_type_filename(const struct dso *dso, debuglink--; if (*debuglink == '/') debuglink++; + + ret = -1; + if (!is_regular_file(filename)) + break; + ret = filename__read_debuglink(filename, debuglink, size - (debuglink - filename)); } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ab02209a7cf3..90cedfa30e43 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1466,7 +1466,8 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work */ - if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) + if (is_regular_file(name) && + filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) dso__set_build_id(dso, build_id); /* @@ -1487,6 +1488,9 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) root_dir, name, PATH_MAX)) continue; + if (!is_regular_file(name)) + continue; + /* Name is now the name of the next image to try */ if (symsrc__init(ss, dso, name, symtab_type) < 0) continue; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index ead9509835d2..7a2da7ef556e 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -691,3 +691,13 @@ out: return tip; } + +bool is_regular_file(const char *file) +{ + struct stat st; + + if (stat(file, &st)) + return false; + + return S_ISREG(st.st_mode); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index fe915e616f9b..61650f05e5c1 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -343,5 +343,6 @@ int fetch_kernel_version(unsigned int *puint, #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) const char *perf_tip(const char *dirpath); +bool is_regular_file(const char *file); #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.2.3 From 86a2cf3123bfec118bfb98728d88be0668779b2b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Jan 2016 12:56:35 +0100 Subject: perf stat: Making several helper functions static There's no need for the following functions to be global: perf_evsel__reset_stat_priv perf_evsel__alloc_stat_priv perf_evsel__free_stat_priv perf_evsel__alloc_prev_raw_counts perf_evsel__free_prev_raw_counts perf_evsel__alloc_stats They all ended up in util/stat.c, and they no longer need to be called from outside this object. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453290995-18485-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 14 +++++++------- tools/perf/util/stat.h | 10 ---------- 2 files changed, 7 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 2b58edccd56f..beeed0bd3bee 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -97,7 +97,7 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) } } -void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) +static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) { int i; struct perf_stat_evsel *ps = evsel->priv; @@ -108,7 +108,7 @@ void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) perf_stat_evsel_id_init(evsel); } -int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) +static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) { evsel->priv = zalloc(sizeof(struct perf_stat_evsel)); if (evsel->priv == NULL) @@ -117,13 +117,13 @@ int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) return 0; } -void perf_evsel__free_stat_priv(struct perf_evsel *evsel) +static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) { zfree(&evsel->priv); } -int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, - int ncpus, int nthreads) +static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, + int ncpus, int nthreads) { struct perf_counts *counts; @@ -134,13 +134,13 @@ int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, return counts ? 0 : -ENOMEM; } -void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) +static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) { perf_counts__delete(evsel->prev_raw_counts); evsel->prev_raw_counts = NULL; } -int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) +static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) { int ncpus = perf_evsel__nr_cpus(evsel); int nthreads = thread_map__nr(evsel->threads); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 086f4e128d63..2af63c9cb59f 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -74,16 +74,6 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, double avg, int cpu, enum aggr_mode aggr); -void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); -int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); -void perf_evsel__free_stat_priv(struct perf_evsel *evsel); - -int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, - int ncpus, int nthreads); -void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel); - -int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw); - int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct perf_evlist *evlist); void perf_evlist__reset_stats(struct perf_evlist *evlist); -- cgit v1.2.3 From c84a5d16711619621f368e84a179790df3377c87 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 20 Jan 2016 10:15:20 +0900 Subject: perf hists: Remove parent filter check in DSO filter function The --exclude-other option sets HIST_FILTER__PARENT bit and it's only set when a hist entry was created. DSO filters don't change this so no need to have the check in hists__filter_by_dso() IMHO. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1453252521-24398-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 68a7612019dc..1d8c8eab9daa 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1266,9 +1266,6 @@ void hists__filter_by_dso(struct hists *hists) for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - if (symbol_conf.exclude_other && !h->parent) - continue; - if (hists__filter_entry_by_dso(hists, h)) continue; -- cgit v1.2.3 From 1f7c254132f098d19ff3fd452ba9f826cd85c4c0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 20 Jan 2016 10:15:21 +0900 Subject: perf hists: Cleanup filtering functions The hists__filter_by_xxx functions share same logic with different filters. Factor out the common code into the hists__filter_by_type. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1453252521-24398-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 89 ++++++++++++++++---------------------------------- 1 file changed, 29 insertions(+), 60 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 1d8c8eab9daa..81ce0aff69d1 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1254,25 +1254,6 @@ static bool hists__filter_entry_by_dso(struct hists *hists, return false; } -void hists__filter_by_dso(struct hists *hists) -{ - struct rb_node *nd; - - hists->stats.nr_non_filtered_samples = 0; - - hists__reset_filter_stats(hists); - hists__reset_col_len(hists); - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (hists__filter_entry_by_dso(hists, h)) - continue; - - hists__remove_entry_filter(hists, h, HIST_FILTER__DSO); - } -} - static bool hists__filter_entry_by_thread(struct hists *hists, struct hist_entry *he) { @@ -1285,25 +1266,6 @@ static bool hists__filter_entry_by_thread(struct hists *hists, return false; } -void hists__filter_by_thread(struct hists *hists) -{ - struct rb_node *nd; - - hists->stats.nr_non_filtered_samples = 0; - - hists__reset_filter_stats(hists); - hists__reset_col_len(hists); - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (hists__filter_entry_by_thread(hists, h)) - continue; - - hists__remove_entry_filter(hists, h, HIST_FILTER__THREAD); - } -} - static bool hists__filter_entry_by_symbol(struct hists *hists, struct hist_entry *he) { @@ -1317,25 +1279,6 @@ static bool hists__filter_entry_by_symbol(struct hists *hists, return false; } -void hists__filter_by_symbol(struct hists *hists) -{ - struct rb_node *nd; - - hists->stats.nr_non_filtered_samples = 0; - - hists__reset_filter_stats(hists); - hists__reset_col_len(hists); - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (hists__filter_entry_by_symbol(hists, h)) - continue; - - hists__remove_entry_filter(hists, h, HIST_FILTER__SYMBOL); - } -} - static bool hists__filter_entry_by_socket(struct hists *hists, struct hist_entry *he) { @@ -1348,7 +1291,9 @@ static bool hists__filter_entry_by_socket(struct hists *hists, return false; } -void hists__filter_by_socket(struct hists *hists) +typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he); + +static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter) { struct rb_node *nd; @@ -1360,13 +1305,37 @@ void hists__filter_by_socket(struct hists *hists) for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - if (hists__filter_entry_by_socket(hists, h)) + if (filter(hists, h)) continue; - hists__remove_entry_filter(hists, h, HIST_FILTER__SOCKET); + hists__remove_entry_filter(hists, h, type); } } +void hists__filter_by_thread(struct hists *hists) +{ + hists__filter_by_type(hists, HIST_FILTER__THREAD, + hists__filter_entry_by_thread); +} + +void hists__filter_by_dso(struct hists *hists) +{ + hists__filter_by_type(hists, HIST_FILTER__DSO, + hists__filter_entry_by_dso); +} + +void hists__filter_by_symbol(struct hists *hists) +{ + hists__filter_by_type(hists, HIST_FILTER__SYMBOL, + hists__filter_entry_by_symbol); +} + +void hists__filter_by_socket(struct hists *hists) +{ + hists__filter_by_type(hists, HIST_FILTER__SOCKET, + hists__filter_entry_by_socket); +} + void events_stats__inc(struct events_stats *stats, u32 type) { ++stats->nr_events[0]; -- cgit v1.2.3 From 3379e0c3effa87d7734fc06277a7023292aadb0c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 19 Jan 2016 21:35:15 +0000 Subject: perf tools: Document the perf sysctls perf_event_paranoid was only documented in source code and a perf error message. Copy the documentation from the error message to Documentation/sysctl/kernel.txt. perf_cpu_time_max_percent was already documented but missing from the list at the top, so add it there. Signed-off-by: Ben Hutchings Cc: Peter Zijlstra Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/20160119213515.GG2637@decadent.org.uk [ Remove reference to external Documentation file, provide info inline, as before ] Signed-off-by: Arnaldo Carvalho de Melo --- Documentation/sysctl/kernel.txt | 13 +++++++++++++ tools/perf/util/evsel.c | 15 +++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 73c6b1ef0e84..c803e7300f0b 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -58,6 +58,8 @@ show up in /proc/sys/kernel: - panic_on_stackoverflow - panic_on_unrecovered_nmi - panic_on_warn +- perf_cpu_time_max_percent +- perf_event_paranoid - pid_max - powersave-nap [ PPC only ] - printk @@ -639,6 +641,17 @@ allowed to execute. ============================================================== +perf_event_paranoid: + +Controls use of the performance events system by unprivileged +users (without CAP_SYS_ADMIN). The default value is 1. + + -1: Allow use of (almost) all events by all users +>=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK +>=1: Disallow CPU event access by users without CAP_SYS_ADMIN +>=2: Disallow kernel profiling by users without CAP_SYS_ADMIN + +============================================================== pid_max: diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cdbaf9b51e42..467808680ee4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2362,12 +2362,15 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, case EPERM: case EACCES: return scnprintf(msg, size, - "You may not have permission to collect %sstats.\n" - "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n" - " -1 - Not paranoid at all\n" - " 0 - Disallow raw tracepoint access for unpriv\n" - " 1 - Disallow cpu events for unpriv\n" - " 2 - Disallow kernel profiling for unpriv", + "You may not have permission to collect %sstats.\n\n" + "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n" + "which controls use of the performance events system by\n" + "unprivileged users (without CAP_SYS_ADMIN).\n\n" + "The default value is 1:\n\n" + " -1: Allow use of (almost) all events by all users\n" + ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n" + ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n" + ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN", target->system_wide ? "system-wide " : ""); case ENOENT: return scnprintf(msg, size, "The %s event is not supported.", -- cgit v1.2.3 From 78ce08dfbd180fb85312ee76e607a6c5fe34a06c Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 8 Jan 2016 17:16:11 +0900 Subject: perf annotate: Rename 'colors.code' to 'colors.jump_arrows' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit USe 'jump_arrows' config name instead of 'code' on 'colors' section. 'colors.code' config is only for jump arrows on assembly code listings i.e. │ ┌──jmp 1333 │ │ xchg %ax,%ax │ │ mov %r15,%r10 │ └─→cmp %r15,%r14 But this config name seems unfit. 'jump_arrows' is more descriptive than 'code'. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1452240971-25418-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 2 +- tools/perf/Documentation/perfconfig.example | 2 +- tools/perf/ui/browser.c | 4 ++-- tools/perf/ui/browser.h | 2 +- tools/perf/ui/browsers/annotate.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index b9ca1e304158..1ee488b9aaf7 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -62,7 +62,7 @@ Given a $HOME/.perfconfig like this: medium = green, default normal = lightgray, default selected = white, lightgray - code = blue, default + jump_arrows = blue, default addr = magenta, default root = white, blue diff --git a/tools/perf/Documentation/perfconfig.example b/tools/perf/Documentation/perfconfig.example index 767ea2436e1c..1d8d5bc4cd2d 100644 --- a/tools/perf/Documentation/perfconfig.example +++ b/tools/perf/Documentation/perfconfig.example @@ -5,7 +5,7 @@ medium = green, lightgray normal = black, lightgray selected = lightgray, magenta - code = blue, lightgray + jump_arrows = blue, lightgray addr = magenta, lightgray [tui] diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index d37202121689..af68a9d488bf 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -531,8 +531,8 @@ static struct ui_browser_colorset { .bg = "yellow", }, { - .colorset = HE_COLORSET_CODE, - .name = "code", + .colorset = HE_COLORSET_JUMP_ARROWS, + .name = "jump_arrows", .fg = "blue", .bg = "default", }, diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 01781de59532..be3b70eb5fca 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -7,7 +7,7 @@ #define HE_COLORSET_MEDIUM 51 #define HE_COLORSET_NORMAL 52 #define HE_COLORSET_SELECTED 53 -#define HE_COLORSET_CODE 54 +#define HE_COLORSET_JUMP_ARROWS 54 #define HE_COLORSET_ADDR 55 #define HE_COLORSET_ROOT 56 diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 718bd46d47fa..4fc208e82c6f 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -284,7 +284,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) to = (u64)btarget->idx; } - ui_browser__set_color(browser, HE_COLORSET_CODE); + ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS); __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, from, to); } -- cgit v1.2.3 From 89debf178708458ac62f5b53dfc97437009d02d3 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 8 Jan 2016 20:39:31 +0900 Subject: perf config: Document variables for 'colors' section in man page Explain 'colors' section and its variables, used for The variables for customizing the colors used in the output for the 'report', 'top' and 'annotate' in the TUI, those are: 'top', 'medium', 'normal', 'selected', 'jump_arrows', 'addr' and 'root'. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1452253193-30502-2-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 46 ++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 1ee488b9aaf7..80517823b7e5 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -98,6 +98,52 @@ Given a $HOME/.perfconfig like this: order = caller sort-key = function +Variables +~~~~~~~~~ + +colors.*:: + The variables for customizing the colors used in the output for the + 'report', 'top' and 'annotate' in the TUI. They should specify the + foreground and background colors, separated by a comma, for example: + + medium = green, lightgray + + If you want to use the color configured for you terminal, just leave it + as 'default', for example: + + medium = default, lightgray + + Available colors: + red, yellow, green, cyan, gray, black, blue, + white, default, magenta, lightgray + + colors.top:: + 'top' means a overhead percentage which is more than 5%. + And values of this variable specify percentage colors. + Basic key values are foreground-color 'red' and + background-color 'default'. + colors.medium:: + 'medium' means a overhead percentage which has more than 0.5%. + Default values are 'green' and 'default'. + colors.normal:: + 'normal' means the rest of overhead percentages + except 'top', 'medium', 'selected'. + Default values are 'lightgray' and 'default'. + colors.selected:: + This selects the colors for the current entry in a list of entries + from sub-commands (top, report, annotate). + Default values are 'black' and 'lightgray'. + colors.jump_arrows:: + Colors for jump arrows on assembly code listings + such as 'jns', 'jmp', 'jane', etc. + Default values are 'blue', 'default'. + colors.addr:: + This selects colors for addresses from 'annotate'. + Default values are 'magenta', 'default'. + colors.root:: + Colors for headers in the output of a sub-commands (top, report). + Default values are 'white', 'blue'. + SEE ALSO -------- linkperf:perf[1] -- cgit v1.2.3 From 3fa9f40718a33d27eb2f4bd36c13318a2d58839d Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 8 Jan 2016 20:39:32 +0900 Subject: perf config: Document variables for 'tui' and 'gtk' sections in man page Explain 'tui' and 'gtk' sections and these variables. 'top', 'report' and 'annotate' Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1452253193-30502-3-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 80517823b7e5..ccbdb64696a7 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -144,6 +144,16 @@ colors.*:: Colors for headers in the output of a sub-commands (top, report). Default values are 'white', 'blue'. +tui.*, gtk.*:: + Subcommands that can be configured here are 'top', 'report' and 'annotate'. + These values are booleans, for example: + + [tui] + top = true + + will make the TUI be the default for the 'top' subcommand. Those will be + available if the required libs were detected at tool build time. + SEE ALSO -------- linkperf:perf[1] -- cgit v1.2.3 From 2733525b8c1a5f9b6e55338d836b835c9c698913 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 8 Jan 2016 20:39:33 +0900 Subject: perf config: Document 'buildid.dir' variable in man page Explain 'buildid.dir' variable. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1452253193-30502-4-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index ccbdb64696a7..a095f0cabf5e 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -154,6 +154,21 @@ tui.*, gtk.*:: will make the TUI be the default for the 'top' subcommand. Those will be available if the required libs were detected at tool build time. +buildid.*:: + buildid.dir:: + Each executable and shared library in modern distributions comes with a + content based identifier that, if available, will be inserted in a + 'perf.data' file header to, at analysis time find what is needed to do + symbol resolution, code annotation, etc. + + The recording tools also stores a hard link or copy in a per-user + directory, $HOME/.debug/, of binaries, shared libraries, /proc/kallsyms + and /proc/kcore files to be used at analysis time. + + The buildid.dir variable can be used to either change this directory + cache location, or to disable it altogether. If you want to disable it, + set buildid.dir to /dev/null. The default is $HOME/.debug + SEE ALSO -------- linkperf:perf[1] -- cgit v1.2.3 From 3b97629d139b19cbcd15b0b3c128a4d6587d2091 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 8 Jan 2016 20:39:34 +0900 Subject: perf config: Document variables for 'annotate' section in man page Explain 'annotate' section and its variables. 'hide_src_code', 'use_offset', 'jump_arrows', 'show_linenr', 'show_nr_jump' and 'show_total_period'. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1452253193-30502-5-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 110 +++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index a095f0cabf5e..cb7ca507ec5e 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -169,6 +169,116 @@ buildid.*:: cache location, or to disable it altogether. If you want to disable it, set buildid.dir to /dev/null. The default is $HOME/.debug +annotate.*:: + These options work only for TUI. + These are in control of addresses, jump function, source code + in lines of assembly code from a specific program. + + annotate.hide_src_code:: + If a program which is analyzed has source code, + this option lets 'annotate' print a list of assembly code with the source code. + For example, let's see a part of a program. There're four lines. + If this option is 'true', they can be printed + without source code from a program as below. + + │ push %rbp + │ mov %rsp,%rbp + │ sub $0x10,%rsp + │ mov (%rdi),%rdx + + But if this option is 'false', source code of the part + can be also printed as below. Default is 'false'. + + │ struct rb_node *rb_next(const struct rb_node *node) + │ { + │ push %rbp + │ mov %rsp,%rbp + │ sub $0x10,%rsp + │ struct rb_node *parent; + │ + │ if (RB_EMPTY_NODE(node)) + │ mov (%rdi),%rdx + │ return n; + + annotate.use_offset:: + Basing on a first address of a loaded function, offset can be used. + Instead of using original addresses of assembly code, + addresses subtracted from a base address can be printed. + Let's illustrate an example. + If a base address is 0XFFFFFFFF81624d50 as below, + + ffffffff81624d50 + + an address on assembly code has a specific absolute address as below + + ffffffff816250b8:│ mov 0x8(%r14),%rdi + + but if use_offset is 'true', an address subtracted from a base address is printed. + Default is true. This option is only applied to TUI. + + 368:│ mov 0x8(%r14),%rdi + + annotate.jump_arrows:: + There can be jump instruction among assembly code. + Depending on a boolean value of jump_arrows, + arrows can be printed or not which represent + where do the instruction jump into as below. + + │ ┌──jmp 1333 + │ │ xchg %ax,%ax + │1330:│ mov %r15,%r10 + │1333:└─→cmp %r15,%r14 + + If jump_arrow is 'false', the arrows isn't printed as below. + Default is 'false'. + + │ ↓ jmp 1333 + │ xchg %ax,%ax + │1330: mov %r15,%r10 + │1333: cmp %r15,%r14 + + annotate.show_linenr:: + When showing source code if this option is 'true', + line numbers are printed as below. + + │1628 if (type & PERF_SAMPLE_IDENTIFIER) { + │ ↓ jne 508 + │1628 data->id = *array; + │1629 array++; + │1630 } + + However if this option is 'false', they aren't printed as below. + Default is 'false'. + + │ if (type & PERF_SAMPLE_IDENTIFIER) { + │ ↓ jne 508 + │ data->id = *array; + │ array++; + │ } + + annotate.show_nr_jumps:: + Let's see a part of assembly code. + + │1382: movb $0x1,-0x270(%rbp) + + If use this, the number of branches jumping to that address can be printed as below. + Default is 'false'. + + │1 1382: movb $0x1,-0x270(%rbp) + + annotate.show_total_period:: + To compare two records on an instruction base, with this option + provided, display total number of samples that belong to a line + in assembly code. If this option is 'true', total periods are printed + instead of percent values as below. + + 302 │ mov %eax,%eax + + But if this option is 'false', percent values for overhead are printed i.e. + Default is 'false'. + + 99.93 │ mov %eax,%eax + SEE ALSO -------- linkperf:perf[1] -- cgit v1.2.3 From 485311d97863f2810646e17c8075be2992225f98 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 8 Jan 2016 20:39:36 +0900 Subject: perf config: Document 'hist.percentage' variable in man page Explain 'hist.percentage' variable. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1452253193-30502-7-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index cb7ca507ec5e..74589c68558a 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -279,6 +279,23 @@ annotate.*:: 99.93 │ mov %eax,%eax +hist.*:: + hist.percentage:: + This option control the way to calculate overhead of filtered entries - + that means the value of this option is effective only if there's a + filter (by comm, dso or symbol name). Suppose a following example: + + Overhead Symbols + ........ ....... + 33.33% foo + 33.33% bar + 33.33% baz + + This is an original overhead and we'll filter out the first 'foo' + entry. The value of 'relative' would increase the overhead of 'bar' + and 'baz' to 50.00% for each, while 'absolute' would show their + current overhead (33.33%). + SEE ALSO -------- linkperf:perf[1] -- cgit v1.2.3 From cfd92dadc5e830268036efb25ff41618f29c3306 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 21 Jan 2016 19:13:24 -0300 Subject: perf sort: Provide a way to find out if per-thread bucketing is in place Now the UI browsers will be able to offer thread related operations only if the thread is part of the sort order in use, i.e. if hist_entry stats are all for a single thread. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa , Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1452960197-5323-9-git-send-email-namhyung@kernel.org [ Carved out from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 3 +++ tools/perf/util/sort.h | 1 + 2 files changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index ec722346e6ff..898e4b0724bf 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -25,6 +25,7 @@ int sort__has_parent = 0; int sort__has_sym = 0; int sort__has_dso = 0; int sort__has_socket = 0; +int sort__has_thread = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; @@ -2136,6 +2137,8 @@ static int sort_dimension__add(const char *tok, sort__has_dso = 1; } else if (sd->entry == &sort_socket) { sort__has_socket = 1; + } else if (sd->entry == &sort_thread) { + sort__has_thread = 1; } return __sort_dimension__add(sd); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 687bbb124428..09616f03d412 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -35,6 +35,7 @@ extern int sort__need_collapse; extern int sort__has_parent; extern int sort__has_sym; extern int sort__has_socket; +extern int sort__has_thread; extern enum sort_mode sort__mode; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; -- cgit v1.2.3 From 2eafd410e669c744208f8110940e42caa7d79447 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 21 Jan 2016 19:13:24 -0300 Subject: perf hists browser: Only 'Zoom into thread' only when sort order has 'pid' We can't offer a zoom into thread when a bucket (struct hist_entry) may have samples for more than one thread, i.e. when 'pid' is not part of the sort order, fix it. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa , Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1452960197-5323-9-git-send-email-namhyung@kernel.org [ Carved out from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 08c09ad755d2..e66b3a30dd9f 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1782,7 +1782,7 @@ static int add_thread_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct thread *thread) { - if (thread == NULL) + if (!sort__has_thread || thread == NULL) return 0; if (asprintf(optstr, "Zoom %s %s(%d) thread", @@ -2307,10 +2307,12 @@ skip_annotation: socked_id); /* perf script support */ if (browser->he_selection) { - nr_options += add_script_opt(browser, - &actions[nr_options], - &options[nr_options], - thread, NULL); + if (sort__has_thread && thread) { + nr_options += add_script_opt(browser, + &actions[nr_options], + &options[nr_options], + thread, NULL); + } /* * Note that browser->selection != NULL * when browser->he_selection is not NULL, -- cgit v1.2.3 From c221acb0f970d3b80d72c812cda19c121acf5d52 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 21 Jan 2016 19:50:09 -0300 Subject: perf hists browser: Only offer symbol scripting when a symbol is under the cursor When this feature was introduced a check was made if there was a resolved symbol under the cursor, it got lost in commit ea7cd5923309 ("perf hists browser: Split popup menu actions - part 2"), reinstate it. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa , Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Fixes: ea7cd5923309 ("perf hists browser: Split popup menu actions - part 2") Link: http://lkml.kernel.org/r/1452960197-5323-9-git-send-email-namhyung@kernel.org [ Carved out from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e66b3a30dd9f..2801d80c6903 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2322,10 +2322,12 @@ skip_annotation: * * See hist_browser__show_entry. */ - nr_options += add_script_opt(browser, - &actions[nr_options], - &options[nr_options], - NULL, browser->selection->sym); + if (sort__has_sym && browser->selection->sym) { + nr_options += add_script_opt(browser, + &actions[nr_options], + &options[nr_options], + NULL, browser->selection->sym); + } } nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], NULL, NULL); -- cgit v1.2.3 From b1447a54f5b41eaf1cc469d9bd3834caa2ff9afb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 22 Jan 2016 11:22:41 -0300 Subject: perf hists browser: Offer 'Zoom into DSO'/'Map details' only when sort order has 'dso' We can't offer a zoom into DSO when a bucket (struct hist_entry) may have samples for more than one DSO, i.e. when 'dso' is not part of the sort order, ditto for 'Map details', fix it. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa , Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1452960197-5323-9-git-send-email-namhyung@kernel.org [ Carved out from a larger patch, moved check to add_{dso,map}_opt() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 4 ++-- tools/perf/util/sort.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 2801d80c6903..e892106410a7 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1825,7 +1825,7 @@ static int add_dso_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct map *map) { - if (map == NULL) + if (!sort__has_dso || map == NULL) return 0; if (asprintf(optstr, "Zoom %s %s DSO", @@ -1850,7 +1850,7 @@ static int add_map_opt(struct hist_browser *browser __maybe_unused, struct popup_action *act, char **optstr, struct map *map) { - if (map == NULL) + if (!sort__has_dso || map == NULL) return 0; if (asprintf(optstr, "Browse map details") < 0) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 09616f03d412..89a1273fd2da 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -32,6 +32,7 @@ extern const char default_sort_order[]; extern regex_t ignore_callees_regex; extern int have_ignore_callees; extern int sort__need_collapse; +extern int sort__has_dso; extern int sort__has_parent; extern int sort__has_sym; extern int sort__has_socket; -- cgit v1.2.3 From d9695d9f93649ecc00877ec2c847739c54a4cbb3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 22 Jan 2016 12:20:18 -0300 Subject: perf hists browser: Be a bit more strict about presenting CPU socket zoom For consistency with the other sort order checks. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa , Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1452960197-5323-9-git-send-email-namhyung@kernel.org [ Carved out from a larger patch, moved check to add_socket_opt() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e892106410a7..b919582add0f 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1971,7 +1971,7 @@ static int add_socket_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, int socket_id) { - if (socket_id < 0) + if (!sort__has_socket || socket_id < 0) return 0; if (asprintf(optstr, "Zoom %s Processor Socket %d", -- cgit v1.2.3 From 4056132e1072f02bbad77f2071770271cc5b58fc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 22 Jan 2016 12:26:06 -0300 Subject: perf hists browser: Offer non-symbol specific menu options for --sort without 'sym' Now that we check more strictly what each of the menu entries need, we can stop bailing out when 'sym' is not in the --sort order, instead we let each option be added if what it needs is present. This way, for instance, we can run scripts on all samples, see DSO map details when 'dso' is in the --sort provided, etc. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa , Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1452960197-5323-9-git-send-email-namhyung@kernel.org [ Carved out from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index b919582add0f..d07e6be42ab1 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2263,10 +2263,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; } - if (!sort__has_sym) - goto add_exit_option; - - if (browser->selection == NULL) + if (!sort__has_sym || browser->selection == NULL) goto skip_annotation; if (sort__mode == SORT_MODE__BRANCH) { @@ -2333,7 +2330,6 @@ skip_annotation: &options[nr_options], NULL, NULL); nr_options += add_switch_opt(browser, &actions[nr_options], &options[nr_options]); -add_exit_option: nr_options += add_exit_opt(browser, &actions[nr_options], &options[nr_options]); -- cgit v1.2.3 From 8acd3da03c3f6e4e31472c5c73402b95a5d0f6cb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 25 Jan 2016 18:01:57 -0300 Subject: perf machine: Introduce machine__find_kernel_symbol_by_name() To be used in the 'vmlinux matches kallsyms' 'perf test' entry. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-m56g1853lz2c6nhnqxibq4jd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 2c2b443df5ba..1a3e45baf97f 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -179,6 +179,16 @@ struct symbol *machine__find_kernel_symbol(struct machine *machine, mapp, filter); } +static inline +struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine, + enum map_type type, const char *name, + struct map **mapp, + symbol_filter_t filter) +{ + return map_groups__find_symbol_by_name(&machine->kmaps, type, name, + mapp, filter); +} + static inline struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr, struct map **mapp, -- cgit v1.2.3 From ab414dcda8fa307388c40a540b35e3c98a9da5ae Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 25 Jan 2016 18:04:47 -0300 Subject: perf test: Fixup aliases checking in the 'vmlinux matches kallsyms' test There are cases where looking at just the next and prev entries is not enough, like with: $ readelf -sW /usr/lib/debug/lib/modules/4.3.3-301.fc23.x86_64/vmlinux | grep ffffffff81065ec0 4979: ffffffff81065ec0 53 FUNC LOCAL DEFAULT 1 try_to_free_pud_page 4980: ffffffff81065ec0 53 FUNC LOCAL DEFAULT 1 try_to_free_pte_page 4981: ffffffff81065ec0 53 FUNC LOCAL DEFAULT 1 try_to_free_pmd_page So just search by name to see if the symbol is in kallsyms. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-jj1vlljg7ol4i713l60rt5ai@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/vmlinux-kallsyms.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index f0bfc9e8fd9f..630b0b409b97 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -110,7 +110,6 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused) */ for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) { struct symbol *pair, *first_pair; - bool backwards = true; sym = rb_entry(nd, struct symbol, rb_node); @@ -151,27 +150,14 @@ next_pair: continue; } else { - struct rb_node *nnd; -detour: - nnd = backwards ? rb_prev(&pair->rb_node) : - rb_next(&pair->rb_node); - if (nnd) { - struct symbol *next = rb_entry(nnd, struct symbol, rb_node); - - if (UM(next->start) == mem_start) { - pair = next; + pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL); + if (pair) { + if (UM(pair->start) == mem_start) goto next_pair; - } - } - if (backwards) { - backwards = false; - pair = first_pair; - goto detour; + pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n", + mem_start, sym->name, pair->name); } - - pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n", - mem_start, sym->name, pair->name); } } else pr_debug("%#" PRIx64 ": %s not on kallsyms\n", -- cgit v1.2.3 From 7b6982ce4b38ecc3f63be46beb7bd079aa290fd7 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 25 Jan 2016 09:55:48 +0000 Subject: perf test: Add libbpf relocation checker There's a bug in LLVM that it can generate unneeded relocation information. See [1] and [2]. Libbpf should check the target section of a relocation symbol. This patch adds a testcase which references a global variable (BPF doesn't support global variables). Before fixing libbpf, the new test case can be loaded into kernel, the global variable acts like the first map. It is incorrect. Result: # ~/perf test BPF 37: Test BPF filter : 37.1: Test basic BPF filtering : Ok 37.2: Test BPF prologue generation : Ok 37.3: Test BPF relocation checker : FAILED! # ~/perf test -v BPF ... libbpf: loading object '[bpf_relocation_test]' from buffer libbpf: section .strtab, size 126, link 0, flags 0, type=3 libbpf: section .text, size 0, link 0, flags 6, type=1 libbpf: section .data, size 0, link 0, flags 3, type=1 libbpf: section .bss, size 0, link 0, flags 3, type=8 libbpf: section func=sys_write, size 104, link 0, flags 6, type=1 libbpf: found program func=sys_write libbpf: section .relfunc=sys_write, size 16, link 10, flags 0, type=9 libbpf: section maps, size 16, link 0, flags 3, type=1 libbpf: maps in [bpf_relocation_test]: 16 bytes libbpf: section license, size 4, link 0, flags 3, type=1 libbpf: license of [bpf_relocation_test] is GPL libbpf: section version, size 4, link 0, flags 3, type=1 libbpf: kernel version of [bpf_relocation_test] is 40400 libbpf: section .symtab, size 144, link 1, flags 0, type=2 libbpf: map 0 is "my_table" libbpf: collecting relocating info for: 'func=sys_write' libbpf: relocation: insn_idx=7 Success unexpectedly: libbpf error when dealing with relocation test child finished with -1 ---- end ---- Test BPF filter subtest 2: FAILED! [1] https://llvm.org/bugs/show_bug.cgi?id=26243 [2] https://patchwork.ozlabs.org/patch/571385/ Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: "David S. Miller" Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1453715801-7732-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- tools/perf/tests/.gitignore | 1 + tools/perf/tests/Build | 9 ++++- tools/perf/tests/bpf-script-test-relocation.c | 50 +++++++++++++++++++++++++++ tools/perf/tests/bpf.c | 26 +++++++++++--- tools/perf/tests/llvm.c | 17 ++++++--- tools/perf/tests/llvm.h | 5 ++- 7 files changed, 98 insertions(+), 12 deletions(-) create mode 100644 tools/perf/tests/bpf-script-test-relocation.c (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5d34815c7ccb..97ce8695199e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -618,7 +618,7 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \ - $(OUTPUT)tests/llvm-src-{base,kbuild,prologue}.c + $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore index bf016c439fbd..8cc30e731c73 100644 --- a/tools/perf/tests/.gitignore +++ b/tools/perf/tests/.gitignore @@ -1,3 +1,4 @@ llvm-src-base.c llvm-src-kbuild.c llvm-src-prologue.c +llvm-src-relocation.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 614899b88b37..1ba628ed049a 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -31,7 +31,7 @@ perf-y += sample-parsing.o perf-y += parse-no-sample-id-all.o perf-y += kmod-path.o perf-y += thread-map.o -perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o +perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o perf-y += bpf.o perf-y += topology.o perf-y += cpumap.o @@ -59,6 +59,13 @@ $(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c tests/Build $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ $(Q)echo ';' >> $@ +$(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/Build + $(call rule_mkdir) + $(Q)echo '#include ' > $@ + $(Q)echo 'const char test_llvm__bpf_test_relocation[] =' >> $@ + $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ + $(Q)echo ';' >> $@ + ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64)) perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o endif diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c new file mode 100644 index 000000000000..93af77421816 --- /dev/null +++ b/tools/perf/tests/bpf-script-test-relocation.c @@ -0,0 +1,50 @@ +/* + * bpf-script-test-relocation.c + * Test BPF loader checking relocation + */ +#ifndef LINUX_VERSION_CODE +# error Need LINUX_VERSION_CODE +# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' +#endif +#define BPF_ANY 0 +#define BPF_MAP_TYPE_ARRAY 2 +#define BPF_FUNC_map_lookup_elem 1 +#define BPF_FUNC_map_update_elem 2 + +static void *(*bpf_map_lookup_elem)(void *map, void *key) = + (void *) BPF_FUNC_map_lookup_elem; +static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) = + (void *) BPF_FUNC_map_update_elem; + +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; +}; + +#define SEC(NAME) __attribute__((section(NAME), used)) +struct bpf_map_def SEC("maps") my_table = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1, +}; + +int this_is_a_global_val; + +SEC("func=sys_write") +int bpf_func__sys_write(void *ctx) +{ + int key = 0; + int value = 0; + + /* + * Incorrect relocation. Should not allow this program be + * loaded into kernel. + */ + bpf_map_update_elem(&this_is_a_global_val, &key, &value, 0); + return 0; +} +char _license[] SEC("license") = "GPL"; +int _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 33689a0cf821..952ca99aba6b 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -71,6 +71,15 @@ static struct { (NR_ITERS + 1) / 4, }, #endif + { + LLVM_TESTCASE_BPF_RELOCATION, + "Test BPF relocation checker", + "[bpf_relocation_test]", + "fix 'perf test LLVM' first", + "libbpf error when dealing with relocation", + NULL, + 0, + }, }; static int do_test(struct bpf_object *obj, int (*func)(void), @@ -190,7 +199,7 @@ static int __test__bpf(int idx) ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, bpf_testcase_table[idx].prog_id, - true); + true, NULL); if (ret != TEST_OK || !obj_buf || !obj_buf_sz) { pr_debug("Unable to get BPF object, %s\n", bpf_testcase_table[idx].msg_compile_fail); @@ -202,14 +211,21 @@ static int __test__bpf(int idx) obj = prepare_bpf(obj_buf, obj_buf_sz, bpf_testcase_table[idx].name); - if (!obj) { + if ((!!bpf_testcase_table[idx].target_func) != (!!obj)) { + if (!obj) + pr_debug("Fail to load BPF object: %s\n", + bpf_testcase_table[idx].msg_load_fail); + else + pr_debug("Success unexpectedly: %s\n", + bpf_testcase_table[idx].msg_load_fail); ret = TEST_FAIL; goto out; } - ret = do_test(obj, - bpf_testcase_table[idx].target_func, - bpf_testcase_table[idx].expect_result); + if (obj) + ret = do_test(obj, + bpf_testcase_table[idx].target_func, + bpf_testcase_table[idx].expect_result); out: bpf__clear(); return ret; diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index 06f45c1d4256..70edcdfa5672 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -35,6 +35,7 @@ static int test__bpf_parsing(void *obj_buf __maybe_unused, static struct { const char *source; const char *desc; + bool should_load_fail; } bpf_source_table[__LLVM_TESTCASE_MAX] = { [LLVM_TESTCASE_BASE] = { .source = test_llvm__bpf_base_prog, @@ -48,14 +49,19 @@ static struct { .source = test_llvm__bpf_test_prologue_prog, .desc = "Compile source for BPF prologue generation test", }, + [LLVM_TESTCASE_BPF_RELOCATION] = { + .source = test_llvm__bpf_test_relocation, + .desc = "Compile source for BPF relocation test", + .should_load_fail = true, + }, }; - int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz, enum test_llvm__testcase idx, - bool force) + bool force, + bool *should_load_fail) { const char *source; const char *desc; @@ -68,6 +74,8 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf, source = bpf_source_table[idx].source; desc = bpf_source_table[idx].desc; + if (should_load_fail) + *should_load_fail = bpf_source_table[idx].should_load_fail; perf_config(perf_config_cb, NULL); @@ -136,14 +144,15 @@ int test__llvm(int subtest) int ret; void *obj_buf = NULL; size_t obj_buf_sz = 0; + bool should_load_fail = false; if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX)) return TEST_FAIL; ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, - subtest, false); + subtest, false, &should_load_fail); - if (ret == TEST_OK) { + if (ret == TEST_OK && !should_load_fail) { ret = test__bpf_parsing(obj_buf, obj_buf_sz); if (ret != TEST_OK) { pr_debug("Failed to parse test case '%s'\n", diff --git a/tools/perf/tests/llvm.h b/tools/perf/tests/llvm.h index 5150b4d6ef50..0eaa604be99d 100644 --- a/tools/perf/tests/llvm.h +++ b/tools/perf/tests/llvm.h @@ -7,14 +7,17 @@ extern const char test_llvm__bpf_base_prog[]; extern const char test_llvm__bpf_test_kbuild_prog[]; extern const char test_llvm__bpf_test_prologue_prog[]; +extern const char test_llvm__bpf_test_relocation[]; enum test_llvm__testcase { LLVM_TESTCASE_BASE, LLVM_TESTCASE_KBUILD, LLVM_TESTCASE_BPF_PROLOGUE, + LLVM_TESTCASE_BPF_RELOCATION, __LLVM_TESTCASE_MAX, }; int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz, - enum test_llvm__testcase index, bool force); + enum test_llvm__testcase index, bool force, + bool *should_load_fail); #endif -- cgit v1.2.3 From 666810e86a3b7531cce892fbeda3b2f2322e1d72 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 25 Jan 2016 09:55:49 +0000 Subject: perf bpf: Check relocation target section Libbpf should check the target section before doing relocation to ensure the relocation is correct. If not, a bug in LLVM causes an error. See [1]. Also, if an incorrect BPF script uses both global variable and map, global variable whould be treated as map and be relocated without error. This patch saves the id of the map section into obj->efile and compare target section of a relocation symbol against it during relocation. Previous patch introduces a test case about this problem. After this patch: # ~/perf test BPF 37: Test BPF filter : 37.1: Test basic BPF filtering : Ok 37.2: Test BPF prologue generation : Ok 37.3: Test BPF relocation checker : Ok # perf test -v BPF ... 37.3: Test BPF relocation checker : ... libbpf: loading object '[bpf_relocation_test]' from buffer libbpf: section .strtab, size 126, link 0, flags 0, type=3 libbpf: section .text, size 0, link 0, flags 6, type=1 libbpf: section .data, size 0, link 0, flags 3, type=1 libbpf: section .bss, size 0, link 0, flags 3, type=8 libbpf: section func=sys_write, size 104, link 0, flags 6, type=1 libbpf: found program func=sys_write libbpf: section .relfunc=sys_write, size 16, link 10, flags 0, type=9 libbpf: section maps, size 16, link 0, flags 3, type=1 libbpf: maps in [bpf_relocation_test]: 16 bytes libbpf: section license, size 4, link 0, flags 3, type=1 libbpf: license of [bpf_relocation_test] is GPL libbpf: section version, size 4, link 0, flags 3, type=1 libbpf: kernel version of [bpf_relocation_test] is 40400 libbpf: section .symtab, size 144, link 1, flags 0, type=2 libbpf: map 0 is "my_table" libbpf: collecting relocating info for: 'func=sys_write' libbpf: Program 'func=sys_write' contains non-map related relo data pointing to section 65522 bpf: failed to load buffer Compile BPF program failed. test child finished with 0 ---- end ---- Test BPF filter subtest 2: Ok [1] https://llvm.org/bugs/show_bug.cgi?id=26243 Signed-off-by: Wang Nan Acked-by: Alexei Starovoitov Tested-by: Arnaldo Carvalho de Melo Cc: "David S. Miller" Cc: Brendan Gregg Cc: Daniel Borkmann Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1453715801-7732-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/libbpf.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8334a5a9d5d7..7e543c3102d4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -201,6 +201,7 @@ struct bpf_object { Elf_Data *data; } *reloc; int nr_reloc; + int maps_shndx; } efile; /* * All loaded bpf_object is linked in a list, which is @@ -350,6 +351,7 @@ static struct bpf_object *bpf_object__new(const char *path, */ obj->efile.obj_buf = obj_buf; obj->efile.obj_buf_sz = obj_buf_sz; + obj->efile.maps_shndx = -1; obj->loaded = false; @@ -529,12 +531,12 @@ bpf_object__init_maps(struct bpf_object *obj, void *data, } static int -bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx) +bpf_object__init_maps_name(struct bpf_object *obj) { int i; Elf_Data *symbols = obj->efile.symbols; - if (!symbols || maps_shndx < 0) + if (!symbols || obj->efile.maps_shndx < 0) return -EINVAL; for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { @@ -544,7 +546,7 @@ bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx) if (!gelf_getsym(symbols, i, &sym)) continue; - if (sym.st_shndx != maps_shndx) + if (sym.st_shndx != obj->efile.maps_shndx) continue; map_name = elf_strptr(obj->efile.elf, @@ -572,7 +574,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; Elf_Scn *scn = NULL; - int idx = 0, err = 0, maps_shndx = -1; + int idx = 0, err = 0; /* Elf is corrupted/truncated, avoid calling elf_strptr. */ if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { @@ -625,7 +627,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) else if (strcmp(name, "maps") == 0) { err = bpf_object__init_maps(obj, data->d_buf, data->d_size); - maps_shndx = idx; + obj->efile.maps_shndx = idx; } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warning("bpf: multiple SYMTAB in %s\n", @@ -674,8 +676,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } - if (maps_shndx >= 0) - err = bpf_object__init_maps_name(obj, maps_shndx); + if (obj->efile.maps_shndx >= 0) + err = bpf_object__init_maps_name(obj); out: return err; } @@ -697,7 +699,8 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) static int bpf_program__collect_reloc(struct bpf_program *prog, size_t nr_maps, GElf_Shdr *shdr, - Elf_Data *data, Elf_Data *symbols) + Elf_Data *data, Elf_Data *symbols, + int maps_shndx) { int i, nrels; @@ -724,9 +727,6 @@ bpf_program__collect_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__FORMAT; } - insn_idx = rel.r_offset / sizeof(struct bpf_insn); - pr_debug("relocation: insn_idx=%u\n", insn_idx); - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { @@ -735,6 +735,15 @@ bpf_program__collect_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__FORMAT; } + if (sym.st_shndx != maps_shndx) { + pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", + prog->section_name, sym.st_shndx); + return -LIBBPF_ERRNO__RELOC; + } + + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + pr_debug("relocation: insn_idx=%u\n", insn_idx); + if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", insn_idx, insns[insn_idx].code); @@ -863,7 +872,8 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) err = bpf_program__collect_reloc(prog, nr_maps, shdr, data, - obj->efile.symbols); + obj->efile.symbols, + obj->efile.maps_shndx); if (err) return err; } -- cgit v1.2.3 From 9fd4186ac19a4c8182dffc9b15dd288b50f09f76 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 25 Jan 2016 09:55:50 +0000 Subject: tools build: Allow subprojects select all feature checkers Put feature checkers not in original FEATURE_TESTS to a new list and allow subproject select all feature checkers by setting FEATURE_TESTS to 'all'. Signed-off-by: Wang Nan Cc: "David S. Miller" Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Link: http://lkml.kernel.org/r/1453715801-7732-4-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 02db3cdff20f..674c47d5f9d1 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -27,7 +27,7 @@ endef # the rule that uses them - an example for that is the 'bionic' # feature check. ] # -FEATURE_TESTS ?= \ +FEATURE_TESTS_BASIC := \ backtrace \ dwarf \ fortify-source \ @@ -56,6 +56,25 @@ FEATURE_TESTS ?= \ get_cpuid \ bpf +# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list +# of all feature tests +FEATURE_TESTS_EXTRA := \ + bionic \ + compile-32 \ + compile-x32 \ + cplus-demangle \ + hello \ + libbabeltrace \ + liberty \ + liberty-z \ + libunwind-debug-frame + +FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC) + +ifeq ($(FEATURE_TESTS),all) + FEATURE_TESTS := $(FEATURE_TESTS_BASIC) $(FEATURE_TESTS_EXTRA) +endif + FEATURE_DISPLAY ?= \ dwarf \ glibc \ -- cgit v1.2.3 From c053a1506faee399cbc2105f2131bb5a5d99eedd Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 25 Jan 2016 09:55:51 +0000 Subject: perf build: Select all feature checkers for feature-dump Set FEATURE_TESTS to 'all' so all possible feature checkers are executed. Without this setting the output feature dump file miss some feature, for example, liberity. Select all checker so we won't get an incomplete feature dump file. Signed-off-by: Wang Nan Cc: "David S. Miller" Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Link: http://lkml.kernel.org/r/1453715801-7732-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 97ce8695199e..0ef3d97d7954 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -165,7 +165,16 @@ ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),) endif endif +# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed. +# Without this setting the output feature dump file misses some features, for +# example, liberty. Select all checkers so we won't get an incomplete feature +# dump file. ifeq ($(config),1) +ifdef MAKECMDGOALS +ifeq ($(filter feature-dump,$(MAKECMDGOALS)),feature-dump) +FEATURE_TESTS := all +endif +endif include config/Makefile endif -- cgit v1.2.3 From b1baae89197e21cd115e9493d5a17f18fca81e6a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 26 Jan 2016 15:37:30 -0300 Subject: perf hists browser: Skip scripting when perf.data file not available The script and data-switch context menu are only meaningful when it deals with a data file. So add a check so that it cannot be shown when perf-top is run. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa , Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1453555902-18401-4-git-send-email-namhyung@kernel.org [ Use goto skip_scripting instead of two is_report_browser() tests ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index d07e6be42ab1..1da30f8aa7a5 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2303,6 +2303,9 @@ skip_annotation: &options[nr_options], socked_id); /* perf script support */ + if (!is_report_browser(hbt)) + goto skip_scripting; + if (browser->he_selection) { if (sort__has_thread && thread) { nr_options += add_script_opt(browser, @@ -2330,6 +2333,7 @@ skip_annotation: &options[nr_options], NULL, NULL); nr_options += add_switch_opt(browser, &actions[nr_options], &options[nr_options]); +skip_scripting: nr_options += add_exit_opt(browser, &actions[nr_options], &options[nr_options]); -- cgit v1.2.3 From 5ac76283b32b116c58e362e99542182ddcfc8262 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 26 Jan 2016 15:51:46 -0300 Subject: perf cpumap: Auto initialize cpu__max_{node,cpu} Since it was always checking if the initialization was done, use that branch to do the initialization if not done already. With this we reduce the number of exported globals from these files. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Kan Liang Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20160125212955.GG22501@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 30 ++++++++++++++++++++++++++++++ tools/perf/util/cpumap.h | 32 +++----------------------------- 2 files changed, 33 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index fa935093a599..9bcf2bed3a6d 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -8,6 +8,10 @@ #include #include "asm/bug.h" +static int max_cpu_num; +static int max_node_num; +static int *cpunode_map; + static struct cpu_map *cpu_map__default_new(void) { struct cpu_map *cpus; @@ -486,6 +490,32 @@ out: pr_err("Failed to read max nodes, using default of %d\n", max_node_num); } +int cpu__max_node(void) +{ + if (unlikely(!max_node_num)) + set_max_node_num(); + + return max_node_num; +} + +int cpu__max_cpu(void) +{ + if (unlikely(!max_cpu_num)) + set_max_cpu_num(); + + return max_cpu_num; +} + +int cpu__get_node(int cpu) +{ + if (unlikely(cpunode_map == NULL)) { + pr_debug("cpu_map not initialized\n"); + return -1; + } + + return cpunode_map[cpu]; +} + static int init_cpunode_map(void) { int i; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 71c41b9efabb..81a2562aaa2b 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -57,37 +57,11 @@ static inline bool cpu_map__empty(const struct cpu_map *map) return map ? map->map[0] == -1 : true; } -int max_cpu_num; -int max_node_num; -int *cpunode_map; - int cpu__setup_cpunode_map(void); -static inline int cpu__max_node(void) -{ - if (unlikely(!max_node_num)) - pr_debug("cpu_map not initialized\n"); - - return max_node_num; -} - -static inline int cpu__max_cpu(void) -{ - if (unlikely(!max_cpu_num)) - pr_debug("cpu_map not initialized\n"); - - return max_cpu_num; -} - -static inline int cpu__get_node(int cpu) -{ - if (unlikely(cpunode_map == NULL)) { - pr_debug("cpu_map not initialized\n"); - return -1; - } - - return cpunode_map[cpu]; -} +int cpu__max_node(void); +int cpu__max_cpu(void); +int cpu__get_node(int cpu); int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, int (*f)(struct cpu_map *map, int cpu, void *data), -- cgit v1.2.3 From cf9162c290447cdf6fca7b64dd6e2200dc52f03b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 27 Jan 2016 11:22:22 +0000 Subject: tools build: Check basic headers for test-compile feature checker An i386 binary can be linked correctly even without correct headers. Which causes problem. For exmaple: $ mv /tmp/oxygen_root/usr/include/gnu/stubs-32.h{,.bak} $ make tools/perf Auto-detecting system features: ... dwarf: [ on ] [SNIP] GEN common-cmds.h CC perf-read-vdso32 In file included from /tmp/oxygen_root/usr/include/features.h:388:0, from /tmp/oxygen_root/usr/include/stdio.h:27, from perf-read-vdso.c:1: /tmp/oxygen_root/usr/include/gnu/stubs.h:7:27: fatal error: gnu/stubs-32.h: No such file or directory # include ^ compilation terminated. ... In this patch we checks not only compiler and linker, but also basic headers in test-compile test case, make it fail on a platform lacking correct headers. Signed-off-by: Wang Nan Acked-by: Jiri Olsa Cc: Li Zefan Link: http://lkml.kernel.org/r/1453893742-20603-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-compile.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/build/feature/test-compile.c b/tools/build/feature/test-compile.c index 31dbf45bf99c..c54e6551ae4c 100644 --- a/tools/build/feature/test-compile.c +++ b/tools/build/feature/test-compile.c @@ -1,4 +1,6 @@ +#include int main(void) { + printf("Hello World!\n"); return 0; } -- cgit v1.2.3 From 76c4aaec41b76afb7f914aaf6080f21ab331b0c6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 28 Jan 2016 16:13:24 -0300 Subject: perf build: Fix feature-dump checks, we need to test all features I see problem with test-all case speedup, because it does not comprise checks for 32bits compilations, fix it. The problem could be noticed by calling: make -C tools/perf feature-dump That would end up misdetecting the feature-compile-x32, that, building using 'gcc -mx32' needs stub headers not present in a fedora 23 devel machine and thus fail to compile, but ended up appearing as detected, i.e. present in tools/perf/FEATURE-DUMP as 'feature-compile-x32=1'. With this fix it correctly appears as 'feature-compile-x32=0' and if we uninstall the libc devel files for 32 bits (glibc-devel.i686), then the relevant variable is flipped from 'feature-compile-32=1' to 'feature-compile-32=0'. The same things happened for bionic and libbabeltrace, that were misdetected because the are no tested in test-all.c Reported-and-Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-u0sjaddf1r9m8icpd98ry7fz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 674c47d5f9d1..7bff2ea831cf 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -119,6 +119,14 @@ ifeq ($(feature-all), 1) # test-all.c passed - just set all the core feature flags to 1: # $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat))) + # + # test-all.c does not comprise these tests, so we need to + # for this case to get features proper values + # + $(call feature_check,compile-32) + $(call feature_check,compile-x32) + $(call feature_check,bionic) + $(call feature_check,libbabeltrace) else $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat))) endif -- cgit v1.2.3 From 5a155bb77a673dda941121142d686c3f47b49981 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 29 Jan 2016 05:57:30 +0000 Subject: perf build: Remove all condition feature check {C,LD}FLAGS 'make feature-dump' should give a stable result, so even 'NO_SOMETHING=1' is given (for babeltrace, if LIBBABELTRACE=1 is not given), we should try to detect those feature and {C,LD}FLAGS. Build or not should be controled independent. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Li Zefan Link: http://lkml.kernel.org/r/1454047050-204993-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/config/Makefile | 101 +++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 54 deletions(-) (limited to 'tools') diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 511141b102e8..0045a5ddd0ca 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -61,50 +61,45 @@ endif ifeq ($(LIBUNWIND_LIBS),) NO_LIBUNWIND := 1 -else - # - # For linking with debug library, run like: - # - # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ - # - ifdef LIBUNWIND_DIR - LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include - LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib - endif - LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS) - - # Set per-feature check compilation flags - FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS) - FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) - FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) - FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) endif +# +# For linking with debug library, run like: +# +# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ +# +ifdef LIBUNWIND_DIR + LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include + LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib +endif +LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS) + +# Set per-feature check compilation flags +FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS) +FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) +FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) +FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) ifeq ($(NO_PERF_REGS),0) CFLAGS += -DHAVE_PERF_REGS_SUPPORT endif -ifndef NO_LIBELF - # for linking with debug library, run like: - # make DEBUG=1 LIBDW_DIR=/opt/libdw/ - ifdef LIBDW_DIR - LIBDW_CFLAGS := -I$(LIBDW_DIR)/include - LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib - endif - FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) - FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw +# for linking with debug library, run like: +# make DEBUG=1 LIBDW_DIR=/opt/libdw/ +ifdef LIBDW_DIR + LIBDW_CFLAGS := -I$(LIBDW_DIR)/include + LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib endif +FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) +FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw -ifdef LIBBABELTRACE - # for linking with debug library, run like: - # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ - ifdef LIBBABELTRACE_DIR - LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include - LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib - endif - FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) - FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf +# for linking with debug library, run like: +# make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ +ifdef LIBBABELTRACE_DIR + LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include + LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib endif +FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) +FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi # include ARCH specific config @@ -145,28 +140,26 @@ ifdef PARSER_DEBUG $(call detected_var,PARSER_DEBUG_FLEX) endif -ifndef NO_LIBPYTHON - # Try different combinations to accommodate systems that only have - # python[2][-config] in weird combinations but always preferring - # python2 and python2-config as per pep-0394. If we catch a - # python[-config] in version 3, the version check will kill it. - PYTHON2 := $(if $(call get-executable,python2),python2,python) - override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2)) - PYTHON2_CONFIG := \ - $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config) - override PYTHON_CONFIG := \ - $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG)) +# Try different combinations to accommodate systems that only have +# python[2][-config] in weird combinations but always preferring +# python2 and python2-config as per pep-0394. If we catch a +# python[-config] in version 3, the version check will kill it. +PYTHON2 := $(if $(call get-executable,python2),python2,python) +override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2)) +PYTHON2_CONFIG := \ + $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config) +override PYTHON_CONFIG := \ + $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG)) - PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) +PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) - PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) - PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) +PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) +PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) - FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) - FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) - FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) -endif +FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) +FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) +FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) +FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) CFLAGS += -fno-omit-frame-pointer CFLAGS += -ggdb3 -- cgit v1.2.3 From 79191c89a049a9c525ce22a7d1e5674699c58818 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 29 Jan 2016 11:51:09 +0000 Subject: perf build: Use feature dump file for build-test To prevent the feature check tests to run repeately, one time per 'tests/make' target/test, this patch utilizes the previously introduced 'feature-dump' make target and FEATURES_DUMP variable, making sure that the feature checkers run only once when doing build-test for normal test cases. However, since standard users doesn't reuse features dump result, we'd better give an option to check their behaviors. The above feature should be used to make build-test faster only. Only utilize it for build-test. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1454068269-235999-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 2 +- tools/perf/tests/make | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index dcd9a70c7193..e4ff0bd08870 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -78,7 +78,7 @@ clean: # The build-test target is not really parallel, don't print the jobs info: # build-test: - @$(MAKE) SHUF=1 -f tests/make --no-print-directory + @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 --no-print-directory # # All other targets get passed through: diff --git a/tools/perf/tests/make b/tools/perf/tests/make index f918015512af..7f663f4611c6 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -15,6 +15,7 @@ else PERF := . PERF_O := $(PERF) O_OPT := +FULL_O := $(shell readlink -f $(PERF_O) || echo $(PERF_O)) ifneq ($(O),) FULL_O := $(shell readlink -f $(O) || echo $(O)) @@ -313,11 +314,43 @@ make_kernelsrc_tools: (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \ test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false) +FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP +FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC + all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools @echo OK + @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC) out: $(run_O) @echo OK + @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC) + +ifeq ($(REUSE_FEATURES_DUMP),1) +$(FEATURES_DUMP_FILE): + $(call clean) + @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) feature-dump"; \ + echo "- $@: $$cmd" && echo $$cmd && \ + ( eval $$cmd ) > /dev/null 2>&1 + +$(FEATURES_DUMP_FILE_STATIC): + $(call clean) + @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) LDFLAGS='-static' feature-dump"; \ + echo "- $@: $$cmd" && echo $$cmd && \ + ( eval $$cmd ) > /dev/null 2>&1 + +# Add feature dump dependency for run/run_O targets +$(foreach t,$(run) $(run_O),$(eval \ + $(t): $(if $(findstring make_static,$(t)),\ + $(FEATURES_DUMP_FILE_STATIC),\ + $(FEATURES_DUMP_FILE)))) + +# Append 'FEATURES_DUMP=' option to all test cases. For example: +# make_no_libbpf: NO_LIBBPF=1 --> NO_LIBBPF=1 FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP +# make_static: LDFLAGS=-static --> LDFLAGS=-static FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP_STATIC +$(foreach t,$(run),$(if $(findstring make_static,$(t)),\ + $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE_STATIC)),\ + $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE)))) +endif .PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools endif # ifndef MK -- cgit v1.2.3 From a639a623904cc526cebd7679debf86e5c8e5590b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 29 Jan 2016 14:49:31 -0300 Subject: perf tools: Speed up build-tests by reducing the number of builds tested The 'tools/perf/test/make' makefile has in its default, 'all' target builds that will pollute the source code directory, i.e. that will not use O= variable. The 'build-test' should be run as often as possible, preferrably after each non strictly non-code commit, so speed it up by selecting just the O= targets. Furthermore it tests both the Makefile.perf file, that is normally driven by the main Makefile, and the Makefile, reduce the time in half by having just MK=Makefile, the most usual, tested by 'build-test'. Please run: make -C tools/perf -f tests/make from time to time for testing also the in-place build tests. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-jrt9utscsiqkmjy3ccufostd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index e4ff0bd08870..4b68f465195c 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -75,10 +75,17 @@ clean: $(make) # -# The build-test target is not really parallel, don't print the jobs info: +# The build-test target is not really parallel, don't print the jobs info, +# it also uses only the tests/make targets that don't pollute the source +# repository, i.e. that uses O= or builds the tarpkg outside the source +# repo directories. +# +# For a full test, use: +# +# make -C tools/perf -f tests/make # build-test: - @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 --no-print-directory + @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile --no-print-directory tarpkg out # # All other targets get passed through: -- cgit v1.2.3 From 14a05e13a044c1cd6aaa3eb1a5fcdad7b4f6c990 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 14 Jan 2016 14:46:15 -0700 Subject: perf auxtrace: Add perf_evlist pointer to *info_priv_size() On some architecture the size of the private header may be dependent on the number of tracers used in the session. As such adding a "struct perf_evlist *" parameter, which should contain all the required information. Also adjusting the existing client of the interface to take the new parameter into account. Signed-off-by: Mathieu Poirier Acked-by: Adrian Hunter Cc: Al Grant Cc: Chunyan Zhang Cc: linux-arm-kernel@lists.infradead.org Cc: linux-doc@vger.kernel.org Cc: Mike Leach Cc: Peter Zijlstra Cc: Rabin Vincent Cc: Tor Jeremiassen Link: http://lkml.kernel.org/r/1452807977-8069-22-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-bts.c | 4 +++- tools/perf/arch/x86/util/intel-pt.c | 4 +++- tools/perf/util/auxtrace.c | 7 ++++--- tools/perf/util/auxtrace.h | 6 ++++-- 4 files changed, 14 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 8d8150f1cf9b..d66f9ad4df2e 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -60,7 +60,9 @@ struct branch { u64 misc; }; -static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused) +static size_t +intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { return INTEL_BTS_AUXTRACE_PRIV_SIZE; } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index f05daacc9e78..6f7d453b0e32 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -273,7 +273,9 @@ intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) return attr; } -static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused) +static size_t +intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { return INTEL_PT_AUXTRACE_PRIV_SIZE; } diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 360fda01f3b0..ec164fe70718 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -478,10 +478,11 @@ void auxtrace_heap__pop(struct auxtrace_heap *heap) heap_array[last].ordinal); } -size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr) +size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr, + struct perf_evlist *evlist) { if (itr) - return itr->info_priv_size(itr); + return itr->info_priv_size(itr, evlist); return 0; } @@ -852,7 +853,7 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, int err; pr_debug2("Synthesizing auxtrace information\n"); - priv_size = auxtrace_record__info_priv_size(itr); + priv_size = auxtrace_record__info_priv_size(itr, session->evlist); ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size); if (!ev) return -ENOMEM; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index b86f90db1352..e5a8e2d4f2af 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -293,7 +293,8 @@ struct auxtrace_record { int (*recording_options)(struct auxtrace_record *itr, struct perf_evlist *evlist, struct record_opts *opts); - size_t (*info_priv_size)(struct auxtrace_record *itr); + size_t (*info_priv_size)(struct auxtrace_record *itr, + struct perf_evlist *evlist); int (*info_fill)(struct auxtrace_record *itr, struct perf_session *session, struct auxtrace_info_event *auxtrace_info, @@ -429,7 +430,8 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, int auxtrace_record__options(struct auxtrace_record *itr, struct perf_evlist *evlist, struct record_opts *opts); -size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr); +size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr, + struct perf_evlist *evlist); int auxtrace_record__info_fill(struct auxtrace_record *itr, struct perf_session *session, struct auxtrace_info_event *auxtrace_info, -- cgit v1.2.3 From fd786fac78affe4a005065bc2b6f90d8f8953961 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 29 Jan 2016 17:40:51 +0000 Subject: perf buildid: Fix cpumode of buildid event There is a nasty confusion that, for kernel module, dso->kernel is not necessary to be DSO_TYPE_KERNEL or DSO_TYPE_GUEST_KERNEL. These two enums are for vmlinux. See thread [1]. We tried to fix this part but it is costy. Code machine__write_buildid_table() is another unfortunate function fall into this trap that, when issuing buildid event for a kernel module, cpumode it gives to the event is PERF_RECORD_MISC_USER, not PERF_RECORD_MISC_KERNEL. However, even with this bug, most of the time it doesn't causes real problem. I find this issue when trying to use a perf before commit 3d39ac538629 ("perf machine: No need to have two DSOs lists") to parse a perf.data generated by newest perf. [1] https://lkml.org/lkml/2015/9/21/908 Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1454089251-203152-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 6a7e273a514a..b28100ee1732 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -211,6 +211,7 @@ static int machine__write_buildid_table(struct machine *machine, int fd) dsos__for_each_with_build_id(pos, &machine->dsos.head) { const char *name; size_t name_len; + bool in_kernel = false; if (!pos->hit) continue; @@ -227,8 +228,11 @@ static int machine__write_buildid_table(struct machine *machine, int fd) name_len = pos->long_name_len + 1; } + in_kernel = pos->kernel || + is_kernel_module(name, + PERF_RECORD_MISC_CPUMODE_UNKNOWN); err = write_buildid(name, name_len, pos->build_id, machine->pid, - pos->kernel ? kmisc : umisc, fd); + in_kernel ? kmisc : umisc, fd); if (err) break; } -- cgit v1.2.3 From 6a7d550e8b2eeb380ab85d9bc53571123b98345b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 25 Jan 2016 09:55:53 +0000 Subject: perf test: Check environment before start real BPF test Copying perf to old kernel system results: # perf test bpf 37: Test BPF filter : 37.1: Test basic BPF filtering : FAILED! 37.2: Test BPF prologue generation : Skip However, in case when kernel doesn't support a test case it should return 'Skip', 'FAILED!' should be reserved for kernel tests for when the kernel supports a feature that then fails to work as advertised. This patch checks environment before real testcase. Signed-off-by: Wang Nan Suggested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1453715801-7732-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 952ca99aba6b..4aed5cb4ac2d 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -1,7 +1,11 @@ #include #include +#include #include #include +#include +#include +#include #include "tests.h" #include "llvm.h" #include "debug.h" @@ -243,6 +247,36 @@ const char *test__bpf_subtest_get_desc(int i) return bpf_testcase_table[i].desc; } +static int check_env(void) +{ + int err; + unsigned int kver_int; + char license[] = "GPL"; + + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + + err = fetch_kernel_version(&kver_int, NULL, 0); + if (err) { + pr_debug("Unable to get kernel version\n"); + return err; + } + + err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns, + sizeof(insns) / sizeof(insns[0]), + license, kver_int, NULL, 0); + if (err < 0) { + pr_err("Missing basic BPF support, skip this test: %s\n", + strerror(errno)); + return err; + } + close(err); + + return 0; +} + int test__bpf(int i) { int err; @@ -255,6 +289,9 @@ int test__bpf(int i) return TEST_SKIP; } + if (check_env()) + return TEST_SKIP; + err = __test__bpf(i); return err; } -- cgit v1.2.3 From 8fd34e1cce180eb0c726e7ed88f7b70c11c38e21 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 25 Jan 2016 09:55:55 +0000 Subject: perf test: Improve bp_signal Will Deacon [1] has some question on patch [2]. This patch improves test__bp_signal so we can test: 1. A watchpoint and a breakpoint that fire on the same instruction 2. Nested signals Test result: On x86_64 and ARM64 (result are similar with patch [2] on ARM64): # ./perf test -v signal 17: Test breakpoint overflow signal handler : --- start --- test child forked, pid 10213 count1 1, count2 3, count3 2, overflow 3, overflows_2 3 test child finished with 0 ---- end ---- Test breakpoint overflow signal handler: Ok So at least 2 cases Will doubted are handled correctly. [1] http://lkml.kernel.org/g/20160104165535.GI1616@arm.com [2] http://lkml.kernel.org/g/1450921362-198371-1-git-send-email-wangnan0@huawei.com Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: He Kuang Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1453715801-7732-9-git-send-email-wangnan0@huawei.com Signed-off-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bp_signal.c | 140 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 118 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index fb80c9eb6a95..1d1bb489b4e8 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -29,14 +29,59 @@ static int fd1; static int fd2; +static int fd3; static int overflows; +static int overflows_2; + +volatile long the_var; + + +/* + * Use ASM to ensure watchpoint and breakpoint can be triggered + * at one instruction. + */ +#if defined (__x86_64__) +extern void __test_function(volatile long *ptr); +asm ( + ".globl __test_function\n" + "__test_function:\n" + "incq (%rdi)\n" + "ret\n"); +#elif defined (__aarch64__) +extern void __test_function(volatile long *ptr); +asm ( + ".globl __test_function\n" + "__test_function:\n" + "str x30, [x0]\n" + "ret\n"); + +#else +static void __test_function(volatile long *ptr) +{ + *ptr = 0x1234; +} +#endif __attribute__ ((noinline)) static int test_function(void) { + __test_function(&the_var); + the_var++; return time(NULL); } +static void sig_handler_2(int signum __maybe_unused, + siginfo_t *oh __maybe_unused, + void *uc __maybe_unused) +{ + overflows_2++; + if (overflows_2 > 10) { + ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0); + } +} + static void sig_handler(int signum __maybe_unused, siginfo_t *oh __maybe_unused, void *uc __maybe_unused) @@ -54,10 +99,11 @@ static void sig_handler(int signum __maybe_unused, */ ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0); } } -static int bp_event(void *fn, int setup_signal) +static int __event(bool is_x, void *addr, int signal) { struct perf_event_attr pe; int fd; @@ -67,8 +113,8 @@ static int bp_event(void *fn, int setup_signal) pe.size = sizeof(struct perf_event_attr); pe.config = 0; - pe.bp_type = HW_BREAKPOINT_X; - pe.bp_addr = (unsigned long) fn; + pe.bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W; + pe.bp_addr = (unsigned long) addr; pe.bp_len = sizeof(long); pe.sample_period = 1; @@ -86,17 +132,25 @@ static int bp_event(void *fn, int setup_signal) return TEST_FAIL; } - if (setup_signal) { - fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC); - fcntl(fd, F_SETSIG, SIGIO); - fcntl(fd, F_SETOWN, getpid()); - } + fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC); + fcntl(fd, F_SETSIG, signal); + fcntl(fd, F_SETOWN, getpid()); ioctl(fd, PERF_EVENT_IOC_RESET, 0); return fd; } +static int bp_event(void *addr, int signal) +{ + return __event(true, addr, signal); +} + +static int wp_event(void *addr, int signal) +{ + return __event(false, addr, signal); +} + static long long bp_count(int fd) { long long count; @@ -114,7 +168,7 @@ static long long bp_count(int fd) int test__bp_signal(int subtest __maybe_unused) { struct sigaction sa; - long long count1, count2; + long long count1, count2, count3; /* setup SIGIO signal handler */ memset(&sa, 0, sizeof(struct sigaction)); @@ -126,21 +180,52 @@ int test__bp_signal(int subtest __maybe_unused) return TEST_FAIL; } + sa.sa_sigaction = (void *) sig_handler_2; + if (sigaction(SIGUSR1, &sa, NULL) < 0) { + pr_debug("failed setting up signal handler 2\n"); + return TEST_FAIL; + } + /* * We create following events: * - * fd1 - breakpoint event on test_function with SIGIO + * fd1 - breakpoint event on __test_function with SIGIO * signal configured. We should get signal * notification each time the breakpoint is hit * - * fd2 - breakpoint event on sig_handler without SIGIO + * fd2 - breakpoint event on sig_handler with SIGUSR1 + * configured. We should get SIGUSR1 each time when + * breakpoint is hit + * + * fd3 - watchpoint event on __test_function with SIGIO * configured. * * Following processing should happen: - * - execute test_function - * - fd1 event breakpoint hit -> count1 == 1 - * - SIGIO is delivered -> overflows == 1 - * - fd2 event breakpoint hit -> count2 == 1 + * Exec: Action: Result: + * incq (%rdi) - fd1 event breakpoint hit -> count1 == 1 + * - SIGIO is delivered + * sig_handler - fd2 event breakpoint hit -> count2 == 1 + * - SIGUSR1 is delivered + * sig_handler_2 -> overflows_2 == 1 (nested signal) + * sys_rt_sigreturn - return from sig_handler_2 + * overflows++ -> overflows = 1 + * sys_rt_sigreturn - return from sig_handler + * incq (%rdi) - fd3 event watchpoint hit -> count3 == 1 (wp and bp in one insn) + * - SIGIO is delivered + * sig_handler - fd2 event breakpoint hit -> count2 == 2 + * - SIGUSR1 is delivered + * sig_handler_2 -> overflows_2 == 2 (nested signal) + * sys_rt_sigreturn - return from sig_handler_2 + * overflows++ -> overflows = 2 + * sys_rt_sigreturn - return from sig_handler + * the_var++ - fd3 event watchpoint hit -> count3 == 2 (standalone watchpoint) + * - SIGIO is delivered + * sig_handler - fd2 event breakpoint hit -> count2 == 3 + * - SIGUSR1 is delivered + * sig_handler_2 -> overflows_2 == 3 (nested signal) + * sys_rt_sigreturn - return from sig_handler_2 + * overflows++ -> overflows == 3 + * sys_rt_sigreturn - return from sig_handler * * The test case check following error conditions: * - we get stuck in signal handler because of debug @@ -152,11 +237,13 @@ int test__bp_signal(int subtest __maybe_unused) * */ - fd1 = bp_event(test_function, 1); - fd2 = bp_event(sig_handler, 0); + fd1 = bp_event(__test_function, SIGIO); + fd2 = bp_event(sig_handler, SIGUSR1); + fd3 = wp_event((void *)&the_var, SIGIO); ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0); ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0); /* * Kick off the test by trigering 'fd1' @@ -166,15 +253,18 @@ int test__bp_signal(int subtest __maybe_unused) ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0); count1 = bp_count(fd1); count2 = bp_count(fd2); + count3 = bp_count(fd3); close(fd1); close(fd2); + close(fd3); - pr_debug("count1 %lld, count2 %lld, overflow %d\n", - count1, count2, overflows); + pr_debug("count1 %lld, count2 %lld, count3 %lld, overflow %d, overflows_2 %d\n", + count1, count2, count3, overflows, overflows_2); if (count1 != 1) { if (count1 == 11) @@ -183,12 +273,18 @@ int test__bp_signal(int subtest __maybe_unused) pr_debug("failed: wrong count for bp1%lld\n", count1); } - if (overflows != 1) + if (overflows != 3) pr_debug("failed: wrong overflow hit\n"); - if (count2 != 1) + if (overflows_2 != 3) + pr_debug("failed: wrong overflow_2 hit\n"); + + if (count2 != 3) pr_debug("failed: wrong count for bp2\n"); - return count1 == 1 && overflows == 1 && count2 == 1 ? + if (count3 != 2) + pr_debug("failed: wrong count for bp3\n"); + + return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ? TEST_OK : TEST_FAIL; } -- cgit v1.2.3 From 37b20151efe002a4a43532d3791d11d39d080248 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 25 Jan 2016 09:56:13 +0000 Subject: perf tools: Move timestamp creation to util Timestamp generation becomes a public available helper. Which will be used by 'perf record', help it output to split output file based on time. For example: perf.data.2015122620363710 perf.data.2015122620364092 perf.data.2015122620365423 ... Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1453715801-7732-27-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-buildid-cache.c | 14 +------------- tools/perf/util/util.c | 17 +++++++++++++++++ tools/perf/util/util.h | 1 + 3 files changed, 19 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index d93bff7fc0e4..632efc6b79a0 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -38,19 +38,7 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) static int build_id_cache__kcore_dir(char *dir, size_t sz) { - struct timeval tv; - struct tm tm; - char dt[32]; - - if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) - return -1; - - if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) - return -1; - - scnprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); - - return 0; + return fetch_current_timestamp(dir, sz); } static bool same_kallsyms_reloc(const char *from_dir, char *to_dir) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 7a2da7ef556e..b9e2843cfbe7 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -701,3 +701,20 @@ bool is_regular_file(const char *file) return S_ISREG(st.st_mode); } + +int fetch_current_timestamp(char *buf, size_t sz) +{ + struct timeval tv; + struct tm tm; + char dt[32]; + + if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) + return -1; + + if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) + return -1; + + scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); + + return 0; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 61650f05e5c1..a8615816a00d 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -344,5 +344,6 @@ int fetch_kernel_version(unsigned int *puint, const char *perf_tip(const char *dirpath); bool is_regular_file(const char *file); +int fetch_current_timestamp(char *buf, size_t sz); #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.2.3 From d2db9a98c3058a45780f7fcd0cc8584858cf6b29 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 25 Jan 2016 09:56:19 +0000 Subject: perf record: Use OPT_BOOLEAN_SET for buildid cache related options 'perf record' knows whether buildid cache is enabled (via --no-no-buildid-cache) deliberately. Buildid cache can be turned off in some situations. Output switching support needs this feature to turn off buildid cache by default. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1453715801-7732-33-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 319712a4e02b..0ee0d5cd31a7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -49,7 +49,9 @@ struct record { const char *progname; int realtime_prio; bool no_buildid; + bool no_buildid_set; bool no_buildid_cache; + bool no_buildid_cache_set; bool buildid_all; unsigned long long samples; }; @@ -1097,10 +1099,12 @@ struct option __record_options[] = { OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, "don't sample"), - OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache, - "do not update the buildid cache"), - OPT_BOOLEAN('B', "no-buildid", &record.no_buildid, - "do not collect buildids in perf.data"), + OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, + &record.no_buildid_cache_set, + "do not update the buildid cache"), + OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, + &record.no_buildid_set, + "do not collect buildids in perf.data"), OPT_CALLBACK('G', "cgroup", &record.evlist, "name", "monitor event in cgroup name only", parse_cgroups), -- cgit v1.2.3 From 162607ea20fafb4a76234ebe4314cd733345482e Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Thu, 28 Jan 2016 12:03:04 +0530 Subject: perf kvm/{x86,s390}: Remove dependency on uapi/kvm_perf.h Its better to remove the dependency on uapi/kvm_perf.h to allow dynamic discovery of kvm events (if its needed). To do this, some extern variables have been introduced with which we can keep the generic functions generic. Signed-off-by: Hemant Kumar Acked-by: Alexander Yarygin Acked-by: David Ahern Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Paul Mackerras Cc: Scott Wood Cc: Srikar Dronamraju Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1453962787-15376-1-git-send-email-hemant@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/util/kvm-stat.c | 8 +++++++- tools/perf/arch/x86/util/kvm-stat.c | 14 +++++++++++--- tools/perf/builtin-kvm.c | 20 ++++++++++---------- tools/perf/util/kvm-stat.h | 5 +++++ 4 files changed, 33 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index a5dbc07ec9dc..b85a94b19c25 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -10,7 +10,7 @@ */ #include "../../util/kvm-stat.h" -#include +#include define_exit_reasons_table(sie_exit_reasons, sie_intercept_code); define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes); @@ -18,6 +18,12 @@ define_exit_reasons_table(sie_sigp_order_codes, sigp_order_codes); define_exit_reasons_table(sie_diagnose_codes, diagnose_codes); define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes); +const char *vcpu_id_str = "id"; +const int decode_str_len = 40; +const char *kvm_exit_reason = "icptcode"; +const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter"; +const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit"; + static void event_icpt_insn_get_key(struct perf_evsel *evsel, struct perf_sample *sample, struct event_key *key) diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index 14e4e668fad7..babefda4c862 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -1,5 +1,7 @@ #include "../../util/kvm-stat.h" -#include +#include +#include +#include define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS); define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS); @@ -11,6 +13,12 @@ static struct kvm_events_ops exit_events = { .name = "VM-EXIT" }; +const char *vcpu_id_str = "vcpu_id"; +const int decode_str_len = 20; +const char *kvm_exit_reason = "exit_reason"; +const char *kvm_entry_trace = "kvm:kvm_entry"; +const char *kvm_exit_trace = "kvm:kvm_exit"; + /* * For the mmio events, we treat: * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry @@ -65,7 +73,7 @@ static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, struct event_key *key, char *decode) { - scnprintf(decode, DECODE_STR_LEN, "%#lx:%s", + scnprintf(decode, decode_str_len, "%#lx:%s", (unsigned long)key->key, key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R"); } @@ -109,7 +117,7 @@ static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, struct event_key *key, char *decode) { - scnprintf(decode, DECODE_STR_LEN, "%#llx:%s", + scnprintf(decode, decode_str_len, "%#llx:%s", (unsigned long long)key->key, key->info ? "POUT" : "PIN"); } diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 4418d9214872..ab5645cf39d2 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -30,7 +30,6 @@ #include #ifdef HAVE_KVM_STAT_SUPPORT -#include #include "util/kvm-stat.h" void exit_event_get_key(struct perf_evsel *evsel, @@ -38,12 +37,12 @@ void exit_event_get_key(struct perf_evsel *evsel, struct event_key *key) { key->info = 0; - key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON); + key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason); } bool kvm_exit_event(struct perf_evsel *evsel) { - return !strcmp(evsel->name, KVM_EXIT_TRACE); + return !strcmp(evsel->name, kvm_exit_trace); } bool exit_event_begin(struct perf_evsel *evsel, @@ -59,7 +58,7 @@ bool exit_event_begin(struct perf_evsel *evsel, bool kvm_entry_event(struct perf_evsel *evsel) { - return !strcmp(evsel->name, KVM_ENTRY_TRACE); + return !strcmp(evsel->name, kvm_entry_trace); } bool exit_event_end(struct perf_evsel *evsel, @@ -91,7 +90,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm, const char *exit_reason = get_exit_reason(kvm, key->exit_reasons, key->key); - scnprintf(decode, DECODE_STR_LEN, "%s", exit_reason); + scnprintf(decode, decode_str_len, "%s", exit_reason); } static bool register_kvm_events_ops(struct perf_kvm_stat *kvm) @@ -357,7 +356,7 @@ static bool handle_end_event(struct perf_kvm_stat *kvm, time_diff = sample->time - time_begin; if (kvm->duration && time_diff > kvm->duration) { - char decode[DECODE_STR_LEN]; + char decode[decode_str_len]; kvm->events_ops->decode_key(kvm, &event->key, decode); if (!skip_event(decode)) { @@ -385,7 +384,8 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread, return NULL; } - vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, VCPU_ID); + vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, + vcpu_id_str); thread__set_priv(thread, vcpu_record); } @@ -574,7 +574,7 @@ static void show_timeofday(void) static void print_result(struct perf_kvm_stat *kvm) { - char decode[DECODE_STR_LEN]; + char decode[decode_str_len]; struct kvm_event *event; int vcpu = kvm->trace_vcpu; @@ -585,7 +585,7 @@ static void print_result(struct perf_kvm_stat *kvm) pr_info("\n\n"); print_vcpu_info(kvm); - pr_info("%*s ", DECODE_STR_LEN, kvm->events_ops->name); + pr_info("%*s ", decode_str_len, kvm->events_ops->name); pr_info("%10s ", "Samples"); pr_info("%9s ", "Samples%"); @@ -604,7 +604,7 @@ static void print_result(struct perf_kvm_stat *kvm) min = get_event_min(event, vcpu); kvm->events_ops->decode_key(kvm, &event->key, decode); - pr_info("%*s ", DECODE_STR_LEN, decode); + pr_info("%*s ", decode_str_len, decode); pr_info("%10llu ", (unsigned long long)ecount); pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100); pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100); diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index ae825d4ec110..dd55548ef66a 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -136,5 +136,10 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid); extern const char * const kvm_events_tp[]; extern struct kvm_reg_events_ops kvm_reg_events_ops[]; extern const char * const kvm_skip_events[]; +extern const char *vcpu_id_str; +extern const int decode_str_len; +extern const char *kvm_exit_reason; +extern const char *kvm_entry_trace; +extern const char *kvm_exit_trace; #endif /* __PERF_KVM_STAT_H */ -- cgit v1.2.3 From 48deaa74fcdad516a94fe38a4af706747d9e4745 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Thu, 28 Jan 2016 12:03:05 +0530 Subject: perf kvm/{x86,s390}: Remove const from kvm_events_tp This patch removes the "const" qualifier from kvm_events_tp declaration to account for the fact that some architectures may need to update this variable dynamically. For instance, powerpc will need to update this variable dynamically depending on the machine type. Signed-off-by: Hemant Kumar Acked-by: David Ahern Cc: Alexander Yarygin Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Paul Mackerras Cc: Scott Wood Cc: Srikar Dronamraju Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1453962787-15376-2-git-send-email-hemant@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/util/kvm-stat.c | 2 +- tools/perf/arch/x86/util/kvm-stat.c | 2 +- tools/perf/util/kvm-stat.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index b85a94b19c25..ed57df2e6d68 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -79,7 +79,7 @@ static struct kvm_events_ops exit_events = { .name = "VM-EXIT" }; -const char * const kvm_events_tp[] = { +const char *kvm_events_tp[] = { "kvm:kvm_s390_sie_enter", "kvm:kvm_s390_sie_exit", "kvm:kvm_s390_intercept_instruction", diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index babefda4c862..b63d4be655a2 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -129,7 +129,7 @@ static struct kvm_events_ops ioport_events = { .name = "IO Port Access" }; -const char * const kvm_events_tp[] = { +const char *kvm_events_tp[] = { "kvm:kvm_entry", "kvm:kvm_exit", "kvm:kvm_mmio", diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index dd55548ef66a..c965dc844df3 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -133,7 +133,7 @@ bool kvm_entry_event(struct perf_evsel *evsel); */ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid); -extern const char * const kvm_events_tp[]; +extern const char *kvm_events_tp[]; extern struct kvm_reg_events_ops kvm_reg_events_ops[]; extern const char * const kvm_skip_events[]; extern const char *vcpu_id_str; -- cgit v1.2.3 From 066d3593e1b14690dc1131d50cacbb0b7eb3f160 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Thu, 28 Jan 2016 12:03:06 +0530 Subject: perf kvm/powerpc: Port perf kvm stat to powerpc perf kvm can be used to analyze guest exit reasons. This support already exists in x86. Hence, porting it to powerpc. - To trace KVM events : perf kvm stat record If many guests are running, we can track for a specific guest by using --pid as in : perf kvm stat record --pid - To see the results : perf kvm stat report The result shows the number of exits (from the guest context to host/hypervisor context) grouped by their respective exit reasons with their frequency. Since, different powerpc machines have different KVM tracepoints, this patch discovers the available tracepoints dynamically and accordingly looks for them. If any single tracepoint is not present, this support won't be enabled for reporting. To record, this will fail if any of the events we are looking to record isn't available. Right now, its only supported on PowerPC Book3S_HV architectures. To analyze the different exits, group them and present them (in a slight descriptive way) to the user, we need a mapping between the "exit code" (dumped in the kvm_guest_exit tracepoint data) and to its related Interrupt vector description (exit reason). This patch adds this mapping in book3s_hv_exits.h. It records on two available KVM tracepoints for book3s_hv: "kvm_hv:kvm_guest_exit" and "kvm_hv:kvm_guest_enter". Here is a sample o/p: # pgrep qemu 19378 60515 2 Guests are running on the host. # perf kvm stat record -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ] # perf kvm stat report -p 60515 Analyze events for pid(s) 60515, all VCPUs: VM-EXIT Samples Samples% Time% MinTime MaxTime Avg time SYSCALL 9141 63.67% 7.49% 1.26us 5782.39us 9.87us (+- 6.46%) H_DATA_STORAGE 4114 28.66% 5.07% 1.72us 4597.68us 14.84us (+-20.06%) HV_DECREMENTER 418 2.91% 4.26% 0.70us 30002.22us 122.58us (+-70.29%) EXTERNAL 392 2.73% 0.06% 0.64us 104.10us 1.94us (+-18.83%) RETURN_TO_HOST 287 2.00% 83.11% 1.53us 124240.15us 3486.52us (+-16.81%) H_INST_STORAGE 5 0.03% 0.00% 1.88us 3.73us 2.39us (+-14.20%) Total Samples:14357, Total events handled time:1203918.42us. Signed-off-by: Hemant Kumar Cc: Alexander Yarygin Cc: David Ahern Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Paul Mackerras Cc: Scott Wood Cc: Srikar Dronamraju Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1453962787-15376-3-git-send-email-hemant@linux.vnet.ibm.com Signed-off-by: Srikar Dronamraju Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/Makefile | 2 + tools/perf/arch/powerpc/util/Build | 1 + tools/perf/arch/powerpc/util/book3s_hv_exits.h | 33 ++++++++ tools/perf/arch/powerpc/util/kvm-stat.c | 107 +++++++++++++++++++++++++ tools/perf/builtin-kvm.c | 18 +++++ tools/perf/util/kvm-stat.h | 1 + 6 files changed, 162 insertions(+) create mode 100644 tools/perf/arch/powerpc/util/book3s_hv_exits.h create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c (limited to 'tools') diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 7fbca175099e..9f9cea3478fd 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -1,3 +1,5 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif + +HAVE_KVM_STAT_SUPPORT := 1 diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 7b8b0d1a1b62..c8fe2074d217 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,5 +1,6 @@ libperf-y += header.o libperf-y += sym-handling.o +libperf-y += kvm-stat.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h new file mode 100644 index 000000000000..e68ba2da8970 --- /dev/null +++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h @@ -0,0 +1,33 @@ +#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H +#define ARCH_PERF_BOOK3S_HV_EXITS_H + +/* + * PowerPC Interrupt vectors : exit code to name mapping + */ + +#define kvm_trace_symbol_exit \ + {0x0, "RETURN_TO_HOST"}, \ + {0x100, "SYSTEM_RESET"}, \ + {0x200, "MACHINE_CHECK"}, \ + {0x300, "DATA_STORAGE"}, \ + {0x380, "DATA_SEGMENT"}, \ + {0x400, "INST_STORAGE"}, \ + {0x480, "INST_SEGMENT"}, \ + {0x500, "EXTERNAL"}, \ + {0x501, "EXTERNAL_LEVEL"}, \ + {0x502, "EXTERNAL_HV"}, \ + {0x600, "ALIGNMENT"}, \ + {0x700, "PROGRAM"}, \ + {0x800, "FP_UNAVAIL"}, \ + {0x900, "DECREMENTER"}, \ + {0x980, "HV_DECREMENTER"}, \ + {0xc00, "SYSCALL"}, \ + {0xd00, "TRACE"}, \ + {0xe00, "H_DATA_STORAGE"}, \ + {0xe20, "H_INST_STORAGE"}, \ + {0xe40, "H_EMUL_ASSIST"}, \ + {0xf00, "PERFMON"}, \ + {0xf20, "ALTIVEC"}, \ + {0xf40, "VSX"} + +#endif diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c new file mode 100644 index 000000000000..27bc559b8b3a --- /dev/null +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -0,0 +1,107 @@ +#include "util/kvm-stat.h" +#include "util/parse-events.h" + +#include "book3s_hv_exits.h" + +#define NR_TPS 2 + +const char *vcpu_id_str = "vcpu_id"; +const int decode_str_len = 40; +const char *kvm_entry_trace = "kvm_hv:kvm_guest_enter"; +const char *kvm_exit_trace = "kvm_hv:kvm_guest_exit"; + +define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit); + +/* Tracepoints specific to ppc_book3s_hv */ +const char *ppc_book3s_hv_kvm_tp[] = { + "kvm_hv:kvm_guest_enter", + "kvm_hv:kvm_guest_exit", +}; + +/* 1 extra placeholder for NULL */ +const char *kvm_events_tp[NR_TPS + 1]; +const char *kvm_exit_reason; + +static struct kvm_events_ops exit_events = { + .is_begin_event = exit_event_begin, + .is_end_event = exit_event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { .name = "vmexit", .ops = &exit_events }, + { NULL, NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + + +static int is_tracepoint_available(const char *str, struct perf_evlist *evlist) +{ + struct parse_events_error err; + int ret; + + err.str = NULL; + ret = parse_events(evlist, str, &err); + if (err.str) + pr_err("%s : %s\n", str, err.str); + return ret; +} + +static int ppc__setup_book3s_hv(struct perf_kvm_stat *kvm, + struct perf_evlist *evlist) +{ + const char **events_ptr; + int i, nr_tp = 0, err = -1; + + /* Check for book3s_hv tracepoints */ + for (events_ptr = ppc_book3s_hv_kvm_tp; *events_ptr; events_ptr++) { + err = is_tracepoint_available(*events_ptr, evlist); + if (err) + return -1; + nr_tp++; + } + + for (i = 0; i < nr_tp; i++) + kvm_events_tp[i] = ppc_book3s_hv_kvm_tp[i]; + + kvm_events_tp[i] = NULL; + kvm_exit_reason = "trap"; + kvm->exit_reasons = hv_exit_reasons; + kvm->exit_reasons_isa = "HV"; + + return 0; +} + +/* Wrapper to setup kvm tracepoints */ +static int ppc__setup_kvm_tp(struct perf_kvm_stat *kvm) +{ + struct perf_evlist *evlist = perf_evlist__new(); + + if (evlist == NULL) + return -ENOMEM; + + /* Right now, only supported on book3s_hv */ + return ppc__setup_book3s_hv(kvm, evlist); +} + +int setup_kvm_events_tp(struct perf_kvm_stat *kvm) +{ + return ppc__setup_kvm_tp(kvm); +} + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + int ret; + + ret = ppc__setup_kvm_tp(kvm); + if (ret) { + kvm->exit_reasons = NULL; + kvm->exit_reasons_isa = NULL; + } + + return ret; +} diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index ab5645cf39d2..bff666458b28 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1132,6 +1132,11 @@ exit: _p; \ }) +int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused) +{ + return 0; +} + static int kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) { @@ -1148,7 +1153,14 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) NULL }; const char * const *events_tp; + int ret; + events_tp_size = 0; + ret = setup_kvm_events_tp(kvm); + if (ret < 0) { + pr_err("Unable to setup the kvm tracepoints\n"); + return ret; + } for (events_tp = kvm_events_tp; *events_tp; events_tp++) events_tp_size++; @@ -1377,6 +1389,12 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, /* * generate the event list */ + err = setup_kvm_events_tp(kvm); + if (err < 0) { + pr_err("Unable to setup the kvm tracepoints\n"); + return err; + } + kvm->evlist = kvm_live_event_list(); if (kvm->evlist == NULL) { err = -1; diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index c965dc844df3..d01e73592f6e 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -122,6 +122,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm, bool kvm_exit_event(struct perf_evsel *evsel); bool kvm_entry_event(struct perf_evsel *evsel); +int setup_kvm_events_tp(struct perf_kvm_stat *kvm); #define define_exit_reasons_table(name, symbols) \ static struct exit_reasons_table name[] = { \ -- cgit v1.2.3 From 78e6c39b231a8e31e193534fdbe29291b7fd8f37 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Thu, 28 Jan 2016 12:03:07 +0530 Subject: perf kvm/powerpc: Add support for HCALL reasons Powerpc provides hcall events that also provides insights into guest behaviour. Enhance perf kvm stat to record and analyze hcall events. - To trace hcall events : perf kvm stat record - To show the results : perf kvm stat report --event=hcall The result shows the number of hypervisor calls from the guest grouped by their respective reasons displayed with the frequency. This patch makes use of two additional tracepoints "kvm_hv:kvm_hcall_enter" and "kvm_hv:kvm_hcall_exit". To map the hcall codes to their respective names, it needs a mapping. Such mapping is added in this patch in book3s_hcalls.h. # pgrep qemu A sample output : 19378 60515 2 VMs running. # perf kvm stat record -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ] # perf kvm stat report -p 60515 --event=hcall Analyze events for all VMs, all VCPUs: HCALL-EVENT Samples Samples% Time% MinTime MaxTime AvgTime H_IPI 822 66.08% 88.10% 0.63us 11.38us 2.05us (+- 1.42%) H_SEND_CRQ 144 11.58% 3.77% 0.41us 0.88us 0.50us (+- 1.47%) H_VIO_SIGNAL 118 9.49% 2.86% 0.37us 0.83us 0.47us (+- 1.43%) H_PUT_TERM_CHAR 76 6.11% 2.07% 0.37us 0.90us 0.52us (+- 2.43%) H_GET_TERM_CHAR 74 5.95% 2.23% 0.37us 1.70us 0.58us (+- 4.77%) H_RTAS 6 0.48% 0.85% 1.10us 9.25us 2.70us (+-48.57%) H_PERFMON 4 0.32% 0.12% 0.41us 0.96us 0.59us (+-20.92%) Total Samples:1244, Total events handled time:1916.69us. Signed-off-by: Hemant Kumar Cc: Alexander Yarygin Cc: David Ahern Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Paul Mackerras Cc: Scott Wood Cc: Srikar Dronamraju Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1453962787-15376-4-git-send-email-hemant@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/book3s_hcalls.h | 123 +++++++++++++++++++++++++++ tools/perf/arch/powerpc/util/kvm-stat.c | 65 +++++++++++++- 2 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 tools/perf/arch/powerpc/util/book3s_hcalls.h (limited to 'tools') diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h new file mode 100644 index 000000000000..0dd6b7f2d44f --- /dev/null +++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h @@ -0,0 +1,123 @@ +#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H +#define ARCH_PERF_BOOK3S_HV_HCALLS_H + +/* + * PowerPC HCALL codes : hcall code to name mapping + */ +#define kvm_trace_symbol_hcall \ + {0x4, "H_REMOVE"}, \ + {0x8, "H_ENTER"}, \ + {0xc, "H_READ"}, \ + {0x10, "H_CLEAR_MOD"}, \ + {0x14, "H_CLEAR_REF"}, \ + {0x18, "H_PROTECT"}, \ + {0x1c, "H_GET_TCE"}, \ + {0x20, "H_PUT_TCE"}, \ + {0x24, "H_SET_SPRG0"}, \ + {0x28, "H_SET_DABR"}, \ + {0x2c, "H_PAGE_INIT"}, \ + {0x30, "H_SET_ASR"}, \ + {0x34, "H_ASR_ON"}, \ + {0x38, "H_ASR_OFF"}, \ + {0x3c, "H_LOGICAL_CI_LOAD"}, \ + {0x40, "H_LOGICAL_CI_STORE"}, \ + {0x44, "H_LOGICAL_CACHE_LOAD"}, \ + {0x48, "H_LOGICAL_CACHE_STORE"}, \ + {0x4c, "H_LOGICAL_ICBI"}, \ + {0x50, "H_LOGICAL_DCBF"}, \ + {0x54, "H_GET_TERM_CHAR"}, \ + {0x58, "H_PUT_TERM_CHAR"}, \ + {0x5c, "H_REAL_TO_LOGICAL"}, \ + {0x60, "H_HYPERVISOR_DATA"}, \ + {0x64, "H_EOI"}, \ + {0x68, "H_CPPR"}, \ + {0x6c, "H_IPI"}, \ + {0x70, "H_IPOLL"}, \ + {0x74, "H_XIRR"}, \ + {0x78, "H_MIGRATE_DMA"}, \ + {0x7c, "H_PERFMON"}, \ + {0xdc, "H_REGISTER_VPA"}, \ + {0xe0, "H_CEDE"}, \ + {0xe4, "H_CONFER"}, \ + {0xe8, "H_PROD"}, \ + {0xec, "H_GET_PPP"}, \ + {0xf0, "H_SET_PPP"}, \ + {0xf4, "H_PURR"}, \ + {0xf8, "H_PIC"}, \ + {0xfc, "H_REG_CRQ"}, \ + {0x100, "H_FREE_CRQ"}, \ + {0x104, "H_VIO_SIGNAL"}, \ + {0x108, "H_SEND_CRQ"}, \ + {0x110, "H_COPY_RDMA"}, \ + {0x114, "H_REGISTER_LOGICAL_LAN"}, \ + {0x118, "H_FREE_LOGICAL_LAN"}, \ + {0x11c, "H_ADD_LOGICAL_LAN_BUFFER"}, \ + {0x120, "H_SEND_LOGICAL_LAN"}, \ + {0x124, "H_BULK_REMOVE"}, \ + {0x130, "H_MULTICAST_CTRL"}, \ + {0x134, "H_SET_XDABR"}, \ + {0x138, "H_STUFF_TCE"}, \ + {0x13c, "H_PUT_TCE_INDIRECT"}, \ + {0x14c, "H_CHANGE_LOGICAL_LAN_MAC"}, \ + {0x150, "H_VTERM_PARTNER_INFO"}, \ + {0x154, "H_REGISTER_VTERM"}, \ + {0x158, "H_FREE_VTERM"}, \ + {0x15c, "H_RESET_EVENTS"}, \ + {0x160, "H_ALLOC_RESOURCE"}, \ + {0x164, "H_FREE_RESOURCE"}, \ + {0x168, "H_MODIFY_QP"}, \ + {0x16c, "H_QUERY_QP"}, \ + {0x170, "H_REREGISTER_PMR"}, \ + {0x174, "H_REGISTER_SMR"}, \ + {0x178, "H_QUERY_MR"}, \ + {0x17c, "H_QUERY_MW"}, \ + {0x180, "H_QUERY_HCA"}, \ + {0x184, "H_QUERY_PORT"}, \ + {0x188, "H_MODIFY_PORT"}, \ + {0x18c, "H_DEFINE_AQP1"}, \ + {0x190, "H_GET_TRACE_BUFFER"}, \ + {0x194, "H_DEFINE_AQP0"}, \ + {0x198, "H_RESIZE_MR"}, \ + {0x19c, "H_ATTACH_MCQP"}, \ + {0x1a0, "H_DETACH_MCQP"}, \ + {0x1a4, "H_CREATE_RPT"}, \ + {0x1a8, "H_REMOVE_RPT"}, \ + {0x1ac, "H_REGISTER_RPAGES"}, \ + {0x1b0, "H_DISABLE_AND_GETC"}, \ + {0x1b4, "H_ERROR_DATA"}, \ + {0x1b8, "H_GET_HCA_INFO"}, \ + {0x1bc, "H_GET_PERF_COUNT"}, \ + {0x1c0, "H_MANAGE_TRACE"}, \ + {0x1d4, "H_FREE_LOGICAL_LAN_BUFFER"}, \ + {0x1d8, "H_POLL_PENDING"}, \ + {0x1e4, "H_QUERY_INT_STATE"}, \ + {0x244, "H_ILLAN_ATTRIBUTES"}, \ + {0x250, "H_MODIFY_HEA_QP"}, \ + {0x254, "H_QUERY_HEA_QP"}, \ + {0x258, "H_QUERY_HEA"}, \ + {0x25c, "H_QUERY_HEA_PORT"}, \ + {0x260, "H_MODIFY_HEA_PORT"}, \ + {0x264, "H_REG_BCMC"}, \ + {0x268, "H_DEREG_BCMC"}, \ + {0x26c, "H_REGISTER_HEA_RPAGES"}, \ + {0x270, "H_DISABLE_AND_GET_HEA"}, \ + {0x274, "H_GET_HEA_INFO"}, \ + {0x278, "H_ALLOC_HEA_RESOURCE"}, \ + {0x284, "H_ADD_CONN"}, \ + {0x288, "H_DEL_CONN"}, \ + {0x298, "H_JOIN"}, \ + {0x2a4, "H_VASI_STATE"}, \ + {0x2b0, "H_ENABLE_CRQ"}, \ + {0x2b8, "H_GET_EM_PARMS"}, \ + {0x2d0, "H_SET_MPP"}, \ + {0x2d4, "H_GET_MPP"}, \ + {0x2ec, "H_HOME_NODE_ASSOCIATIVITY"}, \ + {0x2f4, "H_BEST_ENERGY"}, \ + {0x2fc, "H_XIRR_X"}, \ + {0x300, "H_RANDOM"}, \ + {0x304, "H_COP"}, \ + {0x314, "H_GET_MPP_X"}, \ + {0x31c, "H_SET_MODE"}, \ + {0xf000, "H_RTAS"} \ + +#endif diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index 27bc559b8b3a..74eee30398f8 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -1,9 +1,11 @@ #include "util/kvm-stat.h" #include "util/parse-events.h" +#include "util/debug.h" #include "book3s_hv_exits.h" +#include "book3s_hcalls.h" -#define NR_TPS 2 +#define NR_TPS 4 const char *vcpu_id_str = "vcpu_id"; const int decode_str_len = 40; @@ -11,17 +13,77 @@ const char *kvm_entry_trace = "kvm_hv:kvm_guest_enter"; const char *kvm_exit_trace = "kvm_hv:kvm_guest_exit"; define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit); +define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall); /* Tracepoints specific to ppc_book3s_hv */ const char *ppc_book3s_hv_kvm_tp[] = { "kvm_hv:kvm_guest_enter", "kvm_hv:kvm_guest_exit", + "kvm_hv:kvm_hcall_enter", + "kvm_hv:kvm_hcall_exit", + NULL, }; /* 1 extra placeholder for NULL */ const char *kvm_events_tp[NR_TPS + 1]; const char *kvm_exit_reason; +static void hcall_event_get_key(struct perf_evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = perf_evsel__intval(evsel, sample, "req"); +} + +static const char *get_hcall_exit_reason(u64 exit_code) +{ + struct exit_reasons_table *tbl = hcall_reasons; + + while (tbl->reason != NULL) { + if (tbl->exit_code == exit_code) + return tbl->reason; + tbl++; + } + + pr_debug("Unknown hcall code: %lld\n", + (unsigned long long)exit_code); + return "UNKNOWN"; +} + +static bool hcall_event_end(struct perf_evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return (!strcmp(evsel->name, kvm_events_tp[3])); +} + +static bool hcall_event_begin(struct perf_evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + if (!strcmp(evsel->name, kvm_events_tp[2])) { + hcall_event_get_key(evsel, sample, key); + return true; + } + + return false; +} +static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, + struct event_key *key, + char *decode) +{ + const char *hcall_reason = get_hcall_exit_reason(key->key); + + scnprintf(decode, decode_str_len, "%s", hcall_reason); +} + +static struct kvm_events_ops hcall_events = { + .is_begin_event = hcall_event_begin, + .is_end_event = hcall_event_end, + .decode_key = hcall_event_decode_key, + .name = "HCALL-EVENT", +}; + static struct kvm_events_ops exit_events = { .is_begin_event = exit_event_begin, .is_end_event = exit_event_end, @@ -31,6 +93,7 @@ static struct kvm_events_ops exit_events = { struct kvm_reg_events_ops kvm_reg_events_ops[] = { { .name = "vmexit", .ops = &exit_events }, + { .name = "hcall", .ops = &hcall_events }, { NULL, NULL }, }; -- cgit v1.2.3 From 814568db641f6587c1e98a3a85f214cb6a30fe10 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 29 Jan 2016 17:51:04 -0300 Subject: perf build: Align the names of the build tests: $ make -C tools/perf build-test make[1]: Entering directory `/home/acme/git/linux/tools/perf' make_pure_O: cd . && make -f Makefile O=/tmp/tmp.mPx0Cmik3f DESTDIR=/tmp/tmp.U0SUmVbtJm make_clean_all_O: cd . && make -f Makefile O=/tmp/tmp.Yl5UzhTU7T DESTDIR=/tmp/tmp.fop1E4jdER clean all make_debug_O: cd . && make -f Makefile O=/tmp/tmp.pMn2ozBoXC DESTDIR=/tmp/tmp.azxhDp5sEp DEBUG=1 make_no_libperl_O: cd . && make -f Makefile O=/tmp/tmp.qJPiINMtA7 DESTDIR=/tmp/tmp.KNMrLeGDxZ NO_LIBPERL=1 More needs to be done to make it more compact, i.e. elide the '-f Makefile', remove that 'cd . &&', move the DESTDIR= and O= to the end, as they don't convey that much information besides the fact that they are being set to some random directory just for this build, move the meat, i.e. the meaningful feature disabling bits to the start, etc. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-wir3w3o4f1nmbgcxgnx8cj9c@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/make | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 7f663f4611c6..cc72b67bde5e 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -261,6 +261,8 @@ run := $(shell shuf -e $(run)) run_O := $(shell shuf -e $(run_O)) endif +max_width := $(shell echo $(run_O) | sed 's/ /\n/g' | wc -L) + ifdef DEBUG d := $(info run $(run)) d := $(info run_O $(run_O)) @@ -274,7 +276,7 @@ $(run): $(call clean) @TMP_DEST=$$(mktemp -d); \ cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \ - echo "- $@: $$cmd" && echo $$cmd > $@ && \ + printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1; \ echo " test: $(call test,$@)" >> $@ 2>&1; \ $(call test,$@) && \ @@ -285,7 +287,7 @@ $(run_O): @TMP_O=$$(mktemp -d); \ TMP_DEST=$$(mktemp -d); \ cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \ - echo "- $@: $$cmd" && echo $$cmd > $@ && \ + printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1 && \ echo " test: $(call test_O,$@)" >> $@ 2>&1; \ $(call test_O,$@) && \ -- cgit v1.2.3 From 744070e0e4ac691bb43608f7bf46a9641a9cf342 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 28 Jan 2016 00:40:48 +0900 Subject: perf hists: Fix min callchain hits calculation The total period should be get using hists__total_period() since it takes filtered entries into account. In addition, if callchain mode is 'fractal', the total period should be the entry's period. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1453909257-26015-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 81ce0aff69d1..b96194676c91 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1163,9 +1163,18 @@ static void __hists__insert_output_entry(struct rb_root *entries, struct rb_node *parent = NULL; struct hist_entry *iter; - if (use_callchain) + if (use_callchain) { + if (callchain_param.mode == CHAIN_GRAPH_REL) { + u64 total = he->stat.period; + + if (symbol_conf.cumulate_callchain) + total = he->stat_acc->period; + + min_callchain_hits = total * (callchain_param.min_percent / 100); + } callchain_param.sort(&he->sorted_chain, he->callchain, min_callchain_hits, &callchain_param); + } while (*p != NULL) { parent = *p; @@ -1195,7 +1204,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) else use_callchain = symbol_conf.use_callchain; - min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); + min_callchain_hits = hists__total_period(hists) * (callchain_param.min_percent / 100); if (sort__need_collapse) root = &hists->entries_collapsed; -- cgit v1.2.3 From 0f58474ec835f6fc80af2cde2c7ed5495cd212ba Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 28 Jan 2016 00:40:49 +0900 Subject: perf hists: Update hists' total period when adding entries Currently the hist entry addition path doesn't update total_period of hists and it's calculated during 'resort' path. But the resort path needs to know the total period before doing its job because it's used for calculating percent limit of callchains in hist entries. So this patch update the total period during the addition path. It makes the percent limit of callchains working (again). Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1453909257-26015-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b96194676c91..098310bc4489 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -432,8 +432,12 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, cmp = hist_entry__cmp(he, entry); if (!cmp) { - if (sample_self) + if (sample_self) { he_stat__add_period(&he->stat, period, weight); + hists->stats.total_period += period; + if (!he->filtered) + hists->stats.total_non_filtered_period += period; + } if (symbol_conf.cumulate_callchain) he_stat__add_period(he->stat_acc, period, weight); @@ -466,7 +470,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!he) return NULL; - hists->nr_entries++; + if (sample_self) + hists__inc_stats(hists, he); + else + hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, hists->entries_in); -- cgit v1.2.3 From 2665b4528d0522ef073c2bde33cf9a7bd7391164 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 28 Jan 2016 00:40:50 +0900 Subject: perf report: Apply --percent-limit to callchains also Currently --percent-limit option only works for hist entries. However it'd be better to have same effect to callchains as well Requested-by: Andi Kleen Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1453909257-26015-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2bf537f190a0..72ed0b46d5a1 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -75,7 +75,10 @@ static int report__config(const char *var, const char *value, void *cb) return 0; } if (!strcmp(var, "report.percent-limit")) { - rep->min_percent = strtof(value, NULL); + double pcnt = strtof(value, NULL); + + rep->min_percent = pcnt; + callchain_param.min_percent = pcnt; return 0; } if (!strcmp(var, "report.children")) { @@ -633,8 +636,10 @@ parse_percent_limit(const struct option *opt, const char *str, int unset __maybe_unused) { struct report *rep = opt->value; + double pcnt = strtof(str, NULL); - rep->min_percent = strtof(str, NULL); + rep->min_percent = pcnt; + callchain_param.min_percent = pcnt; return 0; } -- cgit v1.2.3 From 7e597d327eca3d92a759542ff707cba61af3a718 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 28 Jan 2016 00:40:51 +0900 Subject: perf report: Get rid of hist_entry__callchain_fprintf() It's just a wrapper function to align the start position ofcallchains to 'comm' of each thread if it's a first sort key. But it doesn't not work with tracepoint events and also with upcoming hierarchy view. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1453909257-26015-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/stdio/hist.c | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 387110d50b00..8e25f7dd6e84 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -349,30 +349,6 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, return 0; } -static size_t hist_entry__callchain_fprintf(struct hist_entry *he, - struct hists *hists, - FILE *fp) -{ - int left_margin = 0; - u64 total_period = hists->stats.total_period; - - if (field_order == NULL && (sort_order == NULL || - !prefixcmp(sort_order, "comm"))) { - struct perf_hpp_fmt *fmt; - - perf_hpp__for_each_format(fmt) { - if (!perf_hpp__is_sort_entry(fmt)) - continue; - - /* must be 'comm' sort entry */ - left_margin = fmt->width(fmt, NULL, hists_to_evsel(hists)); - left_margin -= thread__comm_len(he->thread); - break; - } - } - return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); -} - static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) { const char *sep = symbol_conf.field_sep; @@ -418,6 +394,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, .buf = bf, .size = size, }; + u64 total_period = hists->stats.total_period; if (size == 0 || size > bfsz) size = hpp.size = bfsz; @@ -427,7 +404,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, ret = fprintf(fp, "%s\n", bf); if (symbol_conf.use_callchain) - ret += hist_entry__callchain_fprintf(he, hists, fp); + ret += hist_entry_callchain__fprintf(he, total_period, 0, fp); return ret; } -- cgit v1.2.3 From 54d27b3119e2eecbb3dfbf821db90fab25f6c523 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 28 Jan 2016 00:40:52 +0900 Subject: perf callchain: Pass parent_samples to __callchain__fprintf_graph() Pass hist entry's period to graph callchain print function. This info is needed by later patch to determine whether it can omit percentage of top-level node or not. No functional change intended. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1453909257-26015-6-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/stdio/hist.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 8e25f7dd6e84..96188ea12771 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -166,7 +166,8 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, } static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, - u64 total_samples, int left_margin) + u64 total_samples, u64 parent_samples, + int left_margin) { struct callchain_node *cnode; struct callchain_list *chain; @@ -213,6 +214,9 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, root = &cnode->rb_root; } + if (callchain_param.mode == CHAIN_GRAPH_REL) + total_samples = parent_samples; + ret += __callchain__fprintf_graph(fp, root, total_samples, 1, 1, left_margin); ret += fprintf(fp, "\n"); @@ -323,16 +327,19 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, u64 total_samples, int left_margin, FILE *fp) { + u64 parent_samples = he->stat.period; + + if (symbol_conf.cumulate_callchain) + parent_samples = he->stat_acc->period; + switch (callchain_param.mode) { case CHAIN_GRAPH_REL: - return callchain__fprintf_graph(fp, &he->sorted_chain, - symbol_conf.cumulate_callchain ? - he->stat_acc->period : he->stat.period, - left_margin); + return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, + parent_samples, left_margin); break; case CHAIN_GRAPH_ABS: return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, - left_margin); + parent_samples, left_margin); break; case CHAIN_FLAT: return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples); -- cgit v1.2.3 From 7ed5d6e28a0a1a54f554b0ab9c38a6061e7cac9e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 28 Jan 2016 00:40:53 +0900 Subject: perf report: Fix percent display in callchains on --stdio When there's only a single callchain, perf doesn't print its percentage in front of the symbols. This is because it assumes that the percentage is same as parents. But if a percent limit is applied, it's possible that there are actually a couple of child nodes but only one of them is shown. In this case it should display the percent to prevent misunderstanding of its percentage is same as the parent's. For example, let's see the following callchain. $ perf report -s comm --percent-limit 0.01 --stdio ... 9.95% swapper | |--7.57%--intel_idle | cpuidle_enter_state | cpuidle_enter | call_cpuidle | cpu_startup_entry | | | |--4.89%--start_secondary | | | --2.68%--rest_init | start_kernel | x86_64_start_reservations | x86_64_start_kernel | |--0.15%--__schedule | | | |--0.13%--schedule | | schedule_preempt_disable | | cpu_startup_entry | | | | | |--0.09%--start_secondary | | | | | --0.04%--rest_init | | start_kernel | | x86_64_start_reservations | | x86_64_start_kernel | | | --0.01%--schedule_preempt_disabled | cpu_startup_entry ... Current code omits the percent if 'intel_idle' becomes the only node when percent limit is set to 0.5%, its percent is not 9.95% but users will assume it incorrectly. Before: $ perf report --percent-limit 0.5 --stdio ... 9.95% swapper | ---intel_idle cpuidle_enter_state cpuidle_enter call_cpuidle cpu_startup_entry | |--4.89%--start_secondary | --2.68%--rest_init start_kernel x86_64_start_reservations x86_64_start_kernel After: $ perf report --percent-limit 0.5 --stdio ... 9.95% swapper | --7.57%--intel_idle cpuidle_enter_state cpuidle_enter call_cpuidle cpu_startup_entry | |--4.89%--start_secondary | --2.68%--rest_init start_kernel x86_64_start_reservations x86_64_start_kernel Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1453909257-26015-7-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/stdio/hist.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 96188ea12771..76ff46becac8 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -165,6 +165,25 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, return ret; } +/* + * If have one single callchain root, don't bother printing + * its percentage (100 % in fractal mode and the same percentage + * than the hist in graph mode). This also avoid one level of column. + * + * However when percent-limit applied, it's possible that single callchain + * node have different (non-100% in fractal mode) percentage. + */ +static bool need_percent_display(struct rb_node *node, u64 parent_samples) +{ + struct callchain_node *cnode; + + if (rb_next(node)) + return true; + + cnode = rb_entry(node, struct callchain_node, rb_node); + return callchain_cumul_hits(cnode) != parent_samples; +} + static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, u64 total_samples, u64 parent_samples, int left_margin) @@ -178,13 +197,8 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, int ret = 0; char bf[1024]; - /* - * If have one single callchain root, don't bother printing - * its percentage (100 % in fractal mode and the same percentage - * than the hist in graph mode). This also avoid one level of column. - */ node = rb_first(root); - if (node && !rb_next(node)) { + if (node && !need_percent_display(node, parent_samples)) { cnode = rb_entry(node, struct callchain_node, rb_node); list_for_each_entry(chain, &cnode->val, list) { /* -- cgit v1.2.3 From 0c841c6c16f320704f75970bbe6a9800c53e6cf5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 28 Jan 2016 00:40:54 +0900 Subject: perf hists browser: Fix dump to show correct callchain style The commit 8c430a348699 ("perf hists browser: Support folded callchains") missed to update hist_browser__dump() so it always shows graph-style callchains regardless of current setting. To fix that, factor out callchain printing code and rename the existing function which prints graph-style callchain. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Fixes: 8c430a348699 ("perf hists browser: Support folded callchains") Link: http://lkml.kernel.org/r/1453909257-26015-8-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 73 ++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 1da30f8aa7a5..6b22baf525dd 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -844,7 +844,7 @@ next: return row - first_row; } -static int hist_browser__show_callchain(struct hist_browser *browser, +static int hist_browser__show_callchain_graph(struct hist_browser *browser, struct rb_root *root, int level, unsigned short row, u64 total, print_callchain_entry_fn print, @@ -898,7 +898,7 @@ static int hist_browser__show_callchain(struct hist_browser *browser, else new_total = total; - row += hist_browser__show_callchain(browser, &child->rb_root, + row += hist_browser__show_callchain_graph(browser, &child->rb_root, new_level, row, new_total, print, arg, is_output_full); } @@ -910,6 +910,43 @@ out: return row - first_row; } +static int hist_browser__show_callchain(struct hist_browser *browser, + struct hist_entry *entry, int level, + unsigned short row, + print_callchain_entry_fn print, + struct callchain_print_arg *arg, + check_output_full_fn is_output_full) +{ + u64 total = hists__total_period(entry->hists); + int printed; + + if (callchain_param.mode == CHAIN_GRAPH_REL) { + if (symbol_conf.cumulate_callchain) + total = entry->stat_acc->period; + else + total = entry->stat.period; + } + + if (callchain_param.mode == CHAIN_FLAT) { + printed = hist_browser__show_callchain_flat(browser, + &entry->sorted_chain, row, total, + print, arg, is_output_full); + } else if (callchain_param.mode == CHAIN_FOLDED) { + printed = hist_browser__show_callchain_folded(browser, + &entry->sorted_chain, row, total, + print, arg, is_output_full); + } else { + printed = hist_browser__show_callchain_graph(browser, + &entry->sorted_chain, level, row, total, + print, arg, is_output_full); + } + + if (arg->is_current_entry) + browser->he_selection = entry; + + return printed; +} + struct hpp_arg { struct ui_browser *b; char folded_sign; @@ -1084,38 +1121,14 @@ static int hist_browser__show_entry(struct hist_browser *browser, --row_offset; if (folded_sign == '-' && row != browser->b.rows) { - u64 total = hists__total_period(entry->hists); struct callchain_print_arg arg = { .row_offset = row_offset, .is_current_entry = current_entry, }; - if (callchain_param.mode == CHAIN_GRAPH_REL) { - if (symbol_conf.cumulate_callchain) - total = entry->stat_acc->period; - else - total = entry->stat.period; - } - - if (callchain_param.mode == CHAIN_FLAT) { - printed += hist_browser__show_callchain_flat(browser, - &entry->sorted_chain, row, total, + printed += hist_browser__show_callchain(browser, entry, 1, row, hist_browser__show_callchain_entry, &arg, hist_browser__check_output_full); - } else if (callchain_param.mode == CHAIN_FOLDED) { - printed += hist_browser__show_callchain_folded(browser, - &entry->sorted_chain, row, total, - hist_browser__show_callchain_entry, &arg, - hist_browser__check_output_full); - } else { - printed += hist_browser__show_callchain(browser, - &entry->sorted_chain, 1, row, total, - hist_browser__show_callchain_entry, &arg, - hist_browser__check_output_full); - } - - if (arg.is_current_entry) - browser->he_selection = entry; } return printed; @@ -1380,15 +1393,11 @@ do_offset: static int hist_browser__fprintf_callchain(struct hist_browser *browser, struct hist_entry *he, FILE *fp) { - u64 total = hists__total_period(he->hists); struct callchain_print_arg arg = { .fp = fp, }; - if (symbol_conf.cumulate_callchain) - total = he->stat_acc->period; - - hist_browser__show_callchain(browser, &he->sorted_chain, 1, 0, total, + hist_browser__show_callchain(browser, he, 1, 0, hist_browser__fprintf_callchain_entry, &arg, hist_browser__check_dump_full); return arg.printed; -- cgit v1.2.3 From 5eca104eee7edfe7155523849750ced539b16e94 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 28 Jan 2016 00:40:55 +0900 Subject: perf hists browser: Pass parent_total to callchain print functions Pass parent node's total period to callchain print functions. This info is needed by later patch to determine whether it can omit percent or not correctly. No functional change intended. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1453909257-26015-9-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 44 +++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 6b22baf525dd..41dbb79c992e 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -660,6 +660,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, static int hist_browser__show_callchain_flat(struct hist_browser *browser, struct rb_root *root, unsigned short row, u64 total, + u64 parent_total __maybe_unused, print_callchain_entry_fn print, struct callchain_print_arg *arg, check_output_full_fn is_output_full) @@ -763,6 +764,7 @@ static char *hist_browser__folded_callchain_str(struct hist_browser *browser, static int hist_browser__show_callchain_folded(struct hist_browser *browser, struct rb_root *root, unsigned short row, u64 total, + u64 parent_total __maybe_unused, print_callchain_entry_fn print, struct callchain_print_arg *arg, check_output_full_fn is_output_full) @@ -847,14 +849,18 @@ next: static int hist_browser__show_callchain_graph(struct hist_browser *browser, struct rb_root *root, int level, unsigned short row, u64 total, + u64 parent_total, print_callchain_entry_fn print, struct callchain_print_arg *arg, check_output_full_fn is_output_full) { struct rb_node *node; int first_row = row, offset = level * LEVEL_OFFSET_STEP; - u64 new_total; bool need_percent; + u64 percent_total = total; + + if (callchain_param.mode == CHAIN_GRAPH_REL) + percent_total = parent_total; node = rb_first(root); need_percent = node && rb_next(node); @@ -878,7 +884,7 @@ static int hist_browser__show_callchain_graph(struct hist_browser *browser, folded_sign = callchain_list__folded(chain); row += hist_browser__show_callchain_list(browser, child, - chain, row, total, + chain, row, percent_total, was_first && need_percent, offset + extra_offset, print, arg); @@ -893,13 +899,9 @@ static int hist_browser__show_callchain_graph(struct hist_browser *browser, if (folded_sign == '-') { const int new_level = level + (extra_offset ? 2 : 1); - if (callchain_param.mode == CHAIN_GRAPH_REL) - new_total = child->children_hit; - else - new_total = total; - row += hist_browser__show_callchain_graph(browser, &child->rb_root, - new_level, row, new_total, + new_level, row, total, + child->children_hit, print, arg, is_output_full); } if (is_output_full(browser, row)) @@ -918,27 +920,29 @@ static int hist_browser__show_callchain(struct hist_browser *browser, check_output_full_fn is_output_full) { u64 total = hists__total_period(entry->hists); + u64 parent_total; int printed; - if (callchain_param.mode == CHAIN_GRAPH_REL) { - if (symbol_conf.cumulate_callchain) - total = entry->stat_acc->period; - else - total = entry->stat.period; - } + if (symbol_conf.cumulate_callchain) + parent_total = entry->stat_acc->period; + else + parent_total = entry->stat.period; if (callchain_param.mode == CHAIN_FLAT) { printed = hist_browser__show_callchain_flat(browser, - &entry->sorted_chain, row, total, - print, arg, is_output_full); + &entry->sorted_chain, row, + total, parent_total, print, arg, + is_output_full); } else if (callchain_param.mode == CHAIN_FOLDED) { printed = hist_browser__show_callchain_folded(browser, - &entry->sorted_chain, row, total, - print, arg, is_output_full); + &entry->sorted_chain, row, + total, parent_total, print, arg, + is_output_full); } else { printed = hist_browser__show_callchain_graph(browser, - &entry->sorted_chain, level, row, total, - print, arg, is_output_full); + &entry->sorted_chain, level, row, + total, parent_total, print, arg, + is_output_full); } if (arg->is_current_entry) -- cgit v1.2.3 From 59c624e2391080fa6315a376a4ee74d0eb393d1d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 28 Jan 2016 00:40:56 +0900 Subject: perf hists browser: Fix percent display in callchains When there's only a single callchain, perf doesn't print its percentage in front of the symbols. This is because it assumes that the percentage is same as parents. But if a percent limit is applied, it's possible that there are actually a couple of child nodes but only one of them is shown. In this case it should display the percent to prevent misunderstanding of its percentage is same as the parent's. For example, let's see the following callchain. $ perf report --no-children --percent-limit 0.01 --tui ... - 0.06% sleep [kernel.vmlinux] [k] kmem_cache_alloc_trace kmem_cache_alloc_trace - perf_event_mmap - 0.04% mmap_region do_mmap_pgoff - vm_mmap_pgoff + 0.02% sys_mmap_pgoff + 0.02% vm_mmap + 0.02% mprotect_fixup Current code omits the percent if 'mmap_region' becomes the only node when percent limit is set to 0.03%, its percent is not 0.06% but users will assume it incorrectly. Before: $ perf report --no-children --percent-limit 0.03 --tui ... 0.06% sleep [kernel.vmlinux] [k] kmem_cache_alloc_trace kmem_cache_alloc_trace - perf_event_mmap - mmap_region do_mmap_pgoff vm_mmap_pgoff After: $ perf report --no-children --percent-limit 0.03 --tui ... 0.06% sleep [kernel.vmlinux] [k] kmem_cache_alloc_trace kmem_cache_alloc_trace - perf_event_mmap - 0.04% mmap_region do_mmap_pgoff vm_mmap_pgoff Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1453909257-26015-10-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 41dbb79c992e..61d578bf4ffd 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -657,10 +657,24 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, return 1; } +static bool check_percent_display(struct rb_node *node, u64 parent_total) +{ + struct callchain_node *child; + + if (node == NULL) + return false; + + if (rb_next(node)) + return true; + + child = rb_entry(node, struct callchain_node, rb_node); + return callchain_cumul_hits(child) != parent_total; +} + static int hist_browser__show_callchain_flat(struct hist_browser *browser, struct rb_root *root, unsigned short row, u64 total, - u64 parent_total __maybe_unused, + u64 parent_total, print_callchain_entry_fn print, struct callchain_print_arg *arg, check_output_full_fn is_output_full) @@ -670,7 +684,7 @@ static int hist_browser__show_callchain_flat(struct hist_browser *browser, bool need_percent; node = rb_first(root); - need_percent = node && rb_next(node); + need_percent = check_percent_display(node, parent_total); while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); @@ -764,7 +778,7 @@ static char *hist_browser__folded_callchain_str(struct hist_browser *browser, static int hist_browser__show_callchain_folded(struct hist_browser *browser, struct rb_root *root, unsigned short row, u64 total, - u64 parent_total __maybe_unused, + u64 parent_total, print_callchain_entry_fn print, struct callchain_print_arg *arg, check_output_full_fn is_output_full) @@ -774,7 +788,7 @@ static int hist_browser__show_callchain_folded(struct hist_browser *browser, bool need_percent; node = rb_first(root); - need_percent = node && rb_next(node); + need_percent = check_percent_display(node, parent_total); while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); @@ -863,7 +877,7 @@ static int hist_browser__show_callchain_graph(struct hist_browser *browser, percent_total = parent_total; node = rb_first(root); - need_percent = node && rb_next(node); + need_percent = check_percent_display(node, parent_total); while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); -- cgit v1.2.3 From 3848c23b19e07188bfa15e3d9a2ac27692f2ff3c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 28 Jan 2016 21:24:54 +0900 Subject: perf report: Don't show blank lines if entry has no callchain When all callchains of a hist entry is percent-limited, do not add a blank line at the end. It makes the entry look like it doesn't have callchains. Reported-and-Tested-by: Jiri Olsa Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20160128122454.GA27446@danjae.kornet Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/stdio/hist.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 76ff46becac8..691e52ce7510 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -233,7 +233,10 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, ret += __callchain__fprintf_graph(fp, root, total_samples, 1, 1, left_margin); - ret += fprintf(fp, "\n"); + if (ret) { + /* do not add a blank line if it printed nothing */ + ret += fprintf(fp, "\n"); + } return ret; } -- cgit v1.2.3 From 01441af5df438a171bce36bc3c7cfb588bc98a7a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:23:59 +0100 Subject: perf hists: Factor output_resort from hists__output_resort Currently hists__output_resort() depends on hists based on hists_evsel struct, but we need to be able to sort common hists as well. Cutting out the sorting base sorting code into output_resort function, so it can be reused in following patch. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 098310bc4489..7797d06d4993 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1197,19 +1197,13 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_insert_color(&he->rb_node, entries); } -void hists__output_resort(struct hists *hists, struct ui_progress *prog) +static void output_resort(struct hists *hists, struct ui_progress *prog, + bool use_callchain) { struct rb_root *root; struct rb_node *next; struct hist_entry *n; u64 min_callchain_hits; - struct perf_evsel *evsel = hists_to_evsel(hists); - bool use_callchain; - - if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) - use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; - else - use_callchain = symbol_conf.use_callchain; min_callchain_hits = hists__total_period(hists) * (callchain_param.min_percent / 100); @@ -1239,6 +1233,19 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) } } +void hists__output_resort(struct hists *hists, struct ui_progress *prog) +{ + struct perf_evsel *evsel = hists_to_evsel(hists); + bool use_callchain; + + if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) + use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; + else + use_callchain = symbol_conf.use_callchain; + + output_resort(hists, prog, use_callchain); +} + static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, enum hist_filter filter) { -- cgit v1.2.3 From 452ce03b1e686f0b2da6c1644dce7cdc71e3c69c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:00 +0100 Subject: perf hists: Introduce perf_evsel__output_resort function Adding evsel specific function to sort hists_evsel based hists. The hists__output_resort can be now used to sort common hists object. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 10 ++++++---- tools/perf/tests/hists_cumulate.c | 2 +- tools/perf/tests/hists_filter.c | 2 +- tools/perf/tests/hists_output.c | 10 +++++----- tools/perf/util/hist.c | 10 +++++++--- tools/perf/util/hist.h | 1 + 8 files changed, 23 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index cc5c1267c738..cfe366375c4b 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -245,7 +245,7 @@ static int __cmd_annotate(struct perf_annotate *ann) hists__collapse_resort(hists, NULL); /* Don't sort callchain */ perf_evsel__reset_sample_bit(pos, CALLCHAIN); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(pos, NULL); if (symbol_conf.event_group && !perf_evsel__is_group_leader(pos)) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 72ed0b46d5a1..54ce0479ca28 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -507,7 +507,7 @@ static void report__output_resort(struct report *rep) ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); evlist__for_each(rep->session->evlist, pos) - hists__output_resort(evsel__hists(pos), &prog); + perf_evsel__output_resort(pos, &prog); ui_progress__finish(); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index bf01cbb0ef23..f1bbe2a589f5 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -252,7 +252,8 @@ static void perf_top__print_sym_table(struct perf_top *top) char bf[160]; int printed = 0; const int win_width = top->winsize.ws_col - 1; - struct hists *hists = evsel__hists(top->sym_evsel); + struct perf_evsel *evsel = top->sym_evsel; + struct hists *hists = evsel__hists(evsel); puts(CONSOLE_CLEAR); @@ -288,7 +289,7 @@ static void perf_top__print_sym_table(struct perf_top *top) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); hists__output_recalc_col_len(hists, top->print_entries - printed); putchar('\n'); @@ -540,6 +541,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) static void perf_top__sort_new_samples(void *arg) { struct perf_top *t = arg; + struct perf_evsel *evsel = t->sym_evsel; struct hists *hists; perf_top__reset_sample_counters(t); @@ -547,7 +549,7 @@ static void perf_top__sort_new_samples(void *arg) if (t->evlist->selected != NULL) t->sym_evsel = t->evlist->selected; - hists = evsel__hists(t->sym_evsel); + hists = evsel__hists(evsel); if (t->evlist->enabled) { if (t->zero) { @@ -559,7 +561,7 @@ static void perf_top__sort_new_samples(void *arg) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); } static void *display_thread_tui(void *arg) diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 5e6a86e50fb9..ecf136c385d5 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -191,7 +191,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec * function since TEST_ASSERT_VAL() returns in case of failure. */ hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(hists_to_evsel(hists), NULL); if (verbose > 2) { pr_info("use callchain: %d, cumulate callchain: %d\n", diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 351a42463444..34b945a55d4d 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -145,7 +145,7 @@ int test__hists_filter(int subtest __maybe_unused) struct hists *hists = evsel__hists(evsel); hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("Normal histogram\n"); diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index b231265148d8..23cce67c7e48 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -156,7 +156,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -256,7 +256,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -310,7 +310,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -388,7 +388,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -491,7 +491,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 7797d06d4993..d07955c145e5 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1233,9 +1233,8 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, } } -void hists__output_resort(struct hists *hists, struct ui_progress *prog) +void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog) { - struct perf_evsel *evsel = hists_to_evsel(hists); bool use_callchain; if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) @@ -1243,7 +1242,12 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) else use_callchain = symbol_conf.use_callchain; - output_resort(hists, prog, use_callchain); + output_resort(evsel__hists(evsel), prog, use_callchain); +} + +void hists__output_resort(struct hists *hists, struct ui_progress *prog) +{ + output_resort(hists, prog, symbol_conf.use_callchain); } static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d4ec4822a103..bc2499794bef 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -128,6 +128,7 @@ int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); void hist_entry__delete(struct hist_entry *he); +void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); void hists__output_resort(struct hists *hists, struct ui_progress *prog); void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); -- cgit v1.2.3 From b21a763edd5f832c6d966d9e60376f3d21009859 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:01 +0100 Subject: perf hists: Add _idx fields into struct perf_hpp_fmt Currently there's no way of comparing hpp format entries, which is needed in following patches. Adding _idx fields into struct perf_hpp_fmt to recognize and be able to compare hpp format entries. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 25 ++++++++++++++----------- tools/perf/util/hist.h | 1 + 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index bf2a66e254ea..d392801ea17e 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -371,7 +371,7 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return 0; } -#define HPP__COLOR_PRINT_FNS(_name, _fn) \ +#define HPP__COLOR_PRINT_FNS(_name, _fn, _idx) \ { \ .name = _name, \ .header = hpp__header_fn, \ @@ -381,9 +381,10 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .cmp = hpp__nop_cmp, \ .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ + .idx = PERF_HPP__ ## _idx, \ } -#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn) \ +#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn, _idx) \ { \ .name = _name, \ .header = hpp__header_fn, \ @@ -393,9 +394,10 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .cmp = hpp__nop_cmp, \ .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ + .idx = PERF_HPP__ ## _idx, \ } -#define HPP__PRINT_FNS(_name, _fn) \ +#define HPP__PRINT_FNS(_name, _fn, _idx) \ { \ .name = _name, \ .header = hpp__header_fn, \ @@ -404,17 +406,18 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .cmp = hpp__nop_cmp, \ .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ + .idx = PERF_HPP__ ## _idx, \ } struct perf_hpp_fmt perf_hpp__format[] = { - HPP__COLOR_PRINT_FNS("Overhead", overhead), - HPP__COLOR_PRINT_FNS("sys", overhead_sys), - HPP__COLOR_PRINT_FNS("usr", overhead_us), - HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys), - HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us), - HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc), - HPP__PRINT_FNS("Samples", samples), - HPP__PRINT_FNS("Period", period) + HPP__COLOR_PRINT_FNS("Overhead", overhead, OVERHEAD), + HPP__COLOR_PRINT_FNS("sys", overhead_sys, OVERHEAD_SYS), + HPP__COLOR_PRINT_FNS("usr", overhead_us, OVERHEAD_US), + HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys, OVERHEAD_GUEST_SYS), + HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us, OVERHEAD_GUEST_US), + HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc, OVERHEAD_ACC), + HPP__PRINT_FNS("Samples", samples, SAMPLES), + HPP__PRINT_FNS("Period", period, PERIOD) }; LIST_HEAD(perf_hpp__list); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index bc2499794bef..8a0cbdeb449e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -221,6 +221,7 @@ struct perf_hpp_fmt { bool elide; int len; int user_len; + int idx; }; extern struct list_head perf_hpp__list; -- cgit v1.2.3 From 2e8b79e706f504801fbce19fa9f16f3c858a105e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:02 +0100 Subject: perf hists: Use struct perf_hpp_fmt::idx in perf_hpp__reset_width We are going to add dynamic hpp format fields, so we need to make the 'len' change for the format itself, not in the perf_hpp__format template. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index d392801ea17e..5a11bf0aabc7 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -629,20 +629,12 @@ unsigned int hists__sort_list_width(struct hists *hists) void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists) { - int idx; - if (perf_hpp__is_sort_entry(fmt)) return perf_hpp__reset_sort_width(fmt, hists); - for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) { - if (fmt == &perf_hpp__format[idx]) - break; - } - - if (idx == PERF_HPP__MAX_INDEX) - return; + BUG_ON(fmt->idx >= PERF_HPP__MAX_INDEX); - switch (idx) { + switch (fmt->idx) { case PERF_HPP__OVERHEAD: case PERF_HPP__OVERHEAD_SYS: case PERF_HPP__OVERHEAD_US: -- cgit v1.2.3 From 97358084b91e94e5f8fcf0379f0430c0ea16bd3b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:03 +0100 Subject: perf hists: Add 'equal' method to perf_hpp_fmt struct To easily compare format entries and make it available for all kinds of format entries. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 9 +++++++-- tools/perf/util/hist.h | 2 +- tools/perf/util/sort.c | 39 ++++++++++++++++++++------------------- 3 files changed, 28 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 5a11bf0aabc7..71c8bb71a350 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -524,6 +524,11 @@ void perf_hpp__cancel_cumulate(void) perf_hpp__format[PERF_HPP__OVERHEAD].name = "Overhead"; } +static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + return a->equal && a->equal(a, b); +} + void perf_hpp__setup_output_field(void) { struct perf_hpp_fmt *fmt; @@ -542,7 +547,7 @@ void perf_hpp__setup_output_field(void) struct perf_hpp_fmt *pos; perf_hpp__for_each_format(pos) { - if (perf_hpp__same_sort_entry(pos, fmt)) + if (fmt_equal(fmt, pos)) goto next; } } @@ -571,7 +576,7 @@ void perf_hpp__append_sort_keys(void) struct perf_hpp_fmt *pos; perf_hpp__for_each_sort_list(pos) { - if (perf_hpp__same_sort_entry(pos, fmt)) + if (fmt_equal(fmt, pos)) goto next; } } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 8a0cbdeb449e..9a240d7b8d3b 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -215,6 +215,7 @@ struct perf_hpp_fmt { struct hist_entry *a, struct hist_entry *b); int64_t (*sort)(struct perf_hpp_fmt *fmt, struct hist_entry *a, struct hist_entry *b); + bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); struct list_head list; struct list_head sort_list; @@ -268,7 +269,6 @@ void perf_hpp__reset_output_field(void); void perf_hpp__append_sort_keys(void); bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); -bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format); bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 898e4b0724bf..170f7f73fe93 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1441,20 +1441,6 @@ struct hpp_sort_entry { struct sort_entry *se; }; -bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) -{ - struct hpp_sort_entry *hse_a; - struct hpp_sort_entry *hse_b; - - if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b)) - return false; - - hse_a = container_of(a, struct hpp_sort_entry, hpp); - hse_b = container_of(b, struct hpp_sort_entry, hpp); - - return hse_a->se == hse_b->se; -} - void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists) { struct hpp_sort_entry *hse; @@ -1540,6 +1526,25 @@ static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt, return sort_fn(a, b); } +bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format) +{ + return format->header == __sort__hpp_header; +} + +static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + struct hpp_sort_entry *hse_a; + struct hpp_sort_entry *hse_b; + + if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b)) + return false; + + hse_a = container_of(a, struct hpp_sort_entry, hpp); + hse_b = container_of(b, struct hpp_sort_entry, hpp); + + return hse_a->se == hse_b->se; +} + static struct hpp_sort_entry * __sort_dimension__alloc_hpp(struct sort_dimension *sd) { @@ -1561,6 +1566,7 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) hse->hpp.cmp = __sort__hpp_cmp; hse->hpp.collapse = __sort__hpp_collapse; hse->hpp.sort = __sort__hpp_sort; + hse->hpp.equal = __sort__hpp_equal; INIT_LIST_HEAD(&hse->hpp.list); INIT_LIST_HEAD(&hse->hpp.sort_list); @@ -1571,11 +1577,6 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) return hse; } -bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format) -{ - return format->header == __sort__hpp_header; -} - static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd) { struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd); -- cgit v1.2.3 From c0020efa079c5fc2388945ae7e856b362731442d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:04 +0100 Subject: perf hists: Add 'hpp__equal' callback function Adding 'hpp__equal' callback function to compare hpp output format entries. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-7-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 71c8bb71a350..b543f4b7d7d3 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -371,6 +371,19 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return 0; } +static bool perf_hpp__is_hpp_entry(struct perf_hpp_fmt *a) +{ + return a->header == hpp__header_fn; +} + +static bool hpp__equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + if (!perf_hpp__is_hpp_entry(a) || !perf_hpp__is_hpp_entry(b)) + return false; + + return a->idx == b->idx; +} + #define HPP__COLOR_PRINT_FNS(_name, _fn, _idx) \ { \ .name = _name, \ @@ -382,6 +395,7 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ .idx = PERF_HPP__ ## _idx, \ + .equal = hpp__equal, \ } #define HPP__COLOR_ACC_PRINT_FNS(_name, _fn, _idx) \ @@ -395,6 +409,7 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ .idx = PERF_HPP__ ## _idx, \ + .equal = hpp__equal, \ } #define HPP__PRINT_FNS(_name, _fn, _idx) \ @@ -407,6 +422,7 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ .idx = PERF_HPP__ ## _idx, \ + .equal = hpp__equal, \ } struct perf_hpp_fmt perf_hpp__format[] = { -- cgit v1.2.3 From 3f931f2c4274565fd6c6a642b16387358cbe6266 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:05 +0100 Subject: perf hists: Make hpp setup function generic Now that we have the 'equal' method implemented for hpp format entries we can ease up the logic in the following functions and make them generic wrt comparing format entries: perf_hpp__setup_output_field perf_hpp__append_sort_keys Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-8-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 36 ++++++++---------------------------- 1 file changed, 8 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index b543f4b7d7d3..b0fcaecb7d1d 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -551,21 +551,11 @@ void perf_hpp__setup_output_field(void) /* append sort keys to output field */ perf_hpp__for_each_sort_list(fmt) { - if (!list_empty(&fmt->list)) - continue; + struct perf_hpp_fmt *pos; - /* - * sort entry fields are dynamically created, - * so they can share a same sort key even though - * the list is empty. - */ - if (perf_hpp__is_sort_entry(fmt)) { - struct perf_hpp_fmt *pos; - - perf_hpp__for_each_format(pos) { - if (fmt_equal(fmt, pos)) - goto next; - } + perf_hpp__for_each_format(pos) { + if (fmt_equal(fmt, pos)) + goto next; } perf_hpp__column_register(fmt); @@ -580,21 +570,11 @@ void perf_hpp__append_sort_keys(void) /* append output fields to sort keys */ perf_hpp__for_each_format(fmt) { - if (!list_empty(&fmt->sort_list)) - continue; + struct perf_hpp_fmt *pos; - /* - * sort entry fields are dynamically created, - * so they can share a same sort key even though - * the list is empty. - */ - if (perf_hpp__is_sort_entry(fmt)) { - struct perf_hpp_fmt *pos; - - perf_hpp__for_each_sort_list(pos) { - if (fmt_equal(fmt, pos)) - goto next; - } + perf_hpp__for_each_sort_list(pos) { + if (fmt_equal(fmt, pos)) + goto next; } perf_hpp__register_sort_field(fmt); -- cgit v1.2.3 From 9887804d01abf7a4e03cfd6be0312d0a5c4e4aba Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:06 +0100 Subject: perf report: Move UI initialization ahead of sort setup The ui initialization changes hpp format callbacks, based on the used browser. Thus we need this init being processed before setup_sorting. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-9-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 54ce0479ca28..1eab50ac1ef6 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -912,15 +912,6 @@ repeat: symbol_conf.cumulate_callchain = false; } - if (setup_sorting(session->evlist) < 0) { - if (sort_order) - parse_options_usage(report_usage, options, "s", 1); - if (field_order) - parse_options_usage(sort_order ? NULL : report_usage, - options, "F", 1); - goto error; - } - /* Force tty output for header output and per-thread stat. */ if (report.header || report.header_only || report.show_threads) use_browser = 0; @@ -930,6 +921,15 @@ repeat: else use_browser = 0; + if (setup_sorting(session->evlist) < 0) { + if (sort_order) + parse_options_usage(report_usage, options, "s", 1); + if (field_order) + parse_options_usage(sort_order ? NULL : report_usage, + options, "F", 1); + goto error; + } + if (report.header || report.header_only) { perf_session__fprintf_info(session, stdout, report.show_full_info); -- cgit v1.2.3 From 3ee60c3b18bd4bf30ea9b70e7542116bb5c205ba Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 18 Jan 2016 10:24:06 +0100 Subject: perf top: Move UI initialization ahead of sort setup The ui initialization changes hpp format callbacks, based on the used browser. Thus we need this init being processed before setup_sorting. Replica of a patch by Jiri for 'perf report'. Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-9-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f1bbe2a589f5..a75de3940b97 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1245,6 +1245,13 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) /* display thread wants entries to be collapsed in a different tree */ sort__need_collapse = 1; + if (top.use_stdio) + use_browser = 0; + else if (top.use_tui) + use_browser = 1; + + setup_browser(false); + if (setup_sorting(top.evlist) < 0) { if (sort_order) parse_options_usage(top_usage, options, "s", 1); @@ -1254,13 +1261,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) goto out_delete_evlist; } - if (top.use_stdio) - use_browser = 0; - else if (top.use_tui) - use_browser = 1; - - setup_browser(false); - status = target__validate(target); if (status) { target__strerror(target, status, errbuf, BUFSIZ); -- cgit v1.2.3 From 1945c3e734cd1f01535dc76de47c38bbe9a87352 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:07 +0100 Subject: perf hists: Allocate output sort field Currently we use static output fields, because we have single global list of all sort/output fields. We will add hists specific sort and output lists in following patches, so we need all format entries to be dynamically allocated. Adding support to allocate output sort field. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-10-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 16 ++++++++++++++-- tools/perf/util/sort.c | 41 +++++++++++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index b0fcaecb7d1d..c877c52ff4bc 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -533,11 +533,23 @@ void perf_hpp__column_disable(unsigned col) void perf_hpp__cancel_cumulate(void) { + struct perf_hpp_fmt *fmt, *acc, *ovh, *tmp; + if (is_strict_order(field_order)) return; - perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC); - perf_hpp__format[PERF_HPP__OVERHEAD].name = "Overhead"; + ovh = &perf_hpp__format[PERF_HPP__OVERHEAD]; + acc = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC]; + + perf_hpp__for_each_format_safe(fmt, tmp) { + if (acc->equal(acc, fmt)) { + perf_hpp__column_unregister(fmt); + continue; + } + + if (ovh->equal(ovh, fmt)) + fmt->name = "Overhead"; + } } static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 170f7f73fe93..52e4a3674985 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1577,6 +1577,19 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) return hse; } +static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd) +{ + struct perf_hpp_fmt *fmt; + + fmt = memdup(hd->fmt, sizeof(*fmt)); + if (fmt) { + INIT_LIST_HEAD(&fmt->list); + INIT_LIST_HEAD(&fmt->sort_list); + } + + return fmt; +} + static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd) { struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd); @@ -2066,11 +2079,17 @@ static int __sort_dimension__add(struct sort_dimension *sd) static int __hpp_dimension__add(struct hpp_dimension *hd) { - if (!hd->taken) { - hd->taken = 1; + struct perf_hpp_fmt *fmt; - perf_hpp__register_sort_field(hd->fmt); - } + if (hd->taken) + return 0; + + fmt = __hpp_dimension__alloc_hpp(hd); + if (!fmt) + return -1; + + hd->taken = 1; + perf_hpp__register_sort_field(fmt); return 0; } @@ -2088,11 +2107,17 @@ static int __sort_dimension__add_output(struct sort_dimension *sd) static int __hpp_dimension__add_output(struct hpp_dimension *hd) { - if (!hd->taken) { - hd->taken = 1; + struct perf_hpp_fmt *fmt; - perf_hpp__column_register(hd->fmt); - } + if (hd->taken) + return 0; + + fmt = __hpp_dimension__alloc_hpp(hd); + if (!fmt) + return -1; + + hd->taken = 1; + perf_hpp__column_register(fmt); return 0; } -- cgit v1.2.3 From 12cb4397fb398545207acf772b219bd751786015 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:08 +0100 Subject: perf hists: Remove perf_hpp__column_(disable|enable) Those functions are no longer needed. They operate over perf_hpp__format array which is now used only as template for dynamic entries. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-11-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 12 ------------ tools/perf/util/hist.h | 2 -- 2 files changed, 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index c877c52ff4bc..80d63a997287 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -519,18 +519,6 @@ void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) list_add_tail(&format->sort_list, &perf_hpp__sort_list); } -void perf_hpp__column_enable(unsigned col) -{ - BUG_ON(col >= PERF_HPP__MAX_INDEX); - perf_hpp__column_register(&perf_hpp__format[col]); -} - -void perf_hpp__column_disable(unsigned col) -{ - BUG_ON(col >= PERF_HPP__MAX_INDEX); - perf_hpp__column_unregister(&perf_hpp__format[col]); -} - void perf_hpp__cancel_cumulate(void) { struct perf_hpp_fmt *fmt, *acc, *ovh, *tmp; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 9a240d7b8d3b..1f9e21dd53f3 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -259,8 +259,6 @@ enum { void perf_hpp__init(void); void perf_hpp__column_register(struct perf_hpp_fmt *format); void perf_hpp__column_unregister(struct perf_hpp_fmt *format); -void perf_hpp__column_enable(unsigned col); -void perf_hpp__column_disable(unsigned col); void perf_hpp__cancel_cumulate(void); void perf_hpp__register_sort_field(struct perf_hpp_fmt *format); -- cgit v1.2.3 From 564132f3116cf376fdc04b2380e621f35efbb6c7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:09 +0100 Subject: perf hists: Properly release format fields With multiple list holding format entries, we need the support properly releasing format output/sort fields. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-12-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 8 ++++++++ tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 24 ++++++++++++++++++++++++ 3 files changed, 33 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 80d63a997287..2cd1a03bf375 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -583,6 +583,12 @@ next: } } +static void fmt_free(struct perf_hpp_fmt *fmt) +{ + if (fmt->free) + fmt->free(fmt); +} + void perf_hpp__reset_output_field(void) { struct perf_hpp_fmt *fmt, *tmp; @@ -591,12 +597,14 @@ void perf_hpp__reset_output_field(void) perf_hpp__for_each_format_safe(fmt, tmp) { list_del_init(&fmt->list); list_del_init(&fmt->sort_list); + fmt_free(fmt); } /* reset sort keys */ perf_hpp__for_each_sort_list_safe(fmt, tmp) { list_del_init(&fmt->list); list_del_init(&fmt->sort_list); + fmt_free(fmt); } } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 1f9e21dd53f3..f3bcf2d38733 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -216,6 +216,7 @@ struct perf_hpp_fmt { int64_t (*sort)(struct perf_hpp_fmt *fmt, struct hist_entry *a, struct hist_entry *b); bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); + void (*free)(struct perf_hpp_fmt *fmt); struct list_head list; struct list_head sort_list; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 52e4a3674985..b5389a54356d 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1545,6 +1545,14 @@ static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) return hse_a->se == hse_b->se; } +static void hse_free(struct perf_hpp_fmt *fmt) +{ + struct hpp_sort_entry *hse; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + free(hse); +} + static struct hpp_sort_entry * __sort_dimension__alloc_hpp(struct sort_dimension *sd) { @@ -1567,6 +1575,7 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) hse->hpp.collapse = __sort__hpp_collapse; hse->hpp.sort = __sort__hpp_sort; hse->hpp.equal = __sort__hpp_equal; + hse->hpp.free = hse_free; INIT_LIST_HEAD(&hse->hpp.list); INIT_LIST_HEAD(&hse->hpp.sort_list); @@ -1577,6 +1586,11 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) return hse; } +static void hpp_free(struct perf_hpp_fmt *fmt) +{ + free(fmt); +} + static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd) { struct perf_hpp_fmt *fmt; @@ -1585,6 +1599,7 @@ static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd) if (fmt) { INIT_LIST_HEAD(&fmt->list); INIT_LIST_HEAD(&fmt->sort_list); + fmt->free = hpp_free; } return fmt; @@ -1818,6 +1833,14 @@ bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt) return fmt->cmp == __sort__hde_cmp; } +static void hde_free(struct perf_hpp_fmt *fmt) +{ + struct hpp_dynamic_entry *hde; + + hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + free(hde); +} + static struct hpp_dynamic_entry * __alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field) { @@ -1842,6 +1865,7 @@ __alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field) hde->hpp.cmp = __sort__hde_cmp; hde->hpp.collapse = __sort__hde_cmp; hde->hpp.sort = __sort__hde_cmp; + hde->hpp.free = hde_free; INIT_LIST_HEAD(&hde->hpp.list); INIT_LIST_HEAD(&hde->hpp.sort_list); -- cgit v1.2.3 From 2fbaa39079672bf52a9208ec1263385b48933cc3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:10 +0100 Subject: perf hists: Separate sort fields parsing into setup_sort_list function Separating sort fields parsing into setup_sort_list function, so it's separated from sort_order string setup and could be reused later in following patches. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-13-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index b5389a54356d..ab1c21a950f6 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2241,6 +2241,26 @@ static int sort_dimension__add(const char *tok, return -ESRCH; } +static int setup_sort_list(char *str, struct perf_evlist *evlist) +{ + char *tmp, *tok; + int ret = 0; + + for (tok = strtok_r(str, ", ", &tmp); + tok; tok = strtok_r(NULL, ", ", &tmp)) { + ret = sort_dimension__add(tok, evlist); + if (ret == -EINVAL) { + error("Invalid --sort key: `%s'", tok); + break; + } else if (ret == -ESRCH) { + error("Unknown --sort key: `%s'", tok); + break; + } + } + + return ret; +} + static const char *get_default_sort_order(struct perf_evlist *evlist) { const char *default_sort_orders[] = { @@ -2335,7 +2355,7 @@ static char *setup_overhead(char *keys) static int __setup_sorting(struct perf_evlist *evlist) { - char *tmp, *tok, *str; + char *str; const char *sort_keys; int ret = 0; @@ -2373,17 +2393,7 @@ static int __setup_sorting(struct perf_evlist *evlist) } } - for (tok = strtok_r(str, ", ", &tmp); - tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = sort_dimension__add(tok, evlist); - if (ret == -EINVAL) { - error("Invalid --sort key: `%s'", tok); - break; - } else if (ret == -ESRCH) { - error("Unknown --sort key: `%s'", tok); - break; - } - } + ret = setup_sort_list(str, evlist); free(str); return ret; -- cgit v1.2.3 From 6d3375efebe906ad0ce55ddaa883bf41fd8c444b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:11 +0100 Subject: perf hists: Separate output fields parsing into setup_output_list function Separating output fields parsing into setup_output_list function, so it's separated from field_order string setup and could be reused later in following patches. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-14-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index ab1c21a950f6..36dbd5554f0e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2535,6 +2535,26 @@ static int output_field_add(char *tok) return -ESRCH; } +static int setup_output_list(char *str) +{ + char *tmp, *tok; + int ret = 0; + + for (tok = strtok_r(str, ", ", &tmp); + tok; tok = strtok_r(NULL, ", ", &tmp)) { + ret = output_field_add(tok); + if (ret == -EINVAL) { + error("Invalid --fields key: `%s'", tok); + break; + } else if (ret == -ESRCH) { + error("Unknown --fields key: `%s'", tok); + break; + } + } + + return ret; +} + static void reset_dimensions(void) { unsigned int i; @@ -2559,7 +2579,7 @@ bool is_strict_order(const char *order) static int __setup_output_field(void) { - char *tmp, *tok, *str, *strp; + char *str, *strp; int ret = -EINVAL; if (field_order == NULL) @@ -2579,17 +2599,7 @@ static int __setup_output_field(void) goto out; } - for (tok = strtok_r(strp, ", ", &tmp); - tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = output_field_add(tok); - if (ret == -EINVAL) { - error("Invalid --fields key: `%s'", tok); - break; - } else if (ret == -ESRCH) { - error("Unknown --fields key: `%s'", tok); - break; - } - } + ret = setup_output_list(strp); out: free(str); -- cgit v1.2.3 From 7c31e10266bd18de163d5c60899591c0540bb002 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:12 +0100 Subject: perf hists: Introduce struct perf_hpp_list Gather output and sort lists under struct perf_hpp_list, so we could have multiple instancies of sort/output format entries. Replacing current perf_hpp__list and perf_hpp__sort_list lists with single perf_hpp_list instance. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-15-git-send-email-jolsa@kernel.org [ Renamed fields to .{fields,sorts} as suggested by Namhyung and acked by Jiri ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 11 ++++++----- tools/perf/util/hist.h | 16 ++++++++++------ 2 files changed, 16 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 2cd1a03bf375..74dbeac4753b 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -436,9 +436,10 @@ struct perf_hpp_fmt perf_hpp__format[] = { HPP__PRINT_FNS("Period", period, PERIOD) }; -LIST_HEAD(perf_hpp__list); -LIST_HEAD(perf_hpp__sort_list); - +struct perf_hpp_list perf_hpp_list = { + .fields = LIST_HEAD_INIT(perf_hpp_list.fields), + .sorts = LIST_HEAD_INIT(perf_hpp_list.sorts), +}; #undef HPP__COLOR_PRINT_FNS #undef HPP__COLOR_ACC_PRINT_FNS @@ -506,7 +507,7 @@ void perf_hpp__init(void) void perf_hpp__column_register(struct perf_hpp_fmt *format) { - list_add_tail(&format->list, &perf_hpp__list); + list_add_tail(&format->list, &perf_hpp_list.fields); } void perf_hpp__column_unregister(struct perf_hpp_fmt *format) @@ -516,7 +517,7 @@ void perf_hpp__column_unregister(struct perf_hpp_fmt *format) void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) { - list_add_tail(&format->sort_list, &perf_hpp__sort_list); + list_add_tail(&format->sort_list, &perf_hpp_list.sorts); } void perf_hpp__cancel_cumulate(void) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f3bcf2d38733..203397a6ea07 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -226,20 +226,24 @@ struct perf_hpp_fmt { int idx; }; -extern struct list_head perf_hpp__list; -extern struct list_head perf_hpp__sort_list; +struct perf_hpp_list { + struct list_head fields; + struct list_head sorts; +}; + +extern struct perf_hpp_list perf_hpp_list; #define perf_hpp__for_each_format(format) \ - list_for_each_entry(format, &perf_hpp__list, list) + list_for_each_entry(format, &perf_hpp_list.fields, list) #define perf_hpp__for_each_format_safe(format, tmp) \ - list_for_each_entry_safe(format, tmp, &perf_hpp__list, list) + list_for_each_entry_safe(format, tmp, &perf_hpp_list.fields, list) #define perf_hpp__for_each_sort_list(format) \ - list_for_each_entry(format, &perf_hpp__sort_list, sort_list) + list_for_each_entry(format, &perf_hpp_list.sorts, sort_list) #define perf_hpp__for_each_sort_list_safe(format, tmp) \ - list_for_each_entry_safe(format, tmp, &perf_hpp__sort_list, sort_list) + list_for_each_entry_safe(format, tmp, &perf_hpp_list.sorts, sort_list) extern struct perf_hpp_fmt perf_hpp__format[]; -- cgit v1.2.3 From 94b3dc3865097e11073f1abf5b20b5f80af223af Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:13 +0100 Subject: perf hists: Introduce perf_hpp_list__init function Introducing perf_hpp_list__init function to have an easy way to initialize perf_hpp_list struct. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-16-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 6 ++++++ tools/perf/util/hist.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index d07955c145e5..b762ecc31505 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1642,3 +1642,9 @@ int hists__init(void) return err; } + +void perf_hpp_list__init(struct perf_hpp_list *list) +{ + INIT_LIST_HEAD(&list->fields); + INIT_LIST_HEAD(&list->sorts); +} diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 203397a6ea07..e22f98e3fc6d 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -386,4 +386,6 @@ int parse_filter_percentage(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused); int perf_hist_config(const char *var, const char *value); +void perf_hpp_list__init(struct perf_hpp_list *list); + #endif /* __PERF_HIST_H */ -- cgit v1.2.3 From ebdd98e030f5ed6dd1bae9ab01b084f97685bd60 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:14 +0100 Subject: perf hists: Add perf_hpp_list register helpers Adding 2 perf_hpp_list register helpers: perf_hpp_list__column_register() perf_hpp_list__register_sort_field() to be called within existing helpers: perf_hpp__column_register() perf_hpp__register_sort_field() to register format entries within global perf_hpp_list object. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-17-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 14 ++++++++------ tools/perf/util/hist.h | 18 +++++++++++++++--- 2 files changed, 23 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 74dbeac4753b..1655c0d9c089 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -505,19 +505,21 @@ void perf_hpp__init(void) hpp_dimension__add_output(PERF_HPP__PERIOD); } -void perf_hpp__column_register(struct perf_hpp_fmt *format) +void perf_hpp_list__column_register(struct perf_hpp_list *list, + struct perf_hpp_fmt *format) { - list_add_tail(&format->list, &perf_hpp_list.fields); + list_add_tail(&format->list, &list->fields); } -void perf_hpp__column_unregister(struct perf_hpp_fmt *format) +void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format) { - list_del(&format->list); + list_add_tail(&format->sort_list, &list->sorts); } -void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) +void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { - list_add_tail(&format->sort_list, &perf_hpp_list.sorts); + list_del(&format->list); } void perf_hpp__cancel_cumulate(void) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index e22f98e3fc6d..a7769d778374 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -233,6 +233,21 @@ struct perf_hpp_list { extern struct perf_hpp_list perf_hpp_list; +void perf_hpp_list__column_register(struct perf_hpp_list *list, + struct perf_hpp_fmt *format); +void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format); + +static inline void perf_hpp__column_register(struct perf_hpp_fmt *format) +{ + perf_hpp_list__column_register(&perf_hpp_list, format); +} + +static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) +{ + perf_hpp_list__register_sort_field(&perf_hpp_list, format); +} + #define perf_hpp__for_each_format(format) \ list_for_each_entry(format, &perf_hpp_list.fields, list) @@ -262,11 +277,8 @@ enum { }; void perf_hpp__init(void); -void perf_hpp__column_register(struct perf_hpp_fmt *format); void perf_hpp__column_unregister(struct perf_hpp_fmt *format); void perf_hpp__cancel_cumulate(void); - -void perf_hpp__register_sort_field(struct perf_hpp_fmt *format); void perf_hpp__setup_output_field(void); void perf_hpp__reset_output_field(void); void perf_hpp__append_sort_keys(void); -- cgit v1.2.3 From 07600027fb7114bf7bcabdd121e5178f200d8a44 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:16 +0100 Subject: perf hists: Pass perf_hpp_list all the way through setup_output_list Passing perf_hpp_list all the way through setup_output_list so the output entry could be added on the arbitrary list. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-19-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 36dbd5554f0e..f643bed8f63b 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1616,14 +1616,15 @@ static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd) return 0; } -static int __sort_dimension__add_hpp_output(struct sort_dimension *sd) +static int __sort_dimension__add_hpp_output(struct perf_hpp_list *list, + struct sort_dimension *sd) { struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd); if (hse == NULL) return -1; - perf_hpp__column_register(&hse->hpp); + perf_hpp_list__column_register(list, &hse->hpp); return 0; } @@ -2117,19 +2118,21 @@ static int __hpp_dimension__add(struct hpp_dimension *hd) return 0; } -static int __sort_dimension__add_output(struct sort_dimension *sd) +static int __sort_dimension__add_output(struct perf_hpp_list *list, + struct sort_dimension *sd) { if (sd->taken) return 0; - if (__sort_dimension__add_hpp_output(sd) < 0) + if (__sort_dimension__add_hpp_output(list, sd) < 0) return -1; sd->taken = 1; return 0; } -static int __hpp_dimension__add_output(struct hpp_dimension *hd) +static int __hpp_dimension__add_output(struct perf_hpp_list *list, + struct hpp_dimension *hd) { struct perf_hpp_fmt *fmt; @@ -2141,14 +2144,14 @@ static int __hpp_dimension__add_output(struct hpp_dimension *hd) return -1; hd->taken = 1; - perf_hpp__column_register(fmt); + perf_hpp_list__column_register(list, fmt); return 0; } int hpp_dimension__add_output(unsigned col) { BUG_ON(col >= PERF_HPP__MAX_INDEX); - return __hpp_dimension__add_output(&hpp_sort_dimensions[col]); + return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]); } static int sort_dimension__add(const char *tok, @@ -2492,7 +2495,7 @@ void sort__setup_elide(FILE *output) } } -static int output_field_add(char *tok) +static int output_field_add(struct perf_hpp_list *list, char *tok) { unsigned int i; @@ -2502,7 +2505,7 @@ static int output_field_add(char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(sd); + return __sort_dimension__add_output(list, sd); } for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { @@ -2511,7 +2514,7 @@ static int output_field_add(char *tok) if (strncasecmp(tok, hd->name, strlen(tok))) continue; - return __hpp_dimension__add_output(hd); + return __hpp_dimension__add_output(list, hd); } for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { @@ -2520,7 +2523,7 @@ static int output_field_add(char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(sd); + return __sort_dimension__add_output(list, sd); } for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) { @@ -2529,20 +2532,20 @@ static int output_field_add(char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(sd); + return __sort_dimension__add_output(list, sd); } return -ESRCH; } -static int setup_output_list(char *str) +static int setup_output_list(struct perf_hpp_list *list, char *str) { char *tmp, *tok; int ret = 0; for (tok = strtok_r(str, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = output_field_add(tok); + ret = output_field_add(list, tok); if (ret == -EINVAL) { error("Invalid --fields key: `%s'", tok); break; @@ -2599,7 +2602,7 @@ static int __setup_output_field(void) goto out; } - ret = setup_output_list(strp); + ret = setup_output_list(&perf_hpp_list, strp); out: free(str); -- cgit v1.2.3 From cf094045d718437e3d5cd42ac09d77561cb2f368 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:17 +0100 Subject: perf hists: Introduce perf_hpp_list__for_each_format macro Introducing perf_hpp_list__for_each_format macro to iterate perf_hpp_list object's output entries. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-20-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 8 ++++---- tools/perf/ui/gtk/hists.c | 6 +++--- tools/perf/ui/hist.c | 8 ++++---- tools/perf/ui/stdio/hist.c | 8 ++++---- tools/perf/util/hist.h | 4 ++-- tools/perf/util/sort.c | 8 ++++---- 6 files changed, 21 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 61d578bf4ffd..df0aedfaea75 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1095,7 +1095,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, hist_browser__gotorc(browser, row, 0); - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (perf_hpp__should_skip(fmt, entry->hists) || column++ < browser->b.horiz_scroll) continue; @@ -1175,7 +1175,7 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char * return ret; } - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (perf_hpp__should_skip(fmt, hists) || column++ < browser->b.horiz_scroll) continue; @@ -1441,7 +1441,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, if (symbol_conf.use_callchain) printed += fprintf(fp, "%c ", folded_sign); - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (perf_hpp__should_skip(fmt, he->hists)) continue; @@ -2104,7 +2104,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, memset(options, 0, sizeof(options)); memset(actions, 0, sizeof(actions)); - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { perf_hpp__reset_width(fmt, hists); /* * This is done just once, and activates the horizontal scrolling diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 0f8dcfdfb10f..eca5151f91d7 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -306,7 +306,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, nr_cols = 0; - perf_hpp__for_each_format(fmt) + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) col_types[nr_cols++] = G_TYPE_STRING; store = gtk_tree_store_newv(nr_cols, col_types); @@ -317,7 +317,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, col_idx = 0; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (perf_hpp__should_skip(fmt, hists)) continue; @@ -367,7 +367,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, col_idx = 0; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (perf_hpp__should_skip(fmt, h->hists)) continue; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 1655c0d9c089..7b5e8cedf853 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -556,7 +556,7 @@ void perf_hpp__setup_output_field(void) perf_hpp__for_each_sort_list(fmt) { struct perf_hpp_fmt *pos; - perf_hpp__for_each_format(pos) { + perf_hpp_list__for_each_format(&perf_hpp_list, pos) { if (fmt_equal(fmt, pos)) goto next; } @@ -572,7 +572,7 @@ void perf_hpp__append_sort_keys(void) struct perf_hpp_fmt *fmt; /* append output fields to sort keys */ - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { struct perf_hpp_fmt *pos; perf_hpp__for_each_sort_list(pos) { @@ -621,7 +621,7 @@ unsigned int hists__sort_list_width(struct hists *hists) bool first = true; struct perf_hpp dummy_hpp; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (perf_hpp__should_skip(fmt, hists)) continue; @@ -674,7 +674,7 @@ void perf_hpp__set_user_width(const char *width_list_str) struct perf_hpp_fmt *fmt; const char *ptr = width_list_str; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { char *p; int len = strtol(ptr, &p, 10); diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 691e52ce7510..83e0bf2ab986 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -384,7 +384,7 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) if (symbol_conf.exclude_other && !he->parent) return 0; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (perf_hpp__should_skip(fmt, he->hists)) continue; @@ -453,7 +453,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, init_rem_hits(); - perf_hpp__for_each_format(fmt) + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) perf_hpp__reset_width(fmt, hists); if (symbol_conf.col_width_list_str) @@ -464,7 +464,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, fprintf(fp, "# "); - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (perf_hpp__should_skip(fmt, hists)) continue; @@ -488,7 +488,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, fprintf(fp, "# "); - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { unsigned int i; if (perf_hpp__should_skip(fmt, hists)) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index a7769d778374..eadffca1a501 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -248,8 +248,8 @@ static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) perf_hpp_list__register_sort_field(&perf_hpp_list, format); } -#define perf_hpp__for_each_format(format) \ - list_for_each_entry(format, &perf_hpp_list.fields, list) +#define perf_hpp_list__for_each_format(_list, format) \ + list_for_each_entry(format, &(_list)->fields, list) #define perf_hpp__for_each_format_safe(format, tmp) \ list_for_each_entry_safe(format, tmp, &perf_hpp_list.fields, list) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index f643bed8f63b..1e134ff56ad4 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2407,7 +2407,7 @@ void perf_hpp__set_elide(int idx, bool elide) struct perf_hpp_fmt *fmt; struct hpp_sort_entry *hse; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -2467,7 +2467,7 @@ void sort__setup_elide(FILE *output) struct perf_hpp_fmt *fmt; struct hpp_sort_entry *hse; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -2479,7 +2479,7 @@ void sort__setup_elide(FILE *output) * It makes no sense to elide all of sort entries. * Just revert them to show up again. */ - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -2487,7 +2487,7 @@ void sort__setup_elide(FILE *output) return; } - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; -- cgit v1.2.3 From 7a1799e0a276069d8b903ba17179b4983b98c04b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:18 +0100 Subject: perf hists: Introduce perf_hpp_list__for_each_format_safe macro Introducing perf_hpp_list__for_each_format_safe macro to iterate perf_hpp_list object's output entries safely. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-21-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 4 ++-- tools/perf/util/hist.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 7b5e8cedf853..348706a908f9 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -532,7 +532,7 @@ void perf_hpp__cancel_cumulate(void) ovh = &perf_hpp__format[PERF_HPP__OVERHEAD]; acc = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC]; - perf_hpp__for_each_format_safe(fmt, tmp) { + perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) { if (acc->equal(acc, fmt)) { perf_hpp__column_unregister(fmt); continue; @@ -597,7 +597,7 @@ void perf_hpp__reset_output_field(void) struct perf_hpp_fmt *fmt, *tmp; /* reset output fields */ - perf_hpp__for_each_format_safe(fmt, tmp) { + perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) { list_del_init(&fmt->list); list_del_init(&fmt->sort_list); fmt_free(fmt); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index eadffca1a501..f5b2309de16e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -251,8 +251,8 @@ static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) #define perf_hpp_list__for_each_format(_list, format) \ list_for_each_entry(format, &(_list)->fields, list) -#define perf_hpp__for_each_format_safe(format, tmp) \ - list_for_each_entry_safe(format, tmp, &perf_hpp_list.fields, list) +#define perf_hpp_list__for_each_format_safe(_list, format, tmp) \ + list_for_each_entry_safe(format, tmp, &(_list)->fields, list) #define perf_hpp__for_each_sort_list(format) \ list_for_each_entry(format, &perf_hpp_list.sorts, sort_list) -- cgit v1.2.3 From d29a497090845002ee449c8dc682dd59ad8bab42 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:19 +0100 Subject: perf hists: Introduce perf_hpp_list__for_each_sort_list macro Introducing perf_hpp_list__for_each_sort_list macro to iterate perf_hpp_list object's sort entries. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-22-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 4 ++-- tools/perf/util/hist.c | 6 +++--- tools/perf/util/hist.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 348706a908f9..f09eabe18167 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -553,7 +553,7 @@ void perf_hpp__setup_output_field(void) struct perf_hpp_fmt *fmt; /* append sort keys to output field */ - perf_hpp__for_each_sort_list(fmt) { + perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { struct perf_hpp_fmt *pos; perf_hpp_list__for_each_format(&perf_hpp_list, pos) { @@ -575,7 +575,7 @@ void perf_hpp__append_sort_keys(void) perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { struct perf_hpp_fmt *pos; - perf_hpp__for_each_sort_list(pos) { + perf_hpp_list__for_each_sort_list(&perf_hpp_list, pos) { if (fmt_equal(fmt, pos)) goto next; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b762ecc31505..dea475d1fab0 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -961,7 +961,7 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp__for_each_sort_list(fmt) { + perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { cmp = fmt->cmp(fmt, left, right); if (cmp) break; @@ -976,7 +976,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp__for_each_sort_list(fmt) { + perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { cmp = fmt->collapse(fmt, left, right); if (cmp) break; @@ -1120,7 +1120,7 @@ static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b) struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp__for_each_sort_list(fmt) { + perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { if (perf_hpp__should_skip(fmt, a->hists)) continue; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f5b2309de16e..c9b2ea4a4929 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -254,8 +254,8 @@ static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) #define perf_hpp_list__for_each_format_safe(_list, format, tmp) \ list_for_each_entry_safe(format, tmp, &(_list)->fields, list) -#define perf_hpp__for_each_sort_list(format) \ - list_for_each_entry(format, &perf_hpp_list.sorts, sort_list) +#define perf_hpp_list__for_each_sort_list(_list, format) \ + list_for_each_entry(format, &(_list)->sorts, sort_list) #define perf_hpp__for_each_sort_list_safe(format, tmp) \ list_for_each_entry_safe(format, tmp, &perf_hpp_list.sorts, sort_list) -- cgit v1.2.3 From 1a8ebd243a0b65c2a6d1458705d04dece937ab52 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:20 +0100 Subject: perf hists: Introduce perf_hpp_list__for_each_sort_list_safe macro Introducing perf_hpp_list__for_each_sort_list_safe macro to iterate perf_hpp_list object's sort entries safely. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-23-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 2 +- tools/perf/util/hist.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index f09eabe18167..9cda51edfdbd 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -604,7 +604,7 @@ void perf_hpp__reset_output_field(void) } /* reset sort keys */ - perf_hpp__for_each_sort_list_safe(fmt, tmp) { + perf_hpp_list__for_each_sort_list_safe(&perf_hpp_list, fmt, tmp) { list_del_init(&fmt->list); list_del_init(&fmt->sort_list); fmt_free(fmt); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index c9b2ea4a4929..61d35a9f928b 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -257,8 +257,8 @@ static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) #define perf_hpp_list__for_each_sort_list(_list, format) \ list_for_each_entry(format, &(_list)->sorts, sort_list) -#define perf_hpp__for_each_sort_list_safe(format, tmp) \ - list_for_each_entry_safe(format, tmp, &perf_hpp_list.sorts, sort_list) +#define perf_hpp_list__for_each_sort_list_safe(_list, format, tmp) \ + list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list) extern struct perf_hpp_fmt perf_hpp__format[]; -- cgit v1.2.3 From 43e0a68f13047750a3728c983a539c61fb4121c5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:21 +0100 Subject: perf hists: Add struct perf_hpp_list argument to helper functions Adding struct perf_hpp_list argument to following helper functions: void perf_hpp__setup_output_field(struct perf_hpp_list *list); void perf_hpp__reset_output_field(struct perf_hpp_list *list); void perf_hpp__append_sort_keys(struct perf_hpp_list *list); so they could be used on hists's hpp_list. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-24-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 19 ++++++++++--------- tools/perf/util/hist.h | 7 ++++--- tools/perf/util/sort.c | 6 +++--- 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 9cda51edfdbd..8075d4cc54a8 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -548,15 +548,15 @@ static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) return a->equal && a->equal(a, b); } -void perf_hpp__setup_output_field(void) +void perf_hpp__setup_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt; /* append sort keys to output field */ - perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { + perf_hpp_list__for_each_sort_list(list, fmt) { struct perf_hpp_fmt *pos; - perf_hpp_list__for_each_format(&perf_hpp_list, pos) { + perf_hpp_list__for_each_format(list, pos) { if (fmt_equal(fmt, pos)) goto next; } @@ -567,15 +567,15 @@ next: } } -void perf_hpp__append_sort_keys(void) +void perf_hpp__append_sort_keys(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt; /* append output fields to sort keys */ - perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { + perf_hpp_list__for_each_format(list, fmt) { struct perf_hpp_fmt *pos; - perf_hpp_list__for_each_sort_list(&perf_hpp_list, pos) { + perf_hpp_list__for_each_sort_list(list, pos) { if (fmt_equal(fmt, pos)) goto next; } @@ -586,25 +586,26 @@ next: } } + static void fmt_free(struct perf_hpp_fmt *fmt) { if (fmt->free) fmt->free(fmt); } -void perf_hpp__reset_output_field(void) +void perf_hpp__reset_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt, *tmp; /* reset output fields */ - perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) { + perf_hpp_list__for_each_format_safe(list, fmt, tmp) { list_del_init(&fmt->list); list_del_init(&fmt->sort_list); fmt_free(fmt); } /* reset sort keys */ - perf_hpp_list__for_each_sort_list_safe(&perf_hpp_list, fmt, tmp) { + perf_hpp_list__for_each_sort_list_safe(list, fmt, tmp) { list_del_init(&fmt->list); list_del_init(&fmt->sort_list); fmt_free(fmt); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 61d35a9f928b..a39c9c1159ff 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -279,9 +279,10 @@ enum { void perf_hpp__init(void); void perf_hpp__column_unregister(struct perf_hpp_fmt *format); void perf_hpp__cancel_cumulate(void); -void perf_hpp__setup_output_field(void); -void perf_hpp__reset_output_field(void); -void perf_hpp__append_sort_keys(void); +void perf_hpp__setup_output_field(struct perf_hpp_list *list); +void perf_hpp__reset_output_field(struct perf_hpp_list *list); +void perf_hpp__append_sort_keys(struct perf_hpp_list *list); + bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 1e134ff56ad4..de620f7f40f4 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2636,9 +2636,9 @@ int setup_sorting(struct perf_evlist *evlist) return err; /* copy sort keys to output fields */ - perf_hpp__setup_output_field(); + perf_hpp__setup_output_field(&perf_hpp_list); /* and then copy output fields to sort keys */ - perf_hpp__append_sort_keys(); + perf_hpp__append_sort_keys(&perf_hpp_list); return 0; } @@ -2654,5 +2654,5 @@ void reset_output_field(void) sort_order = NULL; reset_dimensions(); - perf_hpp__reset_output_field(); + perf_hpp__reset_output_field(&perf_hpp_list); } -- cgit v1.2.3 From 5b65855e20348a9e2772a1cb7c1e6ab477859ba6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:22 +0100 Subject: perf tools: Add hpp_list into struct hists object Adding hpp_list into struct hists object. Initializing struct hists_evsel hists object to carry global perf_hpp_list list. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-25-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 5 +++-- tools/perf/util/hist.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index dea475d1fab0..2b9cc9129692 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1578,7 +1578,7 @@ int perf_hist_config(const char *var, const char *value) return 0; } -int __hists__init(struct hists *hists) +int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) { memset(hists, 0, sizeof(*hists)); hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; @@ -1587,6 +1587,7 @@ int __hists__init(struct hists *hists) hists->entries = RB_ROOT; pthread_mutex_init(&hists->lock, NULL); hists->socket_filter = -1; + hists->hpp_list = hpp_list; return 0; } @@ -1623,7 +1624,7 @@ static int hists_evsel__init(struct perf_evsel *evsel) { struct hists *hists = evsel__hists(evsel); - __hists__init(hists); + __hists__init(hists, &perf_hpp_list); return 0; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index a39c9c1159ff..b296ff5b9683 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -75,6 +75,7 @@ struct hists { u64 event_stream; u16 col_len[HISTC_NR_COLS]; int socket_filter; + struct perf_hpp_list *hpp_list; }; struct hist_entry_iter; @@ -186,7 +187,7 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel) } int hists__init(void); -int __hists__init(struct hists *hists); +int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); struct rb_root *hists__get_rotate_entries_in(struct hists *hists); bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, -- cgit v1.2.3 From f0786af536bb0ba54cb516eee493af03cefdbaa3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:23 +0100 Subject: perf hists: Introduce hists__for_each_format macro With the hist object having the perf_hpp_list we can now iterate output format entries based in the hists object. Adding hists__for_each_format macro to do that. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-26-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 8 ++++---- tools/perf/ui/gtk/hists.c | 6 +++--- tools/perf/ui/hist.c | 2 +- tools/perf/ui/stdio/hist.c | 8 ++++---- tools/perf/util/hist.h | 3 +++ 5 files changed, 15 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index df0aedfaea75..3a1e0965a8fd 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1095,7 +1095,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, hist_browser__gotorc(browser, row, 0); - perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { + hists__for_each_format(browser->hists, fmt) { if (perf_hpp__should_skip(fmt, entry->hists) || column++ < browser->b.horiz_scroll) continue; @@ -1175,7 +1175,7 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char * return ret; } - perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { + hists__for_each_format(browser->hists, fmt) { if (perf_hpp__should_skip(fmt, hists) || column++ < browser->b.horiz_scroll) continue; @@ -1441,7 +1441,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, if (symbol_conf.use_callchain) printed += fprintf(fp, "%c ", folded_sign); - perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { + hists__for_each_format(browser->hists, fmt) { if (perf_hpp__should_skip(fmt, he->hists)) continue; @@ -2104,7 +2104,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, memset(options, 0, sizeof(options)); memset(actions, 0, sizeof(actions)); - perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { + hists__for_each_format(browser->hists, fmt) { perf_hpp__reset_width(fmt, hists); /* * This is done just once, and activates the horizontal scrolling diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index eca5151f91d7..32cc38a5b57f 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -306,7 +306,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, nr_cols = 0; - perf_hpp_list__for_each_format(&perf_hpp_list, fmt) + hists__for_each_format(hists, fmt) col_types[nr_cols++] = G_TYPE_STRING; store = gtk_tree_store_newv(nr_cols, col_types); @@ -317,7 +317,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, col_idx = 0; - perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { + hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, hists)) continue; @@ -367,7 +367,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, col_idx = 0; - perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { + hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, h->hists)) continue; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 8075d4cc54a8..1ba4117d9c2d 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -622,7 +622,7 @@ unsigned int hists__sort_list_width(struct hists *hists) bool first = true; struct perf_hpp dummy_hpp; - perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { + hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, hists)) continue; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 83e0bf2ab986..1a6e8f7f38c4 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -384,7 +384,7 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) if (symbol_conf.exclude_other && !he->parent) return 0; - perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { + hists__for_each_format(he->hists, fmt) { if (perf_hpp__should_skip(fmt, he->hists)) continue; @@ -453,7 +453,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, init_rem_hits(); - perf_hpp_list__for_each_format(&perf_hpp_list, fmt) + hists__for_each_format(hists, fmt) perf_hpp__reset_width(fmt, hists); if (symbol_conf.col_width_list_str) @@ -464,7 +464,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, fprintf(fp, "# "); - perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { + hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, hists)) continue; @@ -488,7 +488,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, fprintf(fp, "# "); - perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { + hists__for_each_format(hists, fmt) { unsigned int i; if (perf_hpp__should_skip(fmt, hists)) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index b296ff5b9683..bc900448e36f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -261,6 +261,9 @@ static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) #define perf_hpp_list__for_each_sort_list_safe(_list, format, tmp) \ list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list) +#define hists__for_each_format(hists, format) \ + perf_hpp_list__for_each_format((hists)->hpp_list, fmt) + extern struct perf_hpp_fmt perf_hpp__format[]; enum { -- cgit v1.2.3 From aa6f50af822a552b579252ecd42224e09e11e879 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 18 Jan 2016 10:24:24 +0100 Subject: perf hists: Introduce hists__for_each_sort_list macro With the hist object having the perf_hpp_list we can now iterate sort format entries based in the hists object. Adding hists__for_each_sort_list macro to do that. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1453109064-1026-27-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 9 ++++++--- tools/perf/util/hist.h | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 2b9cc9129692..12f2d794dc28 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -958,10 +958,11 @@ out: int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { + struct hists *hists = left->hists; struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { + hists__for_each_sort_list(hists, fmt) { cmp = fmt->cmp(fmt, left, right); if (cmp) break; @@ -973,10 +974,11 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) { + struct hists *hists = left->hists; struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { + hists__for_each_sort_list(hists, fmt) { cmp = fmt->collapse(fmt, left, right); if (cmp) break; @@ -1117,10 +1119,11 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b) { + struct hists *hists = a->hists; struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { + hists__for_each_sort_list(hists, fmt) { if (perf_hpp__should_skip(fmt, a->hists)) continue; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index bc900448e36f..1c7544a8fe1a 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -264,6 +264,9 @@ static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) #define hists__for_each_format(hists, format) \ perf_hpp_list__for_each_format((hists)->hpp_list, fmt) +#define hists__for_each_sort_list(hists, format) \ + perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt) + extern struct perf_hpp_fmt perf_hpp__format[]; enum { -- cgit v1.2.3 From c6f5f6b662719ded53700deefec7dbc4227c9778 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 3 Feb 2016 23:11:20 +0900 Subject: perf report: Update documentation of --sort option The description of the memory sort key (used by --mem-mode) was misplaced. Move it under the --sort option so that it can be referenced properly. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1454508683-5735-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 8a301f6afb37..1cb8fac596b1 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -117,6 +117,22 @@ OPTIONS And default sort keys are changed to comm, dso_from, symbol_from, dso_to and symbol_to, see '--branch-stack'. + If the --mem-mode option is used, the following sort keys are also available + (incompatible with --branch-stack): + symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline. + + - symbol_daddr: name of data symbol being executed on at the time of sample + - dso_daddr: name of library or module containing the data being executed + on at the time of the sample + - locked: whether the bus was locked at the time of the sample + - tlb: type of tlb access for the data at the time of the sample + - mem: type of memory access for the data at the time of the sample + - snoop: type of snoop (if any) for the data at the time of the sample + - dcacheline: the cacheline the data address is on at the time of the sample + + And the default sort keys are changed to local_weight, mem, sym, dso, + symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. + If the data file has tracepoint event(s), following (dynamic) sort keys are also available: trace, trace_fields, [.][/raw] @@ -151,22 +167,6 @@ OPTIONS By default, every sort keys not specified in -F will be appended automatically. - If --mem-mode option is used, following sort keys are also available - (incompatible with --branch-stack): - symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline. - - - symbol_daddr: name of data symbol being executed on at the time of sample - - dso_daddr: name of library or module containing the data being executed - on at the time of sample - - locked: whether the bus was locked at the time of sample - - tlb: type of tlb access for the data at the time of sample - - mem: type of memory access for the data at the time of sample - - snoop: type of snoop (if any) for the data at the time of sample - - dcacheline: the cacheline the data address is on at the time of sample - - And default sort keys are changed to local_weight, mem, sym, dso, - symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. - -p:: --parent=:: A regex filter to identify parent. The parent is a caller of this -- cgit v1.2.3 From 1ba2fc6de4ac1a87e3ece65651795760ea5cf658 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 3 Feb 2016 23:11:21 +0900 Subject: perf report: Update documention of --percent-limit option The --percent-limit option was changed to be applied to callchains as well as to hist entries recently, but it missed to update the doc. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1454508683-5735-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 1cb8fac596b1..89cab84e92fd 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -351,7 +351,10 @@ OPTIONS --percent-limit:: Do not show entries which have an overhead under that percent. - (Default: 0). + (Default: 0). Note that this option also sets the percent limit (threshold) + of callchains. However the default value of callchain threshold is + different than the default value of hist entries. Please see the + --call-graph option for details. --percentage:: Determine how to display the overhead percentage of filtered entries. -- cgit v1.2.3 From b62e8dfcda8cb133c062c0e1207afea2476eb7fd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 3 Feb 2016 23:11:23 +0900 Subject: perf hists browser: Add 'L' hotkey to change percent limit Add 'L' key action to change the percent limit applied to both of hist entries and callchains. Suggested-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1454508683-5735-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 55 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 3a1e0965a8fd..a5a5390476ac 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2029,6 +2029,42 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb) hb->nr_non_filtered_entries = nr_entries; } +static void hist_browser__update_percent_limit(struct hist_browser *hb, + double percent) +{ + struct hist_entry *he; + struct rb_node *nd = rb_first(&hb->hists->entries); + u64 total = hists__total_period(hb->hists); + u64 min_callchain_hits = total * (percent / 100); + + hb->min_pcnt = callchain_param.min_percent = percent; + + if (!symbol_conf.use_callchain) + return; + + while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { + he = rb_entry(nd, struct hist_entry, rb_node); + + if (callchain_param.mode == CHAIN_GRAPH_REL) { + total = he->stat.period; + + if (symbol_conf.cumulate_callchain) + total = he->stat_acc->period; + + min_callchain_hits = total * (percent / 100); + } + + callchain_param.sort(&he->sorted_chain, he->callchain, + min_callchain_hits, &callchain_param); + + /* force to re-evaluate folding state of callchains */ + he->init_have_children = false; + hist_entry__set_folding(he, false); + + nd = rb_next(nd); + } +} + static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, const char *helpline, bool left_exits, @@ -2064,6 +2100,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "E Expand all callchains\n" \ "F Toggle percentage of filtered entries\n" \ "H Display column headers\n" \ + "L Change percent limit\n" \ "m Display context menu\n" \ "S Zoom into current Processor Socket\n" \ @@ -2219,6 +2256,24 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, top->zero = !top->zero; } continue; + case 'L': + if (ui_browser__input_window("Percent Limit", + "Please enter the value you want to hide entries under that percent.", + buf, "ENTER: OK, ESC: Cancel", + delay_secs * 2) == K_ENTER) { + char *end; + double new_percent = strtod(buf, &end); + + if (new_percent < 0 || new_percent > 100) { + ui_browser__warning(&browser->b, delay_secs * 2, + "Invalid percent: %.2f", new_percent); + continue; + } + + hist_browser__update_percent_limit(browser, new_percent); + hist_browser__reset(browser); + } + continue; case K_F1: case 'h': case '?': -- cgit v1.2.3 From 5978531b296ab7e61abef43f2a1a2d9b92246de1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 3 Feb 2016 17:16:32 -0300 Subject: perf build tests: Elide "-f Makefile" from make invokation Since this is the name that 'make' will look for if no explicit -f file is passed. This in turn makes the output of 'build-test' more compact: Before: $ perf stat make -C tools/perf build-test cd . && make FEATURE_DUMP_COPY=/home/acme/git/linux/tools/perf/BUILD_TEST_FEATURE_DUMP feature-dump make_no_libaudit_O: cd . && make -f Makefile O=/tmp/tmp.tHIa0Kkk2Y DESTDIR=/tmp/tmp.foK7rckkVi NO_LIBAUDIT=1 FEATURES_DUMP=/home/acme/git/linux/tools/perf/BUILD_TEST_FEATURE_DUMP After: $ perf stat make -C tools/perf build-test make_no_libaudit_O: cd . && make O=/tmp/tmp.tHIa0Kkk2Y DESTDIR=/tmp/tmp.foK7rckkVi NO_LIBAUDIT=1 FEATURES_DUMP=/home/acme/git/linux/tools/perf/BUILD_TEST_FEATURE_DUMP Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-m440lb8dkfsywsyah0htif6t@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/make | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/make b/tools/perf/tests/make index cc72b67bde5e..0b70cf16a562 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -111,6 +111,9 @@ run := make_pure # disable features detection ifeq ($(MK),Makefile) run += make_clean_all +MAKE_F := $(MAKE) +else +MAKE_F := $(MAKE) -f $(MK) endif run += make_python_perf_so run += make_debug @@ -270,12 +273,12 @@ endif MAKEFLAGS := --no-print-directory -clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null) +clean := @(cd $(PERF); $(MAKE_F) -s $(O_OPT) clean >/dev/null) $(run): $(call clean) @TMP_DEST=$$(mktemp -d); \ - cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \ + cmd="cd $(PERF) && $(MAKE_F) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \ printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1; \ echo " test: $(call test,$@)" >> $@ 2>&1; \ @@ -286,7 +289,7 @@ $(run_O): $(call clean) @TMP_O=$$(mktemp -d); \ TMP_DEST=$$(mktemp -d); \ - cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \ + cmd="cd $(PERF) && $(MAKE_F) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \ printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1 && \ echo " test: $(call test_O,$@)" >> $@ 2>&1; \ -- cgit v1.2.3 From 3c7a152b0d1c81b9bac5ab922dc57168046668bf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 3 Feb 2016 17:24:11 -0300 Subject: perf build tests: Move the feature related vars to the front of the make cmdline So that we do less visual searching on the 'make build-test' output to see the feature related variables: After: $ make -C tools/perf build-test make_no_newt_O: cd . && make NO_NEWT=1 FEATURES_DUMP=/home/acme/git/linux/tools/perf/BUILD_TEST_FEATURE_DUMP O=/tmp/tmp.dz55IX DESTDIR=/tmp/tmp.X29xxo make_tags_O: cd . && make tags FEATURES_DUMP=/home/acme/git/linux/tools/perf/BUILD_TEST_FEATURE_DUMP O=/tmp/tmp.6ecLh8 DESTDIR=/tmp/tmp.6vIla578Ho make_util_pmu_bison_o_O: cd . && make util/pmu-bison.o FEATURES_DUMP=/home/acme/git/linux/tools/perf/BUILD_TEST_FEATURE_DUMP O=/tmp/tmp.SVPM2G DESTDIR=/tmp/tmp.C0oAam Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-dx4krgzqa566v1pedrbrcchi@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/make | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 0b70cf16a562..12dcae7aa515 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -278,7 +278,7 @@ clean := @(cd $(PERF); $(MAKE_F) -s $(O_OPT) clean >/dev/null) $(run): $(call clean) @TMP_DEST=$$(mktemp -d); \ - cmd="cd $(PERF) && $(MAKE_F) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \ + cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \ printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1; \ echo " test: $(call test,$@)" >> $@ 2>&1; \ @@ -289,7 +289,7 @@ $(run_O): $(call clean) @TMP_O=$$(mktemp -d); \ TMP_DEST=$$(mktemp -d); \ - cmd="cd $(PERF) && $(MAKE_F) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \ + cmd="cd $(PERF) && $(MAKE_F) $($(patsubst %_O,%,$@)) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST"; \ printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1 && \ echo " test: $(call test_O,$@)" >> $@ 2>&1; \ -- cgit v1.2.3 From 67f43c009778ddaae812aae29731bb04c256165e Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 4 Feb 2016 18:25:06 +0900 Subject: perf config: Document 'ui.show-headers' variable in man page This option controls display of column headers (like 'Overhead' and 'Symbol') in 'report' and 'top'. If this option is false, they are hidden. This option is only applied to TUI. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1454577913-16401-2-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 74589c68558a..42787222ad15 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -296,6 +296,12 @@ hist.*:: and 'baz' to 50.00% for each, while 'absolute' would show their current overhead (33.33%). +ui.*:: + ui.show-headers:: + This option controls display of column headers (like 'Overhead' and 'Symbol') + in 'report' and 'top'. If this option is false, they are hidden. + This option is only applied to TUI. + SEE ALSO -------- linkperf:perf[1] -- cgit v1.2.3 From 56c94dc56f9e4c1c09fbe26ad65715caa2259438 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 4 Feb 2016 18:25:07 +0900 Subject: perf config: Document variables for 'call-graph' section in man page Explain 'call-graph' section and its variables: 'record-mode', 'dump-size', 'print-type', 'order', 'sort-key', 'threshold' and 'print-limit'. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1454577913-16401-3-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 67 ++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 42787222ad15..42310ae7e636 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -302,6 +302,73 @@ ui.*:: in 'report' and 'top'. If this option is false, they are hidden. This option is only applied to TUI. +call-graph.*:: + When sub-commands 'top' and 'report' work with -g/—-children + there're options in control of call-graph. + + call-graph.record-mode:: + The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'. + The value of 'dwarf' is effective only if perf detect needed library + (libunwind or a recent version of libdw). + 'lbr' only work for cpus that support it. + + call-graph.dump-size:: + The size of stack to dump in order to do post-unwinding. Default is 8192 (byte). + When using dwarf into record-mode, the default size will be used if omitted. + + call-graph.print-type:: + The print-types can be graph (graph absolute), fractal (graph relative), + flat and folded. This option controls a way to show overhead for each callchain + entry. Suppose a following example. + + Overhead Symbols + ........ ....... + 40.00% foo + | + ---foo + | + |--50.00%--bar + | main + | + --50.00%--baz + main + + This output is a 'fractal' format. The 'foo' came from 'bar' and 'baz' exactly + half and half so 'fractal' shows 50.00% for each + (meaning that it assumes 100% total overhead of 'foo'). + + The 'graph' uses absolute overhead value of 'foo' as total so each of + 'bar' and 'baz' callchain will have 20.00% of overhead. + If 'flat' is used, single column and linear exposure of call chains. + 'folded' mean call chains are displayed in a line, separated by semicolons. + + call-graph.order:: + This option controls print order of callchains. The default is + 'callee' which means callee is printed at top and then followed by its + caller and so on. The 'caller' prints it in reverse order. + + If this option is not set and report.children or top.children is + set to true (or the equivalent command line option is given), + the default value of this option is changed to 'caller' for the + execution of 'perf report' or 'perf top'. Other commands will + still default to 'callee'. + + call-graph.sort-key:: + The callchains are merged if they contain same information. + The sort-key option determines a way to compare the callchains. + A value of 'sort-key' can be 'function' or 'address'. + The default is 'function'. + + call-graph.threshold:: + When there're many callchains it'd print tons of lines. So perf omits + small callchains under a certain overhead (threshold) and this option + control the threshold. Default is 0.5 (%). The overhead is calculated + by value depends on call-graph.print-type. + + call-graph.print-limit:: + This is a maximum number of lines of callchain printed for a single + histogram entry. Default is 0 which means no limitation. + SEE ALSO -------- linkperf:perf[1] -- cgit v1.2.3 From 806cb95bb6cb25105b37d971d9916105898cb6fe Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 4 Feb 2016 18:25:08 +0900 Subject: perf config: Document variables for 'report' section in man page Explain 'report' section's variables: 'percent-limit', 'queue-size' and 'children'. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1454577913-16401-4-git-send-email-treeze.taeung@gmail.com [ Fix some grammar issues, add some more info ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 42310ae7e636..f38f46f67d74 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -369,6 +369,42 @@ call-graph.*:: This is a maximum number of lines of callchain printed for a single histogram entry. Default is 0 which means no limitation. +report.*:: + report.percent-limit:: + This one is mostly the same as call-graph.threshold but works for + histogram entries. Entries having an overhead lower than this + percentage will not be printed. Default is '0'. If percent-limit + is '10', only entries which have more than 10% of overhead will be + printed. + + report.queue-size:: + This option sets up the maximum allocation size of the internal + event queue for ordering events. Default is 0, meaning no limit. + + report.children:: + 'Children' means functions called from another function. + If this option is true, 'perf report' cumulates callchains of children + and show (accumulated) total overhead as well as 'Self' overhead. + Please refer to the 'perf report' manual. The default is 'true'. + + report.group:: + This option is to show event group information together. + Example output with this turned on, notice that there is one column + per event in the group, ref-cycles and cycles: + + # group: {ref-cycles,cycles} + # ======== + # + # Samples: 7K of event 'anon group { ref-cycles, cycles }' + # Event count (approx.): 6876107743 + # + # Overhead Command Shared Object Symbol + # ................ ....... ................. ................... + # + 99.84% 99.76% noploop noploop [.] main + 0.07% 0.00% noploop ld-2.15.so [.] strcmp + 0.03% 0.00% noploop [kernel.kallsyms] [k] timerqueue_del + SEE ALSO -------- linkperf:perf[1] -- cgit v1.2.3 From 0b04c84087d3188c648628a6c73738314724c921 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 4 Feb 2016 18:25:09 +0900 Subject: perf config: Document 'top.children' variable in man page Explain 'top.children' variable. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1454577913-16401-5-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index f38f46f67d74..5e1db5ae53c4 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -405,6 +405,13 @@ report.*:: 0.07% 0.00% noploop ld-2.15.so [.] strcmp 0.03% 0.00% noploop [kernel.kallsyms] [k] timerqueue_del +top.*:: + top.children:: + Same as 'report.children'. So if it is enabled, the output of 'top' + command will have 'Children' overhead column as well as 'Self' overhead + column by default. + The default is 'true'. + SEE ALSO -------- linkperf:perf[1] -- cgit v1.2.3 From 08b75b409e3799553a3536e628f1dba4c87d7c14 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 4 Feb 2016 18:25:10 +0900 Subject: perf config: Document 'man.viewer' variable in man page Explain 'man.viewer' variable and how to add new man viewer tools. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1454577913-16401-6-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 5e1db5ae53c4..fd3f048c9644 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -412,6 +412,15 @@ top.*:: column by default. The default is 'true'. +man.*:: + man.viewer:: + This option can assign a tool to view manual pages when 'help' + subcommand was invoked. Supported tools are 'man', 'woman' + (with emacs client) and 'konqueror'. Default is 'man'. + + New man viewer tool can be also added using 'man..cmd' + or use different path using 'man..path' config option. + SEE ALSO -------- linkperf:perf[1] -- cgit v1.2.3 From ab2e08e8ba683f3e923a56b1e81b5c5e115bad0b Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 4 Feb 2016 18:25:11 +0900 Subject: perf config: Document 'pager.' variables in man page Explain 'pager.' variables. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1454577913-16401-7-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index fd3f048c9644..99aa72e5e9cf 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -421,6 +421,11 @@ man.*:: New man viewer tool can be also added using 'man..cmd' or use different path using 'man..path' config option. +pager.*:: + pager.:: + When the subcommand is run on stdio, determine whether it uses + pager or not based on this value. Default is 'unspecified'. + SEE ALSO -------- linkperf:perf[1] -- cgit v1.2.3 From 57f0dafe6a41de6c9d81bc6c403349a261e10fc4 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 4 Feb 2016 18:25:12 +0900 Subject: perf config: Document 'kmem.default' variable in man page Explain 'kmem.default' variable. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1454577913-16401-8-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 99aa72e5e9cf..fb1f4a984e63 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -426,6 +426,11 @@ pager.*:: When the subcommand is run on stdio, determine whether it uses pager or not based on this value. Default is 'unspecified'. +kmem.*:: + kmem.default:: + This option decides which allocator is to be analyzed if neither + '--slab' nor '--page' option is used. Default is 'slab'. + SEE ALSO -------- linkperf:perf[1] -- cgit v1.2.3 From a9edec3ce211d776736b35b14b9bd2c0b5ed860b Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 4 Feb 2016 18:25:13 +0900 Subject: perf config: Document 'record.build-id' variable in man page Explain 'record.build-id' variable. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1454577913-16401-9-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index fb1f4a984e63..c7158bfb1649 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -431,6 +431,14 @@ kmem.*:: This option decides which allocator is to be analyzed if neither '--slab' nor '--page' option is used. Default is 'slab'. +record.*:: + record.build-id:: + This option can be 'cache', 'no-cache' or 'skip'. + 'cache' is to post-process data and save/update the binaries into + the build-id cache (in ~/.debug). This is the default. + But if this option is 'no-cache', it will not update the build-id cache. + 'skip' skips post-processing and does not update the cache. + SEE ALSO -------- linkperf:perf[1] -- cgit v1.2.3 From 3e2751d9169563486c2bfe7382726f1315cb156b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 4 Feb 2016 12:30:36 +0100 Subject: perf tools: Fix parallel build including 'clean' target Do not parallelize 'clean' with other targets, figure out if it is present and do it first, then the other targets. Noticed with: tools/perf> make -j24 clean all LD arch/libperf-in.o LD plugin_xen-in.o arch//libperf-in.o: file not recognized: File truncated make[3]: *** [arch/libperf-in.o] Error 1 make[2]: *** [arch] Error 2 make[2]: *** Waiting for unfinished jobs.... AR libapi.a Reported-and-Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Acked-by: Wang Nan Link: http://lkml.kernel.org/n/tip-kb0qs29zbz7hxn32mc5zbsoz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 4b68f465195c..67837c6cdbd8 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -68,6 +68,20 @@ all tags TAGS: $(print_msg) $(make) +ifdef MAKECMDGOALS +has_clean := 0 +ifneq ($(filter clean,$(MAKECMDGOALS)),) + has_clean := 1 +endif # clean + +ifeq ($(has_clean),1) + rest := $(filter-out clean,$(MAKECMDGOALS)) + ifneq ($(rest),) +$(rest): clean + endif # rest +endif # has_clean +endif # MAKECMDGOALS + # # The clean target is not really parallel, don't print the jobs info: # -- cgit v1.2.3 From be9e49911123516ef883836906269832aec37e01 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 3 Feb 2016 17:28:45 -0300 Subject: perf build tests: Do parallell builds with 'build-test' Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-jhmnf9g7y9ryqcjql00unk5y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 67837c6cdbd8..32a64e619028 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -99,7 +99,7 @@ clean: # make -C tools/perf -f tests/make # build-test: - @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile --no-print-directory tarpkg out + @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg out # # All other targets get passed through: -- cgit v1.2.3 From 89fee59b504f86925894fcc9ba79d5c933842f93 Mon Sep 17 00:00:00 2001 From: Marcin Ślusarz Date: Tue, 19 Jan 2016 20:03:03 +0100 Subject: perf tools: handle spaces in file names obtained from /proc/pid/maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Steam frequently puts game binaries in folders with spaces. Note: "(deleted)" markers are now treated as part of the file name. Signed-off-by: Marcin Ślusarz Acked-by: Namhyung Kim Fixes: 6064803313ba ("perf tools: Use sscanf for parsing /proc/pid/maps") Link: http://lkml.kernel.org/r/20160119190303.GA17579@marcin-Inspiron-7720 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 85155e91b61b..7bad5c3fa7b7 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -282,7 +282,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, ""); /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n", + n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n", &event->mmap2.start, &event->mmap2.len, prot, &event->mmap2.pgoff, &event->mmap2.maj, &event->mmap2.min, -- cgit v1.2.3 From e9c4bcdd349eb00f6c704450a063b3dcbea25864 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 30 Nov 2015 10:02:20 +0100 Subject: perf symbols: add Java demangling support Add Java function descriptor demangling support. Something bfd cannot do. Use the JAVA_DEMANGLE_NORET flag to avoid decoding the return type of functions. Signed-off-by: Stephane Eranian Cc: Adrian Hunter Cc: Andi Kleen Cc: Carl Love Cc: David Ahern Cc: Jiri Olsa Cc: John McCutchan Cc: Namhyung Kim Cc: Pawel Moll Cc: Peter Zijlstra Cc: Sonny Rao Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1448874143-7269-2-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/demangle-java.c | 199 ++++++++++++++++++++++++++++++++++++++++ tools/perf/util/demangle-java.h | 10 ++ tools/perf/util/symbol-elf.c | 3 + 4 files changed, 213 insertions(+) create mode 100644 tools/perf/util/demangle-java.c create mode 100644 tools/perf/util/demangle-java.h (limited to 'tools') diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 5eec53a3f4ac..edae107416b6 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -105,6 +105,7 @@ libperf-y += scripting-engines/ libperf-$(CONFIG_ZLIB) += zlib.o libperf-$(CONFIG_LZMA) += lzma.o +libperf-y += demangle-java.o CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c new file mode 100644 index 000000000000..3e6062ab2cdd --- /dev/null +++ b/tools/perf/util/demangle-java.c @@ -0,0 +1,199 @@ +#include +#include +#include +#include "util.h" +#include "debug.h" +#include "symbol.h" + +#include "demangle-java.h" + +enum { + MODE_PREFIX = 0, + MODE_CLASS = 1, + MODE_FUNC = 2, + MODE_TYPE = 3, + MODE_CTYPE = 3, /* class arg */ +}; + +#define BASE_ENT(c, n) [c - 'A']=n +static const char *base_types['Z' - 'A' + 1] = { + BASE_ENT('B', "byte" ), + BASE_ENT('C', "char" ), + BASE_ENT('D', "double" ), + BASE_ENT('F', "float" ), + BASE_ENT('I', "int" ), + BASE_ENT('J', "long" ), + BASE_ENT('S', "short" ), + BASE_ENT('Z', "bool" ), +}; + +/* + * demangle Java symbol between str and end positions and stores + * up to maxlen characters into buf. The parser starts in mode. + * + * Use MODE_PREFIX to process entire prototype till end position + * Use MODE_TYPE to process return type if str starts on return type char + * + * Return: + * success: buf + * error : NULL + */ +static char * +__demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int mode) +{ + int rlen = 0; + int array = 0; + int narg = 0; + const char *q; + + if (!end) + end = str + strlen(str); + + for (q = str; q != end; q++) { + + if (rlen == (maxlen - 1)) + break; + + switch (*q) { + case 'L': + if (mode == MODE_PREFIX || mode == MODE_CTYPE) { + if (mode == MODE_CTYPE) { + if (narg) + rlen += scnprintf(buf + rlen, maxlen - rlen, ", "); + narg++; + } + rlen += scnprintf(buf + rlen, maxlen - rlen, "class "); + if (mode == MODE_PREFIX) + mode = MODE_CLASS; + } else + buf[rlen++] = *q; + break; + case 'B': + case 'C': + case 'D': + case 'F': + case 'I': + case 'J': + case 'S': + case 'Z': + if (mode == MODE_TYPE) { + if (narg) + rlen += scnprintf(buf + rlen, maxlen - rlen, ", "); + rlen += scnprintf(buf + rlen, maxlen - rlen, "%s", base_types[*q - 'A']); + while (array--) + rlen += scnprintf(buf + rlen, maxlen - rlen, "[]"); + array = 0; + narg++; + } else + buf[rlen++] = *q; + break; + case 'V': + if (mode == MODE_TYPE) { + rlen += scnprintf(buf + rlen, maxlen - rlen, "void"); + while (array--) + rlen += scnprintf(buf + rlen, maxlen - rlen, "[]"); + array = 0; + } else + buf[rlen++] = *q; + break; + case '[': + if (mode != MODE_TYPE) + goto error; + array++; + break; + case '(': + if (mode != MODE_FUNC) + goto error; + buf[rlen++] = *q; + mode = MODE_TYPE; + break; + case ')': + if (mode != MODE_TYPE) + goto error; + buf[rlen++] = *q; + narg = 0; + break; + case ';': + if (mode != MODE_CLASS && mode != MODE_CTYPE) + goto error; + /* safe because at least one other char to process */ + if (isalpha(*(q + 1))) + rlen += scnprintf(buf + rlen, maxlen - rlen, "."); + if (mode == MODE_CLASS) + mode = MODE_FUNC; + else if (mode == MODE_CTYPE) + mode = MODE_TYPE; + break; + case '/': + if (mode != MODE_CLASS && mode != MODE_CTYPE) + goto error; + rlen += scnprintf(buf + rlen, maxlen - rlen, "."); + break; + default : + buf[rlen++] = *q; + } + } + buf[rlen] = '\0'; + return buf; +error: + return NULL; +} + +/* + * Demangle Java function signature (openJDK, not GCJ) + * input: + * str: string to parse. String is not modified + * flags: comobination of JAVA_DEMANGLE_* flags to modify demangling + * return: + * if input can be demangled, then a newly allocated string is returned. + * if input cannot be demangled, then NULL is returned + * + * Note: caller is responsible for freeing demangled string + */ +char * +java_demangle_sym(const char *str, int flags) +{ + char *buf, *ptr; + char *p; + size_t len, l1 = 0; + + if (!str) + return NULL; + + /* find start of retunr type */ + p = strrchr(str, ')'); + if (!p) + return NULL; + + /* + * expansion factor estimated to 3x + */ + len = strlen(str) * 3 + 1; + buf = malloc(len); + if (!buf) + return NULL; + + buf[0] = '\0'; + if (!(flags & JAVA_DEMANGLE_NORET)) { + /* + * get return type first + */ + ptr = __demangle_java_sym(p + 1, NULL, buf, len, MODE_TYPE); + if (!ptr) + goto error; + + /* add space between return type and function prototype */ + l1 = strlen(buf); + buf[l1++] = ' '; + } + + /* process function up to return type */ + ptr = __demangle_java_sym(str, p + 1, buf + l1, len - l1, MODE_PREFIX); + if (!ptr) + goto error; + + return buf; +error: + free(buf); + return NULL; +} diff --git a/tools/perf/util/demangle-java.h b/tools/perf/util/demangle-java.h new file mode 100644 index 000000000000..a981c1f968fe --- /dev/null +++ b/tools/perf/util/demangle-java.h @@ -0,0 +1,10 @@ +#ifndef __PERF_DEMANGLE_JAVA +#define __PERF_DEMANGLE_JAVA 1 +/* + * demangle function flags + */ +#define JAVA_DEMANGLE_NORET 0x1 /* do not process return type */ + +char * java_demangle_sym(const char *str, int flags); + +#endif /* __PERF_DEMANGLE_JAVA */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 562b8ebeae5b..b1dd68f358fc 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -6,6 +6,7 @@ #include #include "symbol.h" +#include "demangle-java.h" #include "machine.h" #include "vdso.h" #include @@ -1077,6 +1078,8 @@ new_symbol: demangle_flags = DMGL_PARAMS | DMGL_ANSI; demangled = bfd_demangle(NULL, elf_name, demangle_flags); + if (demangled == NULL) + demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); if (demangled != NULL) elf_name = demangled; } -- cgit v1.2.3 From 8ee4646038e47d065d35703e3e343136c4cd42aa Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 30 Nov 2015 10:02:21 +0100 Subject: perf build: Add libcrypto feature detection Will be used to generate build-ids in the jitdump code. Signed-off-by: Stephane Eranian Cc: Adrian Hunter Cc: Andi Kleen Cc: Carl Love Cc: David Ahern Cc: Jiri Olsa Cc: John McCutchan Cc: Namhyung Kim Cc: Pawel Moll Cc: Peter Zijlstra Cc: Sonny Rao Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1448874143-7269-3-git-send-email-eranian@google.com [ tools/perf/Makefile.perf comment about NO_LIBCRYPTO and added it to tests/make ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 2 ++ tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-libcrypto.c | 17 +++++++++++++++++ tools/perf/Makefile.perf | 3 +++ tools/perf/config/Makefile | 11 +++++++++++ tools/perf/tests/make | 2 ++ 7 files changed, 44 insertions(+) create mode 100644 tools/build/feature/test-libcrypto.c (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 7bff2ea831cf..6b7707270aa3 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -46,6 +46,7 @@ FEATURE_TESTS_BASIC := \ libpython \ libpython-version \ libslang \ + libcrypto \ libunwind \ pthread-attr-setaffinity-np \ stackprotector-all \ @@ -87,6 +88,7 @@ FEATURE_DISPLAY ?= \ libperl \ libpython \ libslang \ + libcrypto \ libunwind \ libdw-dwarf-unwind \ zlib \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index bf8f0352264d..c5f4c417428d 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -23,6 +23,7 @@ FILES= \ test-libpython.bin \ test-libpython-version.bin \ test-libslang.bin \ + test-libcrypto.bin \ test-libunwind.bin \ test-libunwind-debug-frame.bin \ test-pthread-attr-setaffinity-np.bin \ @@ -105,6 +106,9 @@ $(OUTPUT)test-libaudit.bin: $(OUTPUT)test-libslang.bin: $(BUILD) -I/usr/include/slang -lslang +$(OUTPUT)test-libcrypto.bin: + $(BUILD) -lcrypto + $(OUTPUT)test-gtk2.bin: $(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 81025cade45f..e499a36c1e4a 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -129,6 +129,10 @@ # include "test-bpf.c" #undef main +#define main main_test_libcrypto +# include "test-libcrypto.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -158,6 +162,7 @@ int main(int argc, char *argv[]) main_test_lzma(); main_test_get_cpuid(); main_test_bpf(); + main_test_libcrypto(); return 0; } diff --git a/tools/build/feature/test-libcrypto.c b/tools/build/feature/test-libcrypto.c new file mode 100644 index 000000000000..bd79dc7f28d3 --- /dev/null +++ b/tools/build/feature/test-libcrypto.c @@ -0,0 +1,17 @@ +#include +#include + +int main(void) +{ + MD5_CTX context; + unsigned char md[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH]; + unsigned char dat[] = "12345"; + + MD5_Init(&context); + MD5_Update(&context, &dat[0], sizeof(dat)); + MD5_Final(&md[0], &context); + + SHA1(&dat[0], sizeof(dat), &md[0]); + + return 0; +} diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 0ef3d97d7954..d404117810a7 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -58,6 +58,9 @@ include config/utilities.mak # # Define NO_LIBBIONIC if you do not want bionic support # +# Define NO_LIBCRYPTO if you do not want libcrypto (openssl) support +# used for generating build-ids for ELFs generated by jitdump. +# # Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support # for dwarf backtrace post unwind. # diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 0045a5ddd0ca..f7aeaf303f5a 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -404,6 +404,17 @@ ifndef NO_LIBAUDIT endif endif +ifndef NO_LIBCRYPTO + ifneq ($(feature-libcrypto), 1) + msg := $(warning No libcrypto.h found, disables jitted code injection, please install libssl-devel or libssl-dev); + NO_LIBCRYPTO := 1 + else + CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT + EXTLIBS += -lcrypto + $(call detected,CONFIG_CRYPTO) + endif +endif + ifdef NO_NEWT NO_SLANG=1 endif diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 12dcae7aa515..cac15d93aea6 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -80,6 +80,7 @@ make_no_libaudit := NO_LIBAUDIT=1 make_no_libbionic := NO_LIBBIONIC=1 make_no_auxtrace := NO_AUXTRACE=1 make_no_libbpf := NO_LIBBPF=1 +make_no_libcrypto := NO_LIBCRYPTO=1 make_tags := tags make_cscope := cscope make_help := help @@ -103,6 +104,7 @@ make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 +make_minimal += NO_LIBCRYPTO=1 # $(run) contains all available tests run := make_pure -- cgit v1.2.3 From 921f3fadbc48c7c3799b415b895297cd476cf7f1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 22 Jan 2016 18:41:00 -0300 Subject: perf inject: Make sure mmap records are ordered when injecting build_ids To make sure the mmap records are ordered correctly and so that the correct especially due to jitted code mmaps. We cannot generate the buildid hit list and inject the jit mmaps (will come right after this patch) in at the same time for now. Signed-off-by: Stephane Eranian Cc: Adrian Hunter Cc: Andi Kleen Cc: Carl Love Cc: David Ahern Cc: Jiri Olsa Cc: John McCutchan Cc: Namhyung Kim Cc: Pawel Moll Cc: Peter Zijlstra Cc: Sonny Rao Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1448874143-7269-3-git-send-email-eranian@google.com [ Carved out from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 0022e02ed31a..6567baedd92a 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -755,6 +755,17 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) if (inject.session == NULL) return -1; + if (inject.build_ids) { + /* + * to make sure the mmap records are ordered correctly + * and so that the correct especially due to jitted code + * mmaps. We cannot generate the buildid hit list and + * inject the jit mmaps at the same time for now. + */ + inject.tool.ordered_events = true; + inject.tool.ordering_requires_timestamps = true; + } + ret = symbol__init(&inject.session->header.env); if (ret < 0) goto out_delete; -- cgit v1.2.3 From 9b07e27f88b9cd785cdb23f9a2231c12521dda94 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 30 Nov 2015 10:02:21 +0100 Subject: perf inject: Add jitdump mmap injection support This patch adds a --jit/-j option to perf inject. This options injects MMAP records into the perf.data file to cover the jitted code mmaps. It also emits ELF images for each function in the jidump file. Those images are created where the jitdump file is. The MMAP records point to that location as well. Typical flow: $ perf record -k mono -- java -agentpath:libpjvmti.so java_class $ perf inject --jit -i perf.data -o perf.data.jitted $ perf report -i perf.data.jitted Note that jitdump.h support is not limited to Java, it works with any jitted environment modified to emit the jitdump file format, include those where code can be jitted multiple times and moved around. The jitdump.h format is adapted from the Oprofile project. The genelf.c (ELF binary generation) depends on MD5 hash encoding for the buildid. To enable this, libssl-dev must be installed. If not, then genelf.c defaults to using urandom to generate the buildid, which is not ideal. The Makefile auto-detects the presence on libssl-dev. This version mmaps the jitdump file to create a marker MMAP record in the perf.data file. The marker is used to detect jitdump and cause perf inject to inject the jitted mmaps and generate ELF images for jitted functions. In V8, the following fixes and changes were made among other things: - the jidump header format include a new flags field to be used to carry information about the configuration of the runtime agent. Contributed by: Adrian Hunter - Fix mmap pgoff: MMAP event pgoff must be the offset within the ELF file at which the code resides. Contributed by: Adrian Hunter - Fix ELF virtual addresses: perf tools expect the ELF virtual addresses of dynamic objects to match the file offset. Contributed by: Adrian Hunter - JIT MMAP injection does not obey finished_round semantics. JIT MMAP injection injects all MMAP events in one go, so it does not obey finished_round semantics, so drop the finished_round events from the output perf.data file. Contributed by: Adrian Hunter Signed-off-by: Stephane Eranian Cc: Adrian Hunter Cc: Andi Kleen Cc: Carl Love Cc: David Ahern Cc: Jiri Olsa Cc: John McCutchan Cc: Namhyung Kim Cc: Pawel Moll Cc: Peter Zijlstra Cc: Sonny Rao Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1448874143-7269-3-git-send-email-eranian@google.com [ Moved inject.build_ids ordering bits to a separate patch, fixed the NO_LIBELF=1 build ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-inject.txt | 7 + tools/perf/builtin-inject.c | 98 ++++- tools/perf/util/Build | 2 + tools/perf/util/genelf.c | 442 ++++++++++++++++++++ tools/perf/util/genelf.h | 63 +++ tools/perf/util/jit.h | 15 + tools/perf/util/jitdump.c | 670 +++++++++++++++++++++++++++++++ tools/perf/util/jitdump.h | 124 ++++++ 8 files changed, 1418 insertions(+), 3 deletions(-) create mode 100644 tools/perf/util/genelf.c create mode 100644 tools/perf/util/genelf.h create mode 100644 tools/perf/util/jit.h create mode 100644 tools/perf/util/jitdump.c create mode 100644 tools/perf/util/jitdump.h (limited to 'tools') diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 0b1cedeef895..87b2588d1cbd 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -53,6 +53,13 @@ include::itrace.txt[] --strip:: Use with --itrace to strip out non-synthesized events. +-j:: +--jit:: + Process jitdump files by injecting the mmap records corresponding to jitted + functions. This option also generates the ELF images for each jitted function + found in the jitdumps files captured in the input perf.data file. Use this option + if you are monitoring environment using JIT runtimes, such as Java, DART or V8. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 6567baedd92a..b38445f08c2f 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -17,6 +17,7 @@ #include "util/build-id.h" #include "util/data.h" #include "util/auxtrace.h" +#include "util/jit.h" #include @@ -29,6 +30,7 @@ struct perf_inject { bool sched_stat; bool have_auxtrace; bool strip; + bool jit_mode; const char *input_name; struct perf_data_file output; u64 bytes_written; @@ -71,6 +73,15 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } +#ifdef HAVE_LIBELF_SUPPORT +static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct ordered_events *oe __maybe_unused) +{ + return 0; +} +#endif + static int perf_event__repipe_op2_synth(struct perf_tool *tool, union perf_event *event, struct perf_session *session @@ -234,6 +245,27 @@ static int perf_event__repipe_mmap(struct perf_tool *tool, return err; } +#ifdef HAVE_LIBELF_SUPPORT +static int perf_event__jit_repipe_mmap(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, tool); + u64 n = 0; + + /* + * if jit marker, then inject jit mmaps and generate ELF images + */ + if (!jit_process(inject->session, &inject->output, machine, + event->mmap.filename, sample->pid, &n)) { + inject->bytes_written += n; + return 0; + } + return perf_event__repipe_mmap(tool, event, sample, machine); +} +#endif + static int perf_event__repipe_mmap2(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -247,6 +279,27 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool, return err; } +#ifdef HAVE_LIBELF_SUPPORT +static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, tool); + u64 n = 0; + + /* + * if jit marker, then inject jit mmaps and generate ELF images + */ + if (!jit_process(inject->session, &inject->output, machine, + event->mmap2.filename, sample->pid, &n)) { + inject->bytes_written += n; + return 0; + } + return perf_event__repipe_mmap2(tool, event, sample, machine); +} +#endif + static int perf_event__repipe_fork(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -664,6 +717,23 @@ static int __cmd_inject(struct perf_inject *inject) return ret; } +#ifdef HAVE_LIBELF_SUPPORT +static int +jit_validate_events(struct perf_session *session) +{ + struct perf_evsel *evsel; + + /* + * check that all events use CLOCK_MONOTONIC + */ + evlist__for_each(session->evlist, evsel) { + if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC) + return -1; + } + return 0; +} +#endif + int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_inject inject = { @@ -703,7 +773,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) }; int ret; - const struct option options[] = { + struct option options[] = { OPT_BOOLEAN('b', "build-ids", &inject.build_ids, "Inject build-ids into the output stream"), OPT_STRING('i', "input", &inject.input_name, "file", @@ -713,6 +783,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, "Merge sched-stat and sched-switch for getting events " "where and how long tasks slept"), + OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show build ids, etc)"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", @@ -729,7 +800,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) "perf inject []", NULL }; - +#ifndef HAVE_LIBELF_SUPPORT + set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); +#endif argc = parse_options(argc, argv, options, inject_usage, 0); /* @@ -765,7 +838,26 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) inject.tool.ordered_events = true; inject.tool.ordering_requires_timestamps = true; } - +#ifdef HAVE_LIBELF_SUPPORT + if (inject.jit_mode) { + /* + * validate event is using the correct clockid + */ + if (jit_validate_events(inject.session)) { + fprintf(stderr, "error, jitted code must be sampled with perf record -k 1\n"); + return -1; + } + inject.tool.mmap2 = perf_event__jit_repipe_mmap2; + inject.tool.mmap = perf_event__jit_repipe_mmap; + inject.tool.ordered_events = true; + inject.tool.ordering_requires_timestamps = true; + /* + * JIT MMAP injection injects all MMAP events in one go, so it + * does not obey finished_round semantics. + */ + inject.tool.finished_round = perf_event__drop_oe; + } +#endif ret = symbol__init(&inject.session->header.env); if (ret < 0) goto out_delete; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index edae107416b6..52a4a806ee2f 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -106,6 +106,8 @@ libperf-y += scripting-engines/ libperf-$(CONFIG_ZLIB) += zlib.o libperf-$(CONFIG_LZMA) += lzma.o libperf-y += demangle-java.o +libperf-$(CONFIG_LIBELF) += jitdump.o +libperf-$(CONFIG_LIBELF) += genelf.o CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c new file mode 100644 index 000000000000..145f8116ef56 --- /dev/null +++ b/tools/perf/util/genelf.c @@ -0,0 +1,442 @@ +/* + * genelf.c + * Copyright (C) 2014, Google, Inc + * + * Contributed by: + * Stephane Eranian + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "perf.h" +#include "genelf.h" +#include "../util/jitdump.h" + +#define JVMTI + +#define BUILD_ID_URANDOM /* different uuid for each run */ + +#ifdef HAVE_LIBCRYPTO + +#define BUILD_ID_MD5 +#undef BUILD_ID_SHA /* does not seem to work well when linked with Java */ +#undef BUILD_ID_URANDOM /* different uuid for each run */ + +#ifdef BUILD_ID_SHA +#include +#endif + +#ifdef BUILD_ID_MD5 +#include +#endif +#endif + + +typedef struct { + unsigned int namesz; /* Size of entry's owner string */ + unsigned int descsz; /* Size of the note descriptor */ + unsigned int type; /* Interpretation of the descriptor */ + char name[0]; /* Start of the name+desc data */ +} Elf_Note; + +struct options { + char *output; + int fd; +}; + +static char shd_string_table[] = { + 0, + '.', 't', 'e', 'x', 't', 0, /* 1 */ + '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', 0, /* 7 */ + '.', 's', 'y', 'm', 't', 'a', 'b', 0, /* 17 */ + '.', 's', 't', 'r', 't', 'a', 'b', 0, /* 25 */ + '.', 'n', 'o', 't', 'e', '.', 'g', 'n', 'u', '.', 'b', 'u', 'i', 'l', 'd', '-', 'i', 'd', 0, /* 33 */ + '.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */ + '.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */ + '.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */ +}; + +static struct buildid_note { + Elf_Note desc; /* descsz: size of build-id, must be multiple of 4 */ + char name[4]; /* GNU\0 */ + char build_id[20]; +} bnote; + +static Elf_Sym symtab[]={ + /* symbol 0 MUST be the undefined symbol */ + { .st_name = 0, /* index in sym_string table */ + .st_info = ELF_ST_TYPE(STT_NOTYPE), + .st_shndx = 0, /* for now */ + .st_value = 0x0, + .st_other = ELF_ST_VIS(STV_DEFAULT), + .st_size = 0, + }, + { .st_name = 1, /* index in sym_string table */ + .st_info = ELF_ST_BIND(STB_LOCAL) | ELF_ST_TYPE(STT_FUNC), + .st_shndx = 1, + .st_value = 0, /* for now */ + .st_other = ELF_ST_VIS(STV_DEFAULT), + .st_size = 0, /* for now */ + } +}; + +#ifdef BUILD_ID_URANDOM +static void +gen_build_id(struct buildid_note *note, + unsigned long load_addr __maybe_unused, + const void *code __maybe_unused, + size_t csize __maybe_unused) +{ + int fd; + size_t sz = sizeof(note->build_id); + ssize_t sret; + + fd = open("/dev/urandom", O_RDONLY); + if (fd == -1) + err(1, "cannot access /dev/urandom for builid"); + + sret = read(fd, note->build_id, sz); + + close(fd); + + if (sret != (ssize_t)sz) + memset(note->build_id, 0, sz); +} +#endif + +#ifdef BUILD_ID_SHA +static void +gen_build_id(struct buildid_note *note, + unsigned long load_addr __maybe_unused, + const void *code, + size_t csize) +{ + if (sizeof(note->build_id) < SHA_DIGEST_LENGTH) + errx(1, "build_id too small for SHA1"); + + SHA1(code, csize, (unsigned char *)note->build_id); +} +#endif + +#ifdef BUILD_ID_MD5 +static void +gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize) +{ + MD5_CTX context; + + if (sizeof(note->build_id) < 16) + errx(1, "build_id too small for MD5"); + + MD5_Init(&context); + MD5_Update(&context, &load_addr, sizeof(load_addr)); + MD5_Update(&context, code, csize); + MD5_Final((unsigned char *)note->build_id, &context); +} +#endif + +/* + * fd: file descriptor open for writing for the output file + * load_addr: code load address (could be zero, just used for buildid) + * sym: function name (for native code - used as the symbol) + * code: the native code + * csize: the code size in bytes + */ +int +jit_write_elf(int fd, uint64_t load_addr, const char *sym, + const void *code, int csize) +{ + Elf *e; + Elf_Data *d; + Elf_Scn *scn; + Elf_Ehdr *ehdr; + Elf_Shdr *shdr; + char *strsym = NULL; + int symlen; + int retval = -1; + + if (elf_version(EV_CURRENT) == EV_NONE) { + warnx("ELF initialization failed"); + return -1; + } + + e = elf_begin(fd, ELF_C_WRITE, NULL); + if (!e) { + warnx("elf_begin failed"); + goto error; + } + + /* + * setup ELF header + */ + ehdr = elf_newehdr(e); + if (!ehdr) { + warnx("cannot get ehdr"); + goto error; + } + + ehdr->e_ident[EI_DATA] = GEN_ELF_ENDIAN; + ehdr->e_ident[EI_CLASS] = GEN_ELF_CLASS; + ehdr->e_machine = GEN_ELF_ARCH; + ehdr->e_type = ET_DYN; + ehdr->e_entry = GEN_ELF_TEXT_OFFSET; + ehdr->e_version = EV_CURRENT; + ehdr->e_shstrndx= 2; /* shdr index for section name */ + + /* + * setup text section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 16; + d->d_off = 0LL; + d->d_buf = (void *)code; + d->d_type = ELF_T_BYTE; + d->d_size = csize; + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 1; + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = GEN_ELF_TEXT_OFFSET; + shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + shdr->sh_entsize = 0; + + /* + * setup section headers string table + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = shd_string_table; + d->d_type = ELF_T_BYTE; + d->d_size = sizeof(shd_string_table); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 7; /* offset of '.shstrtab' in shd_string_table */ + shdr->sh_type = SHT_STRTAB; + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup symtab section + */ + symtab[1].st_size = csize; + symtab[1].st_value = GEN_ELF_TEXT_OFFSET; + + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 8; + d->d_off = 0LL; + d->d_buf = symtab; + d->d_type = ELF_T_SYM; + d->d_size = sizeof(symtab); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 17; /* offset of '.symtab' in shd_string_table */ + shdr->sh_type = SHT_SYMTAB; + shdr->sh_flags = 0; + shdr->sh_entsize = sizeof(Elf_Sym); + shdr->sh_link = 4; /* index of .strtab section */ + + /* + * setup symbols string table + * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry + */ + symlen = 2 + strlen(sym); + strsym = calloc(1, symlen); + if (!strsym) { + warnx("cannot allocate strsym"); + goto error; + } + strcpy(strsym + 1, sym); + + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = strsym; + d->d_type = ELF_T_BYTE; + d->d_size = symlen; + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 25; /* offset in shd_string_table */ + shdr->sh_type = SHT_STRTAB; + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup build-id section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + /* + * build-id generation + */ + gen_build_id(&bnote, load_addr, code, csize); + bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */ + bnote.desc.descsz = sizeof(bnote.build_id); + bnote.desc.type = NT_GNU_BUILD_ID; + strcpy(bnote.name, "GNU"); + + d->d_align = 4; + d->d_off = 0LL; + d->d_buf = &bnote; + d->d_type = ELF_T_BYTE; + d->d_size = sizeof(bnote); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 33; /* offset in shd_string_table */ + shdr->sh_type = SHT_NOTE; + shdr->sh_addr = 0x0; + shdr->sh_flags = SHF_ALLOC; + shdr->sh_size = sizeof(bnote); + shdr->sh_entsize = 0; + + if (elf_update(e, ELF_C_WRITE) < 0) { + warnx("elf_update 4 failed"); + goto error; + } + + retval = 0; +error: + (void)elf_end(e); + + free(strsym); + + + return retval; +} + +#ifndef JVMTI + +static unsigned char x86_code[] = { + 0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */ + 0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */ + 0xCD, 0x80 /* int $0x80 */ +}; + +static struct options options; + +int main(int argc, char **argv) +{ + int c, fd, ret; + + while ((c = getopt(argc, argv, "o:h")) != -1) { + switch (c) { + case 'o': + options.output = optarg; + break; + case 'h': + printf("Usage: genelf -o output_file [-h]\n"); + return 0; + default: + errx(1, "unknown option"); + } + } + + fd = open(options.output, O_CREAT|O_TRUNC|O_RDWR, 0666); + if (fd == -1) + err(1, "cannot create file %s", options.output); + + ret = jit_write_elf(fd, "main", x86_code, sizeof(x86_code)); + close(fd); + + if (ret != 0) + unlink(options.output); + + return ret; +} +#endif diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h new file mode 100644 index 000000000000..d8e9ece13c8b --- /dev/null +++ b/tools/perf/util/genelf.h @@ -0,0 +1,63 @@ +#ifndef __GENELF_H__ +#define __GENELF_H__ + +/* genelf.c */ +extern int jit_write_elf(int fd, uint64_t code_addr, const char *sym, + const void *code, int csize); + +#if defined(__arm__) +#define GEN_ELF_ARCH EM_ARM +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__aarch64__) +#define GEN_ELF_ARCH EM_AARCH64 +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__x86_64__) +#define GEN_ELF_ARCH EM_X86_64 +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__i386__) +#define GEN_ELF_ARCH EM_386 +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__ppcle__) +#define GEN_ELF_ARCH EM_PPC +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__powerpc__) +#define GEN_ELF_ARCH EM_PPC64 +#define GEN_ELF_ENDIAN ELFDATA2MSB +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__powerpcle__) +#define GEN_ELF_ARCH EM_PPC64 +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS64 +#else +#error "unsupported architecture" +#endif + +#if GEN_ELF_CLASS == ELFCLASS64 +#define elf_newehdr elf64_newehdr +#define elf_getshdr elf64_getshdr +#define Elf_Ehdr Elf64_Ehdr +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define ELF_ST_TYPE(a) ELF64_ST_TYPE(a) +#define ELF_ST_BIND(a) ELF64_ST_BIND(a) +#define ELF_ST_VIS(a) ELF64_ST_VISIBILITY(a) +#else +#define elf_newehdr elf32_newehdr +#define elf_getshdr elf32_getshdr +#define Elf_Ehdr Elf32_Ehdr +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define ELF_ST_TYPE(a) ELF32_ST_TYPE(a) +#define ELF_ST_BIND(a) ELF32_ST_BIND(a) +#define ELF_ST_VIS(a) ELF32_ST_VISIBILITY(a) +#endif + +/* The .text section is directly after the ELF header */ +#define GEN_ELF_TEXT_OFFSET sizeof(Elf_Ehdr) + +#endif diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h new file mode 100644 index 000000000000..a1e99da0715a --- /dev/null +++ b/tools/perf/util/jit.h @@ -0,0 +1,15 @@ +#ifndef __JIT_H__ +#define __JIT_H__ + +#include + +extern int jit_process(struct perf_session *session, + struct perf_data_file *output, + struct machine *machine, + char *filename, + pid_t pid, + u64 *nbytes); + +extern int jit_inject_record(const char *filename); + +#endif /* __JIT_H__ */ diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c new file mode 100644 index 000000000000..9f7a01289efe --- /dev/null +++ b/tools/perf/util/jitdump.c @@ -0,0 +1,670 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "event.h" +#include "debug.h" +#include "evlist.h" +#include "symbol.h" +#include "strlist.h" +#include + +#include "session.h" +#include "jit.h" +#include "jitdump.h" +#include "genelf.h" +#include "../builtin.h" + +struct jit_buf_desc { + struct perf_data_file *output; + struct perf_session *session; + struct machine *machine; + union jr_entry *entry; + void *buf; + uint64_t sample_type; + size_t bufsize; + FILE *in; + bool needs_bswap; /* handles cross-endianess */ + void *debug_data; + size_t nr_debug_entries; + uint32_t code_load_count; + u64 bytes_written; + struct rb_root code_root; + char dir[PATH_MAX]; +}; + +struct debug_line_info { + unsigned long vma; + unsigned int lineno; + /* The filename format is unspecified, absolute path, relative etc. */ + char const filename[0]; +}; + +struct jit_tool { + struct perf_tool tool; + struct perf_data_file output; + struct perf_data_file input; + u64 bytes_written; +}; + +#define hmax(a, b) ((a) > (b) ? (a) : (b)) +#define get_jit_tool(t) (container_of(tool, struct jit_tool, tool)) + +static int +jit_emit_elf(char *filename, + const char *sym, + uint64_t code_addr, + const void *code, + int csize) +{ + int ret, fd; + + if (verbose > 0) + fprintf(stderr, "write ELF image %s\n", filename); + + fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644); + if (fd == -1) { + pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(errno)); + return -1; + } + + ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize); + + close(fd); + + if (ret) + unlink(filename); + + return ret; +} + +static void +jit_close(struct jit_buf_desc *jd) +{ + if (!(jd && jd->in)) + return; + funlockfile(jd->in); + fclose(jd->in); + jd->in = NULL; +} + +static int +jit_open(struct jit_buf_desc *jd, const char *name) +{ + struct jitheader header; + struct jr_prefix *prefix; + ssize_t bs, bsz = 0; + void *n, *buf = NULL; + int ret, retval = -1; + + jd->in = fopen(name, "r"); + if (!jd->in) + return -1; + + bsz = hmax(sizeof(header), sizeof(*prefix)); + + buf = malloc(bsz); + if (!buf) + goto error; + + /* + * protect from writer modifying the file while we are reading it + */ + flockfile(jd->in); + + ret = fread(buf, sizeof(header), 1, jd->in); + if (ret != 1) + goto error; + + memcpy(&header, buf, sizeof(header)); + + if (header.magic != JITHEADER_MAGIC) { + if (header.magic != JITHEADER_MAGIC_SW) + goto error; + jd->needs_bswap = true; + } + + if (jd->needs_bswap) { + header.version = bswap_32(header.version); + header.total_size = bswap_32(header.total_size); + header.pid = bswap_32(header.pid); + header.elf_mach = bswap_32(header.elf_mach); + header.timestamp = bswap_64(header.timestamp); + header.flags = bswap_64(header.flags); + } + + if (verbose > 2) + pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n", + header.version, + header.total_size, + (unsigned long long)header.timestamp, + header.pid, + header.elf_mach); + + if (header.flags & JITDUMP_FLAGS_RESERVED) { + pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n", + (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED); + goto error; + } + + bs = header.total_size - sizeof(header); + + if (bs > bsz) { + n = realloc(buf, bs); + if (!n) + goto error; + bsz = bs; + buf = n; + /* read extra we do not know about */ + ret = fread(buf, bs - bsz, 1, jd->in); + if (ret != 1) + goto error; + } + /* + * keep dirname for generating files and mmap records + */ + strcpy(jd->dir, name); + dirname(jd->dir); + + return 0; +error: + funlockfile(jd->in); + fclose(jd->in); + return retval; +} + +static union jr_entry * +jit_get_next_entry(struct jit_buf_desc *jd) +{ + struct jr_prefix *prefix; + union jr_entry *jr; + void *addr; + size_t bs, size; + int id, ret; + + if (!(jd && jd->in)) + return NULL; + + if (jd->buf == NULL) { + size_t sz = getpagesize(); + if (sz < sizeof(*prefix)) + sz = sizeof(*prefix); + + jd->buf = malloc(sz); + if (jd->buf == NULL) + return NULL; + + jd->bufsize = sz; + } + + prefix = jd->buf; + + /* + * file is still locked at this point + */ + ret = fread(prefix, sizeof(*prefix), 1, jd->in); + if (ret != 1) + return NULL; + + if (jd->needs_bswap) { + prefix->id = bswap_32(prefix->id); + prefix->total_size = bswap_32(prefix->total_size); + prefix->timestamp = bswap_64(prefix->timestamp); + } + id = prefix->id; + size = prefix->total_size; + + bs = (size_t)size; + if (bs < sizeof(*prefix)) + return NULL; + + if (id >= JIT_CODE_MAX) { + pr_warning("next_entry: unknown prefix %d, skipping\n", id); + return NULL; + } + if (bs > jd->bufsize) { + void *n; + n = realloc(jd->buf, bs); + if (!n) + return NULL; + jd->buf = n; + jd->bufsize = bs; + } + + addr = ((void *)jd->buf) + sizeof(*prefix); + + ret = fread(addr, bs - sizeof(*prefix), 1, jd->in); + if (ret != 1) + return NULL; + + jr = (union jr_entry *)jd->buf; + + switch(id) { + case JIT_CODE_DEBUG_INFO: + if (jd->needs_bswap) { + uint64_t n; + jr->info.code_addr = bswap_64(jr->info.code_addr); + jr->info.nr_entry = bswap_64(jr->info.nr_entry); + for (n = 0 ; n < jr->info.nr_entry; n++) { + jr->info.entries[n].addr = bswap_64(jr->info.entries[n].addr); + jr->info.entries[n].lineno = bswap_32(jr->info.entries[n].lineno); + jr->info.entries[n].discrim = bswap_32(jr->info.entries[n].discrim); + } + } + break; + case JIT_CODE_CLOSE: + break; + case JIT_CODE_LOAD: + if (jd->needs_bswap) { + jr->load.pid = bswap_32(jr->load.pid); + jr->load.tid = bswap_32(jr->load.tid); + jr->load.vma = bswap_64(jr->load.vma); + jr->load.code_addr = bswap_64(jr->load.code_addr); + jr->load.code_size = bswap_64(jr->load.code_size); + jr->load.code_index= bswap_64(jr->load.code_index); + } + jd->code_load_count++; + break; + case JIT_CODE_MOVE: + if (jd->needs_bswap) { + jr->move.pid = bswap_32(jr->move.pid); + jr->move.tid = bswap_32(jr->move.tid); + jr->move.vma = bswap_64(jr->move.vma); + jr->move.old_code_addr = bswap_64(jr->move.old_code_addr); + jr->move.new_code_addr = bswap_64(jr->move.new_code_addr); + jr->move.code_size = bswap_64(jr->move.code_size); + jr->move.code_index = bswap_64(jr->move.code_index); + } + break; + case JIT_CODE_MAX: + default: + return NULL; + } + return jr; +} + +static int +jit_inject_event(struct jit_buf_desc *jd, union perf_event *event) +{ + ssize_t size; + + size = perf_data_file__write(jd->output, event, event->header.size); + if (size < 0) + return -1; + + jd->bytes_written += size; + return 0; +} + +static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) +{ + struct perf_sample sample; + union perf_event *event; + struct perf_tool *tool = jd->session->tool; + uint64_t code, addr; + uintptr_t uaddr; + char *filename; + struct stat st; + size_t size; + u16 idr_size; + const char *sym; + uint32_t count; + int ret, csize; + pid_t pid, tid; + struct { + u32 pid, tid; + u64 time; + } *id; + + pid = jr->load.pid; + tid = jr->load.tid; + csize = jr->load.code_size; + addr = jr->load.code_addr; + sym = (void *)((unsigned long)jr + sizeof(jr->load)); + code = (unsigned long)jr + jr->load.p.total_size - csize; + count = jr->load.code_index; + idr_size = jd->machine->id_hdr_size; + + event = calloc(1, sizeof(*event) + idr_size); + if (!event) + return -1; + + filename = event->mmap2.filename; + size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%u.so", + jd->dir, + pid, + count); + + size++; /* for \0 */ + + size = PERF_ALIGN(size, sizeof(u64)); + uaddr = (uintptr_t)code; + ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize); + + if (jd->debug_data && jd->nr_debug_entries) { + free(jd->debug_data); + jd->debug_data = NULL; + jd->nr_debug_entries = 0; + } + + if (ret) { + free(event); + return -1; + } + if (stat(filename, &st)) + memset(&st, 0, sizeof(stat)); + + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.misc = PERF_RECORD_MISC_USER; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size) + idr_size); + + event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; + event->mmap2.start = addr; + event->mmap2.len = csize; + event->mmap2.pid = pid; + event->mmap2.tid = tid; + event->mmap2.ino = st.st_ino; + event->mmap2.maj = major(st.st_dev); + event->mmap2.min = minor(st.st_dev); + event->mmap2.prot = st.st_mode; + event->mmap2.flags = MAP_SHARED; + event->mmap2.ino_generation = 1; + + id = (void *)((unsigned long)event + event->mmap.header.size - idr_size); + if (jd->sample_type & PERF_SAMPLE_TID) { + id->pid = pid; + id->tid = tid; + } + if (jd->sample_type & PERF_SAMPLE_TIME) + id->time = jr->load.p.timestamp; + + /* + * create pseudo sample to induce dso hit increment + * use first address as sample address + */ + memset(&sample, 0, sizeof(sample)); + sample.pid = pid; + sample.tid = tid; + sample.time = id->time; + sample.ip = addr; + + ret = perf_event__process_mmap2(tool, event, &sample, jd->machine); + if (ret) + return ret; + + ret = jit_inject_event(jd, event); + /* + * mark dso as use to generate buildid in the header + */ + if (!ret) + build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); + + return ret; +} + +static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) +{ + struct perf_sample sample; + union perf_event *event; + struct perf_tool *tool = jd->session->tool; + char *filename; + size_t size; + struct stat st; + u16 idr_size; + int ret; + pid_t pid, tid; + struct { + u32 pid, tid; + u64 time; + } *id; + + pid = jr->move.pid; + tid = jr->move.tid; + idr_size = jd->machine->id_hdr_size; + + /* + * +16 to account for sample_id_all (hack) + */ + event = calloc(1, sizeof(*event) + 16); + if (!event) + return -1; + + filename = event->mmap2.filename; + size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%"PRIu64, + jd->dir, + pid, + jr->move.code_index); + + size++; /* for \0 */ + + if (stat(filename, &st)) + memset(&st, 0, sizeof(stat)); + + size = PERF_ALIGN(size, sizeof(u64)); + + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.misc = PERF_RECORD_MISC_USER; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size) + idr_size); + event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; + event->mmap2.start = jr->move.new_code_addr; + event->mmap2.len = jr->move.code_size; + event->mmap2.pid = pid; + event->mmap2.tid = tid; + event->mmap2.ino = st.st_ino; + event->mmap2.maj = major(st.st_dev); + event->mmap2.min = minor(st.st_dev); + event->mmap2.prot = st.st_mode; + event->mmap2.flags = MAP_SHARED; + event->mmap2.ino_generation = 1; + + id = (void *)((unsigned long)event + event->mmap.header.size - idr_size); + if (jd->sample_type & PERF_SAMPLE_TID) { + id->pid = pid; + id->tid = tid; + } + if (jd->sample_type & PERF_SAMPLE_TIME) + id->time = jr->load.p.timestamp; + + /* + * create pseudo sample to induce dso hit increment + * use first address as sample address + */ + memset(&sample, 0, sizeof(sample)); + sample.pid = pid; + sample.tid = tid; + sample.time = id->time; + sample.ip = jr->move.new_code_addr; + + ret = perf_event__process_mmap2(tool, event, &sample, jd->machine); + if (ret) + return ret; + + ret = jit_inject_event(jd, event); + if (!ret) + build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); + + return ret; +} + +static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr) +{ + void *data; + size_t sz; + + if (!(jd && jr)) + return -1; + + sz = jr->prefix.total_size - sizeof(jr->info); + data = malloc(sz); + if (!data) + return -1; + + memcpy(data, &jr->info.entries, sz); + + jd->debug_data = data; + + /* + * we must use nr_entry instead of size here because + * we cannot distinguish actual entry from padding otherwise + */ + jd->nr_debug_entries = jr->info.nr_entry; + + return 0; +} + +static int +jit_process_dump(struct jit_buf_desc *jd) +{ + union jr_entry *jr; + int ret; + + while ((jr = jit_get_next_entry(jd))) { + switch(jr->prefix.id) { + case JIT_CODE_LOAD: + ret = jit_repipe_code_load(jd, jr); + break; + case JIT_CODE_MOVE: + ret = jit_repipe_code_move(jd, jr); + break; + case JIT_CODE_DEBUG_INFO: + ret = jit_repipe_debug_info(jd, jr); + break; + default: + ret = 0; + continue; + } + } + return ret; +} + +static int +jit_inject(struct jit_buf_desc *jd, char *path) +{ + int ret; + + if (verbose > 0) + fprintf(stderr, "injecting: %s\n", path); + + ret = jit_open(jd, path); + if (ret) + return -1; + + ret = jit_process_dump(jd); + + jit_close(jd); + + if (verbose > 0) + fprintf(stderr, "injected: %s (%d)\n", path, ret); + + return 0; +} + +/* + * File must be with pattern .../jit-XXXX.dump + * where XXXX is the PID of the process which did the mmap() + * as captured in the RECORD_MMAP record + */ +static int +jit_detect(char *mmap_name, pid_t pid) + { + char *p; + char *end = NULL; + pid_t pid2; + + if (verbose > 2) + fprintf(stderr, "jit marker trying : %s\n", mmap_name); + /* + * get file name + */ + p = strrchr(mmap_name, '/'); + if (!p) + return -1; + + /* + * match prefix + */ + if (strncmp(p, "/jit-", 5)) + return -1; + + /* + * skip prefix + */ + p += 5; + + /* + * must be followed by a pid + */ + if (!isdigit(*p)) + return -1; + + pid2 = (int)strtol(p, &end, 10); + if (!end) + return -1; + + /* + * pid does not match mmap pid + * pid==0 in system-wide mode (synthesized) + */ + if (pid && pid2 != pid) + return -1; + /* + * validate suffix + */ + if (strcmp(end, ".dump")) + return -1; + + if (verbose > 0) + fprintf(stderr, "jit marker found: %s\n", mmap_name); + + return 0; +} + +int +jit_process(struct perf_session *session, + struct perf_data_file *output, + struct machine *machine, + char *filename, + pid_t pid, + u64 *nbytes) +{ + struct perf_evsel *first; + struct jit_buf_desc jd; + int ret; + + /* + * first, detect marker mmap (i.e., the jitdump mmap) + */ + if (jit_detect(filename, pid)) + return -1; + + memset(&jd, 0, sizeof(jd)); + + jd.session = session; + jd.output = output; + jd.machine = machine; + + /* + * track sample_type to compute id_all layout + * perf sets the same sample type to all events as of now + */ + first = perf_evlist__first(session->evlist); + jd.sample_type = first->attr.sample_type; + + *nbytes = 0; + + ret = jit_inject(&jd, filename); + if (!ret) + *nbytes = jd.bytes_written; + + return ret; +} diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h new file mode 100644 index 000000000000..b66c1f503d9e --- /dev/null +++ b/tools/perf/util/jitdump.h @@ -0,0 +1,124 @@ +/* + * jitdump.h: jitted code info encapsulation file format + * + * Adapted from OProfile GPLv2 support jidump.h: + * Copyright 2007 OProfile authors + * Jens Wilke + * Daniel Hansel + * Copyright IBM Corporation 2007 + */ +#ifndef JITDUMP_H +#define JITDUMP_H + +#include +#include +#include + +/* JiTD */ +#define JITHEADER_MAGIC 0x4A695444 +#define JITHEADER_MAGIC_SW 0x4454694A + +#define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7) + +#define JITHEADER_VERSION 1 + +enum jitdump_flags_bits { + JITDUMP_FLAGS_MAX_BIT, +}; + +#define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \ + (~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0) + +struct jitheader { + uint32_t magic; /* characters "jItD" */ + uint32_t version; /* header version */ + uint32_t total_size; /* total size of header */ + uint32_t elf_mach; /* elf mach target */ + uint32_t pad1; /* reserved */ + uint32_t pid; /* JIT process id */ + uint64_t timestamp; /* timestamp */ + uint64_t flags; /* flags */ +}; + +enum jit_record_type { + JIT_CODE_LOAD = 0, + JIT_CODE_MOVE = 1, + JIT_CODE_DEBUG_INFO = 2, + JIT_CODE_CLOSE = 3, + + JIT_CODE_MAX, +}; + +/* record prefix (mandatory in each record) */ +struct jr_prefix { + uint32_t id; + uint32_t total_size; + uint64_t timestamp; +}; + +struct jr_code_load { + struct jr_prefix p; + + uint32_t pid; + uint32_t tid; + uint64_t vma; + uint64_t code_addr; + uint64_t code_size; + uint64_t code_index; +}; + +struct jr_code_close { + struct jr_prefix p; +}; + +struct jr_code_move { + struct jr_prefix p; + + uint32_t pid; + uint32_t tid; + uint64_t vma; + uint64_t old_code_addr; + uint64_t new_code_addr; + uint64_t code_size; + uint64_t code_index; +}; + +struct debug_entry { + uint64_t addr; + int lineno; /* source line number starting at 1 */ + int discrim; /* column discriminator, 0 is default */ + const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */ +}; + +struct jr_code_debug_info { + struct jr_prefix p; + + uint64_t code_addr; + uint64_t nr_entry; + struct debug_entry entries[0]; +}; + +union jr_entry { + struct jr_code_debug_info info; + struct jr_code_close close; + struct jr_code_load load; + struct jr_code_move move; + struct jr_prefix prefix; +}; + +static inline struct debug_entry * +debug_entry_next(struct debug_entry *ent) +{ + void *a = ent + 1; + size_t l = strlen(ent->name) + 1; + return a + l; +} + +static inline char * +debug_entry_file(struct debug_entry *ent) +{ + void *a = ent + 1; + return a; +} + +#endif /* !JITDUMP_H */ -- cgit v1.2.3 From 209045adc2bbdb2b315fa5539cec54d01cd3e7db Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 30 Nov 2015 10:02:22 +0100 Subject: perf tools: add JVMTI agent library This is a standalone JVMTI library to help profile Java jitted code with perf record/perf report. The library is not installed or compiled automatically by perf Makefile. It is not used directly by perf. It is arch agnostic and has been tested on X86 and ARM. It needs to be used with a Java runtime, such as OpenJDK, as follows: $ java -agentpath:libjvmti.so ....... See the "Committer Notes" below on how to build it. When used this way, java will generate a jitdump binary file in $HOME/.debug/java/jit/java-jit-* This binary dump file contains information to help symbolize and annotate jitted code. The jitdump information must be injected into the perf.data file using: $ perf inject --jit -i perf.data -o perf.data.jitted This injects the MMAP records to cover the jitted code and also generates one ELF image for each jitted function. The ELF images are created in the same subdir as the jitdump file. The MMAP records point there too. Then, to visualize the function or asm profile, simply use the regular perf commands: $ perf report -i perf.data.jitted or $ perf annotate -i perf.data.jitted JVMTI agent code adapted from the OProfile's opagent code. This version of the JVMTI agent is using the CLOCK_MONOTONIC as the time source to timestamp jit samples. To correlate with perf_events samples, it needs to run on kernel 4.0.0-rc5+ or later with the following commit from Peter Zijlstra: 34f439278cef ("perf: Add per event clockid support") With this patch recording jitted code is done as follows: $ perf record -k mono -- java -agentpath:libjvmti.so ....... -------------------------------------------------------------------------- Committer Notes: Extended testing instructions: $ cd tools/perf/jvmti/ $ dnf install java-devel $ make Then, create some simple java stuff to record some samples: $ cat hello.java public class hello { public static void main(String[] args) { System.out.println("Hello, World"); } } $ javac hello.java $ java hello Hello, World $ And then record it using this jvmti thing: $ perf record -k mono java -agentpath:/home/acme/git/linux/tools/perf/jvmti/libjvmti.so hello java: jvmti: jitdump in /home/acme/.debug/jit/java-jit-20160205.XXWIEDls/jit-1908.dump Hello, World [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.030 MB perf.data (268 samples) ] $ Now lets insert the PERF_RECORD_MMAP2 records to point jitted mmaps to files created by the agent: $ perf inject --jit -i perf.data -o perf.data.jitted And finally see that it did its job: $ perf report -D -i perf.data.jitted | grep PERF_RECORD_MMAP2 | tail -5 79197149129422 0xfe10 [0xa0]: PERF_RECORD_MMAP2 1908/1923: [0x7f172428bd60(0x80) @ 0x40 fd:02 1840554 1]: --xs /home/acme/.debug/jit/java-jit-20160205.XXWIEDls/jitted-1908-283.so 79197149235701 0xfeb0 [0xa0]: PERF_RECORD_MMAP2 1908/1923: [0x7f172428ba60(0x180) @ 0x40 fd:02 1840555 1]: --xs /home/acme/.debug/jit/java-jit-20160205.XXWIEDls/jitted-1908-284.so 79197149250558 0xff50 [0xa0]: PERF_RECORD_MMAP2 1908/1923: [0x7f172428b860(0x180) @ 0x40 fd:02 1840556 1]: --xs /home/acme/.debug/jit/java-jit-20160205.XXWIEDls/jitted-1908-285.so 79197149714746 0xfff0 [0xa0]: PERF_RECORD_MMAP2 1908/1923: [0x7f172428b660(0x180) @ 0x40 fd:02 1840557 1]: --xs /home/acme/.debug/jit/java-jit-20160205.XXWIEDls/jitted-1908-286.so 79197149806558 0x10090 [0xa0]: PERF_RECORD_MMAP2 1908/1923: [0x7f172428b460(0x180) @ 0x40 fd:02 1840558 1]: --xs /home/acme/.debug/jit/java-jit-20160205.XXWIEDls/jitted-1908-287.so $ So: $ perf report -D -i perf.data | grep PERF_RECORD_MMAP2 | wc -l Failed to open /tmp/perf-1908.map, continuing without symbols 21 $ perf report -D -i perf.data.jitted | grep PERF_RECORD_MMAP2 | wc -l 307 $ echo $((307 - 21)) 286 $ 286 extra PERF_RECORD_MMAP2 records. All for thise tiny, with just one function, ELF files: $ file /home/acme/.debug/jit/java-jit-20160205.XXWIEDls/jitted-1908-9.so /home/acme/.debug/jit/java-jit-20160205.XXWIEDls/jitted-1908-9.so: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), corrupted program header size, BuildID[sha1]=ae54a2ebc3ecf0ba547bfc8cabdea1519df5203f, not stripped $ readelf -sw /home/acme/.debug/jit/java-jit-20160205.XXWIEDls/jitted-1908-9.so Symbol table '.symtab' contains 2 entries: Num: Value Size Type Bind Vis Ndx Name 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND 1: 0000000000000040 9 FUNC LOCAL DEFAULT 1 atomic_cmpxchg_long $ Inserted into the build-id cache: $ ls -la ~/.debug/.build-id/ae/54a2ebc3ecf0ba547bfc8cabdea1519df5203f lrwxrwxrwx. 1 acme acme 111 Feb 5 11:30 /home/acme/.debug/.build-id/ae/54a2ebc3ecf0ba547bfc8cabdea1519df5203f -> ../../home/acme/.debug/jit/java-jit-20160205.XXWIEDls/jitted-1908-9.so/ae54a2ebc3ecf0ba547bfc8cabdea1519df5203f Note: check why 'file' reports that 'corrupted program header size'. With a stupid java hog to do some profiling: $ cat hog.java public class hog { private static double do_something_else(int i) { double total = 0; while (i > 0) { total += Math.log(i--); } return total; } private static double do_something(int i) { double total = 0; while (i > 0) { total += Math.sqrt(i--) + do_something_else(i / 100); } return total; } public static void main(String[] args) { System.out.println(String.format("%s=%f & %f", args[0], do_something(Integer.parseInt(args[0])), do_something_else(Integer.parseInt(args[1])))); } } $ javac hog.java $ perf record -F 10000 -g -k mono java -agentpath:/home/acme/git/linux/tools/perf/jvmti/libjvmti.so hog 100000 2345000 java: jvmti: jitdump in /home/acme/.debug/jit/java-jit-20160205.XX4sqd14/jit-8670.dump 100000=291561592.669602 & 32050989.778714 [ perf record: Woken up 6 times to write data ] [ perf record: Captured and wrote 1.536 MB perf.data (12538 samples) ] $ perf inject --jit -i perf.data -o perf.data.jitted Looking at the 'perf report' TUI, at one expanded callchain leading to the jitted code: $ perf report --no-children -i perf.data.jitted Samples: 12K of event 'cycles:pp', Event count (approx.): 3829569932 Overhead Comm Shared Object Symbol - 93.38% java jitted-8670-291.so [.] class hog.do_something_else(int) class hog.do_something_else(int) - Interpreter - 75.86% call_stub JavaCalls::call_helper jni_invoke_static jni_CallStaticVoidMethod JavaMain start_thread - 17.52% JavaCalls::call_helper jni_invoke_static jni_CallStaticVoidMethod JavaMain start_thread Signed-off-by: Stephane Eranian Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: Carl Love Cc: David Ahern Cc: Jiri Olsa Cc: John McCutchan Cc: Namhyung Kim Cc: Pawel Moll Cc: Peter Zijlstra Cc: Sonny Rao Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1448874143-7269-4-git-send-email-eranian@google.com [ Made it build on fedora23, added some build/usage instructions ] [ Check if filename != NULL in compiled_method_load_cb, fixing segfault ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/jvmti/Makefile | 76 +++++++ tools/perf/jvmti/jvmti_agent.c | 465 +++++++++++++++++++++++++++++++++++++++++ tools/perf/jvmti/jvmti_agent.h | 29 +++ tools/perf/jvmti/libjvmti.c | 208 ++++++++++++++++++ 4 files changed, 778 insertions(+) create mode 100644 tools/perf/jvmti/Makefile create mode 100644 tools/perf/jvmti/jvmti_agent.c create mode 100644 tools/perf/jvmti/jvmti_agent.h create mode 100644 tools/perf/jvmti/libjvmti.c (limited to 'tools') diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile new file mode 100644 index 000000000000..5968f8332a28 --- /dev/null +++ b/tools/perf/jvmti/Makefile @@ -0,0 +1,76 @@ +ARCH=$(shell uname -m) + +ifeq ($(ARCH), x86_64) +JARCH=amd64 +endif +ifeq ($(ARCH), armv7l) +JARCH=armhf +endif +ifeq ($(ARCH), armv6l) +JARCH=armhf +endif +ifeq ($(ARCH), aarch64) +JARCH=aarch64 +endif +ifeq ($(ARCH), ppc64) +JARCH=powerpc +endif +ifeq ($(ARCH), ppc64le) +JARCH=powerpc +endif + +DESTDIR=/usr/local + +VERSION=1 +REVISION=0 +AGE=0 + +LN=ln -sf +RM=rm + +SLIBJVMTI=libjvmti.so.$(VERSION).$(REVISION).$(AGE) +VLIBJVMTI=libjvmti.so.$(VERSION) +SLDFLAGS=-shared -Wl,-soname -Wl,$(VLIBJVMTI) +SOLIBEXT=so + +# The following works at least on fedora 23, you may need the next +# line for other distros. +JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') +#JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3) +# -lrt required in 32-bit mode for clock_gettime() +LIBS=-lelf -lrt +INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux + +TARGETS=$(SLIBJVMTI) + +SRCS=libjvmti.c jvmti_agent.c +OBJS=$(SRCS:.c=.o) +SOBJS=$(OBJS:.o=.lo) +OPT=-O2 -g -Werror -Wall + +CFLAGS=$(INCDIR) $(OPT) + +all: $(TARGETS) + +.c.o: + $(CC) $(CFLAGS) -c $*.c +.c.lo: + $(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo + +$(OBJS) $(SOBJS): Makefile jvmti_agent.h ../util/jitdump.h + +$(SLIBJVMTI): $(SOBJS) + $(CC) $(CFLAGS) $(SLDFLAGS) -o $@ $(SOBJS) $(LIBS) + $(LN) $@ libjvmti.$(SOLIBEXT) + +clean: + $(RM) -f *.o *.so.* *.so *.lo + +install: + -mkdir -p $(DESTDIR)/lib + install -m 755 $(SLIBJVMTI) $(DESTDIR)/lib/ + (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) $(VLIBJVMTI)) + (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) libjvmti.$(SOLIBEXT)) + ldconfig + +.SUFFIXES: .c .S .o .lo diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c new file mode 100644 index 000000000000..cbab139de5a4 --- /dev/null +++ b/tools/perf/jvmti/jvmti_agent.c @@ -0,0 +1,465 @@ +/* + * jvmti_agent.c: JVMTI agent interface + * + * Adapted from the Oprofile code in opagent.c: + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright 2007 OProfile authors + * Jens Wilke + * Daniel Hansel + * Copyright IBM Corporation 2007 + */ +#include +#include /* for mkdir() */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for gettid() */ +#include + +#include "jvmti_agent.h" +#include "../util/jitdump.h" + +#define JIT_LANG "java" + +static char jit_path[PATH_MAX]; +static void *marker_addr; + +/* + * padding buffer + */ +static const char pad_bytes[7]; + +static inline pid_t gettid(void) +{ + return (pid_t)syscall(__NR_gettid); +} + +static int get_e_machine(struct jitheader *hdr) +{ + ssize_t sret; + char id[16]; + int fd, ret = -1; + int m = -1; + struct { + uint16_t e_type; + uint16_t e_machine; + } info; + + fd = open("/proc/self/exe", O_RDONLY); + if (fd == -1) + return -1; + + sret = read(fd, id, sizeof(id)); + if (sret != sizeof(id)) + goto error; + + /* check ELF signature */ + if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') + goto error; + + sret = read(fd, &info, sizeof(info)); + if (sret != sizeof(info)) + goto error; + + m = info.e_machine; + if (m < 0) + m = 0; /* ELF EM_NONE */ + + hdr->elf_mach = m; + ret = 0; +error: + close(fd); + return ret; +} + +#define NSEC_PER_SEC 1000000000 +static int perf_clk_id = CLOCK_MONOTONIC; + +static inline uint64_t +timespec_to_ns(const struct timespec *ts) +{ + return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; +} + +static inline uint64_t +perf_get_timestamp(void) +{ + struct timespec ts; + int ret; + + ret = clock_gettime(perf_clk_id, &ts); + if (ret) + return 0; + + return timespec_to_ns(&ts); +} + +static int +debug_cache_init(void) +{ + char str[32]; + char *base, *p; + struct tm tm; + time_t t; + int ret; + + time(&t); + localtime_r(&t, &tm); + + base = getenv("JITDUMPDIR"); + if (!base) + base = getenv("HOME"); + if (!base) + base = "."; + + strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm); + + snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base); + + ret = mkdir(jit_path, 0755); + if (ret == -1) { + if (errno != EEXIST) { + warn("jvmti: cannot create jit cache dir %s", jit_path); + return -1; + } + } + + snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base); + ret = mkdir(jit_path, 0755); + if (ret == -1) { + if (errno != EEXIST) { + warn("cannot create jit cache dir %s", jit_path); + return -1; + } + } + + snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str); + + p = mkdtemp(jit_path); + if (p != jit_path) { + warn("cannot create jit cache dir %s", jit_path); + return -1; + } + + return 0; +} + +static int +perf_open_marker_file(int fd) +{ + long pgsz; + + pgsz = sysconf(_SC_PAGESIZE); + if (pgsz == -1) + return -1; + + /* + * we mmap the jitdump to create an MMAP RECORD in perf.data file. + * The mmap is captured either live (perf record running when we mmap) + * or in deferred mode, via /proc/PID/maps + * the MMAP record is used as a marker of a jitdump file for more meta + * data info about the jitted code. Perf report/annotate detect this + * special filename and process the jitdump file. + * + * mapping must be PROT_EXEC to ensure it is captured by perf record + * even when not using -d option + */ + marker_addr = mmap(NULL, pgsz, PROT_READ|PROT_EXEC, MAP_PRIVATE, fd, 0); + return (marker_addr == MAP_FAILED) ? -1 : 0; +} + +static void +perf_close_marker_file(void) +{ + long pgsz; + + if (!marker_addr) + return; + + pgsz = sysconf(_SC_PAGESIZE); + if (pgsz == -1) + return; + + munmap(marker_addr, pgsz); +} + +void *jvmti_open(void) +{ + int pad_cnt; + char dump_path[PATH_MAX]; + struct jitheader header; + int fd; + FILE *fp; + + /* + * check if clockid is supported + */ + if (!perf_get_timestamp()) + warnx("jvmti: kernel does not support %d clock id", perf_clk_id); + + memset(&header, 0, sizeof(header)); + + debug_cache_init(); + + /* + * jitdump file name + */ + snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + + fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666); + if (fd == -1) + return NULL; + + /* + * create perf.data maker for the jitdump file + */ + if (perf_open_marker_file(fd)) { + warnx("jvmti: failed to create marker file"); + return NULL; + } + + fp = fdopen(fd, "w+"); + if (!fp) { + warn("jvmti: cannot create %s", dump_path); + close(fd); + goto error; + } + + warnx("jvmti: jitdump in %s", dump_path); + + if (get_e_machine(&header)) { + warn("get_e_machine failed\n"); + goto error; + } + + header.magic = JITHEADER_MAGIC; + header.version = JITHEADER_VERSION; + header.total_size = sizeof(header); + header.pid = getpid(); + + /* calculate amount of padding '\0' */ + pad_cnt = PADDING_8ALIGNED(header.total_size); + header.total_size += pad_cnt; + + header.timestamp = perf_get_timestamp(); + + if (!fwrite(&header, sizeof(header), 1, fp)) { + warn("jvmti: cannot write dumpfile header"); + goto error; + } + + /* write padding '\0' if necessary */ + if (pad_cnt && !fwrite(pad_bytes, pad_cnt, 1, fp)) { + warn("jvmti: cannot write dumpfile header padding"); + goto error; + } + + return fp; +error: + fclose(fp); + return NULL; +} + +int +jvmti_close(void *agent) +{ + struct jr_code_close rec; + FILE *fp = agent; + + if (!fp) { + warnx("jvmti: incalid fd in close_agent"); + return -1; + } + + rec.p.id = JIT_CODE_CLOSE; + rec.p.total_size = sizeof(rec); + + rec.p.timestamp = perf_get_timestamp(); + + if (!fwrite(&rec, sizeof(rec), 1, fp)) + return -1; + + fclose(fp); + + fp = NULL; + + perf_close_marker_file(); + + return 0; +} + +int +jvmti_write_code(void *agent, char const *sym, + uint64_t vma, void const *code, unsigned int const size) +{ + static int code_generation = 1; + struct jr_code_load rec; + size_t sym_len; + size_t padding_count; + FILE *fp = agent; + int ret = -1; + + /* don't care about 0 length function, no samples */ + if (size == 0) + return 0; + + if (!fp) { + warnx("jvmti: invalid fd in write_native_code"); + return -1; + } + + sym_len = strlen(sym) + 1; + + rec.p.id = JIT_CODE_LOAD; + rec.p.total_size = sizeof(rec) + sym_len; + padding_count = PADDING_8ALIGNED(rec.p.total_size); + rec.p. total_size += padding_count; + rec.p.timestamp = perf_get_timestamp(); + + rec.code_size = size; + rec.vma = vma; + rec.code_addr = vma; + rec.pid = getpid(); + rec.tid = gettid(); + + if (code) + rec.p.total_size += size; + + /* + * If JVM is multi-threaded, nultiple concurrent calls to agent + * may be possible, so protect file writes + */ + flockfile(fp); + + /* + * get code index inside lock to avoid race condition + */ + rec.code_index = code_generation++; + + ret = fwrite_unlocked(&rec, sizeof(rec), 1, fp); + fwrite_unlocked(sym, sym_len, 1, fp); + + if (padding_count) + fwrite_unlocked(pad_bytes, padding_count, 1, fp); + + if (code) + fwrite_unlocked(code, size, 1, fp); + + funlockfile(fp); + + ret = 0; + + return ret; +} + +int +jvmti_write_debug_info(void *agent, uint64_t code, const char *file, + jvmtiAddrLocationMap const *map, + jvmtiLineNumberEntry *li, jint num) +{ + static const char *prev_str = "\xff"; + struct jr_code_debug_info rec; + size_t sret, len, size, flen; + size_t padding_count; + FILE *fp = agent; + int i; + + /* + * no entry to write + */ + if (!num) + return 0; + + if (!fp) { + warnx("jvmti: invalid fd in write_debug_info"); + return -1; + } + + flen = strlen(file) + 1; + + rec.p.id = JIT_CODE_DEBUG_INFO; + size = sizeof(rec); + rec.p.timestamp = perf_get_timestamp(); + rec.code_addr = (uint64_t)(uintptr_t)code; + rec.nr_entry = num; + + /* + * on disk source line info layout: + * uint64_t : addr + * int : line number + * file[] : source file name + * padding : pad to multiple of 8 bytes + */ + size += num * (sizeof(uint64_t) + sizeof(int)); + size += flen + (num - 1) * 2; + /* + * pad to 8 bytes + */ + padding_count = PADDING_8ALIGNED(size); + + rec.p.total_size = size + padding_count; + + /* + * If JVM is multi-threaded, nultiple concurrent calls to agent + * may be possible, so protect file writes + */ + flockfile(fp); + + sret = fwrite_unlocked(&rec, sizeof(rec), 1, fp); + if (sret != 1) + goto error; + + for (i = 0; i < num; i++) { + uint64_t addr; + + addr = (uint64_t)map[i].start_address; + len = sizeof(addr); + sret = fwrite_unlocked(&addr, len, 1, fp); + if (sret != 1) + goto error; + + len = sizeof(int); + sret = fwrite_unlocked(&li[i].line_number, len, 1, fp); + if (sret != 1) + goto error; + + if (i == 0) { + sret = fwrite_unlocked(file, flen, 1, fp); + } else { + sret = fwrite_unlocked(prev_str, 2, 1, fp); + } + if (sret != 1) + goto error; + + } + if (padding_count) + sret = fwrite_unlocked(pad_bytes, padding_count, 1, fp); + if (sret != 1) + goto error; + + funlockfile(fp); + return 0; +error: + funlockfile(fp); + return -1; +} diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h new file mode 100644 index 000000000000..8251a1c5ee3f --- /dev/null +++ b/tools/perf/jvmti/jvmti_agent.h @@ -0,0 +1,29 @@ +#ifndef __JVMTI_AGENT_H__ +#define __JVMTI_AGENT_H__ + +#include +#include +#include + +#define __unused __attribute__((unused)) + +#if defined(__cplusplus) +extern "C" { +#endif + +void *jvmti_open(void); +int jvmti_close(void *agent); +int jvmti_write_code(void *agent, char const *symbol_name, + uint64_t vma, void const *code, + const unsigned int code_size); +int jvmti_write_debug_info(void *agent, + uint64_t code, + const char *file, + jvmtiAddrLocationMap const *map, + jvmtiLineNumberEntry *tab, jint nr); + +#if defined(__cplusplus) +} + +#endif +#endif /* __JVMTI_H__ */ diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c new file mode 100644 index 000000000000..92ffbe4ff160 --- /dev/null +++ b/tools/perf/jvmti/libjvmti.c @@ -0,0 +1,208 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "jvmti_agent.h" + +static int has_line_numbers; +void *jvmti_agent; + +static void JNICALL +compiled_method_load_cb(jvmtiEnv *jvmti, + jmethodID method, + jint code_size, + void const *code_addr, + jint map_length, + jvmtiAddrLocationMap const *map, + void const *compile_info __unused) +{ + jvmtiLineNumberEntry *tab = NULL; + jclass decl_class; + char *class_sign = NULL; + char *func_name = NULL; + char *func_sign = NULL; + char *file_name= NULL; + char fn[PATH_MAX]; + uint64_t addr = (uint64_t)(uintptr_t)code_addr; + jvmtiError ret; + jint nr_lines = 0; + size_t len; + + ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method, + &decl_class); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot get declaring class"); + return; + } + + if (has_line_numbers && map && map_length) { + + ret = (*jvmti)->GetLineNumberTable(jvmti, method, &nr_lines, &tab); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot get line table for method"); + } else { + ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot get source filename ret=%d", ret); + nr_lines = 0; + } + } + } + + ret = (*jvmti)->GetClassSignature(jvmti, decl_class, + &class_sign, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: getclassignature failed"); + goto error; + } + + ret = (*jvmti)->GetMethodName(jvmti, method, &func_name, + &func_sign, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: failed getmethodname"); + goto error; + } + + /* + * Assume path name is class hierarchy, this is a common practice with Java programs + */ + if (*class_sign == 'L') { + int j, i = 0; + char *p = strrchr(class_sign, '/'); + if (p) { + /* drop the 'L' prefix and copy up to the final '/' */ + for (i = 0; i < (p - class_sign); i++) + fn[i] = class_sign[i+1]; + } + /* + * append file name, we use loops and not string ops to avoid modifying + * class_sign which is used later for the symbol name + */ + for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++) + fn[i] = file_name[j]; + fn[i] = '\0'; + } else { + /* fallback case */ + strcpy(fn, file_name); + } + /* + * write source line info record if we have it + */ + if (jvmti_write_debug_info(jvmti_agent, addr, fn, map, tab, nr_lines)) + warnx("jvmti: write_debug_info() failed"); + + len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2; + { + char str[len]; + snprintf(str, len, "%s%s%s", class_sign, func_name, func_sign); + if (jvmti_write_code(jvmti_agent, str, addr, code_addr, code_size)) + warnx("jvmti: write_code() failed"); + } +error: + (*jvmti)->Deallocate(jvmti, (unsigned char *)func_name); + (*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign); + (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); + (*jvmti)->Deallocate(jvmti, (unsigned char *)tab); + (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); +} + +static void JNICALL +code_generated_cb(jvmtiEnv *jvmti, + char const *name, + void const *code_addr, + jint code_size) +{ + uint64_t addr = (uint64_t)(unsigned long)code_addr; + int ret; + + ret = jvmti_write_code(jvmti_agent, name, addr, code_addr, code_size); + if (ret) + warnx("jvmti: write_code() failed for code_generated"); +} + +JNIEXPORT jint JNICALL +Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused) +{ + jvmtiEventCallbacks cb; + jvmtiCapabilities caps1; + jvmtiJlocationFormat format; + jvmtiEnv *jvmti = NULL; + jint ret; + + jvmti_agent = jvmti_open(); + if (!jvmti_agent) { + warnx("jvmti: open_agent failed"); + return -1; + } + + /* + * Request a JVMTI interface version 1 environment + */ + ret = (*jvm)->GetEnv(jvm, (void *)&jvmti, JVMTI_VERSION_1); + if (ret != JNI_OK) { + warnx("jvmti: jvmti version 1 not supported"); + return -1; + } + + /* + * acquire method_load capability, we require it + * request line numbers (optional) + */ + memset(&caps1, 0, sizeof(caps1)); + caps1.can_generate_compiled_method_load_events = 1; + + ret = (*jvmti)->AddCapabilities(jvmti, &caps1); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: acquire compiled_method capability failed"); + return -1; + } + ret = (*jvmti)->GetJLocationFormat(jvmti, &format); + if (ret == JVMTI_ERROR_NONE && format == JVMTI_JLOCATION_JVMBCI) { + memset(&caps1, 0, sizeof(caps1)); + caps1.can_get_line_numbers = 1; + caps1.can_get_source_file_name = 1; + ret = (*jvmti)->AddCapabilities(jvmti, &caps1); + if (ret == JVMTI_ERROR_NONE) + has_line_numbers = 1; + } + + memset(&cb, 0, sizeof(cb)); + + cb.CompiledMethodLoad = compiled_method_load_cb; + cb.DynamicCodeGenerated = code_generated_cb; + + ret = (*jvmti)->SetEventCallbacks(jvmti, &cb, sizeof(cb)); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot set event callbacks"); + return -1; + } + + ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE, + JVMTI_EVENT_COMPILED_METHOD_LOAD, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: setnotification failed for method_load"); + return -1; + } + + ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE, + JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: setnotification failed on code_generated"); + return -1; + } + return 0; +} + +JNIEXPORT void JNICALL +Agent_OnUnload(JavaVM *jvm __unused) +{ + int ret; + + ret = jvmti_close(jvmti_agent); + if (ret) + errx(1, "Error: op_close_agent()"); +} -- cgit v1.2.3 From 598b7c6919c7bbcc1243009721a01bc12275ff3e Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 30 Nov 2015 10:02:23 +0100 Subject: perf jit: add source line info support This patch adds source line information support to perf for jitted code. The source line info must be emitted by the runtime, such as JVMTI. Perf injects extract the source line info from the jitdump file and adds the corresponding .debug_lines section in the ELF image generated for each jitted function. The source line enables matching any address in the profile with a source file and line number. The improvement is visible in perf annotate with the source code displayed alongside the assembly code. The dwarf code leverages the support from OProfile which is also released under GPLv2. Copyright 2007 OProfile authors. Signed-off-by: Stephane Eranian Cc: Adrian Hunter Cc: Andi Kleen Cc: Carl Love Cc: David Ahern Cc: Jiri Olsa Cc: John McCutchan Cc: Namhyung Kim Cc: Pawel Moll Cc: Peter Zijlstra Cc: Sonny Rao Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1448874143-7269-5-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/jvmti/jvmti_agent.c | 32 +-- tools/perf/jvmti/jvmti_agent.h | 11 +- tools/perf/jvmti/libjvmti.c | 122 ++++++++- tools/perf/util/Build | 3 + tools/perf/util/genelf.c | 15 +- tools/perf/util/genelf.h | 6 +- tools/perf/util/genelf_debug.c | 610 +++++++++++++++++++++++++++++++++++++++++ tools/perf/util/jitdump.c | 8 +- 8 files changed, 768 insertions(+), 39 deletions(-) create mode 100644 tools/perf/util/genelf_debug.c (limited to 'tools') diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index cbab139de5a4..6461e02ab940 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -374,20 +374,20 @@ jvmti_write_code(void *agent, char const *sym, int jvmti_write_debug_info(void *agent, uint64_t code, const char *file, - jvmtiAddrLocationMap const *map, - jvmtiLineNumberEntry *li, jint num) + jvmti_line_info_t *li, int nr_lines) { - static const char *prev_str = "\xff"; struct jr_code_debug_info rec; size_t sret, len, size, flen; size_t padding_count; + uint64_t addr; + const char *fn = file; FILE *fp = agent; int i; /* * no entry to write */ - if (!num) + if (!nr_lines) return 0; if (!fp) { @@ -401,17 +401,18 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file, size = sizeof(rec); rec.p.timestamp = perf_get_timestamp(); rec.code_addr = (uint64_t)(uintptr_t)code; - rec.nr_entry = num; + rec.nr_entry = nr_lines; /* * on disk source line info layout: * uint64_t : addr * int : line number + * int : column discriminator * file[] : source file name * padding : pad to multiple of 8 bytes */ - size += num * (sizeof(uint64_t) + sizeof(int)); - size += flen + (num - 1) * 2; + size += nr_lines * sizeof(struct debug_entry); + size += flen * nr_lines; /* * pad to 8 bytes */ @@ -429,28 +430,27 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file, if (sret != 1) goto error; - for (i = 0; i < num; i++) { - uint64_t addr; + for (i = 0; i < nr_lines; i++) { - addr = (uint64_t)map[i].start_address; + addr = (uint64_t)li[i].pc; len = sizeof(addr); sret = fwrite_unlocked(&addr, len, 1, fp); if (sret != 1) goto error; - len = sizeof(int); + len = sizeof(li[0].line_number); sret = fwrite_unlocked(&li[i].line_number, len, 1, fp); if (sret != 1) goto error; - if (i == 0) { - sret = fwrite_unlocked(file, flen, 1, fp); - } else { - sret = fwrite_unlocked(prev_str, 2, 1, fp); - } + len = sizeof(li[0].discrim); + sret = fwrite_unlocked(&li[i].discrim, len, 1, fp); if (sret != 1) goto error; + sret = fwrite_unlocked(fn, flen, 1, fp); + if (sret != 1) + goto error; } if (padding_count) sret = fwrite_unlocked(pad_bytes, padding_count, 1, fp); diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h index 8251a1c5ee3f..bedf5d0ba9ff 100644 --- a/tools/perf/jvmti/jvmti_agent.h +++ b/tools/perf/jvmti/jvmti_agent.h @@ -11,16 +11,23 @@ extern "C" { #endif +typedef struct { + unsigned long pc; + int line_number; + int discrim; /* discriminator -- 0 for now */ +} jvmti_line_info_t; + void *jvmti_open(void); int jvmti_close(void *agent); int jvmti_write_code(void *agent, char const *symbol_name, uint64_t vma, void const *code, const unsigned int code_size); + int jvmti_write_debug_info(void *agent, uint64_t code, const char *file, - jvmtiAddrLocationMap const *map, - jvmtiLineNumberEntry *tab, jint nr); + jvmti_line_info_t *li, + int nr_lines); #if defined(__cplusplus) } diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c index 92ffbe4ff160..ac12e4b91a92 100644 --- a/tools/perf/jvmti/libjvmti.c +++ b/tools/perf/jvmti/libjvmti.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "jvmti_agent.h" @@ -11,6 +12,100 @@ static int has_line_numbers; void *jvmti_agent; +static jvmtiError +do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci, + jvmti_line_info_t *tab, jint *nr) +{ + jint i, lines = 0; + jint nr_lines = 0; + jvmtiLineNumberEntry *loc_tab = NULL; + jvmtiError ret; + + ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab); + if (ret != JVMTI_ERROR_NONE) + return ret; + + for (i = 0; i < nr_lines; i++) { + if (loc_tab[i].start_location < bci) { + tab[lines].pc = (unsigned long)pc; + tab[lines].line_number = loc_tab[i].line_number; + tab[lines].discrim = 0; /* not yet used */ + lines++; + } else { + break; + } + } + (*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab); + *nr = lines; + return JVMTI_ERROR_NONE; +} + +static jvmtiError +get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **tab, int *nr_lines) +{ + const jvmtiCompiledMethodLoadRecordHeader *hdr; + jvmtiCompiledMethodLoadInlineRecord *rec; + jvmtiLineNumberEntry *lne = NULL; + PCStackInfo *c; + jint nr, ret; + int nr_total = 0; + int i, lines_total = 0; + + if (!(tab && nr_lines)) + return JVMTI_ERROR_NULL_POINTER; + + /* + * Phase 1 -- get the number of lines necessary + */ + for (hdr = compile_info; hdr != NULL; hdr = hdr->next) { + if (hdr->kind == JVMTI_CMLR_INLINE_INFO) { + rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr; + for (i = 0; i < rec->numpcs; i++) { + c = rec->pcinfo + i; + nr = 0; + /* + * unfortunately, need a tab to get the number of lines! + */ + ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne); + if (ret == JVMTI_ERROR_NONE) { + /* free what was allocated for nothing */ + (*jvmti)->Deallocate(jvmti, (unsigned char *)lne); + nr_total += (int)nr; + } + } + } + } + + if (nr_total == 0) + return JVMTI_ERROR_NOT_FOUND; + + /* + * Phase 2 -- allocate big enough line table + */ + *tab = malloc(nr_total * sizeof(**tab)); + if (!*tab) + return JVMTI_ERROR_OUT_OF_MEMORY; + + for (hdr = compile_info; hdr != NULL; hdr = hdr->next) { + if (hdr->kind == JVMTI_CMLR_INLINE_INFO) { + rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr; + for (i = 0; i < rec->numpcs; i++) { + c = rec->pcinfo + i; + nr = 0; + ret = do_get_line_numbers(jvmti, c->pc, + c->methods[0], + c->bcis[0], + *tab + lines_total, + &nr); + if (ret == JVMTI_ERROR_NONE) + lines_total += nr; + } + } + } + *nr_lines = lines_total; + return JVMTI_ERROR_NONE; +} + static void JNICALL compiled_method_load_cb(jvmtiEnv *jvmti, jmethodID method, @@ -18,9 +113,9 @@ compiled_method_load_cb(jvmtiEnv *jvmti, void const *code_addr, jint map_length, jvmtiAddrLocationMap const *map, - void const *compile_info __unused) + const void *compile_info) { - jvmtiLineNumberEntry *tab = NULL; + jvmti_line_info_t *line_tab = NULL; jclass decl_class; char *class_sign = NULL; char *func_name = NULL; @@ -29,7 +124,7 @@ compiled_method_load_cb(jvmtiEnv *jvmti, char fn[PATH_MAX]; uint64_t addr = (uint64_t)(uintptr_t)code_addr; jvmtiError ret; - jint nr_lines = 0; + int nr_lines = 0; /* in line_tab[] */ size_t len; ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method, @@ -40,19 +135,19 @@ compiled_method_load_cb(jvmtiEnv *jvmti, } if (has_line_numbers && map && map_length) { - - ret = (*jvmti)->GetLineNumberTable(jvmti, method, &nr_lines, &tab); + ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines); if (ret != JVMTI_ERROR_NONE) { warnx("jvmti: cannot get line table for method"); - } else { - ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name); - if (ret != JVMTI_ERROR_NONE) { - warnx("jvmti: cannot get source filename ret=%d", ret); - nr_lines = 0; - } + nr_lines = 0; } } + ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot get source filename ret=%d", ret); + goto error; + } + ret = (*jvmti)->GetClassSignature(jvmti, decl_class, &class_sign, NULL); if (ret != JVMTI_ERROR_NONE) { @@ -92,13 +187,14 @@ compiled_method_load_cb(jvmtiEnv *jvmti, /* * write source line info record if we have it */ - if (jvmti_write_debug_info(jvmti_agent, addr, fn, map, tab, nr_lines)) + if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines)) warnx("jvmti: write_debug_info() failed"); len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2; { char str[len]; snprintf(str, len, "%s%s%s", class_sign, func_name, func_sign); + if (jvmti_write_code(jvmti_agent, str, addr, code_addr, code_size)) warnx("jvmti: write_code() failed"); } @@ -106,8 +202,8 @@ error: (*jvmti)->Deallocate(jvmti, (unsigned char *)func_name); (*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign); (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); - (*jvmti)->Deallocate(jvmti, (unsigned char *)tab); (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); + free(line_tab); } static void JNICALL diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 52a4a806ee2f..a34752d28488 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -108,8 +108,11 @@ libperf-$(CONFIG_LZMA) += lzma.o libperf-y += demangle-java.o libperf-$(CONFIG_LIBELF) += jitdump.o libperf-$(CONFIG_LIBELF) += genelf.o +libperf-$(CONFIG_LIBELF) += genelf_debug.o CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +# avoid compiler warnings in 32-bit mode +CFLAGS_genelf_debug.o += -Wno-packed $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c $(call rule_mkdir) diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index 145f8116ef56..c1ef805c6a8f 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -156,7 +156,8 @@ gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *cod */ int jit_write_elf(int fd, uint64_t load_addr, const char *sym, - const void *code, int csize) + const void *code, int csize, + void *debug, int nr_debug_entries) { Elf *e; Elf_Data *d; @@ -385,9 +386,15 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_size = sizeof(bnote); shdr->sh_entsize = 0; - if (elf_update(e, ELF_C_WRITE) < 0) { - warnx("elf_update 4 failed"); - goto error; + if (debug && nr_debug_entries) { + retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries); + if (retval) + goto error; + } else { + if (elf_update(e, ELF_C_WRITE) < 0) { + warnx("elf_update 4 failed"); + goto error; + } } retval = 0; diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index d8e9ece13c8b..45bf9c6d3257 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -3,7 +3,11 @@ /* genelf.c */ extern int jit_write_elf(int fd, uint64_t code_addr, const char *sym, - const void *code, int csize); + const void *code, int csize, + void *debug, int nr_debug_entries); +/* genelf_debug.c */ +extern int jit_add_debug_info(Elf *e, uint64_t code_addr, + void *debug, int nr_debug_entries); #if defined(__arm__) #define GEN_ELF_ARCH EM_ARM diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c new file mode 100644 index 000000000000..5980f7d256b1 --- /dev/null +++ b/tools/perf/util/genelf_debug.c @@ -0,0 +1,610 @@ +/* + * genelf_debug.c + * Copyright (C) 2015, Google, Inc + * + * Contributed by: + * Stephane Eranian + * + * Released under the GPL v2. + * + * based on GPLv2 source code from Oprofile + * @remark Copyright 2007 OProfile authors + * @author Philippe Elie + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "perf.h" +#include "genelf.h" +#include "../util/jitdump.h" + +#define BUFFER_EXT_DFL_SIZE (4 * 1024) + +typedef uint32_t uword; +typedef uint16_t uhalf; +typedef int32_t sword; +typedef int16_t shalf; +typedef uint8_t ubyte; +typedef int8_t sbyte; + +struct buffer_ext { + size_t cur_pos; + size_t max_sz; + void *data; +}; + +static void +buffer_ext_dump(struct buffer_ext *be, const char *msg) +{ + size_t i; + warnx("DUMP for %s", msg); + for (i = 0 ; i < be->cur_pos; i++) + warnx("%4zu 0x%02x", i, (((char *)be->data)[i]) & 0xff); +} + +static inline int +buffer_ext_add(struct buffer_ext *be, void *addr, size_t sz) +{ + void *tmp; + size_t be_sz = be->max_sz; + +retry: + if ((be->cur_pos + sz) < be_sz) { + memcpy(be->data + be->cur_pos, addr, sz); + be->cur_pos += sz; + return 0; + } + + if (!be_sz) + be_sz = BUFFER_EXT_DFL_SIZE; + else + be_sz <<= 1; + + tmp = realloc(be->data, be_sz); + if (!tmp) + return -1; + + be->data = tmp; + be->max_sz = be_sz; + + goto retry; +} + +static void +buffer_ext_init(struct buffer_ext *be) +{ + be->data = NULL; + be->cur_pos = 0; + be->max_sz = 0; +} + +static inline size_t +buffer_ext_size(struct buffer_ext *be) +{ + return be->cur_pos; +} + +static inline void * +buffer_ext_addr(struct buffer_ext *be) +{ + return be->data; +} + +struct debug_line_header { + // Not counting this field + uword total_length; + // version number (2 currently) + uhalf version; + // relative offset from next field to + // program statement + uword prolog_length; + ubyte minimum_instruction_length; + ubyte default_is_stmt; + // line_base - see DWARF 2 specs + sbyte line_base; + // line_range - see DWARF 2 specs + ubyte line_range; + // number of opcode + 1 + ubyte opcode_base; + /* follow the array of opcode args nr: ubytes [nr_opcode_base] */ + /* follow the search directories index, zero terminated string + * terminated by an empty string. + */ + /* follow an array of { filename, LEB128, LEB128, LEB128 }, first is + * the directory index entry, 0 means current directory, then mtime + * and filesize, last entry is followed by en empty string. + */ + /* follow the first program statement */ +} __attribute__((packed)); + +/* DWARF 2 spec talk only about one possible compilation unit header while + * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not + * related to the used arch, an ELF 32 can hold more than 4 Go of debug + * information. For now we handle only DWARF 2 32 bits comp unit. It'll only + * become a problem if we generate more than 4GB of debug information. + */ +struct compilation_unit_header { + uword total_length; + uhalf version; + uword debug_abbrev_offset; + ubyte pointer_size; +} __attribute__((packed)); + +#define DW_LNS_num_opcode (DW_LNS_set_isa + 1) + +/* field filled at run time are marked with -1 */ +static struct debug_line_header const default_debug_line_header = { + .total_length = -1, + .version = 2, + .prolog_length = -1, + .minimum_instruction_length = 1, /* could be better when min instruction size != 1 */ + .default_is_stmt = 1, /* we don't take care about basic block */ + .line_base = -5, /* sensible value for line base ... */ + .line_range = -14, /* ... and line range are guessed statically */ + .opcode_base = DW_LNS_num_opcode +}; + +static ubyte standard_opcode_length[] = +{ + 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 +}; +#if 0 +{ + [DW_LNS_advance_pc] = 1, + [DW_LNS_advance_line] = 1, + [DW_LNS_set_file] = 1, + [DW_LNS_set_column] = 1, + [DW_LNS_fixed_advance_pc] = 1, + [DW_LNS_set_isa] = 1, +}; +#endif + +/* field filled at run time are marked with -1 */ +static struct compilation_unit_header default_comp_unit_header = { + .total_length = -1, + .version = 2, + .debug_abbrev_offset = 0, /* we reuse the same abbrev entries for all comp unit */ + .pointer_size = sizeof(void *) +}; + +static void emit_uword(struct buffer_ext *be, uword data) +{ + buffer_ext_add(be, &data, sizeof(uword)); +} + +static void emit_string(struct buffer_ext *be, const char *s) +{ + buffer_ext_add(be, (void *)s, strlen(s) + 1); +} + +static void emit_unsigned_LEB128(struct buffer_ext *be, + unsigned long data) +{ + do { + ubyte cur = data & 0x7F; + data >>= 7; + if (data) + cur |= 0x80; + buffer_ext_add(be, &cur, 1); + } while (data); +} + +static void emit_signed_LEB128(struct buffer_ext *be, long data) +{ + int more = 1; + int negative = data < 0; + int size = sizeof(long) * CHAR_BIT; + while (more) { + ubyte cur = data & 0x7F; + data >>= 7; + if (negative) + data |= - (1 << (size - 7)); + if ((data == 0 && !(cur & 0x40)) || + (data == -1l && (cur & 0x40))) + more = 0; + else + cur |= 0x80; + buffer_ext_add(be, &cur, 1); + } +} + +static void emit_extended_opcode(struct buffer_ext *be, ubyte opcode, + void *data, size_t data_len) +{ + buffer_ext_add(be, (char *)"", 1); + + emit_unsigned_LEB128(be, data_len + 1); + + buffer_ext_add(be, &opcode, 1); + buffer_ext_add(be, data, data_len); +} + +static void emit_opcode(struct buffer_ext *be, ubyte opcode) +{ + buffer_ext_add(be, &opcode, 1); +} + +static void emit_opcode_signed(struct buffer_ext *be, + ubyte opcode, long data) +{ + buffer_ext_add(be, &opcode, 1); + emit_signed_LEB128(be, data); +} + +static void emit_opcode_unsigned(struct buffer_ext *be, ubyte opcode, + unsigned long data) +{ + buffer_ext_add(be, &opcode, 1); + emit_unsigned_LEB128(be, data); +} + +static void emit_advance_pc(struct buffer_ext *be, unsigned long delta_pc) +{ + emit_opcode_unsigned(be, DW_LNS_advance_pc, delta_pc); +} + +static void emit_advance_lineno(struct buffer_ext *be, long delta_lineno) +{ + emit_opcode_signed(be, DW_LNS_advance_line, delta_lineno); +} + +static void emit_lne_end_of_sequence(struct buffer_ext *be) +{ + emit_extended_opcode(be, DW_LNE_end_sequence, NULL, 0); +} + +static void emit_set_file(struct buffer_ext *be, unsigned long idx) +{ + emit_opcode_unsigned(be, DW_LNS_set_file, idx); +} + +static void emit_lne_define_filename(struct buffer_ext *be, + const char *filename) +{ + buffer_ext_add(be, (void *)"", 1); + + /* LNE field, strlen(filename) + zero termination, 3 bytes for: the dir entry, timestamp, filesize */ + emit_unsigned_LEB128(be, strlen(filename) + 5); + emit_opcode(be, DW_LNE_define_file); + emit_string(be, filename); + /* directory index 0=do not know */ + emit_unsigned_LEB128(be, 0); + /* last modification date on file 0=do not know */ + emit_unsigned_LEB128(be, 0); + /* filesize 0=do not know */ + emit_unsigned_LEB128(be, 0); +} + +static void emit_lne_set_address(struct buffer_ext *be, + void *address) +{ + emit_extended_opcode(be, DW_LNE_set_address, &address, sizeof(unsigned long)); +} + +static ubyte get_special_opcode(struct debug_entry *ent, + unsigned int last_line, + unsigned long last_vma) +{ + unsigned int temp; + unsigned long delta_addr; + + /* + * delta from line_base + */ + temp = (ent->lineno - last_line) - default_debug_line_header.line_base; + + if (temp >= default_debug_line_header.line_range) + return 0; + + /* + * delta of addresses + */ + delta_addr = (ent->addr - last_vma) / default_debug_line_header.minimum_instruction_length; + + /* This is not sufficient to ensure opcode will be in [0-256] but + * sufficient to ensure when summing with the delta lineno we will + * not overflow the unsigned long opcode */ + + if (delta_addr <= 256 / default_debug_line_header.line_range) { + unsigned long opcode = temp + + (delta_addr * default_debug_line_header.line_range) + + default_debug_line_header.opcode_base; + + return opcode <= 255 ? opcode : 0; + } + return 0; +} + +static void emit_lineno_info(struct buffer_ext *be, + struct debug_entry *ent, size_t nr_entry, + unsigned long code_addr) +{ + size_t i; + + /* + * Machine state at start of a statement program + * address = 0 + * file = 1 + * line = 1 + * column = 0 + * is_stmt = default_is_stmt as given in the debug_line_header + * basic block = 0 + * end sequence = 0 + */ + + /* start state of the state machine we take care of */ + unsigned long last_vma = code_addr; + char const *cur_filename = NULL; + unsigned long cur_file_idx = 0; + int last_line = 1; + + emit_lne_set_address(be, (void *)code_addr); + + for (i = 0; i < nr_entry; i++, ent = debug_entry_next(ent)) { + int need_copy = 0; + ubyte special_opcode; + + /* + * check if filename changed, if so add it + */ + if (!cur_filename || strcmp(cur_filename, ent->name)) { + emit_lne_define_filename(be, ent->name); + cur_filename = ent->name; + emit_set_file(be, ++cur_file_idx); + need_copy = 1; + } + + special_opcode = get_special_opcode(ent, last_line, last_vma); + if (special_opcode != 0) { + last_line = ent->lineno; + last_vma = ent->addr; + emit_opcode(be, special_opcode); + } else { + /* + * lines differ, emit line delta + */ + if (last_line != ent->lineno) { + emit_advance_lineno(be, ent->lineno - last_line); + last_line = ent->lineno; + need_copy = 1; + } + /* + * addresses differ, emit address delta + */ + if (last_vma != ent->addr) { + emit_advance_pc(be, ent->addr - last_vma); + last_vma = ent->addr; + need_copy = 1; + } + /* + * add new row to matrix + */ + if (need_copy) + emit_opcode(be, DW_LNS_copy); + } + } +} + +static void add_debug_line(struct buffer_ext *be, + struct debug_entry *ent, size_t nr_entry, + unsigned long code_addr) +{ + struct debug_line_header * dbg_header; + size_t old_size; + + old_size = buffer_ext_size(be); + + buffer_ext_add(be, (void *)&default_debug_line_header, + sizeof(default_debug_line_header)); + + buffer_ext_add(be, &standard_opcode_length, sizeof(standard_opcode_length)); + + // empty directory entry + buffer_ext_add(be, (void *)"", 1); + + // empty filename directory + buffer_ext_add(be, (void *)"", 1); + + dbg_header = buffer_ext_addr(be) + old_size; + dbg_header->prolog_length = (buffer_ext_size(be) - old_size) - + offsetof(struct debug_line_header, minimum_instruction_length); + + emit_lineno_info(be, ent, nr_entry, code_addr); + + emit_lne_end_of_sequence(be); + + dbg_header = buffer_ext_addr(be) + old_size; + dbg_header->total_length = (buffer_ext_size(be) - old_size) - + offsetof(struct debug_line_header, version); +} + +static void +add_debug_abbrev(struct buffer_ext *be) +{ + emit_unsigned_LEB128(be, 1); + emit_unsigned_LEB128(be, DW_TAG_compile_unit); + emit_unsigned_LEB128(be, DW_CHILDREN_yes); + emit_unsigned_LEB128(be, DW_AT_stmt_list); + emit_unsigned_LEB128(be, DW_FORM_data4); + emit_unsigned_LEB128(be, 0); + emit_unsigned_LEB128(be, 0); + emit_unsigned_LEB128(be, 0); +} + +static void +add_compilation_unit(struct buffer_ext *be, + size_t offset_debug_line) +{ + struct compilation_unit_header *comp_unit_header; + size_t old_size = buffer_ext_size(be); + + buffer_ext_add(be, &default_comp_unit_header, + sizeof(default_comp_unit_header)); + + emit_unsigned_LEB128(be, 1); + emit_uword(be, offset_debug_line); + + comp_unit_header = buffer_ext_addr(be) + old_size; + comp_unit_header->total_length = (buffer_ext_size(be) - old_size) - + offsetof(struct compilation_unit_header, version); +} + +static int +jit_process_debug_info(uint64_t code_addr, + void *debug, int nr_debug_entries, + struct buffer_ext *dl, + struct buffer_ext *da, + struct buffer_ext *di) +{ + struct debug_entry *ent = debug; + int i; + + for (i = 0; i < nr_debug_entries; i++) { + ent->addr = ent->addr - code_addr; + ent = debug_entry_next(ent); + } + add_compilation_unit(di, buffer_ext_size(dl)); + add_debug_line(dl, debug, nr_debug_entries, 0); + add_debug_abbrev(da); + if (0) buffer_ext_dump(da, "abbrev"); + + return 0; +} + +int +jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries) +{ + Elf_Data *d; + Elf_Scn *scn; + Elf_Shdr *shdr; + struct buffer_ext dl, di, da; + int ret; + + buffer_ext_init(&dl); + buffer_ext_init(&di); + buffer_ext_init(&da); + + ret = jit_process_debug_info(code_addr, debug, nr_debug_entries, &dl, &da, &di); + if (ret) + return -1; + /* + * setup .debug_line section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = buffer_ext_addr(&dl); + d->d_type = ELF_T_BYTE; + d->d_size = buffer_ext_size(&dl); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 52; /* .debug_line */ + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */ + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup .debug_info section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = buffer_ext_addr(&di); + d->d_type = ELF_T_BYTE; + d->d_size = buffer_ext_size(&di); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 64; /* .debug_info */ + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */ + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup .debug_abbrev section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = buffer_ext_addr(&da); + d->d_type = ELF_T_BYTE; + d->d_size = buffer_ext_size(&da); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 76; /* .debug_info */ + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */ + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * now we update the ELF image with all the sections + */ + if (elf_update(e, ELF_C_WRITE) < 0) { + warnx("elf_update debug failed"); + return -1; + } + return 0; +} diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 9f7a01289efe..99fa5eee9fe0 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -63,7 +63,9 @@ jit_emit_elf(char *filename, const char *sym, uint64_t code_addr, const void *code, - int csize) + int csize, + void *debug, + int nr_debug_entries) { int ret, fd; @@ -76,7 +78,7 @@ jit_emit_elf(char *filename, return -1; } - ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize); + ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries); close(fd); @@ -347,7 +349,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) size = PERF_ALIGN(size, sizeof(u64)); uaddr = (uintptr_t)code; - ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize); + ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries); if (jd->debug_data && jd->nr_debug_entries) { free(jd->debug_data); -- cgit v1.2.3 From c7ac24178c50a01f14eebcddf5c7b7a2e54676cc Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 11 Feb 2016 02:51:17 +0900 Subject: perf config: Add '--system' and '--user' options to select which config file is used The '--system' option means $(sysconfdir)/perfconfig and '--user' means $HOME/.perfconfig. If none is used, both system and user config file are read. E.g.: # perf config [] [options] With an specific config file: # perf config --user | --system or both user and system config file: # perf config Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1455126685-32367-2-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 14 +++++++++++++- tools/perf/builtin-config.c | 27 ++++++++++++++++++++++++--- tools/perf/util/cache.h | 3 +++ tools/perf/util/config.c | 4 ++-- 4 files changed, 42 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index c7158bfb1649..15949e2a7805 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -8,7 +8,7 @@ perf-config - Get and set variables in a configuration file. SYNOPSIS -------- [verse] -'perf config' -l | --list +'perf config' [] -l | --list DESCRIPTION ----------- @@ -21,6 +21,14 @@ OPTIONS --list:: Show current config variables, name and value, for all sections. +--user:: + For writing and reading options: write to user + '$HOME/.perfconfig' file or read it. + +--system:: + For writing and reading options: write to system-wide + '$(sysconfdir)/perfconfig' or read it. + CONFIGURATION FILE ------------------ @@ -30,6 +38,10 @@ The '$HOME/.perfconfig' file is used to store a per-user configuration. The file '$(sysconfdir)/perfconfig' can be used to store a system-wide default configuration. +When reading or writing, the values are read from the system and user +configuration files by default, and options '--system' and '--user' +can be used to tell the command to read from or write to only that location. + Syntax ~~~~~~ diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index f04e804a9fad..c42448ed5dfe 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -13,8 +13,10 @@ #include "util/util.h" #include "util/debug.h" +static bool use_system_config, use_user_config; + static const char * const config_usage[] = { - "perf config [options]", + "perf config [] [options]", NULL }; @@ -25,6 +27,8 @@ enum actions { static struct option config_options[] = { OPT_SET_UINT('l', "list", &actions, "show current config variables", ACTION_LIST), + OPT_BOOLEAN(0, "system", &use_system_config, "use system config file"), + OPT_BOOLEAN(0, "user", &use_user_config, "use user config file"), OPT_END() }; @@ -42,10 +46,23 @@ static int show_config(const char *key, const char *value, int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) { int ret = 0; + char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); argc = parse_options(argc, argv, config_options, config_usage, PARSE_OPT_STOP_AT_NON_OPTION); + if (use_system_config && use_user_config) { + pr_err("Error: only one config file at a time\n"); + parse_options_usage(config_usage, config_options, "user", 0); + parse_options_usage(NULL, config_options, "system", 0); + return -1; + } + + if (use_system_config) + config_exclusive_filename = perf_etc_perfconfig(); + else if (use_user_config) + config_exclusive_filename = user_config; + switch (actions) { case ACTION_LIST: if (argc) { @@ -53,9 +70,13 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) parse_options_usage(config_usage, config_options, "l", 1); } else { ret = perf_config(show_config, NULL); - if (ret < 0) + if (ret < 0) { + const char * config_filename = config_exclusive_filename; + if (!config_exclusive_filename) + config_filename = user_config; pr_err("Nothing configured, " - "please check your ~/.perfconfig file\n"); + "please check your %s \n", config_filename); + } } break; default: diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 07b5d63947b1..3ca453f0c51f 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -23,6 +23,8 @@ #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" #define PERF_PAGER_ENVIRONMENT "PERF_PAGER" +extern const char *config_exclusive_filename; + typedef int (*config_fn_t)(const char *, const char *, void *); extern int perf_default_config(const char *, const char *, void *); extern int perf_config(config_fn_t fn, void *); @@ -31,6 +33,7 @@ extern u64 perf_config_u64(const char *, const char *); extern int perf_config_bool(const char *, const char *); extern int config_error_nonbool(const char *); extern const char *perf_config_dirname(const char *, const char *); +extern const char *perf_etc_perfconfig(void); char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index d3e12e30e1d5..4e727635476e 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -26,7 +26,7 @@ static const char *config_file_name; static int config_linenr; static int config_file_eof; -static const char *config_exclusive_filename; +const char *config_exclusive_filename; static int get_next_char(void) { @@ -434,7 +434,7 @@ static int perf_config_from_file(config_fn_t fn, const char *filename, void *dat return ret; } -static const char *perf_etc_perfconfig(void) +const char *perf_etc_perfconfig(void) { static const char *system_wide; if (!system_wide) -- cgit v1.2.3 From e7ee404757609067c8f261d90251f1e96459c535 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 5 Feb 2016 14:01:27 +0000 Subject: perf symbols: Fix symbols searching for module in buildid-cache Before this patch, if a sample is triggered inside a module not in /lib/modules/`uname -r`/, even if the module is in buildid-cache, 'perf report' will still be unable to find the correct symbol. For example: # rm -rf ~/.debug/ # perf buildid-cache -a ./mymodule.ko # perf probe -m ./mymodule.ko -a get_mymodule_val Added new event: probe:get_mymodule_val (on get_mymodule_val in mymodule) You can now use it in all perf tools, such as: perf record -e probe:get_mymodule_val -aR sleep 1 # perf record -e probe:get_mymodule_val cat /proc/mymodule mymodule:3 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.011 MB perf.data (1 samples) ] # perf report --stdio [SNIP] # # Overhead Command Shared Object Symbol # ........ ....... ................ ...................... # 100.00% cat [mymodule] [k] 0x0000000000000001 # perf report -vvvv --stdio dso__load_sym: adjusting symbol: st_value: 0 sh_addr: 0 sh_offset: 0x70 symbol__new: get_mymodule_val 0x70-0x8a [SNIP] This is caused by dso__load() -> dso__load_sym(). In dso__load(), kmod is true only when its file is found in some well know directories. All files loaded from buildid-cache are treated as user programs. Following dso__load_sym() set map->pgoff incorrectly. This patch gives kernel modules in buildid-cache a chance to adjust value of kmod. After dso__load() get the type of symbols, if it is buildid, check the last 3 chars of original filename against '.ko', and adjust the value of kmod if the file is a kernel module. Signed-off-by: Wang Nan Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1454680939-24963-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/build-id.h | 1 + tools/perf/util/symbol.c | 4 ++++ 3 files changed, 49 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index b28100ee1732..f1479eeef7da 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -166,6 +166,50 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) return build_id__filename(build_id_hex, bf, size); } +bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size) +{ + char *id_name, *ch; + struct stat sb; + + id_name = dso__build_id_filename(dso, bf, size); + if (!id_name) + goto err; + if (access(id_name, F_OK)) + goto err; + if (lstat(id_name, &sb) == -1) + goto err; + if ((size_t)sb.st_size > size - 1) + goto err; + if (readlink(id_name, bf, size - 1) < 0) + goto err; + + bf[sb.st_size] = '\0'; + + /* + * link should be: + * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92 + */ + ch = strrchr(bf, '/'); + if (!ch) + goto err; + if (ch - 3 < bf) + goto err; + + return strncmp(".ko", ch - 3, 3) == 0; +err: + /* + * If dso__build_id_filename work, get id_name again, + * because id_name points to bf and is broken. + */ + if (id_name) + id_name = dso__build_id_filename(dso, bf, size); + pr_err("Invalid build id: %s\n", id_name ? : + dso->long_name ? : + dso->short_name ? : + "[unknown]"); + return false; +} + #define dsos__for_each_with_build_id(pos, head) \ list_for_each_entry(pos, head, node) \ if (!pos->has_build_id) \ diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 27a14a8a945b..64af3e20610d 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -16,6 +16,7 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); int filename__sprintf_build_id(const char *pathname, char *sbuild_id); char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); +bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 90cedfa30e43..e7588dc91518 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1529,6 +1529,10 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (!runtime_ss && syms_ss) runtime_ss = syms_ss; + if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE) + if (dso__build_id_is_kmod(dso, name, PATH_MAX)) + kmod = true; + if (syms_ss) ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, kmod); else -- cgit v1.2.3 From 37b4e2020a5f4dbecf22ee3efe92de6dbea1c5f0 Mon Sep 17 00:00:00 2001 From: Zubair Lutfullah Kakakhel Date: Tue, 9 Feb 2016 13:33:38 +0000 Subject: perf build: Add EXTRA_LDFLAGS option to makefile To compile for little-endian systems, you need to pass -EL to CC and LD. EXTRA_CFLAGS works to pass -EL to CC. Add EXTRA_LDFLAGS to pass -EL to LD. Signed-off-by: Zubair Lutfullah Kakakhel Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1455024818-15842-1-git-send-email-Zubair.Kakakhel@imgtec.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d404117810a7..4a4fad4182f5 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -139,6 +139,8 @@ $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,AR,$(CROSS_COMPILE)ar) $(call allow-override,LD,$(CROSS_COMPILE)ld) +LD += $(EXTRA_LDFLAGS) + PKG_CONFIG = $(CROSS_COMPILE)pkg-config RM = rm -f -- cgit v1.2.3 From b416e204f88dd91d9e99f6deee3d57fbc90aee40 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Tue, 9 Feb 2016 20:53:10 +0900 Subject: perf python scripting: Append examples to err msg about audit-libs-python To print syscall names, the audit-libs-python package is required.. If not installed, it prints this error string: # perf script syscall-counts Install the audit-libs-python package to get syscall names. But the package name is different in Ubuntu, mention that in the error message, similar to a error message of util/trace-event-scripting.c: # perf script syscall-counts Install the audit-libs-python package to get syscall names. For example: # apt-get install python-audit (Ubuntu) # yum install audit-libs-python (Fedora) etc. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1455018790-13425-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py index 15c8400240fd..1d95009592eb 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py @@ -71,7 +71,10 @@ try: except: if not audit_package_warned: audit_package_warned = True - print "Install the audit-libs-python package to get syscall names" + print "Install the audit-libs-python package to get syscall names.\n" \ + "For example:\n # apt-get install python-audit (Ubuntu)" \ + "\n # yum install audit-libs-python (Fedora)" \ + "\n etc.\n" def syscall_name(id): try: -- cgit v1.2.3 From 37d9bb580aa73c171c51fb93edf67a902bcb186f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Feb 2016 11:27:51 -0300 Subject: perf tools: Add comment explaining the repsep_snprintf function Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-4j67nvlfwbnkg85b969ewnkr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index de620f7f40f4..8b54ede7ec1f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -28,7 +28,15 @@ int sort__has_socket = 0; int sort__has_thread = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; - +/* + * Replaces all occurrences of a char used with the: + * + * -t, --field-separator + * + * option, that uses a special separator character and don't pad with spaces, + * replacing all occurances of this separator in symbol names (and other + * output) with a '.' character, that thus it's the only non valid separator. +*/ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...) { int n; -- cgit v1.2.3 From 89fee70943232d73e3cc328634e0da253b6de9b5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 11 Feb 2016 17:14:13 -0300 Subject: perf hists: Do column alignment on the format iterator We were doing column alignment in the format function for each cell, returning a string padded with spaces so that when the next column is printed the cursor is at its column alignment. This ends up needlessly printing trailing spaces, do it at the format iterator, that is where we know if it is needed, i.e. if there is more columns to be printed. This eliminates the need for triming lines when doing a dump using 'P' in the TUI browser and also produces far saner results with things like piping 'perf report' to 'less'. Right now only the formatters for sym->name and the 'locked' column (perf mem report), that are the ones that end up at the end of lines in the default 'perf report', 'perf top' and 'perf mem report' tools, the others will be done in a subsequent patch. In the end the 'width' parameter for the formatters now mean, in 'printf' terms, the 'precision', where before it was the field 'width'. Reported-by: Dave Jones Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/n/tip-s7iwl2gj23w92l6tibnrcqzr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 27 ++++++++++++++++++--------- tools/perf/ui/stdio/hist.c | 1 + tools/perf/util/hist.c | 21 +++++++++++++++++++++ tools/perf/util/hist.h | 5 +++++ tools/perf/util/sort.c | 13 +++---------- 5 files changed, 48 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index a5a5390476ac..1819771243f9 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1061,7 +1061,6 @@ static int hist_browser__show_entry(struct hist_browser *browser, struct hist_entry *entry, unsigned short row) { - char s[256]; int printed = 0; int width = browser->b.width; char folded_sign = ' '; @@ -1086,16 +1085,18 @@ static int hist_browser__show_entry(struct hist_browser *browser, .folded_sign = folded_sign, .current_entry = current_entry, }; - struct perf_hpp hpp = { - .buf = s, - .size = sizeof(s), - .ptr = &arg, - }; int column = 0; hist_browser__gotorc(browser, row, 0); hists__for_each_format(browser->hists, fmt) { + char s[2048]; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .ptr = &arg, + }; + if (perf_hpp__should_skip(fmt, entry->hists) || column++ < browser->b.horiz_scroll) continue; @@ -1120,11 +1121,18 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (fmt->color) { - width -= fmt->color(fmt, &hpp, entry); + int ret = fmt->color(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, ret); + /* + * fmt->color() already used ui_browser to + * print the non alignment bits, skip it (+ret): + */ + ui_browser__printf(&browser->b, "%s", s + ret); } else { - width -= fmt->entry(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, fmt->entry(fmt, &hpp, entry)); ui_browser__printf(&browser->b, "%s", s); } + width -= hpp.buf - s; } /* The scroll bar isn't being used */ @@ -1452,9 +1460,10 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, first = false; ret = fmt->entry(fmt, &hpp, he); + ret = hist_entry__snprintf_alignment(he, &hpp, fmt, ret); advance_hpp(&hpp, ret); } - printed += fprintf(fp, "%s\n", rtrim(s)); + printed += fprintf(fp, "%s\n", s); if (folded_sign == '-') printed += hist_browser__fprintf_callchain(browser, he, fp); diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 1a6e8f7f38c4..87b022ff03d8 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -403,6 +403,7 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) else ret = fmt->entry(fmt, hpp, he); + ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret); advance_hpp(hpp, ret); } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 12f2d794dc28..561e9473a915 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1014,6 +1014,27 @@ void hist_entry__delete(struct hist_entry *he) free(he); } +/* + * If this is not the last column, then we need to pad it according to the + * pre-calculated max lenght for this column, otherwise don't bother adding + * spaces because that would break viewing this with, for instance, 'less', + * that would show tons of trailing spaces when a long C++ demangled method + * names is sampled. +*/ +int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, + struct perf_hpp_fmt *fmt, int printed) +{ + if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) { + const int width = fmt->width(fmt, hpp, hists_to_evsel(he->hists)); + if (printed < width) { + advance_hpp(hpp, printed); + printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " "); + } + } + + return printed; +} + /* * collapse the histogram */ diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 1c7544a8fe1a..840b6d6aa44f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -122,11 +122,16 @@ struct hist_entry *__hists__add_entry(struct hists *hists, int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int max_stack_depth, void *arg); +struct perf_hpp; +struct perf_hpp_fmt; + int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); +int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, + struct perf_hpp_fmt *fmt, int printed); void hist_entry__delete(struct hist_entry *he); void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 8b54ede7ec1f..de715756f281 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -255,10 +255,8 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name); ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx", ip - map->unmap_ip(map, sym->start)); - ret += repsep_snprintf(bf + ret, size - ret, "%-*s", - width - ret, ""); } else { - ret += repsep_snprintf(bf + ret, size - ret, "%-*s", + ret += repsep_snprintf(bf + ret, size - ret, "%.*s", width - ret, sym->name); } @@ -266,14 +264,9 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, size_t len = BITS_PER_LONG / 4; ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, ip); - ret += repsep_snprintf(bf + ret, size - ret, "%-*s", - width - ret, ""); } - if (ret > width) - bf[width] = '\0'; - - return width; + return ret; } static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, @@ -819,7 +812,7 @@ static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf, else out = "No"; - return repsep_snprintf(bf, size, "%-*s", width, out); + return repsep_snprintf(bf, size, "%.*s", width, out); } static int64_t -- cgit v1.2.3 From a8adfceb389a0045e06af22517fa3326797b160a Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 12 Feb 2016 16:31:23 -0300 Subject: perf tools: Unlink entries from terms list We were just freeing them, better unlink and init its nodes to catch bugs faster if we keep dangling references to them. Signed-off-by: Wang Nan Acked-by: Jiri Olsa Cc: Alexei Starovoitov Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Zefan Li Cc: pi3orama@163.com [ Spun off from another patch, use list_del_init() instead of list_del() ] Link: http://lkml.kernel.org/r/1454680939-24963-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 813d9b272c81..133c8d28f36c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2072,8 +2072,10 @@ void parse_events__free_terms(struct list_head *terms) { struct parse_events_term *term, *h; - list_for_each_entry_safe(term, h, terms, list) + list_for_each_entry_safe(term, h, terms, list) { + list_del_init(&term->list); free(term); + } } void parse_events_evlist_error(struct parse_events_evlist *data, -- cgit v1.2.3 From fc0a2c1d59beac70b8738f4ce14431b798837374 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Feb 2016 16:43:02 -0300 Subject: perf tools: Introduce parse_events_terms__purge() Purges 'struct parse_event_term' entries from a list_head. Some users need this because they don't allocate space for the list head, it maybe on the stack or embedded into some other struct. Next patch will convert users that need just purging and then the perf_events__free_terms() routine will free the list head as well, finally being renamed to perf_events_terms__delete(). Acked-by: Jiri Olsa Cc: Alexei Starovoitov Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Wang Nan Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/n/tip-4w3zl4ifcl0ed0j4bu3tckqp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 7 ++++++- tools/perf/util/parse-events.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 133c8d28f36c..668afdccfcca 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2068,7 +2068,7 @@ int parse_events_term__clone(struct parse_events_term **new, term->err_term, term->err_val); } -void parse_events__free_terms(struct list_head *terms) +void parse_events_terms__purge(struct list_head *terms) { struct parse_events_term *term, *h; @@ -2078,6 +2078,11 @@ void parse_events__free_terms(struct list_head *terms) } } +void parse_events__free_terms(struct list_head *terms) +{ + parse_events_terms__purge(terms); +} + void parse_events_evlist_error(struct parse_events_evlist *data, int idx, const char *str) { diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index f1a6db107241..f90a04ccab39 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -116,6 +116,7 @@ int parse_events_term__sym_hw(struct parse_events_term **term, int parse_events_term__clone(struct parse_events_term **new, struct parse_events_term *term); void parse_events__free_terms(struct list_head *terms); +void parse_events_terms__purge(struct list_head *terms); int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_name(struct list_head *list, char *name); -- cgit v1.2.3 From 682dc24c2a0f13d5a16ac8f4303671eb8f11519f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Feb 2016 16:48:00 -0300 Subject: perf tools: Use perf_event_terms__purge() for non-malloced terms In these two cases, a 'perf test' entry and in the PMU code the list_head is on the stack, so we can't use perf_event__free_terms() (soon to be renamed to perf_event_terms__delete()), because it will free the list_head as well. Acked-by: Jiri Olsa Cc: Alexei Starovoitov Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Wang Nan Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/n/tip-i956ryjhz97gnnqe8iqe7m7s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 2 +- tools/perf/util/pmu.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index abe8849d1d70..6648274f4601 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1666,7 +1666,7 @@ static int test_term(struct terms_test *t) } ret = t->check(&terms); - parse_events__free_terms(&terms); + parse_events_terms__purge(&terms); return ret; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 41a9c875e492..cf59fbaee491 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -354,7 +354,7 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, list_for_each_entry(term, &alias->terms, list) { ret = parse_events_term__clone(&cloned, term); if (ret) { - parse_events__free_terms(&list); + parse_events_terms__purge(&list); return ret; } list_add_tail(&cloned->list, &list); -- cgit v1.2.3 From d20a5f2b277b2f46548fb60f2bb95ad9a601d3fe Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 12 Feb 2016 17:01:17 -0300 Subject: perf tools: Free the terms list_head in parse_events__free_terms() Fixing a leak, since code calling parse_events__free_terms() expect it to free the list_head too. Signed-off-by: Wang Nan Acked-by: Jiri Olsa Cc: Alexei Starovoitov Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Zefan Li Cc: pi3orama@163.com [ Spun off from another patch ] Link: http://lkml.kernel.org/r/1454680939-24963-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 668afdccfcca..d1b49ec7ae46 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2081,6 +2081,7 @@ void parse_events_terms__purge(struct list_head *terms) void parse_events__free_terms(struct list_head *terms) { parse_events_terms__purge(terms); + free(terms); } void parse_events_evlist_error(struct parse_events_evlist *data, -- cgit v1.2.3 From 2146afc6b45b3f1b967f5605d4e6d97dd9e31061 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Feb 2016 17:09:17 -0300 Subject: perf tools: Rename parse_events__free_terms() to parse_events_terms__delete() To follow convention used in other tools/perf/ areas. Also remove the need to check if it is NULL before calling the destructor, again, to follow convention that goes back to free(). Cc: Alexei Starovoitov Cc: He Kuang cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Wang Nan Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/n/tip-w6owu7rb8a46gvunlinxaqwx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 2 +- tools/perf/util/parse-events.c | 7 ++++--- tools/perf/util/parse-events.h | 2 +- tools/perf/util/parse-events.y | 8 ++++---- 4 files changed, 10 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 6f7d453b0e32..a3395179c9ee 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -89,7 +89,7 @@ static int intel_pt_parse_terms_with_default(struct list_head *formats, *config = attr.config; out_free: - parse_events__free_terms(terms); + parse_events_terms__delete(terms); return err; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d1b49ec7ae46..e5583fd4e7bd 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1386,8 +1386,7 @@ int parse_events_terms(struct list_head *terms, const char *str) return 0; } - if (data.terms) - parse_events__free_terms(data.terms); + parse_events_terms__delete(data.terms); return ret; } @@ -2078,8 +2077,10 @@ void parse_events_terms__purge(struct list_head *terms) } } -void parse_events__free_terms(struct list_head *terms) +void parse_events_terms__delete(struct list_head *terms) { + if (!terms) + return; parse_events_terms__purge(terms); free(terms); } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index f90a04ccab39..53628bf3da67 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -115,7 +115,7 @@ int parse_events_term__sym_hw(struct parse_events_term **term, char *config, unsigned idx); int parse_events_term__clone(struct parse_events_term **new, struct parse_events_term *term); -void parse_events__free_terms(struct list_head *terms); +void parse_events_terms__delete(struct list_head *terms); void parse_events_terms__purge(struct list_head *terms); int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index ad379968d4c1..c0eac88ef474 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -218,7 +218,7 @@ PE_NAME '/' event_config '/' ALLOC_LIST(list); ABORT_ON(parse_events_add_pmu(data, list, $1, $3)); - parse_events__free_terms($3); + parse_events_terms__delete($3); $$ = list; } | @@ -246,7 +246,7 @@ PE_KERNEL_PMU_EVENT sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); - parse_events__free_terms(head); + parse_events_terms__delete(head); $$ = list; } | @@ -266,7 +266,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); - parse_events__free_terms(head); + parse_events_terms__delete(head); $$ = list; } @@ -285,7 +285,7 @@ value_sym '/' event_config '/' ALLOC_LIST(list); ABORT_ON(parse_events_add_numeric(data, list, type, config, $3)); - parse_events__free_terms($3); + parse_events_terms__delete($3); $$ = list; } | -- cgit v1.2.3 From 5141d7350d3d8a12f1f76b1015b937f14d2b97e2 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 5 Feb 2016 14:01:30 +0000 Subject: perf data: Fix releasing event_class A new patch in libbabeltrace [1] reveals a object leak problem in 'perf data' CTF support: perf code never releases the event_class which is allocated in add_event() and stored in evsel's private field. If libbabeltrace has the above patch applied, leaking event_class prevents the writer from being destroyed and flushing metadata. For example: $ perf record ls perf.data [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data (12 samples) ] $ perf data convert --to-ctf ./out.ctf [ perf data convert: Converted 'perf.data' into CTF data './out.ctf' ] [ perf data convert: Converted and wrote 0.000 MB (12 samples) ] $ cat ./out.ctf/metadata $ ls -l ./out.ctf/metadata -rw-r----- 1 w00229757 mm 0 Jan 27 10:49 ./out.ctf/metadata The correct result should be: ... $ cat ./out.ctf/metadata /* CTF 1.8 */ trace { [SNIP] $ ls -l ./out.ctf/metadata -rw-r----- 1 w00229757 mm 2446 Jan 27 10:52 ./out.ctf/metadata The full story is: Patch [1] of babeltrace redesigns its reference counting scheme. In that patch: * writer <- trace (bt_ctf_writer_create) * trace <- stream_class (bt_ctf_trace_add_stream_class) * stream_class <- event_class (bt_ctf_stream_class_add_event_class) ('<-' means 'is a parent of') Holding of event_class causes reference count of corresponding 'writer' to increase through parent chain. Perf expects that 'writer' is released (so metadata is flushed) through bt_ctf_writer_put() in ctf_writer__cleanup(). However, since it never releases event_class, the reference of 'writer' won't be dropped, so bt_ctf_writer_put() won't lead to the release of writer. Before this CTF patch, !(writer <- trace). Even with event_class leaking, the writer ends up being released. [1] https://github.com/efficios/babeltrace/commit/e6a8e8e4744633807083a077ff9f101eb97d9801 Signed-off-by: Wang Nan Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1454680939-24963-6-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data-convert-bt.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 34cd1e4039d3..b722e57d5a87 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -858,6 +858,23 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session) return 0; } +static void cleanup_events(struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + struct evsel_priv *priv; + + priv = evsel->priv; + bt_ctf_event_class_put(priv->event_class); + zfree(&evsel->priv); + } + + perf_evlist__delete(evlist); + session->evlist = NULL; +} + static int setup_streams(struct ctf_writer *cw, struct perf_session *session) { struct ctf_stream **stream; @@ -1171,6 +1188,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force) (double) c.events_size / 1024.0 / 1024.0, c.events_count); + cleanup_events(session); perf_session__delete(session); ctf_writer__cleanup(cw); -- cgit v1.2.3 From 1ad826bad5bd0b6ccfb203f78c70302b764df0be Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Feb 2016 18:30:01 -0300 Subject: perf tests: Fix build on older systems where 'signal' is reserved MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fixing the following problems, for instance, on RHEL6.7: CC /tmp/build/perf/tests/bp_signal.o cc1: warnings being treated as errors tests/bp_signal.c: In function ‘__event’: tests/bp_signal.c:106: error: declaration of ‘signal’ shadows a global declaration /usr/include/signal.h:101: error: shadowed declaration is here tests/bp_signal.c: In function ‘bp_event’: tests/bp_signal.c:144: error: declaration of ‘signal’ shadows a global declaration /usr/include/signal.h:101: error: shadowed declaration is here tests/bp_signal.c: In function ‘wp_event’: tests/bp_signal.c:149: error: declaration of ‘signal’ shadows a global declaration /usr/include/signal.h:101: error: shadowed declaration is here mv: cannot stat `/tmp/build/perf/tests/.bp_signal.o.tmp': No such file or directory make[3]: *** [/tmp/build/perf/tests/bp_signal.o] Error 1 make[2]: *** [tests] Error 2 make[1]: *** [/tmp/build/perf/perf-in.o] Error 2 make[1]: *** Waiting for unfinished jobs.... Reported-by: Vinson Lee Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: He Kuang Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: Will Deacon Cc: pi3orama@163.com Fixes: 8fd34e1cce18 ("perf test: Improve bp_signal") Link: http://lkml.kernel.org/n/tip-wlpx6tik1b0jirlkw64bv400@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bp_signal.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index 1d1bb489b4e8..e7664fe3bd33 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -103,7 +103,7 @@ static void sig_handler(int signum __maybe_unused, } } -static int __event(bool is_x, void *addr, int signal) +static int __event(bool is_x, void *addr, int sig) { struct perf_event_attr pe; int fd; @@ -133,7 +133,7 @@ static int __event(bool is_x, void *addr, int signal) } fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC); - fcntl(fd, F_SETSIG, signal); + fcntl(fd, F_SETSIG, sig); fcntl(fd, F_SETOWN, getpid()); ioctl(fd, PERF_EVENT_IOC_RESET, 0); @@ -141,14 +141,14 @@ static int __event(bool is_x, void *addr, int signal) return fd; } -static int bp_event(void *addr, int signal) +static int bp_event(void *addr, int sig) { - return __event(true, addr, signal); + return __event(true, addr, sig); } -static int wp_event(void *addr, int signal) +static int wp_event(void *addr, int sig) { - return __event(false, addr, signal); + return __event(false, addr, sig); } static long long bp_count(int fd) -- cgit v1.2.3 From d646ae0a73deb0d80792a6a9c0757317ad8049c5 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 16 Feb 2016 07:37:41 +0100 Subject: perf jvmti: Add check for java alternatives cmd in Makefile This patch modifies the jvmti makefile to check if the /usr/sbin/java-update-alternatives utility is present. If so, then use it, if not then use the altenatives command. This helps handle the difference between Ubuntu and Fedora Linux distributions. Signed-off-by: Stephane Eranian Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1455604661-9357-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/jvmti/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile index 5968f8332a28..0277a64b391b 100644 --- a/tools/perf/jvmti/Makefile +++ b/tools/perf/jvmti/Makefile @@ -35,8 +35,12 @@ SOLIBEXT=so # The following works at least on fedora 23, you may need the next # line for other distros. +ifeq (,$(wildcard /usr/sbin/update-java-alternatives)) JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') -#JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3) +else +JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3) +endif + # -lrt required in 32-bit mode for clock_gettime() LIBS=-lelf -lrt INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux -- cgit v1.2.3 From 975f14fa8f2996604f248552eee4403def34bf5e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 14 Feb 2016 17:03:42 +0100 Subject: tools lib api: Add debug output support Adding support for warning/info/debug output within libapi code. Adding following macros: pr_warning(fmt, ...) pr_info(fmt, ...) pr_debug(fmt, ...) Also adding libapi_set_print function to set above functions. This will be used in perf to set standard debug handlers for libapi. Adding 2 header files: debug.h - to be used outside libapi, contains libapi_set_print interface debug-internal.h - to be used within libapi, contains pr_warning/pr_info/pr_debug definitions Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1455465826-8426-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Build | 1 + tools/lib/api/Makefile | 1 + tools/lib/api/debug-internal.h | 20 ++++++++++++++++++++ tools/lib/api/debug.c | 28 ++++++++++++++++++++++++++++ tools/lib/api/debug.h | 10 ++++++++++ 5 files changed, 60 insertions(+) create mode 100644 tools/lib/api/debug-internal.h create mode 100644 tools/lib/api/debug.c create mode 100644 tools/lib/api/debug.h (limited to 'tools') diff --git a/tools/lib/api/Build b/tools/lib/api/Build index e8b8a23b9bf4..954c644f7ad9 100644 --- a/tools/lib/api/Build +++ b/tools/lib/api/Build @@ -1,3 +1,4 @@ libapi-y += fd/ libapi-y += fs/ libapi-y += cpu.o +libapi-y += debug.o diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index d85904dc9b38..bbc82c614bee 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -18,6 +18,7 @@ LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +CFLAGS += -I$(srctree)/tools/lib/api RM = rm -f diff --git a/tools/lib/api/debug-internal.h b/tools/lib/api/debug-internal.h new file mode 100644 index 000000000000..188f7880eafe --- /dev/null +++ b/tools/lib/api/debug-internal.h @@ -0,0 +1,20 @@ +#ifndef __API_DEBUG_INTERNAL_H__ +#define __API_DEBUG_INTERNAL_H__ + +#include "debug.h" + +#define __pr(func, fmt, ...) \ +do { \ + if ((func)) \ + (func)("libapi: " fmt, ##__VA_ARGS__); \ +} while (0) + +extern libapi_print_fn_t __pr_warning; +extern libapi_print_fn_t __pr_info; +extern libapi_print_fn_t __pr_debug; + +#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__) + +#endif /* __API_DEBUG_INTERNAL_H__ */ diff --git a/tools/lib/api/debug.c b/tools/lib/api/debug.c new file mode 100644 index 000000000000..5fa5cf500a1f --- /dev/null +++ b/tools/lib/api/debug.c @@ -0,0 +1,28 @@ +#include +#include +#include "debug.h" +#include "debug-internal.h" + +static int __base_pr(const char *format, ...) +{ + va_list args; + int err; + + va_start(args, format); + err = vfprintf(stderr, format, args); + va_end(args); + return err; +} + +libapi_print_fn_t __pr_warning = __base_pr; +libapi_print_fn_t __pr_info = __base_pr; +libapi_print_fn_t __pr_debug; + +void libapi_set_print(libapi_print_fn_t warn, + libapi_print_fn_t info, + libapi_print_fn_t debug) +{ + __pr_warning = warn; + __pr_info = info; + __pr_debug = debug; +} diff --git a/tools/lib/api/debug.h b/tools/lib/api/debug.h new file mode 100644 index 000000000000..a0872f68fc56 --- /dev/null +++ b/tools/lib/api/debug.h @@ -0,0 +1,10 @@ +#ifndef __API_DEBUG_H__ +#define __API_DEBUG_H__ + +typedef int (*libapi_print_fn_t)(const char *, ...); + +void libapi_set_print(libapi_print_fn_t warn, + libapi_print_fn_t info, + libapi_print_fn_t debug); + +#endif /* __API_DEBUG_H__ */ -- cgit v1.2.3 From 607bfbd7ffc60156ae0831c917497dc91a57dd8d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 14 Feb 2016 17:03:43 +0100 Subject: tools lib api fs: Adopt filename__read_str from perf We already moved similar functions in here, also it'll be useful for sysfs__read_str addition in following patch. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1455465826-8426-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/fs.c | 51 +++++++++++++++++++++++++++++++++++++++++++ tools/lib/api/fs/fs.h | 2 ++ tools/perf/util/trace-event.c | 1 + tools/perf/util/util.c | 48 ---------------------------------------- tools/perf/util/util.h | 1 - 5 files changed, 54 insertions(+), 49 deletions(-) (limited to 'tools') diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 459599d1b6c4..2cbf6773ca5d 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -13,6 +13,7 @@ #include #include "fs.h" +#include "debug-internal.h" #define _STR(x) #x #define STR(x) _STR(x) @@ -300,6 +301,56 @@ int filename__read_ull(const char *filename, unsigned long long *value) return err; } +#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */ + +int filename__read_str(const char *filename, char **buf, size_t *sizep) +{ + size_t size = 0, alloc_size = 0; + void *bf = NULL, *nbf; + int fd, n, err = 0; + char sbuf[STRERR_BUFSIZE]; + + fd = open(filename, O_RDONLY); + if (fd < 0) + return -errno; + + do { + if (size == alloc_size) { + alloc_size += BUFSIZ; + nbf = realloc(bf, alloc_size); + if (!nbf) { + err = -ENOMEM; + break; + } + + bf = nbf; + } + + n = read(fd, bf + size, alloc_size - size); + if (n < 0) { + if (size) { + pr_warning("read failed %d: %s\n", errno, + strerror_r(errno, sbuf, sizeof(sbuf))); + err = 0; + } else + err = -errno; + + break; + } + + size += n; + } while (n > 0); + + if (!err) { + *sizep = size; + *buf = bf; + } else + free(bf); + + close(fd); + return err; +} + int sysfs__read_ull(const char *entry, unsigned long long *value) { char path[PATH_MAX]; diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index d024a7f682f6..858922b61141 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -2,6 +2,7 @@ #define __API_FS__ #include +#include /* * On most systems would have given us this, but not on some systems @@ -26,6 +27,7 @@ FS(tracefs) int filename__read_int(const char *filename, int *value); int filename__read_ull(const char *filename, unsigned long long *value); +int filename__read_str(const char *filename, char **buf, size_t *sizep); int sysctl__read_int(const char *sysctl, int *value); int sysfs__read_int(const char *entry, int *value); diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 802bb868d446..8ae051e0ec79 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "trace-event.h" #include "machine.h" #include "util.h" diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index b9e2843cfbe7..35b20dd454de 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -507,54 +507,6 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) return ret; } -int filename__read_str(const char *filename, char **buf, size_t *sizep) -{ - size_t size = 0, alloc_size = 0; - void *bf = NULL, *nbf; - int fd, n, err = 0; - char sbuf[STRERR_BUFSIZE]; - - fd = open(filename, O_RDONLY); - if (fd < 0) - return -errno; - - do { - if (size == alloc_size) { - alloc_size += BUFSIZ; - nbf = realloc(bf, alloc_size); - if (!nbf) { - err = -ENOMEM; - break; - } - - bf = nbf; - } - - n = read(fd, bf + size, alloc_size - size); - if (n < 0) { - if (size) { - pr_warning("read failed %d: %s\n", errno, - strerror_r(errno, sbuf, sizeof(sbuf))); - err = 0; - } else - err = -errno; - - break; - } - - size += n; - } while (n > 0); - - if (!err) { - *sizep = size; - *buf = bf; - } else - free(bf); - - close(fd); - return err; -} - const char *get_filename_for_perf_kvm(void) { const char *filename; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index a8615816a00d..3dd04089e8be 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -303,7 +303,6 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym, bool unwind_inlines); void free_srcline(char *srcline); -int filename__read_str(const char *filename, char **buf, size_t *sizep); int perf_event_paranoid(void); void mem_bswap_64(void *src, int byte_size); -- cgit v1.2.3 From 51c0396c600f49de9c3ff8f6fd83055de3709c3d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 14 Feb 2016 17:03:44 +0100 Subject: tools lib api fs: Add sysfs__read_str function Adding sysfs__read_str function to ease up reading string files from sysfs. New interface is: int sysfs__read_str(const char *entry, char **buf, size_t *sizep); Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1455465826-8426-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/fs.c | 13 +++++++++++++ tools/lib/api/fs/fs.h | 1 + 2 files changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 2cbf6773ca5d..ef78c22ff44d 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -377,6 +377,19 @@ int sysfs__read_int(const char *entry, int *value) return filename__read_int(path, value); } +int sysfs__read_str(const char *entry, char **buf, size_t *sizep) +{ + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; + + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); + + return filename__read_str(path, buf, sizep); +} + int sysctl__read_int(const char *sysctl, int *value) { char path[PATH_MAX]; diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index 858922b61141..9f6598098dc5 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -32,4 +32,5 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep); int sysctl__read_int(const char *sysctl, int *value); int sysfs__read_int(const char *entry, int *value); int sysfs__read_ull(const char *entry, unsigned long long *value); +int sysfs__read_str(const char *entry, char **buf, size_t *sizep); #endif /* __API_FS__ */ -- cgit v1.2.3 From bedbdd4297224efcd7d668198e32fab14b76b98b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Feb 2016 11:48:38 -0300 Subject: perf debug: Rename __eprintf(va_list args) to veprintf Adhering to the naming convention used when va_args is in a printf like function, e.g. stdio.h. Cc: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-b5l3wt77ct28dcnriguxtvn6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 86d9c7302598..d6c8d2b2cd70 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -22,7 +22,7 @@ int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; -static int _eprintf(int level, int var, const char *fmt, va_list args) +int veprintf(int level, int var, const char *fmt, va_list args) { int ret = 0; @@ -36,24 +36,19 @@ static int _eprintf(int level, int var, const char *fmt, va_list args) return ret; } -int veprintf(int level, int var, const char *fmt, va_list args) -{ - return _eprintf(level, var, fmt, args); -} - int eprintf(int level, int var, const char *fmt, ...) { va_list args; int ret; va_start(args, fmt); - ret = _eprintf(level, var, fmt, args); + ret = veprintf(level, var, fmt, args); va_end(args); return ret; } -static int __eprintf_time(u64 t, const char *fmt, va_list args) +static int veprintf_time(u64 t, const char *fmt, va_list args) { int ret = 0; u64 secs, usecs, nsecs = t; @@ -75,7 +70,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) if (var >= level) { va_start(args, fmt); - ret = __eprintf_time(t, fmt, args); + ret = veprintf_time(t, fmt, args); va_end(args); } @@ -91,7 +86,7 @@ void pr_stat(const char *fmt, ...) va_list args; va_start(args, fmt); - _eprintf(1, verbose, fmt, args); + veprintf(1, verbose, fmt, args); va_end(args); eprintf(1, verbose, "\n"); } -- cgit v1.2.3 From dd629cc0975349c99d5483402bca1ef16313c209 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 14 Feb 2016 17:03:45 +0100 Subject: perf tools: Initialize libapi debug output Setting libapi debug output functions to use perf functions. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1455465826-8426-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 2 ++ tools/perf/util/debug.c | 21 +++++++++++++++++++++ tools/perf/util/debug.h | 1 + 3 files changed, 24 insertions(+) (limited to 'tools') diff --git a/tools/perf/perf.c b/tools/perf/perf.c index a929618b8eb6..144047c396f0 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -613,6 +613,8 @@ int main(int argc, const char **argv) */ pthread__block_sigwinch(); + perf_debug_setup(); + while (1) { static int done_help; int was_alias = run_argv(&argc, &argv); diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index d6c8d2b2cd70..ff7e86ad1b06 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "cache.h" #include "color.h" @@ -187,3 +188,23 @@ int perf_debug_option(const char *str) free(s); return 0; } + +#define DEBUG_WRAPPER(__n, __l) \ +static int pr_ ## __n ## _wrapper(const char *fmt, ...) \ +{ \ + va_list args; \ + int ret; \ + \ + va_start(args, fmt); \ + ret = veprintf(__l, verbose, fmt, args); \ + va_end(args); \ + return ret; \ +} + +DEBUG_WRAPPER(warning, 0); +DEBUG_WRAPPER(debug, 1); + +void perf_debug_setup(void) +{ + libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper); +} diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 8b9a088c32ab..14bafda79eda 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -53,5 +53,6 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__( int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); +void perf_debug_setup(void); #endif /* __PERF_DEBUG_H */ -- cgit v1.2.3 From 720e98b5faf10cfd12b7821dbdcc41c9747bd13e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 16 Feb 2016 16:01:43 +0100 Subject: perf tools: Add perf data cache feature Storing CPU cache details under perf data. It's stored as new HEADER_CACHE feature and it's displayed under header info with -I option: $ perf report --header-only -I ... # CPU cache info: # L1 Data 32K [0-1] # L1 Instruction 32K [0-1] # L1 Data 32K [2-3] # L1 Instruction 32K [2-3] # L2 Unified 256K [0-1] # L2 Unified 256K [2-3] # L3 Unified 4096K [0-3] ... All distinct caches are stored/displayed. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20160216150143.GA7119@krava.brq.redhat.com [ Fixed leak on process_caches(), s/cache_level/cpu_cache_level/g ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/env.c | 13 +++ tools/perf/util/env.h | 15 +++ tools/perf/util/header.c | 270 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/header.h | 1 + 4 files changed, 299 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 7dd5939dea2e..49a11d9d8b8f 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -6,6 +6,8 @@ struct perf_env perf_env; void perf_env__exit(struct perf_env *env) { + int i; + zfree(&env->hostname); zfree(&env->os_release); zfree(&env->version); @@ -19,6 +21,10 @@ void perf_env__exit(struct perf_env *env) zfree(&env->numa_nodes); zfree(&env->pmu_mappings); zfree(&env->cpu); + + for (i = 0; i < env->caches_cnt; i++) + cpu_cache_level__free(&env->caches[i]); + zfree(&env->caches); } int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) @@ -75,3 +81,10 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) env->nr_cpus_avail = nr_cpus; return 0; } + +void cpu_cache_level__free(struct cpu_cache_level *cache) +{ + free(cache->type); + free(cache->map); + free(cache->size); +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 0132b9557c02..56cffb60a0b4 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -1,11 +1,23 @@ #ifndef __PERF_ENV_H #define __PERF_ENV_H +#include + struct cpu_topology_map { int socket_id; int core_id; }; +struct cpu_cache_level { + u32 level; + u32 line_size; + u32 sets; + u32 ways; + char *type; + char *size; + char *map; +}; + struct perf_env { char *hostname; char *os_release; @@ -31,6 +43,8 @@ struct perf_env { char *numa_nodes; char *pmu_mappings; struct cpu_topology_map *cpu; + struct cpu_cache_level *caches; + int caches_cnt; }; extern struct perf_env perf_env; @@ -41,4 +55,5 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env); +void cpu_cache_level__free(struct cpu_cache_level *cache); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index f50b7235ecb6..73e38e472ecd 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -23,6 +23,8 @@ #include "strbuf.h" #include "build-id.h" #include "data.h" +#include +#include "asm/bug.h" /* * magic2 = "PERFILE2" @@ -868,6 +870,199 @@ static int write_auxtrace(int fd, struct perf_header *h, return err; } +static int cpu_cache_level__sort(const void *a, const void *b) +{ + struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a; + struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b; + + return cache_a->level - cache_b->level; +} + +static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b) +{ + if (a->level != b->level) + return false; + + if (a->line_size != b->line_size) + return false; + + if (a->sets != b->sets) + return false; + + if (a->ways != b->ways) + return false; + + if (strcmp(a->type, b->type)) + return false; + + if (strcmp(a->size, b->size)) + return false; + + if (strcmp(a->map, b->map)) + return false; + + return true; +} + +static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level) +{ + char path[PATH_MAX], file[PATH_MAX]; + struct stat st; + size_t len; + + scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level); + scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path); + + if (stat(file, &st)) + return 1; + + scnprintf(file, PATH_MAX, "%s/level", path); + if (sysfs__read_int(file, (int *) &cache->level)) + return -1; + + scnprintf(file, PATH_MAX, "%s/coherency_line_size", path); + if (sysfs__read_int(file, (int *) &cache->line_size)) + return -1; + + scnprintf(file, PATH_MAX, "%s/number_of_sets", path); + if (sysfs__read_int(file, (int *) &cache->sets)) + return -1; + + scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path); + if (sysfs__read_int(file, (int *) &cache->ways)) + return -1; + + scnprintf(file, PATH_MAX, "%s/type", path); + if (sysfs__read_str(file, &cache->type, &len)) + return -1; + + cache->type[len] = 0; + cache->type = rtrim(cache->type); + + scnprintf(file, PATH_MAX, "%s/size", path); + if (sysfs__read_str(file, &cache->size, &len)) { + free(cache->type); + return -1; + } + + cache->size[len] = 0; + cache->size = rtrim(cache->size); + + scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path); + if (sysfs__read_str(file, &cache->map, &len)) { + free(cache->map); + free(cache->type); + return -1; + } + + cache->map[len] = 0; + cache->map = rtrim(cache->map); + return 0; +} + +static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c) +{ + fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map); +} + +static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp) +{ + u32 i, cnt = 0; + long ncpus; + u32 nr, cpu; + u16 level; + + ncpus = sysconf(_SC_NPROCESSORS_CONF); + if (ncpus < 0) + return -1; + + nr = (u32)(ncpus & UINT_MAX); + + for (cpu = 0; cpu < nr; cpu++) { + for (level = 0; level < 10; level++) { + struct cpu_cache_level c; + int err; + + err = cpu_cache_level__read(&c, cpu, level); + if (err < 0) + return err; + + if (err == 1) + break; + + for (i = 0; i < cnt; i++) { + if (cpu_cache_level__cmp(&c, &caches[i])) + break; + } + + if (i == cnt) + caches[cnt++] = c; + else + cpu_cache_level__free(&c); + + if (WARN_ONCE(cnt == size, "way too many cpu caches..")) + goto out; + } + } + out: + *cntp = cnt; + return 0; +} + +#define MAX_CACHES 2000 + +static int write_cache(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + struct cpu_cache_level caches[MAX_CACHES]; + u32 cnt = 0, i, version = 1; + int ret; + + ret = build_caches(caches, MAX_CACHES, &cnt); + if (ret) + goto out; + + qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort); + + ret = do_write(fd, &version, sizeof(u32)); + if (ret < 0) + goto out; + + ret = do_write(fd, &cnt, sizeof(u32)); + if (ret < 0) + goto out; + + for (i = 0; i < cnt; i++) { + struct cpu_cache_level *c = &caches[i]; + + #define _W(v) \ + ret = do_write(fd, &c->v, sizeof(u32)); \ + if (ret < 0) \ + goto out; + + _W(level) + _W(line_size) + _W(sets) + _W(ways) + #undef _W + + #define _W(v) \ + ret = do_write_string(fd, (const char *) c->v); \ + if (ret < 0) \ + goto out; + + _W(type) + _W(size) + _W(map) + #undef _W + } + +out: + for (i = 0; i < cnt; i++) + cpu_cache_level__free(&caches[i]); + return ret; +} + static int write_stat(int fd __maybe_unused, struct perf_header *h __maybe_unused, struct perf_evlist *evlist __maybe_unused) @@ -1172,6 +1367,18 @@ static void print_stat(struct perf_header *ph __maybe_unused, fprintf(fp, "# contains stat data\n"); } +static void print_cache(struct perf_header *ph __maybe_unused, + int fd __maybe_unused, FILE *fp __maybe_unused) +{ + int i; + + fprintf(fp, "# CPU cache info:\n"); + for (i = 0; i < ph->env.caches_cnt; i++) { + fprintf(fp, "# "); + cpu_cache_level__fprintf(fp, &ph->env.caches[i]); + } +} + static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { @@ -1920,6 +2127,68 @@ static int process_auxtrace(struct perf_file_section *section, return err; } +static int process_cache(struct perf_file_section *section __maybe_unused, + struct perf_header *ph __maybe_unused, int fd __maybe_unused, + void *data __maybe_unused) +{ + struct cpu_cache_level *caches; + u32 cnt, i, version; + + if (readn(fd, &version, sizeof(version)) != sizeof(version)) + return -1; + + if (ph->needs_swap) + version = bswap_32(version); + + if (version != 1) + return -1; + + if (readn(fd, &cnt, sizeof(cnt)) != sizeof(cnt)) + return -1; + + if (ph->needs_swap) + cnt = bswap_32(cnt); + + caches = zalloc(sizeof(*caches) * cnt); + if (!caches) + return -1; + + for (i = 0; i < cnt; i++) { + struct cpu_cache_level c; + + #define _R(v) \ + if (readn(fd, &c.v, sizeof(u32)) != sizeof(u32))\ + goto out_free_caches; \ + if (ph->needs_swap) \ + c.v = bswap_32(c.v); \ + + _R(level) + _R(line_size) + _R(sets) + _R(ways) + #undef _R + + #define _R(v) \ + c.v = do_read_string(fd, ph); \ + if (!c.v) \ + goto out_free_caches; + + _R(type) + _R(size) + _R(map) + #undef _R + + caches[i] = c; + } + + ph->env.caches = caches; + ph->env.caches_cnt = cnt; + return 0; +out_free_caches: + free(caches); + return -1; +} + struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); @@ -1962,6 +2231,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPP(HEADER_GROUP_DESC, group_desc), FEAT_OPP(HEADER_AUXTRACE, auxtrace), FEAT_OPA(HEADER_STAT, stat), + FEAT_OPF(HEADER_CACHE, cache), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index cff9892452ee..3d87ca823c0a 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -32,6 +32,7 @@ enum { HEADER_GROUP_DESC, HEADER_AUXTRACE, HEADER_STAT, + HEADER_CACHE, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; -- cgit v1.2.3 From 140aeadc1fb51d38130fd06a272a381f22e3070c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 30 Jan 2016 09:06:49 -0800 Subject: perf stat: Abstract stat metrics printing Abstract the printing of shadow metrics. Instead of every metric calling fprintf directly and taking care of indentation, use two call backs: one to print metrics and another to start a new line. This will allow adding metrics to CSV mode and also using them for other purposes. The computation of padding is now done in the central callback, instead of every metric doing it manually. This makes it easier to add new metrics. v2: Refactor functions, printout now does more. Move shadow printing. Improve fallback callbacks. Don't use void * callback data. v3: Remove unnecessary hunk. Add typedef for new_line v4: Remove unnecessary hunk. Don't print metrics for CSV/interval mode yet. Move printout change to separate patch. v5: Fix bisect bugs. Avoid bogus frontend cycles printing. Fix indentation in different aggregation modes. v6: Delay newline handling Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1454173616-17710-2-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 71 ++++++++++++-- tools/perf/util/stat-shadow.c | 211 +++++++++++++++++++++++------------------- tools/perf/util/stat.h | 15 ++- 3 files changed, 194 insertions(+), 103 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 038e877081b6..fabcadba1f19 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -735,6 +735,58 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) } } +struct outstate { + FILE *fh; + bool newline; +}; + +#define METRIC_LEN 35 + +static void new_line_std(void *ctx) +{ + struct outstate *os = ctx; + + os->newline = true; +} + +static void do_new_line_std(struct outstate *os) +{ + fputc('\n', os->fh); + if (stat_config.aggr_mode == AGGR_NONE) + fprintf(os->fh, " "); + if (stat_config.aggr_mode == AGGR_CORE) + fprintf(os->fh, " "); + if (stat_config.aggr_mode == AGGR_SOCKET) + fprintf(os->fh, " "); + fprintf(os->fh, " "); +} + +static void print_metric_std(void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + int n; + bool newline = os->newline; + + os->newline = false; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%-*s", METRIC_LEN, ""); + return; + } + + if (newline) + do_new_line_std(os); + + n = fprintf(out, " # "); + if (color) + n += color_fprintf(out, color, fmt, val); + else + n += fprintf(out, fmt, val); + fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); +} + static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -795,20 +847,27 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) static void printout(int id, int nr, struct perf_evsel *counter, double uval) { - int cpu = cpu_map__id_to_cpu(id); + struct outstate os = { .fh = stat_config.output }; + struct perf_stat_output_ctx out; + print_metric_t pm = print_metric_std; + void (*nl)(void *); - if (stat_config.aggr_mode == AGGR_GLOBAL) - cpu = 0; + nl = new_line_std; if (nsec_counter(counter)) nsec_printout(id, nr, counter, uval); else abs_printout(id, nr, counter, uval); + out.print_metric = pm; + out.new_line = nl; + out.ctx = &os; + if (!csv_output && !stat_config.interval) - perf_stat__print_shadow_stats(stat_config.output, counter, - uval, cpu, - stat_config.aggr_mode); + perf_stat__print_shadow_stats(counter, uval, + stat_config.aggr_mode == AGGR_GLOBAL ? 0 : + cpu_map__id_to_cpu(id), + &out); } static void print_aggr(char *prefix) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 6ac03146889d..4d8f18581b9b 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -137,9 +137,10 @@ static const char *get_ratio_color(enum grc_type type, double ratio) return color; } -static void print_stalled_cycles_frontend(FILE *out, int cpu, +static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel - __maybe_unused, double avg) + __maybe_unused, double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -152,14 +153,17 @@ static void print_stalled_cycles_frontend(FILE *out, int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " frontend cycles idle "); + if (ratio) + out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle", + ratio); + else + out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0); } -static void print_stalled_cycles_backend(FILE *out, int cpu, +static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel - __maybe_unused, double avg) + __maybe_unused, double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -172,14 +176,13 @@ static void print_stalled_cycles_backend(FILE *out, int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " backend cycles idle "); + out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio); } -static void print_branch_misses(FILE *out, int cpu, +static void print_branch_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -192,14 +195,13 @@ static void print_branch_misses(FILE *out, int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all branches "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio); } -static void print_l1_dcache_misses(FILE *out, int cpu, +static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -212,14 +214,13 @@ static void print_l1_dcache_misses(FILE *out, int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all L1-dcache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); } -static void print_l1_icache_misses(FILE *out, int cpu, +static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -231,15 +232,13 @@ static void print_l1_icache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all L1-icache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); } -static void print_dtlb_cache_misses(FILE *out, int cpu, +static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -251,15 +250,13 @@ static void print_dtlb_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all dTLB cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); } -static void print_itlb_cache_misses(FILE *out, int cpu, +static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -271,15 +268,13 @@ static void print_itlb_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all iTLB cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); } -static void print_ll_cache_misses(FILE *out, int cpu, +static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -291,15 +286,15 @@ static void print_ll_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all LL-cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); } -void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, - double avg, int cpu, enum aggr_mode aggr) +void perf_stat__print_shadow_stats(struct perf_evsel *evsel, + double avg, int cpu, + struct perf_stat_output_ctx *out) { + void *ctxp = out->ctx; + print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; int ctx = evsel_context(evsel); @@ -307,119 +302,145 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) { ratio = avg / total; - fprintf(out, " # %5.2f insns per cycle ", ratio); + print_metric(ctxp, NULL, "%7.2f ", + "insn per cycle", ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "insn per cycle", 0); } total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); + out->new_line(ctxp); if (total && avg) { ratio = total / avg; - fprintf(out, "\n"); - if (aggr == AGGR_NONE) - fprintf(out, " "); - fprintf(out, " # %5.2f stalled cycles per insn", ratio); + print_metric(ctxp, NULL, "%7.2f ", + "stalled cycles per insn", + ratio); + } else { + print_metric(ctxp, NULL, NULL, + "stalled cycles per insn", 0); } - - } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && - runtime_branches_stats[ctx][cpu].n != 0) { - print_branch_misses(out, cpu, evsel, avg); + } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { + if (runtime_branches_stats[ctx][cpu].n != 0) + print_branch_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all branches", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_dcache_stats[ctx][cpu].n != 0) { - print_l1_dcache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_l1_dcache_stats[ctx][cpu].n != 0) + print_l1_dcache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_icache_stats[ctx][cpu].n != 0) { - print_l1_icache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_l1_icache_stats[ctx][cpu].n != 0) + print_l1_icache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_dtlb_cache_stats[ctx][cpu].n != 0) { - print_dtlb_cache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) + print_dtlb_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_itlb_cache_stats[ctx][cpu].n != 0) { - print_itlb_cache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_itlb_cache_stats[ctx][cpu].n != 0) + print_itlb_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_ll_cache_stats[ctx][cpu].n != 0) { - print_ll_cache_misses(out, cpu, evsel, avg); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && - runtime_cacherefs_stats[ctx][cpu].n != 0) { + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_ll_cache_stats[ctx][cpu].n != 0) + print_ll_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); + } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); if (total) ratio = avg * 100 / total; - fprintf(out, " # %8.3f %% of all cache refs ", ratio); - + if (runtime_cacherefs_stats[ctx][cpu].n != 0) + print_metric(ctxp, NULL, "%8.3f %%", + "of all cache refs", ratio); + else + print_metric(ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(out, cpu, evsel, avg); + print_stalled_cycles_frontend(cpu, evsel, avg, out); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(out, cpu, evsel, avg); + print_stalled_cycles_backend(cpu, evsel, avg, out); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { total = avg_stats(&runtime_nsecs_stats[cpu]); if (total) { ratio = avg / total; - fprintf(out, " # %8.3f GHz ", ratio); + print_metric(ctxp, NULL, "%8.3f", "GHz", ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) - fprintf(out, - " # %5.2f%% transactional cycles ", - 100.0 * (avg / total)); + print_metric(ctxp, NULL, + "%7.2f%%", "transactional cycles", + 100.0 * (avg / total)); + else + print_metric(ctxp, NULL, NULL, "transactional cycles", + 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (total2 < avg) total2 = avg; if (total) - fprintf(out, - " # %5.2f%% aborted cycles ", + print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles", 100.0 * ((total2-avg) / total)); - } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + else + print_metric(ctxp, NULL, NULL, "aborted cycles", 0); + } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (avg) ratio = total / avg; - fprintf(out, " # %8.0f cycles / transaction ", ratio); - } else if (perf_stat_evsel__is(evsel, ELISION_START) && - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) + print_metric(ctxp, NULL, "%8.0f", + "cycles / transaction", ratio); + else + print_metric(ctxp, NULL, NULL, "cycles / transaction", + 0); + } else if (perf_stat_evsel__is(evsel, ELISION_START)) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (avg) ratio = total / avg; - fprintf(out, " # %8.0f cycles / elision ", ratio); + print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) { if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) - fprintf(out, " # %8.3f CPUs utilized ", avg / ratio); + print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", + avg / ratio); else - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; + char unit_buf[10]; total = avg_stats(&runtime_nsecs_stats[cpu]); @@ -429,9 +450,9 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, ratio *= 1000; unit = 'K'; } - - fprintf(out, " # %8.3f %c/sec ", ratio, unit); + snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); + print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, NULL, 0); } } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 2af63c9cb59f..f02af68adc04 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -68,11 +68,22 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel); extern struct stats walltime_nsecs_stats; +typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, + const char *fmt, double val); +typedef void (*new_line_t )(void *ctx); + void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, int cpu); -void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, - double avg, int cpu, enum aggr_mode aggr); +struct perf_stat_output_ctx { + void *ctx; + print_metric_t print_metric; + new_line_t new_line; +}; + +void perf_stat__print_shadow_stats(struct perf_evsel *evsel, + double avg, int cpu, + struct perf_stat_output_ctx *out); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct perf_evlist *evlist); -- cgit v1.2.3 From f94833929032ad23412d3970beed6769a2fdbc19 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 30 Jan 2016 09:06:50 -0800 Subject: perf stat: Add support for metrics in interval mode Now that we can modify the metrics printout functions easily, it's straight forward to support metric printing for interval mode. All that is needed is to print the time stamp on every new line. Pass the prefix into the context and print it out. v2: Move wrong hunk to here. Committer note: Before: [root@jouet ~]# perf stat -I 1000 -e instructions,cycles sleep 1 # time counts unit events 1.000168216 538,913 instructions 1.000168216 748,765 cycles 1.000660048 153,741 instructions 1.000660048 214,066 cycles After: # perf stat -I 1000 -e instructions,cycles sleep 1 # time counts unit events 1.000215928 519,620 instructions # 0.69 insn per cycle 1.000215928 752,003 cycles 1.000946033 148,502 instructions # 0.33 insn per cycle 1.000946033 160,104 cycles Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1454173616-17710-3-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fabcadba1f19..5710bdb058d2 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -738,6 +738,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) struct outstate { FILE *fh; bool newline; + const char *prefix; }; #define METRIC_LEN 35 @@ -752,6 +753,7 @@ static void new_line_std(void *ctx) static void do_new_line_std(struct outstate *os) { fputc('\n', os->fh); + fputs(os->prefix, os->fh); if (stat_config.aggr_mode == AGGR_NONE) fprintf(os->fh, " "); if (stat_config.aggr_mode == AGGR_CORE) @@ -845,10 +847,14 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } -static void printout(int id, int nr, struct perf_evsel *counter, double uval) +static void printout(int id, int nr, struct perf_evsel *counter, double uval, + char *prefix) { - struct outstate os = { .fh = stat_config.output }; struct perf_stat_output_ctx out; + struct outstate os = { + .fh = stat_config.output, + .prefix = prefix ? prefix : "" + }; print_metric_t pm = print_metric_std; void (*nl)(void *); @@ -863,7 +869,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval) out.new_line = nl; out.ctx = &os; - if (!csv_output && !stat_config.interval) + if (!csv_output) perf_stat__print_shadow_stats(counter, uval, stat_config.aggr_mode == AGGR_GLOBAL ? 0 : cpu_map__id_to_cpu(id), @@ -923,7 +929,7 @@ static void print_aggr(char *prefix) continue; } uval = val * counter->scale; - printout(id, nr, counter, uval); + printout(id, nr, counter, uval, prefix); if (!csv_output) print_noise(counter, 1.0); @@ -954,7 +960,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(thread, 0, counter, uval); + printout(thread, 0, counter, uval, prefix); if (!csv_output) print_noise(counter, 1.0); @@ -1004,7 +1010,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) } uval = avg * counter->scale; - printout(-1, 0, counter, uval); + printout(-1, 0, counter, uval, prefix); print_noise(counter, avg); @@ -1057,7 +1063,7 @@ static void print_counter(struct perf_evsel *counter, char *prefix) } uval = val * counter->scale; - printout(cpu, 0, counter, uval); + printout(cpu, 0, counter, uval, prefix); if (!csv_output) print_noise(counter, 1.0); print_running(run, ena); -- cgit v1.2.3 From cb110f471025f3278978aaccb18f3164ea2b8189 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 30 Jan 2016 09:06:51 -0800 Subject: perf stat: Move noise/running printing into printout Move the running/noise printing into printout to avoid duplicated code in the callers. v2: Merged with other patches. Remove unnecessary hunk. Readd hunk that ended in earlier patch. v3: Fix noise/running output in CSV mode v4: Merge with later patch that also moves not supported printing. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1454173616-17710-4-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 123 ++++++++++++---------------------------------- 1 file changed, 32 insertions(+), 91 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5710bdb058d2..15e4fcf34e0c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -848,7 +848,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) } static void printout(int id, int nr, struct perf_evsel *counter, double uval, - char *prefix) + char *prefix, u64 run, u64 ena, double noise) { struct perf_stat_output_ctx out; struct outstate os = { @@ -860,6 +860,30 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, nl = new_line_std; + if (run == 0 || ena == 0) { + aggr_printout(counter, id, nr); + + fprintf(stat_config.output, "%*s%s", + csv_output ? 0 : 18, + counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, + csv_sep); + + fprintf(stat_config.output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + + fprintf(stat_config.output, "%*s", + csv_output ? 0 : -25, + perf_evsel__name(counter)); + + if (counter->cgrp) + fprintf(stat_config.output, "%s%s", + csv_sep, counter->cgrp->name); + + print_running(run, ena); + return; + } + if (nsec_counter(counter)) nsec_printout(id, nr, counter, uval); else @@ -874,6 +898,9 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, stat_config.aggr_mode == AGGR_GLOBAL ? 0 : cpu_map__id_to_cpu(id), &out); + + print_noise(counter, noise); + print_running(run, ena); } static void print_aggr(char *prefix) @@ -904,36 +931,8 @@ static void print_aggr(char *prefix) if (prefix) fprintf(output, "%s", prefix); - if (run == 0 || ena == 0) { - aggr_printout(counter, id, nr); - - fprintf(output, "%*s%s", - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(output, "%s%s", - csv_sep, counter->cgrp->name); - - print_running(run, ena); - fputc('\n', output); - continue; - } uval = val * counter->scale; - printout(id, nr, counter, uval, prefix); - if (!csv_output) - print_noise(counter, 1.0); - - print_running(run, ena); + printout(id, nr, counter, uval, prefix, run, ena, 1.0); fputc('\n', output); } } @@ -960,12 +959,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(thread, 0, counter, uval, prefix); - - if (!csv_output) - print_noise(counter, 1.0); - - print_running(run, ena); + printout(thread, 0, counter, uval, prefix, run, ena, 1.0); fputc('\n', output); } } @@ -979,7 +973,6 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) FILE *output = stat_config.output; struct perf_stat_evsel *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); - int scaled = counter->counts->scaled; double uval; double avg_enabled, avg_running; @@ -989,32 +982,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) if (prefix) fprintf(output, "%s", prefix); - if (scaled == -1 || !counter->supported) { - fprintf(output, "%*s%s", - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(output, "%s%s", csv_sep, counter->cgrp->name); - - print_running(avg_running, avg_enabled); - fputc('\n', output); - return; - } - uval = avg * counter->scale; - printout(-1, 0, counter, uval, prefix); - - print_noise(counter, avg); - - print_running(avg_running, avg_enabled); + printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); fprintf(output, "\n"); } @@ -1037,36 +1006,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix) if (prefix) fprintf(output, "%s", prefix); - if (run == 0 || ena == 0) { - fprintf(output, "CPU%*d%s%*s%s", - csv_output ? 0 : -4, - perf_evsel__cpus(counter)->map[cpu], csv_sep, - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(output, "%s%s", - csv_sep, counter->cgrp->name); - - print_running(run, ena); - fputc('\n', output); - continue; - } - uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix); - if (!csv_output) - print_noise(counter, 1.0); - print_running(run, ena); + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); fputc('\n', output); } -- cgit v1.2.3 From a55e5663761366fb883f6f25375dd68bc958b9db Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 17 Feb 2016 10:57:19 -0300 Subject: perf evlist: Reference count the cpu and thread maps at set_maps() We were dropping the reference we possibly held but not obtaining one for the new maps, which we will drop at perf_evlist__delete(), fix it. This was caught by Steven Noonan in some of the machines which would produce this output when caught by glibc debug mechanisms: $ sudo perf test 21 21: Test object code reading :*** Error in `perf': corrupted double-linked list: 0x00000000023ffcd0 *** ======= Backtrace: ========= /usr/lib/libc.so.6(+0x72055)[0x7f25be0f3055] /usr/lib/libc.so.6(+0x779b6)[0x7f25be0f89b6] /usr/lib/libc.so.6(+0x7a0ed)[0x7f25be0fb0ed] /usr/lib/libc.so.6(__libc_calloc+0xba)[0x7f25be0fceda] perf(parse_events_lex_init_extra+0x38)[0x4cfff8] perf(parse_events+0x55)[0x4a0615] perf(perf_evlist__config+0xcf)[0x4eeb2f] perf[0x479f82] perf(test__code_reading+0x1e)[0x47ad4e] perf(cmd_test+0x5dd)[0x46452d] perf[0x47f4e3] perf(main+0x603)[0x42c723] /usr/lib/libc.so.6(__libc_start_main+0xf0)[0x7f25be0a1610] perf(_start+0x29)[0x42c859] Further investigation using valgrind led to the reference count imbalance fixed in this patch. Reported-and-Tested-by: Steven Noonan Report-Link: http://lkml.kernel.org/r/CAKbGBLjC2Dx5vshxyGmQkcD+VwiAQLbHoXA9i7kvRB2-2opHZQ@mail.gmail.com Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: f30a79b012e5 ("perf tools: Add reference counting for cpu_map object") Link: http://lkml.kernel.org/n/tip-j0u1bdhr47sa511sgg76kb8h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d81f13de2476..a7eb0eae9938 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1181,12 +1181,12 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, */ if (cpus != evlist->cpus) { cpu_map__put(evlist->cpus); - evlist->cpus = cpus; + evlist->cpus = cpu_map__get(cpus); } if (threads != evlist->threads) { thread_map__put(evlist->threads); - evlist->threads = threads; + evlist->threads = thread_map__get(threads); } perf_evlist__propagate_maps(evlist); -- cgit v1.2.3 From 85723885feb823b4fc352b727ece0b6d00306c4d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 15 Feb 2016 09:34:31 +0100 Subject: perf record: Add --all-user/--all-kernel options Allow user to easily switch all events to user or kernel space with simple --all-user or --all-kernel options. This will be handy within perf mem/c2c wrappers to switch easily monitoring modes. Committer note: Testing it: # perf record --all-kernel --all-user -a sleep 2 Error: option `all-user' cannot be used with all-kernel Usage: perf record [] [] or: perf record [] -- [] --all-user Configure all used events to run in user space. --all-kernel Configure all used events to run in kernel space. # perf record --all-user --all-kernel -a sleep 2 Error: option `all-kernel' cannot be used with all-user Usage: perf record [] [] or: perf record [] -- [] --all-kernel Configure all used events to run in kernel space. --all-user Configure all used events to run in user space. # perf record --all-user -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.416 MB perf.data (162 samples) ] # perf report | grep '\[k\]' # perf record --all-kernel -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.423 MB perf.data (296 samples) ] # perf report | grep '\[\.\]' # Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1455525293-8671-2-git-send-email-jolsa@kernel.org [ Made those options to be mutually exclusive ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 6 ++++++ tools/perf/builtin-record.c | 6 ++++++ tools/perf/perf.h | 2 ++ tools/perf/util/evsel.c | 10 ++++++++++ 4 files changed, 24 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index fbceb631387c..19aa17532a16 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -341,6 +341,12 @@ Specify vmlinux path which has debuginfo. --buildid-all:: Record build-id of all DSOs regardless whether it's actually hit or not. +--all-kernel:: +Configure all used events to run in kernel space. + +--all-user:: +Configure all used events to run in user space. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0ee0d5cd31a7..cf3a28d83066 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1140,6 +1140,12 @@ struct option __record_options[] = { "per thread proc mmap processing timeout in ms"), OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, "Record context switch events"), + OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, + "Configure all used events to run in kernel space.", + PARSE_OPT_EXCLUSIVE), + OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, + "Configure all used events to run in user space.", + PARSE_OPT_EXCLUSIVE), OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", "clang binary to use for compiling BPF scriptlets"), OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 90129accffbe..5381a01c0610 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -58,6 +58,8 @@ struct record_opts { bool full_auxtrace; bool auxtrace_snapshot_mode; bool record_switch_events; + bool all_kernel; + bool all_user; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 467808680ee4..6ae20d0056de 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -898,6 +898,16 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (evsel->precise_max) perf_event_attr__set_max_precise_ip(attr); + if (opts->all_user) { + attr->exclude_kernel = 1; + attr->exclude_user = 0; + } + + if (opts->all_kernel) { + attr->exclude_kernel = 0; + attr->exclude_user = 1; + } + /* * Apply event specific term settings, * it overloads any global configuration. -- cgit v1.2.3 From d9aade7fd27a604bbffd363e6a68416ef51bab88 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Feb 2016 13:34:09 -0300 Subject: perf evlist: Handle -EINVAL for sample_freq > max_sample_rate in strerror_open() When running the "code reading" test we get: # perf test -v "code reading" 2>&1 | tail -5 Parsing event 'cycles:u' perf_evlist__open failed test child finished with -1 ---- end ---- Test object code reading: FAILED! # And with -vv we get the errno value, -22, i.e. -EINVAL, but we can do better and handle the case at hand, with this patch it becomes: # perf test -v "code reading" 2>&1 | tail -7 perf_evlist__open() failed! Error: Invalid argument. Hint: Check /proc/sys/kernel/perf_event_max_sample_rate. Hint: The current value is 1000 and 4000 is being requested. test child finished with -1 ---- end ---- Test object code reading: FAILED! # Next patch will make this 'perf test' entry to use perf_evlist__strerror() Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Steven Noonan Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-i31ai6kfefn75eapejjokfhc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a7eb0eae9938..0f577162c699 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1624,7 +1624,7 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) return printed + fprintf(fp, "\n"); } -int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, +int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size) { int printed, value; @@ -1652,7 +1652,25 @@ int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" "Hint:\tThe current value is %d.", value); break; + case EINVAL: { + struct perf_evsel *first = perf_evlist__first(evlist); + int max_freq; + + if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) + goto out_default; + + if (first->attr.sample_freq < (u64)max_freq) + goto out_default; + + printed = scnprintf(buf, size, + "Error:\t%s.\n" + "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" + "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", + emsg, max_freq, first->attr.sample_freq); + break; + } default: +out_default: scnprintf(buf, size, "%s", emsg); break; } -- cgit v1.2.3 From 6880bbf96930ec6f8b40b5b93f21973f3297672a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Feb 2016 13:40:57 -0300 Subject: perf tests: Use perf_evlist__strerror_open() to provide hints about max_freq Before: # perf test -v "code reading" 2>&1 | tail -4 perf_evlist__open failed test child finished with -1 ---- end ---- Test object code reading: FAILED! # After: # perf test -v "code reading" 2>&1 | tail -7 perf_evlist__open() failed! Error: Invalid argument. Hint: Check /proc/sys/kernel/perf_event_max_sample_rate. Hint: The current value is 1000 and 4000 is being requested. test child finished with -1 ---- end ---- Test object code reading: FAILED! # Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Steven Noonan Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ifbx7vmrc38loe6317owz2jx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/code-reading.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 313a48c6b2bc..f84339cb7f95 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -559,7 +559,13 @@ static int do_test_code_reading(bool try_kcore) evlist = NULL; continue; } - pr_debug("perf_evlist__open failed\n"); + + if (verbose) { + char errbuf[512]; + perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); + pr_debug("perf_evlist__open() failed!\n%s\n", errbuf); + } + goto out_put; } break; -- cgit v1.2.3 From 5243ba76a585a6481c4d7b931e7e3d98900cbdbe Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Feb 2016 13:45:25 -0300 Subject: perf test: Reduce the sample_freq for the 'object code reading' test Using 4 kHz is not necessary and sometimes is more than what was auto-tuned: # dmesg | grep max_sample_rate | tail -2 [ 2499.144373] perf interrupt took too long (2501 > 2500), lowering kernel.perf_event_max_sample_rate to 50000 [ 3592.413606] perf interrupt took too long (5069 > 5000), lowering kernel.perf_event_max_sample_rate to 25000 Simulating a auto-tune of 2000 we make the test fail, as reported by Steven Noonan for one of his machines, so reduce it to 500 HZ, it is enough to get a good number of samples for this test: # perf test -v 21 2>&1 | grep '^Reading object code for memory address' | tee /tmp/out | tail -5 Reading object code for memory address: 0x479f40 Reading object code for memory address: 0x7f29b7eea80d Reading object code for memory address: 0x7f29b7eea80d Reading object code for memory address: 0x7f29b7eea800 Reading object code for memory address: 0xffffffff813b2f23 [root@jouet ~]# wc -l /tmp/out 40 /tmp/out [root@jouet ~]# For systems that auto-tune below that, the previous patches will tell the user what is happening so that he may either ignore the result of this test or bump /proc/sys/kernel/perf_event_max_sample_rate. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Steven Noonan Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-6kufyy1iprdfzrbtuqgxir70@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/code-reading.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index f84339cb7f95..afc9ad0a0515 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -439,7 +439,7 @@ static int do_test_code_reading(bool try_kcore) .mmap_pages = UINT_MAX, .user_freq = UINT_MAX, .user_interval = ULLONG_MAX, - .freq = 4000, + .freq = 500, .target = { .uses_mmap = true, }, -- cgit v1.2.3 From b002f3bbd321993c1a6d56b86544065420156ab9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 17 Feb 2016 14:44:00 -0800 Subject: perf stat: Handled scaled == -1 case for counters Arnaldo pointed out that the earlier cb110f471025 ("perf stat: Move noise/running printing into printout") change changed behavior for not counted counters. This patch fixes it again. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Stephane Eranian Fixes: cb110f471025 ("perf stat: Move noise/running printing into printout") Link: http://lkml.kernel.org/r/1455749045-18098-2-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 15e4fcf34e0c..86289dfcb452 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -860,7 +860,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, nl = new_line_std; - if (run == 0 || ena == 0) { + if (run == 0 || ena == 0 || counter->counts->scaled == -1) { aggr_printout(counter, id, nr); fprintf(stat_config.output, "%*s%s", -- cgit v1.2.3 From 80cdce7666924158af63ba5071805a28800ebe4b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 19 Feb 2016 11:43:51 +0000 Subject: perf bpf: Rename bpf_prog_priv__clear() to clear_prog_priv() The name of bpf_prog_priv__clear() doesn't follow perf's naming convention. bpf_prog_priv__delete() seems to be a better name. However, bpf_prog_priv__delete() should be a method of 'struct bpf_prog_priv', but its first parameter is 'struct bpf_program'. It is callback from libbpf to clear priv structures when destroying a bpf program. It is actually a method of bpf_program (libbpf object), but bpf_program__ functions should be provided by libbpf. This patch removes the prefix of that function. Signed-off-by: Wang Nan Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1455882283-79592-4-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 540a7efa657e..0bdccf423b27 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -108,8 +108,8 @@ void bpf__clear(void) } static void -bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused, - void *_priv) +clear_prog_priv(struct bpf_program *prog __maybe_unused, + void *_priv) { struct bpf_prog_priv *priv = _priv; @@ -337,7 +337,7 @@ config_bpf_program(struct bpf_program *prog) } pr_debug("bpf: config '%s' is ok\n", config_str); - err = bpf_program__set_private(prog, priv, bpf_prog_priv__clear); + err = bpf_program__set_private(prog, priv, clear_prog_priv); if (err) { pr_debug("Failed to set priv for program '%s'\n", config_str); goto errout; -- cgit v1.2.3 From 26dee028d365fbc0e3326606a8520260b4462381 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 19 Feb 2016 11:43:52 +0000 Subject: perf tools: Fix checking asprintf return value According to man pages, asprintf returns -1 when failure. This patch fixes two incorrect return value checker. Signed-off-by: Wang Nan Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Cc: stable@vger.kernel.org # v4.4+ Fixes: ffeb883e5662 ("perf tools: Show proper error message for wrong terms of hw/sw events") Link: http://lkml.kernel.org/r/1455882283-79592-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index e5583fd4e7bd..72524c755b11 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2110,11 +2110,11 @@ char *parse_events_formats_error_string(char *additional_terms) /* valid terms */ if (additional_terms) { - if (!asprintf(&str, "valid terms: %s,%s", - additional_terms, static_terms)) + if (asprintf(&str, "valid terms: %s,%s", + additional_terms, static_terms) < 0) goto fail; } else { - if (!asprintf(&str, "valid terms: %s", static_terms)) + if (asprintf(&str, "valid terms: %s", static_terms) < 0) goto fail; } return str; -- cgit v1.2.3 From 17cb5f84b89fd39a143f1c899836f40420a6b799 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 19 Feb 2016 11:43:57 +0000 Subject: perf tools: Create config_term_names array config_term_names[] is introduced for future commits which will be able to retrieve the config name through the config term. Utilize this array in parse_events_formats_error_string() so the missing '{,no-}inherit' terms are added. Signed-off-by: Wang Nan Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1455882283-79592-10-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 51 +++++++++++++++++++++++++++++++++++++++--- tools/perf/util/parse-events.h | 3 ++- tools/perf/util/parse-events.l | 3 +-- 3 files changed, 51 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 72524c755b11..fd085d5f5c79 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -746,6 +746,25 @@ static int check_type_val(struct parse_events_term *term, return -EINVAL; } +/* + * Update according to parse-events.l + */ +static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { + [PARSE_EVENTS__TERM_TYPE_USER] = "", + [PARSE_EVENTS__TERM_TYPE_CONFIG] = "config", + [PARSE_EVENTS__TERM_TYPE_CONFIG1] = "config1", + [PARSE_EVENTS__TERM_TYPE_CONFIG2] = "config2", + [PARSE_EVENTS__TERM_TYPE_NAME] = "name", + [PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD] = "period", + [PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ] = "freq", + [PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE] = "branch_type", + [PARSE_EVENTS__TERM_TYPE_TIME] = "time", + [PARSE_EVENTS__TERM_TYPE_CALLGRAPH] = "call-graph", + [PARSE_EVENTS__TERM_TYPE_STACKSIZE] = "stack-size", + [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", + [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", +}; + typedef int config_term_func_t(struct perf_event_attr *attr, struct parse_events_term *term, struct parse_events_error *err); @@ -2097,6 +2116,31 @@ void parse_events_evlist_error(struct parse_events_evlist *data, WARN_ONCE(!err->str, "WARNING: failed to allocate error string"); } +static void config_terms_list(char *buf, size_t buf_sz) +{ + int i; + bool first = true; + + buf[0] = '\0'; + for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) { + const char *name = config_term_names[i]; + + if (!name) + continue; + if (name[0] == '<') + continue; + + if (strlen(buf) + strlen(name) + 2 >= buf_sz) + return; + + if (!first) + strcat(buf, ","); + else + first = false; + strcat(buf, name); + } +} + /* * Return string contains valid config terms of an event. * @additional_terms: For terms such as PMU sysfs terms. @@ -2104,10 +2148,11 @@ void parse_events_evlist_error(struct parse_events_evlist *data, char *parse_events_formats_error_string(char *additional_terms) { char *str; - static const char *static_terms = "config,config1,config2,name," - "period,freq,branch_type,time," - "call-graph,stack-size\n"; + /* "branch_type" is the longest name */ + char static_terms[__PARSE_EVENTS__TERM_TYPE_NR * + (sizeof("branch_type") - 1)]; + config_terms_list(static_terms, sizeof(static_terms)); /* valid terms */ if (additional_terms) { if (asprintf(&str, "valid terms: %s,%s", diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 53628bf3da67..b50d50b96f95 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -68,7 +68,8 @@ enum { PARSE_EVENTS__TERM_TYPE_CALLGRAPH, PARSE_EVENTS__TERM_TYPE_STACKSIZE, PARSE_EVENTS__TERM_TYPE_NOINHERIT, - PARSE_EVENTS__TERM_TYPE_INHERIT + PARSE_EVENTS__TERM_TYPE_INHERIT, + __PARSE_EVENTS__TERM_TYPE_NR, }; struct parse_events_term { diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 58c5831ffd5c..99486e6a8b97 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -178,8 +178,7 @@ modifier_bp [rwx]{1,3} { /* - * Please update parse_events_formats_error_string any time - * new static term is added. + * Please update config_term_names when new static term is added. */ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } -- cgit v1.2.3 From 1669e509ea25e4e3e871d913d21b1cac4a96d1e8 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 19 Feb 2016 11:43:58 +0000 Subject: perf stat: Bail out on unsupported event config modifiers 'perf stat' accepts some config terms but doesn't apply them. For example: # perf stat -e 'instructions/no-inherit/' -e 'instructions/inherit/' bash # ls # exit Performance counter stats for 'bash': 266258061 instructions/no-inherit/ 266258061 instructions/inherit/ 1.402183915 seconds time elapsed The result is confusing, because user may expect the first 'instructions' event exclude the 'ls' command. This patch forbid most of these config terms for 'perf stat'. Result: # ./perf stat -e 'instructions/no-inherit/' -e 'instructions/inherit/' bash event syntax error: 'instructions/no-inherit/' \___ 'no-inherit' is not usable in 'perf stat' ... We can add blocked config terms back when 'perf stat' really supports them. This patch also removes unavailable config term from error message: # ./perf stat -e 'instructions/badterm/' ls event syntax error: 'instructions/badterm/' \___ unknown term valid terms: config,config1,config2,name # ./perf stat -e 'cpu/badterm/' ls event syntax error: 'cpu/badterm/' \___ unknown term valid terms: pc,any,inv,edge,cmask,event,in_tx,ldlat,umask,in_tx_cp,offcore_rsp,config,config1,config2,name Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1455882283-79592-11-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 + tools/perf/util/parse-events.c | 48 ++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/parse-events.h | 1 + 3 files changed, 50 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 86289dfcb452..8c0bc0fe5179 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1831,6 +1831,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) if (evsel_list == NULL) return -ENOMEM; + parse_events__shrink_config_terms(); argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, (const char **) stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index fd085d5f5c79..eb5df43ec68f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -765,6 +765,41 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", }; +static bool config_term_shrinked; + +static bool +config_term_avail(int term_type, struct parse_events_error *err) +{ + if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) { + err->str = strdup("Invalid term_type"); + return false; + } + if (!config_term_shrinked) + return true; + + switch (term_type) { + case PARSE_EVENTS__TERM_TYPE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_CONFIG1: + case PARSE_EVENTS__TERM_TYPE_CONFIG2: + case PARSE_EVENTS__TERM_TYPE_NAME: + return true; + default: + if (!err) + return false; + + /* term_type is validated so indexing is safe */ + if (asprintf(&err->str, "'%s' is not usable in 'perf stat'", + config_term_names[term_type]) < 0) + err->str = NULL; + return false; + } +} + +void parse_events__shrink_config_terms(void) +{ + config_term_shrinked = true; +} + typedef int config_term_func_t(struct perf_event_attr *attr, struct parse_events_term *term, struct parse_events_error *err); @@ -834,6 +869,17 @@ do { \ return -EINVAL; } + /* + * Check term availbility after basic checking so + * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered. + * + * If check availbility at the entry of this function, + * user will see "'' is not usable in 'perf stat'" + * if an invalid config term is provided for legacy events + * (for example, instructions/badterm/...), which is confusing. + */ + if (!config_term_avail(term->type_term, err)) + return -EINVAL; return 0; #undef CHECK_TYPE_VAL } @@ -2125,6 +2171,8 @@ static void config_terms_list(char *buf, size_t buf_sz) for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) { const char *name = config_term_names[i]; + if (!config_term_avail(i, NULL)) + continue; if (!name) continue; if (name[0] == '<') diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index b50d50b96f95..76151f9f00d2 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -105,6 +105,7 @@ struct parse_events_terms { struct list_head *terms; }; +void parse_events__shrink_config_terms(void); int parse_events__is_hardcoded_term(struct parse_events_term *term); int parse_events_term__num(struct parse_events_term **term, int type_term, char *config, u64 num, -- cgit v1.2.3 From e814fddde18fec43fa41a27ae94c09b54772697e Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 19 Feb 2016 11:43:59 +0000 Subject: perf tools: Rename and move pmu_event_name to get_config_name Following commits will make more events obey /name=newname/ options. This patch makes pmu_event_name() a generic helper. Makes new get_config_name() accept NULL input to make life easier. Signed-off-by: Wang Nan Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1455882283-79592-12-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index eb5df43ec68f..3243e95eb1c7 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -279,7 +279,24 @@ const char *event_type(int type) return "unknown"; } +static int parse_events__is_name_term(struct parse_events_term *term) +{ + return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; +} +static char *get_config_name(struct list_head *head_terms) +{ + struct parse_events_term *term; + + if (!head_terms) + return NULL; + + list_for_each_entry(term, head_terms, list) + if (parse_events__is_name_term(term)) + return term->val.str; + + return NULL; +} static struct perf_evsel * __add_event(struct list_head *list, int *idx, @@ -1029,22 +1046,6 @@ int parse_events_add_numeric(struct parse_events_evlist *data, return add_event(list, &data->idx, &attr, NULL, &config_terms); } -static int parse_events__is_name_term(struct parse_events_term *term) -{ - return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; -} - -static char *pmu_event_name(struct list_head *head_terms) -{ - struct parse_events_term *term; - - list_for_each_entry(term, head_terms, list) - if (parse_events__is_name_term(term)) - return term->val.str; - - return NULL; -} - int parse_events_add_pmu(struct parse_events_evlist *data, struct list_head *list, char *name, struct list_head *head_config) @@ -1089,7 +1090,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, return -EINVAL; evsel = __add_event(list, &data->idx, &attr, - pmu_event_name(head_config), pmu->cpus, + get_config_name(head_config), pmu->cpus, &config_terms); if (evsel) { evsel->unit = info.unit; -- cgit v1.2.3 From 1d55e8ef340dad1ccd5aaf53071de41fc3d8dba4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 19 Feb 2016 18:45:12 -0300 Subject: perf tools: Introduce opt_event_config nonterminal To remove duplicated code that differs only in using the matching '/a,b,c/' part or NULL if no event configuration is done ('//' or no pair of slashes at all). Will be used by some new targets allowing the configuration of hardware events, etc. Lifted part of the 'opt_event_config' nonterminal from a patch by Wang Nan. Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/n/tip-e3xzpx9cqsmwnaguaxyw6r42@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 47 ++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index c0eac88ef474..ce68746bdc89 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -64,6 +64,7 @@ static inc_group_count(struct list_head *list, %type PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT %type value_sym %type event_config +%type opt_event_config %type event_term %type event_pmu %type event_legacy_symbol @@ -222,16 +223,6 @@ PE_NAME '/' event_config '/' $$ = list; } | -PE_NAME '/' '/' -{ - struct parse_events_evlist *data = _data; - struct list_head *list; - - ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(data, list, $1, NULL)); - $$ = list; -} -| PE_KERNEL_PMU_EVENT sep_dc { struct parse_events_evlist *data = _data; @@ -378,7 +369,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc } event_legacy_tracepoint: -tracepoint_name +tracepoint_name opt_event_config { struct parse_events_evlist *data = _data; struct parse_events_error *error = data->error; @@ -389,24 +380,7 @@ tracepoint_name error->idx = @1.first_column; if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, - error, NULL)) - return -1; - - $$ = list; -} -| -tracepoint_name '/' event_config '/' -{ - struct parse_events_evlist *data = _data; - struct parse_events_error *error = data->error; - struct list_head *list; - - ALLOC_LIST(list); - if (error) - error->idx = @1.first_column; - - if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, - error, $3)) + error, $2)) return -1; $$ = list; @@ -476,6 +450,21 @@ PE_BPF_SOURCE $$ = list; } +opt_event_config: +'/' event_config '/' +{ + $$ = $2; +} +| +'/' '/' +{ + $$ = NULL; +} +| +{ + $$ = NULL; +} + start_terms: event_config { struct parse_events_terms *data = _data; -- cgit v1.2.3 From 10bf358a1b79fa1311eb05ee31f2cefdcad01741 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 19 Feb 2016 11:44:00 +0000 Subject: perf tools: Enable config raw and numeric events This patch allows setting config terms for raw and numeric events. For example: # perf stat -e cycles/name=cyc/ ls ... 1821108 cyc ... # perf stat -e r6530160/name=event/ ls ... 1103195 event ... # perf record -e cycles -e 4:0x6530160/name=evtx,call-graph=fp/ -a sleep 1 ... # perf report --stdio ... # Samples: 124 of event 'cycles' 46.61% 0.00% swapper [kernel.vmlinux] [k] cpu_startup_entry 41.26% 0.00% swapper [kernel.vmlinux] [k] start_secondary ... # Samples: 91 of event 'evtx' ... 93.76% 0.00% swapper [kernel.vmlinux] [k] cpu_startup_entry | ---cpu_startup_entry | |--66.63%--call_cpuidle | cpuidle_enter | | ... 3 test cases are introduced to test config terms for symbol, raw and numeric events. Committer note: Further testing shows that we can retrieve the event name using 'perf evlist -v' and looking at the 'config' perf_event_attr field, i.e.: # perf record -e cycles -e 4:0x6530160/name=evtx,call-graph=fp/ -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.724 MB perf.data (2076 samples) ] # perf evlist cycles evtx # perf evlist -v cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 evtx: type: 4, size: 112, config: 0x6530160, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, sample_id_all: 1, exclude_guest: 1 # Signed-off-by: Wang Nan Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1455882283-79592-13-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 40 ++++++++++++++++++++++++++++++++++++++++ tools/perf/util/parse-events.c | 3 ++- tools/perf/util/parse-events.y | 10 ++++++---- 3 files changed, 48 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 6648274f4601..15e2d055321e 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1271,6 +1271,31 @@ static int test__checkevent_precise_max_modifier(struct perf_evlist *evlist) return 0; } +static int test__checkevent_config_symbol(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "insn") == 0); + return 0; +} + +static int test__checkevent_config_raw(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "rawpmu") == 0); + return 0; +} + +static int test__checkevent_config_num(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "numpmu") == 0); + return 0; +} + + static int count_tracepoints(void) { struct dirent *events_ent; @@ -1579,6 +1604,21 @@ static struct evlist_test test__events[] = { .check = test__checkevent_precise_max_modifier, .id = 47, }, + { + .name = "instructions/name=insn/", + .check = test__checkevent_config_symbol, + .id = 48, + }, + { + .name = "r1234/name=rawpmu/", + .check = test__checkevent_config_raw, + .id = 49, + }, + { + .name = "4:0x6530160/name=numpmu/", + .check = test__checkevent_config_num, + .id = 50, + }, }; static struct evlist_test test__events_pmu[] = { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 3243e95eb1c7..75576e130e16 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1043,7 +1043,8 @@ int parse_events_add_numeric(struct parse_events_evlist *data, return -ENOMEM; } - return add_event(list, &data->idx, &attr, NULL, &config_terms); + return add_event(list, &data->idx, &attr, + get_config_name(head_config), &config_terms); } int parse_events_add_pmu(struct parse_events_evlist *data, diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index ce68746bdc89..82029f92c4d2 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -407,24 +407,26 @@ PE_NAME ':' PE_NAME } event_legacy_numeric: -PE_VALUE ':' PE_VALUE +PE_VALUE ':' PE_VALUE opt_event_config { struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, NULL)); + ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, $4)); + parse_events_terms__delete($4); $$ = list; } event_legacy_raw: -PE_RAW +PE_RAW opt_event_config { struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, NULL)); + ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, $2)); + parse_events_terms__delete($2); $$ = list; } -- cgit v1.2.3 From 43d0b97817a41b274aaec0476e912dae3ae1f93d Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 19 Feb 2016 11:44:01 +0000 Subject: perf tools: Enable config and setting names for legacy cache events This patch allows setting config terms for legacy cache events. For example: # perf stat -e L1-icache-misses/name=valA/ -e branches/name=valB/ ls ... Performance counter stats for 'ls': 11299 valA 451605 valB 0.000779091 seconds time elapsed # perf record -e cache-misses/name=inh/ -e cache-misses/name=noinh,no-inherit/ bash # ls # exit [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.023 MB perf.data (131 samples) ] # perf report --stdio | grep -B 1 'Event count' # Samples: 105 of event 'inh' # Event count (approx.): 109118 -- # Samples: 26 of event 'noinh' # Event count (approx.): 48302 A test case is introduced to test this feature. Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1455882283-79592-14-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 12 ++++++++++++ tools/perf/util/parse-events.c | 30 +++++++++++++++++++++++++++--- tools/perf/util/parse-events.h | 4 +++- tools/perf/util/parse-events.y | 18 ++++++++++++------ 4 files changed, 54 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 15e2d055321e..7865f68dc0d8 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1295,6 +1295,13 @@ static int test__checkevent_config_num(struct perf_evlist *evlist) return 0; } +static int test__checkevent_config_cache(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "cachepmu") == 0); + return 0; +} static int count_tracepoints(void) { @@ -1619,6 +1626,11 @@ static struct evlist_test test__events[] = { .check = test__checkevent_config_num, .id = 50, }, + { + .name = "L1-dcache-misses/name=cachepmu/", + .check = test__checkevent_config_cache, + .id = 51, + }, }; static struct evlist_test test__events_pmu[] = { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 75576e130e16..2996aa4207bd 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -350,11 +350,25 @@ static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES] return -1; } +typedef int config_term_func_t(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err); +static int config_term_common(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err); +static int config_attr(struct perf_event_attr *attr, + struct list_head *head, + struct parse_events_error *err, + config_term_func_t config_term); + int parse_events_add_cache(struct list_head *list, int *idx, - char *type, char *op_result1, char *op_result2) + char *type, char *op_result1, char *op_result2, + struct parse_events_error *error, + struct list_head *head_config) { struct perf_event_attr attr; - char name[MAX_NAME_LEN]; + LIST_HEAD(config_terms); + char name[MAX_NAME_LEN], *config_name; int cache_type = -1, cache_op = -1, cache_result = -1; char *op_result[2] = { op_result1, op_result2 }; int i, n; @@ -368,6 +382,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, if (cache_type == -1) return -EINVAL; + config_name = get_config_name(head_config); n = snprintf(name, MAX_NAME_LEN, "%s", type); for (i = 0; (i < 2) && (op_result[i]); i++) { @@ -408,7 +423,16 @@ int parse_events_add_cache(struct list_head *list, int *idx, memset(&attr, 0, sizeof(attr)); attr.config = cache_type | (cache_op << 8) | (cache_result << 16); attr.type = PERF_TYPE_HW_CACHE; - return add_event(list, idx, &attr, name, NULL); + + if (head_config) { + if (config_attr(&attr, head_config, error, + config_term_common)) + return -EINVAL; + + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + } + return add_event(list, idx, &attr, config_name ? : name, &config_terms); } static void tracepoint_error(struct parse_events_error *e, int err, diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 76151f9f00d2..d5eb2af78826 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -140,7 +140,9 @@ int parse_events_add_numeric(struct parse_events_evlist *data, u32 type, u64 config, struct list_head *head_config); int parse_events_add_cache(struct list_head *list, int *idx, - char *type, char *op_result1, char *op_result2); + char *type, char *op_result1, char *op_result2, + struct parse_events_error *error, + struct list_head *head_config); int parse_events_add_breakpoint(struct list_head *list, int *idx, void *ptr, char *type, u64 len); int parse_events_add_pmu(struct parse_events_evlist *data, diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 82029f92c4d2..6a2d006ea77f 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -293,33 +293,39 @@ value_sym sep_slash_dc } event_legacy_cache: -PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT +PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config { struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5)); + ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5, error, $6)); + parse_events_terms__delete($6); $$ = list; } | -PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT +PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config { struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL)); + ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL, error, $4)); + parse_events_terms__delete($4); $$ = list; } | -PE_NAME_CACHE_TYPE +PE_NAME_CACHE_TYPE opt_event_config { struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL)); + ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL, error, $2)); + parse_events_terms__delete($2); $$ = list; } -- cgit v1.2.3 From 467ef10c68b90b940412390dcd14bbfe8cc40e73 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Feb 2016 23:08:19 +0900 Subject: perf hists browser: Fix percentage update on key press Currently 'perf top --tui' decrements percentage of all entries on any key press. This is because it adds total period as new samples are added to hists. As perf-top does it currently but added samples are not passed to the display thread, the percentages are decresing continuously. So separate total period stat into a different variable so that it cannot affect the output total period. This new total period stats are used only for calcualating callchain percent limit. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Fixes: 0f58474ec835 ("perf hists: Update hists' total period when adding entries") Link: http://lkml.kernel.org/r/1455631723-17345-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 26 +++++++++++++++++++------- tools/perf/util/hist.h | 2 ++ 2 files changed, 21 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 561e9473a915..a856617be744 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -405,6 +405,16 @@ static u8 symbol__parent_filter(const struct symbol *parent) return 0; } +static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period) +{ + if (!symbol_conf.use_callchain) + return; + + he->hists->callchain_period += period; + if (!he->filtered) + he->hists->callchain_non_filtered_period += period; +} + static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *entry, struct addr_location *al, @@ -434,9 +444,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { he_stat__add_period(&he->stat, period, weight); - hists->stats.total_period += period; - if (!he->filtered) - hists->stats.total_non_filtered_period += period; + hist_entry__add_callchain_period(he, period); } if (symbol_conf.cumulate_callchain) he_stat__add_period(he->stat_acc, period, weight); @@ -471,9 +479,8 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, return NULL; if (sample_self) - hists__inc_stats(hists, he); - else - hists->nr_entries++; + hist_entry__add_callchain_period(he, period); + hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, hists->entries_in); @@ -1227,9 +1234,14 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, struct rb_root *root; struct rb_node *next; struct hist_entry *n; + u64 callchain_total; u64 min_callchain_hits; - min_callchain_hits = hists__total_period(hists) * (callchain_param.min_percent / 100); + callchain_total = hists->callchain_period; + if (symbol_conf.filter_relative) + callchain_total = hists->callchain_non_filtered_period; + + min_callchain_hits = callchain_total * (callchain_param.min_percent / 100); if (sort__need_collapse) root = &hists->entries_collapsed; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 840b6d6aa44f..045a9e785a34 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -66,6 +66,8 @@ struct hists { struct rb_root entries_collapsed; u64 nr_entries; u64 nr_non_filtered_entries; + u64 callchain_period; + u64 callchain_non_filtered_period; struct thread *thread_filter; const struct dso *dso_filter; const char *uid_filter_str; -- cgit v1.2.3 From 7565bd39c1a63c82350d26a66ea1a1f1bb49ad2e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Feb 2016 23:08:20 +0900 Subject: perf callchain: Check return value of add_child() The create_child() in add_child() can return NULL in case of memory allocation failure. So check the return value and bail out. The proper error handling will be added later. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1455631723-17345-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 53c43eb9489e..134d88b33fc1 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -453,6 +453,9 @@ add_child(struct callchain_node *parent, struct callchain_node *new; new = create_child(parent, false); + if (new == NULL) + return NULL; + fill_node(new, cursor); new->children_hit = 0; @@ -524,6 +527,8 @@ split_add_child(struct callchain_node *parent, node = callchain_cursor_current(cursor); new = add_child(parent, cursor, period); + if (new == NULL) + return; /* * This is second child since we moved parent's children @@ -585,6 +590,9 @@ append_chain_children(struct callchain_node *root, } /* nothing in children, add to the current node */ rnode = add_child(root, cursor, period); + if (rnode == NULL) + return; + rb_link_node(&rnode->rb_node_in, parent, p); rb_insert_color(&rnode->rb_node_in, &root->rb_root_in); -- cgit v1.2.3 From 8451cbb9b174a9b6e016d7f1bff81ff12dbd1990 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Feb 2016 23:08:21 +0900 Subject: perf callchain: Check return value of fill_node() Memory allocation in the fill_node() can fail so change its return type to int and check it in add_child() too. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1455631723-17345-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 134d88b33fc1..a82ea6f6fc0f 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -416,7 +416,7 @@ create_child(struct callchain_node *parent, bool inherit_children) /* * Fill the node with callchain values */ -static void +static int fill_node(struct callchain_node *node, struct callchain_cursor *cursor) { struct callchain_cursor_node *cursor_node; @@ -433,7 +433,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) call = zalloc(sizeof(*call)); if (!call) { perror("not enough memory for the code path tree"); - return; + return -1; } call->ip = cursor_node->ip; call->ms.sym = cursor_node->sym; @@ -443,6 +443,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) callchain_cursor_advance(cursor); cursor_node = callchain_cursor_current(cursor); } + return 0; } static struct callchain_node * @@ -456,7 +457,16 @@ add_child(struct callchain_node *parent, if (new == NULL) return NULL; - fill_node(new, cursor); + if (fill_node(new, cursor) < 0) { + struct callchain_list *call, *tmp; + + list_for_each_entry_safe(call, tmp, &new->val, list) { + list_del(&call->list); + free(call); + } + free(new); + return NULL; + } new->children_hit = 0; new->hit = period; -- cgit v1.2.3 From 2d713b809d89a3d10c6a85162bf7cce0468e45d9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Feb 2016 23:08:22 +0900 Subject: perf callchain: Add enum match_result for match_chain() The append_chain() might return either result of match_chain() or other (error) code. But match_chain() can return any value in s64 type so it's hard to check the error case. Add new enum match_result and make match_chain() return non-negative values only so that we can check the error cases. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1455631723-17345-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 52 +++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index a82ea6f6fc0f..dab2c1f1e86b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -475,16 +475,32 @@ add_child(struct callchain_node *parent, return new; } -static s64 match_chain(struct callchain_cursor_node *node, - struct callchain_list *cnode) +enum match_result { + MATCH_ERROR = -1, + MATCH_EQ, + MATCH_LT, + MATCH_GT, +}; + +static enum match_result match_chain(struct callchain_cursor_node *node, + struct callchain_list *cnode) { struct symbol *sym = node->sym; + u64 left, right; if (cnode->ms.sym && sym && - callchain_param.key == CCKEY_FUNCTION) - return cnode->ms.sym->start - sym->start; - else - return cnode->ip - node->ip; + callchain_param.key == CCKEY_FUNCTION) { + left = cnode->ms.sym->start; + right = sym->start; + } else { + left = cnode->ip; + right = node->ip; + } + + if (left == right) + return MATCH_EQ; + + return left > right ? MATCH_GT : MATCH_LT; } /* @@ -549,7 +565,7 @@ split_add_child(struct callchain_node *parent, cnode = list_first_entry(&first->val, struct callchain_list, list); - if (match_chain(node, cnode) < 0) + if (match_chain(node, cnode) == MATCH_LT) pp = &p->rb_left; else pp = &p->rb_right; @@ -562,7 +578,7 @@ split_add_child(struct callchain_node *parent, } } -static int +static enum match_result append_chain(struct callchain_node *root, struct callchain_cursor *cursor, u64 period); @@ -583,17 +599,17 @@ append_chain_children(struct callchain_node *root, /* lookup in childrens */ while (*p) { - s64 ret; + enum match_result ret; parent = *p; rnode = rb_entry(parent, struct callchain_node, rb_node_in); /* If at least first entry matches, rely to children */ ret = append_chain(rnode, cursor, period); - if (ret == 0) + if (ret == MATCH_EQ) goto inc_children_hit; - if (ret < 0) + if (ret == MATCH_LT) p = &parent->rb_left; else p = &parent->rb_right; @@ -611,7 +627,7 @@ inc_children_hit: root->children_count++; } -static int +static enum match_result append_chain(struct callchain_node *root, struct callchain_cursor *cursor, u64 period) @@ -620,7 +636,7 @@ append_chain(struct callchain_node *root, u64 start = cursor->pos; bool found = false; u64 matches; - int cmp = 0; + enum match_result cmp = MATCH_ERROR; /* * Lookup in the current node @@ -636,7 +652,7 @@ append_chain(struct callchain_node *root, break; cmp = match_chain(node, cnode); - if (cmp) + if (cmp != MATCH_EQ) break; found = true; @@ -646,7 +662,7 @@ append_chain(struct callchain_node *root, /* matches not, relay no the parent */ if (!found) { - WARN_ONCE(!cmp, "Chain comparison error\n"); + WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n"); return cmp; } @@ -655,20 +671,20 @@ append_chain(struct callchain_node *root, /* we match only a part of the node. Split it and add the new chain */ if (matches < root->val_nr) { split_add_child(root, cursor, cnode, start, matches, period); - return 0; + return MATCH_EQ; } /* we match 100% of the path, increment the hit */ if (matches == root->val_nr && cursor->pos == cursor->nr) { root->hit += period; root->count++; - return 0; + return MATCH_EQ; } /* We match the node and still have a part remaining */ append_chain_children(root, cursor, period); - return 0; + return MATCH_EQ; } int callchain_append(struct callchain_root *root, -- cgit v1.2.3 From f2bb4c5af4fe16d8b1e4ae371e1ceaa817380a88 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Feb 2016 23:08:23 +0900 Subject: perf callchain: Check return value of split_add_child() Now create_child() and add_child() return errors so check and pass it to the caller. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1455631723-17345-6-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index dab2c1f1e86b..5259379892e1 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -508,7 +508,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node, * give a part of its callchain to the created child. * Then create another child to host the given callchain of new branch */ -static void +static int split_add_child(struct callchain_node *parent, struct callchain_cursor *cursor, struct callchain_list *to_split, @@ -520,6 +520,8 @@ split_add_child(struct callchain_node *parent, /* split */ new = create_child(parent, true); + if (new == NULL) + return -1; /* split the callchain and move a part to the new child */ old_tail = parent->val.prev; @@ -554,7 +556,7 @@ split_add_child(struct callchain_node *parent, node = callchain_cursor_current(cursor); new = add_child(parent, cursor, period); if (new == NULL) - return; + return -1; /* * This is second child since we moved parent's children @@ -576,6 +578,7 @@ split_add_child(struct callchain_node *parent, parent->hit = period; parent->count = 1; } + return 0; } static enum match_result @@ -670,7 +673,10 @@ append_chain(struct callchain_node *root, /* we match only a part of the node. Split it and add the new chain */ if (matches < root->val_nr) { - split_add_child(root, cursor, cnode, start, matches, period); + if (split_add_child(root, cursor, cnode, start, matches, + period) < 0) + return MATCH_ERROR; + return MATCH_EQ; } -- cgit v1.2.3 From dca0d122e498c054b117bd4aa5568ce90ee142d5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Feb 2016 23:08:24 +0900 Subject: perf callchain: Check return value of append_chain_children() Now it can check the error case, so check and pass it to the caller. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1455631723-17345-7-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 5259379892e1..24b4bd0d7754 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -586,7 +586,7 @@ append_chain(struct callchain_node *root, struct callchain_cursor *cursor, u64 period); -static void +static int append_chain_children(struct callchain_node *root, struct callchain_cursor *cursor, u64 period) @@ -598,7 +598,7 @@ append_chain_children(struct callchain_node *root, node = callchain_cursor_current(cursor); if (!node) - return; + return -1; /* lookup in childrens */ while (*p) { @@ -611,6 +611,8 @@ append_chain_children(struct callchain_node *root, ret = append_chain(rnode, cursor, period); if (ret == MATCH_EQ) goto inc_children_hit; + if (ret == MATCH_ERROR) + return -1; if (ret == MATCH_LT) p = &parent->rb_left; @@ -620,7 +622,7 @@ append_chain_children(struct callchain_node *root, /* nothing in children, add to the current node */ rnode = add_child(root, cursor, period); if (rnode == NULL) - return; + return -1; rb_link_node(&rnode->rb_node_in, parent, p); rb_insert_color(&rnode->rb_node_in, &root->rb_root_in); @@ -628,6 +630,7 @@ append_chain_children(struct callchain_node *root, inc_children_hit: root->children_hit += period; root->children_count++; + return 0; } static enum match_result @@ -688,7 +691,8 @@ append_chain(struct callchain_node *root, } /* We match the node and still have a part remaining */ - append_chain_children(root, cursor, period); + if (append_chain_children(root, cursor, period) < 0) + return MATCH_ERROR; return MATCH_EQ; } @@ -702,7 +706,8 @@ int callchain_append(struct callchain_root *root, callchain_cursor_commit(cursor); - append_chain_children(&root->node, cursor, period); + if (append_chain_children(&root->node, cursor, period) < 0) + return -1; if (cursor->nr > root->max_depth) root->max_depth = cursor->nr; @@ -730,7 +735,8 @@ merge_chain_branch(struct callchain_cursor *cursor, if (src->hit) { callchain_cursor_commit(cursor); - append_chain_children(dst, cursor, src->hit); + if (append_chain_children(dst, cursor, src->hit) < 0) + return -1; } n = rb_first(&src->rb_root_in); -- cgit v1.2.3 From bba58cdfaace2eb96d2b3cabc610d2ba033371c8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Feb 2016 23:08:25 +0900 Subject: perf hists: Return error from hists__collapse_resort() Currently hists__collapse_resort() and hists__collapse_insert_entry() don't return an error code. Now that callchain_merge() can check for errors, abort and pass the error to the user. A later patch can add more work which also can fail. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1455631723-17345-8-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 29 +++++++++++++++++++---------- tools/perf/util/hist.h | 4 ++-- 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index a856617be744..827c6cbcd05d 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1046,8 +1046,8 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, * collapse the histogram */ -bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, - struct rb_root *root, struct hist_entry *he) +int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root, + struct hist_entry *he) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -1061,18 +1061,21 @@ bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, cmp = hist_entry__collapse(iter, he); if (!cmp) { + int ret = 0; + he_stat__add_stat(&iter->stat, &he->stat); if (symbol_conf.cumulate_callchain) he_stat__add_stat(iter->stat_acc, he->stat_acc); if (symbol_conf.use_callchain) { callchain_cursor_reset(&callchain_cursor); - callchain_merge(&callchain_cursor, - iter->callchain, - he->callchain); + if (callchain_merge(&callchain_cursor, + iter->callchain, + he->callchain) < 0) + ret = -1; } hist_entry__delete(he); - return false; + return ret; } if (cmp < 0) @@ -1084,7 +1087,7 @@ bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, root); - return true; + return 1; } struct rb_root *hists__get_rotate_entries_in(struct hists *hists) @@ -1110,14 +1113,15 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) hists__filter_entry_by_socket(hists, he); } -void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) +int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) { struct rb_root *root; struct rb_node *next; struct hist_entry *n; + int ret; if (!sort__need_collapse) - return; + return 0; hists->nr_entries = 0; @@ -1132,7 +1136,11 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) next = rb_next(&n->rb_node_in); rb_erase(&n->rb_node_in, root); - if (hists__collapse_insert_entry(hists, &hists->entries_collapsed, n)) { + ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n); + if (ret < 0) + return -1; + + if (ret) { /* * If it wasn't combined with one of the entries already * collapsed, we need to apply the filters that may have @@ -1143,6 +1151,7 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) if (prog) ui_progress__update(prog, 1); } + return 0; } static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 045a9e785a34..97baa1d6ae5f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -138,7 +138,7 @@ void hist_entry__delete(struct hist_entry *he); void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); void hists__output_resort(struct hists *hists, struct ui_progress *prog); -void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); +int hists__collapse_resort(struct hists *hists, struct ui_progress *prog); void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); void hists__delete_entries(struct hists *hists); @@ -197,7 +197,7 @@ int hists__init(void); int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); struct rb_root *hists__get_rotate_entries_in(struct hists *hists); -bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, +int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root, struct hist_entry *he); struct perf_hpp { -- cgit v1.2.3 From 5b2ea6f2f6ac81a230e6cc68e1473e796a583f00 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Feb 2016 23:08:26 +0900 Subject: perf report: Check error during report__collapse_hists() If it returns an error, warn user and bail out instead of silently ignoring it. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1455631723-17345-9-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1eab50ac1ef6..760e886ca9d9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -469,10 +469,11 @@ static int report__browse_hists(struct report *rep) return ret; } -static void report__collapse_hists(struct report *rep) +static int report__collapse_hists(struct report *rep) { struct ui_progress prog; struct perf_evsel *pos; + int ret = 0; ui_progress__init(&prog, rep->nr_entries, "Merging related events..."); @@ -484,7 +485,9 @@ static void report__collapse_hists(struct report *rep) hists->socket_filter = rep->socket_filter; - hists__collapse_resort(hists, &prog); + ret = hists__collapse_resort(hists, &prog); + if (ret < 0) + break; /* Non-group events are considered as leader */ if (symbol_conf.event_group && @@ -497,6 +500,7 @@ static void report__collapse_hists(struct report *rep) } ui_progress__finish(); + return ret; } static void report__output_resort(struct report *rep) @@ -564,7 +568,11 @@ static int __cmd_report(struct report *rep) } } - report__collapse_hists(rep); + ret = report__collapse_hists(rep); + if (ret) { + ui__error("failed to process hist entry\n"); + return ret; + } if (session_done()) return 0; -- cgit v1.2.3 From 2c97b0d4a757eec7b83acfe3895d94ad4db13827 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 19 Feb 2016 19:47:04 -0300 Subject: perf tools: Fix build on older systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In RHEL 6.7: CC /tmp/build/perf/util/parse-events.o cc1: warnings being treated as errors util/parse-events.c: In function ‘parse_events_add_cache’: util/parse-events.c:366: error: declaration of ‘error’ shadows a global declaration util/util.h:136: error: shadowed declaration is here Rename it to 'err'. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 43d0b97817a4 ("perf tools: Enable config and setting names for legacy cache events") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2996aa4207bd..2b8770821365 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -363,7 +363,7 @@ static int config_attr(struct perf_event_attr *attr, int parse_events_add_cache(struct list_head *list, int *idx, char *type, char *op_result1, char *op_result2, - struct parse_events_error *error, + struct parse_events_error *err, struct list_head *head_config) { struct perf_event_attr attr; @@ -425,7 +425,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, attr.type = PERF_TYPE_HW_CACHE; if (head_config) { - if (config_attr(&attr, head_config, error, + if (config_attr(&attr, head_config, err, config_term_common)) return -EINVAL; -- cgit v1.2.3 From 58de6ed0a9a32e4b1cf22cc0c46ca16056763f19 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 19 Feb 2016 19:51:13 -0300 Subject: perf tools: Remove duplicate typedef config_term_func_t definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Older compilers don't like this, for instance, on RHEL6.7: CC /tmp/build/perf/util/parse-events.o util/parse-events.c:844: error: redefinition of typedef ‘config_term_func_t’ util/parse-events.c:353: note: previous declaration of ‘config_term_func_t’ was here So remove the second definition, that should've been just moved in 43d0b97817a4 ("perf tools: Enable config and setting names for legacy cache events"), not copied. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 43d0b97817a4 ("perf tools: Enable config and setting names for legacy cache events") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2b8770821365..b0b329539db5 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -841,10 +841,6 @@ void parse_events__shrink_config_terms(void) config_term_shrinked = true; } -typedef int config_term_func_t(struct perf_event_attr *attr, - struct parse_events_term *term, - struct parse_events_error *err); - static int config_term_common(struct perf_event_attr *attr, struct parse_events_term *term, struct parse_events_error *err) -- cgit v1.2.3 From 665aa75700edda07bd7f05acab86cef1a1a1ea66 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 21 Feb 2016 23:22:35 +0900 Subject: perf tools: Fix segfault on dynamic entries A dynamic entry is created for each tracepoint event. When it sets up the sort key, it checks with existing keys using ->equal() callback. But it missed to set the ->equal for dynamic entries. The following segfault was due to the missing ->equal() callback. (gdb) bt #0 0x0000000000140003 in ?? () #1 0x0000000000537769 in fmt_equal (b=0x2106980, a=0x21067a0) at ui/hist.c:548 #2 perf_hpp__setup_output_field (list=0x8c6d80 ) at ui/hist.c:560 #3 0x00000000004e927e in setup_sorting (evlist=) at util/sort.c:2642 #4 0x000000000043cf50 in cmd_report (argc=, argv=, prefix=) at builtin-report.c:932 #5 0x00000000004865a1 in run_builtin (p=p@entry=0x8bbce0 , argc=argc@entry=7, argv=argv@entry=0x7ffd24d56ce0) at perf.c:390 #6 0x000000000042dc1f in handle_internal_command (argv=0x7ffd24d56ce0, argc=7) at perf.c:451 #7 run_argv (argv=0x7ffd24d56a70, argcp=0x7ffd24d56a7c) at perf.c:495 #8 main (argc=7, argv=0x7ffd24d56ce0) at perf.c:620 Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1456064558-13086-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index de715756f281..7daea71691df 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1835,6 +1835,20 @@ bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt) return fmt->cmp == __sort__hde_cmp; } +static bool __sort__hde_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + struct hpp_dynamic_entry *hde_a; + struct hpp_dynamic_entry *hde_b; + + if (!perf_hpp__is_dynamic_entry(a) || !perf_hpp__is_dynamic_entry(b)) + return false; + + hde_a = container_of(a, struct hpp_dynamic_entry, hpp); + hde_b = container_of(b, struct hpp_dynamic_entry, hpp); + + return hde_a->field == hde_b->field; +} + static void hde_free(struct perf_hpp_fmt *fmt) { struct hpp_dynamic_entry *hde; @@ -1867,6 +1881,7 @@ __alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field) hde->hpp.cmp = __sort__hde_cmp; hde->hpp.collapse = __sort__hde_cmp; hde->hpp.sort = __sort__hde_cmp; + hde->hpp.equal = __sort__hde_equal; hde->hpp.free = hde_free; INIT_LIST_HEAD(&hde->hpp.list); -- cgit v1.2.3 From cecaec635de3719ef56a9261c10cd8f2f74ebdb1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 22 Feb 2016 09:31:51 +0900 Subject: perf tools: Update srcline/file if needed Normally the hist entry's srcline and/or srcfile is set during sorting. However sometime it's possible to a hist entry's srcline is not set yet after the sorting. This is because the entry is so unique and other sort keys already make it distinct. Then the srcline/file sort didn't have a chance to be called during the sorting. In that case it has NULL srcline/srcfile field and shows nothing. Before: $ perf report -s comm,sym,srcline ... Overhead Command Symbol ----------------------------------------------------------------- 34.42% swapper [k] intel_idle intel_idle.c:0 2.44% perf [.] __poll_nocancel (null) 1.70% gnome-shell [k] fw_domains_get (null) 1.04% Xorg [k] sock_poll (null) After: 34.42% swapper [k] intel_idle intel_idle.c:0 2.44% perf [.] __poll_nocancel .:0 1.70% gnome-shell [k] fw_domains_get fw_domains_get+42 1.04% Xorg [k] sock_poll socket.c:0 Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1456101111-14400-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 64 ++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 33 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7daea71691df..6f4605b5beb5 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -286,35 +286,34 @@ struct sort_entry sort_sym = { /* --sort srcline */ +static char *hist_entry__get_srcline(struct hist_entry *he) +{ + struct map *map = he->ms.map; + + if (!map) + return SRCLINE_UNKNOWN; + + return get_srcline(map->dso, map__rip_2objdump(map, he->ip), + he->ms.sym, true); +} + static int64_t sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) { - if (!left->srcline) { - if (!left->ms.map) - left->srcline = SRCLINE_UNKNOWN; - else { - struct map *map = left->ms.map; - left->srcline = get_srcline(map->dso, - map__rip_2objdump(map, left->ip), - left->ms.sym, true); - } - } - if (!right->srcline) { - if (!right->ms.map) - right->srcline = SRCLINE_UNKNOWN; - else { - struct map *map = right->ms.map; - right->srcline = get_srcline(map->dso, - map__rip_2objdump(map, right->ip), - right->ms.sym, true); - } - } + if (!left->srcline) + left->srcline = hist_entry__get_srcline(left); + if (!right->srcline) + right->srcline = hist_entry__get_srcline(right); + return strcmp(right->srcline, left->srcline); } static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { + if (!he->srcline) + he->srcline = hist_entry__get_srcline(he); + return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcline); } @@ -329,11 +328,14 @@ struct sort_entry sort_srcline = { static char no_srcfile[1]; -static char *get_srcfile(struct hist_entry *e) +static char *hist_entry__get_srcfile(struct hist_entry *e) { char *sf, *p; struct map *map = e->ms.map; + if (!map) + return no_srcfile; + sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), e->ms.sym, false, true); if (!strcmp(sf, SRCLINE_UNKNOWN)) @@ -350,24 +352,20 @@ static char *get_srcfile(struct hist_entry *e) static int64_t sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right) { - if (!left->srcfile) { - if (!left->ms.map) - left->srcfile = no_srcfile; - else - left->srcfile = get_srcfile(left); - } - if (!right->srcfile) { - if (!right->ms.map) - right->srcfile = no_srcfile; - else - right->srcfile = get_srcfile(right); - } + if (!left->srcfile) + left->srcfile = hist_entry__get_srcfile(left); + if (!right->srcfile) + right->srcfile = hist_entry__get_srcfile(right); + return strcmp(right->srcfile, left->srcfile); } static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { + if (!he->srcfile) + he->srcfile = hist_entry__get_srcfile(he); + return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile); } -- cgit v1.2.3 From 2960ed6f8d6794dcb39ba48c3e515e5be18ee9e1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 22 Feb 2016 09:32:33 +0900 Subject: perf tools: Fix alignment on some sort keys The srcline, srcfile and trace sort keys can have long entries. With commit 89fee7094323 ("perf hists: Do column alignment on the format iterator"), it now aligns output with hist_entry__snprintf_alignment(). So each (possibly long) sort entries don't need to do it themselves. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1456101153-14519-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6f4605b5beb5..a7d73e503b1b 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -314,7 +314,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf, if (!he->srcline) he->srcline = hist_entry__get_srcline(he); - return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcline); + return repsep_snprintf(bf, size, "%-.*s", width, he->srcline); } struct sort_entry sort_srcline = { @@ -366,7 +366,7 @@ static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf, if (!he->srcfile) he->srcfile = hist_entry__get_srcfile(he); - return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile); + return repsep_snprintf(bf, size, "%-.*s", width, he->srcfile); } struct sort_entry sort_srcfile = { @@ -496,11 +496,11 @@ static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf, evsel = hists_to_evsel(he->hists); if (evsel->attr.type != PERF_TYPE_TRACEPOINT) - return scnprintf(bf, size, "%-*.*s", width, width, "N/A"); + return scnprintf(bf, size, "%-.*s", width, "N/A"); if (he->trace_output == NULL) he->trace_output = get_trace_output(he); - return repsep_snprintf(bf, size, "%-*.*s", width, width, he->trace_output); + return repsep_snprintf(bf, size, "%-.*s", width, he->trace_output); } struct sort_entry sort_trace = { -- cgit v1.2.3 From 0c0af78d472f96efe04daaaccede7522b2394b76 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 21 Feb 2016 23:22:38 +0900 Subject: perf tools: Fix column width setting on 'trace' sort key It missed to update column length of the 'trace' sort key in the hists__calc_col_len() so it might truncate the output. It calculated the column length in the ->cmp() callback originally but it doesn't guarantee it's called always. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1456064558-13086-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 3 +++ tools/perf/util/sort.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 827c6cbcd05d..017eb5c42c37 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -179,6 +179,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) if (h->transaction) hists__new_col_len(hists, HISTC_TRANSACTION, hist_entry__transaction_len()); + + if (h->trace_output) + hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output)); } void hists__output_recalc_col_len(struct hists *hists, int max_rows) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a7d73e503b1b..6d0f85894f38 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -483,9 +483,6 @@ sort__trace_cmp(struct hist_entry *left, struct hist_entry *right) if (right->trace_output == NULL) right->trace_output = get_trace_output(right); - hists__new_col_len(left->hists, HISTC_TRACE, strlen(left->trace_output)); - hists__new_col_len(right->hists, HISTC_TRACE, strlen(right->trace_output)); - return strcmp(right->trace_output, left->trace_output); } -- cgit v1.2.3 From dd42baf1f64d7257258fa4f20064aee5160df369 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 21 Feb 2016 23:22:34 +0900 Subject: perf tools: Fix assertion failure on dynamic entry The dynamic entry is created for each field in a tracepoint event. Since they have no fixed hpp format index, it should skip when perf_hpp__reset_width() is called. This caused following assertion failure.. $ perf record -e sched:sched_switch -a sleep 1 $ perf report -s comm,next_pid --stdio perf: ui/hist.c:651: perf_hpp__reset_width: Assertion `!(fmt->idx >= PERF_HPP__MAX_INDEX)' failed. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1456064558-13086-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 1ba4117d9c2d..12223d791e9f 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -645,6 +645,9 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists) if (perf_hpp__is_sort_entry(fmt)) return perf_hpp__reset_sort_width(fmt, hists); + if (perf_hpp__is_dynamic_entry(fmt)) + return; + BUG_ON(fmt->idx >= PERF_HPP__MAX_INDEX); switch (fmt->idx) { -- cgit v1.2.3 From 066dacbf2a32defb4de23ea4c1af9e77578b5ac2 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 22 Feb 2016 09:10:30 +0000 Subject: perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 276 +++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/bpf-loader.h | 38 ++++++ 2 files changed, 314 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 0bdccf423b27..caeef9ec0124 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -739,6 +739,261 @@ int bpf__foreach_tev(struct bpf_object *obj, return 0; } +enum bpf_map_op_type { + BPF_MAP_OP_SET_VALUE, +}; + +enum bpf_map_key_type { + BPF_MAP_KEY_ALL, +}; + +struct bpf_map_op { + struct list_head list; + enum bpf_map_op_type op_type; + enum bpf_map_key_type key_type; + union { + u64 value; + } v; +}; + +struct bpf_map_priv { + struct list_head ops_list; +}; + +static void +bpf_map_op__delete(struct bpf_map_op *op) +{ + if (!list_empty(&op->list)) + list_del(&op->list); + free(op); +} + +static void +bpf_map_priv__purge(struct bpf_map_priv *priv) +{ + struct bpf_map_op *pos, *n; + + list_for_each_entry_safe(pos, n, &priv->ops_list, list) { + list_del_init(&pos->list); + bpf_map_op__delete(pos); + } +} + +static void +bpf_map_priv__clear(struct bpf_map *map __maybe_unused, + void *_priv) +{ + struct bpf_map_priv *priv = _priv; + + bpf_map_priv__purge(priv); + free(priv); +} + +static struct bpf_map_op * +bpf_map_op__new(void) +{ + struct bpf_map_op *op; + + op = zalloc(sizeof(*op)); + if (!op) { + pr_debug("Failed to alloc bpf_map_op\n"); + return ERR_PTR(-ENOMEM); + } + INIT_LIST_HEAD(&op->list); + + op->key_type = BPF_MAP_KEY_ALL; + return op; +} + +static int +bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) +{ + struct bpf_map_priv *priv; + const char *map_name; + int err; + + map_name = bpf_map__get_name(map); + err = bpf_map__get_private(map, (void **)&priv); + if (err) { + pr_debug("Failed to get private from map %s\n", map_name); + return err; + } + + if (!priv) { + priv = zalloc(sizeof(*priv)); + if (!priv) { + pr_debug("No enough memory to alloc map private\n"); + return -ENOMEM; + } + INIT_LIST_HEAD(&priv->ops_list); + + if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) { + free(priv); + return -BPF_LOADER_ERRNO__INTERNAL; + } + } + + list_add_tail(&op->list, &priv->ops_list); + return 0; +} + +static int +__bpf_map__config_value(struct bpf_map *map, + struct parse_events_term *term) +{ + struct bpf_map_def def; + struct bpf_map_op *op; + const char *map_name; + int err; + + map_name = bpf_map__get_name(map); + + err = bpf_map__get_def(map, &def); + if (err) { + pr_debug("Unable to get map definition from '%s'\n", + map_name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + if (def.type != BPF_MAP_TYPE_ARRAY) { + pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n", + map_name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; + } + if (def.key_size < sizeof(unsigned int)) { + pr_debug("Map %s has incorrect key size\n", map_name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE; + } + switch (def.value_size) { + case 1: + case 2: + case 4: + case 8: + break; + default: + pr_debug("Map %s has incorrect value size\n", map_name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE; + } + + op = bpf_map_op__new(); + if (IS_ERR(op)) + return PTR_ERR(op); + op->op_type = BPF_MAP_OP_SET_VALUE; + op->v.value = term->val.num; + + err = bpf_map__add_op(map, op); + if (err) + bpf_map_op__delete(op); + return err; +} + +static int +bpf_map__config_value(struct bpf_map *map, + struct parse_events_term *term, + struct perf_evlist *evlist __maybe_unused) +{ + if (!term->err_val) { + pr_debug("Config value not set\n"); + return -BPF_LOADER_ERRNO__OBJCONF_CONF; + } + + if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) { + pr_debug("ERROR: wrong value type\n"); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE; + } + + return __bpf_map__config_value(map, term); +} + +struct bpf_obj_config__map_func { + const char *config_opt; + int (*config_func)(struct bpf_map *, struct parse_events_term *, + struct perf_evlist *); +}; + +struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = { + {"value", bpf_map__config_value}, +}; + +static int +bpf__obj_config_map(struct bpf_object *obj, + struct parse_events_term *term, + struct perf_evlist *evlist, + int *key_scan_pos) +{ + /* key is "map:." */ + char *map_name = strdup(term->config + sizeof("map:") - 1); + struct bpf_map *map; + int err = -BPF_LOADER_ERRNO__OBJCONF_OPT; + char *map_opt; + size_t i; + + if (!map_name) + return -ENOMEM; + + map_opt = strchr(map_name, '.'); + if (!map_opt) { + pr_debug("ERROR: Invalid map config: %s\n", map_name); + goto out; + } + + *map_opt++ = '\0'; + if (*map_opt == '\0') { + pr_debug("ERROR: Invalid map option: %s\n", term->config); + goto out; + } + + map = bpf_object__get_map_by_name(obj, map_name); + if (!map) { + pr_debug("ERROR: Map %s doesn't exist\n", map_name); + err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST; + goto out; + } + + *key_scan_pos += map_opt - map_name; + for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) { + struct bpf_obj_config__map_func *func = + &bpf_obj_config__map_funcs[i]; + + if (strcmp(map_opt, func->config_opt) == 0) { + err = func->config_func(map, term, evlist); + goto out; + } + } + + pr_debug("ERROR: Invalid map config option '%s'\n", map_opt); + err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT; +out: + free(map_name); + if (!err) + key_scan_pos += strlen(map_opt); + return err; +} + +int bpf__config_obj(struct bpf_object *obj, + struct parse_events_term *term, + struct perf_evlist *evlist, + int *error_pos) +{ + int key_scan_pos = 0; + int err; + + if (!obj || !term || !term->config) + return -EINVAL; + + if (!prefixcmp(term->config, "map:")) { + key_scan_pos = sizeof("map:") - 1; + err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos); + goto out; + } + err = -BPF_LOADER_ERRNO__OBJCONF_OPT; +out: + if (error_pos) + *error_pos = key_scan_pos; + return err; + +} + #define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START) #define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c) #define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START) @@ -753,6 +1008,14 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = { [ERRCODE_OFFSET(PROLOGUE)] = "Failed to generate prologue", [ERRCODE_OFFSET(PROLOGUE2BIG)] = "Prologue too big for program", [ERRCODE_OFFSET(PROLOGUEOOB)] = "Offset out of bound for prologue", + [ERRCODE_OFFSET(OBJCONF_OPT)] = "Invalid object config option", + [ERRCODE_OFFSET(OBJCONF_CONF)] = "Config value not set (missing '=')", + [ERRCODE_OFFSET(OBJCONF_MAP_OPT)] = "Invalid object map config option", + [ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)] = "Target map doesn't exist", + [ERRCODE_OFFSET(OBJCONF_MAP_VALUE)] = "Incorrect value type for map", + [ERRCODE_OFFSET(OBJCONF_MAP_TYPE)] = "Incorrect map type", + [ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)] = "Incorrect map key size", + [ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size", }; static int @@ -872,3 +1135,16 @@ int bpf__strerror_load(struct bpf_object *obj, bpf__strerror_end(buf, size); return 0; } + +int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, + struct parse_events_term *term __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + int *error_pos __maybe_unused, int err, + char *buf, size_t size) +{ + bpf__strerror_head(err, buf, size); + bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, + "Can't use this config term with this map type"); + bpf__strerror_end(buf, size); + return 0; +} diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index 6fdc0457e2b6..cc46a07b1a62 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -10,6 +10,7 @@ #include #include #include "probe-event.h" +#include "evlist.h" #include "debug.h" enum bpf_loader_errno { @@ -24,10 +25,19 @@ enum bpf_loader_errno { BPF_LOADER_ERRNO__PROLOGUE, /* Failed to generate prologue */ BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */ BPF_LOADER_ERRNO__PROLOGUEOOB, /* Offset out of bound for prologue */ + BPF_LOADER_ERRNO__OBJCONF_OPT, /* Invalid object config option */ + BPF_LOADER_ERRNO__OBJCONF_CONF, /* Config value not set (lost '=')) */ + BPF_LOADER_ERRNO__OBJCONF_MAP_OPT, /* Invalid object map config option */ + BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST, /* Target map not exist */ + BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE, /* Incorrect value type for map */ + BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, /* Incorrect map type */ + BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE, /* Incorrect map key size */ + BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */ __BPF_LOADER_ERRNO__END, }; struct bpf_object; +struct parse_events_term; #define PERF_BPF_PROBE_GROUP "perf_bpf_probe" typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev, @@ -53,6 +63,14 @@ int bpf__strerror_load(struct bpf_object *obj, int err, char *buf, size_t size); int bpf__foreach_tev(struct bpf_object *obj, bpf_prog_iter_callback_t func, void *arg); + +int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term, + struct perf_evlist *evlist, int *error_pos); +int bpf__strerror_config_obj(struct bpf_object *obj, + struct parse_events_term *term, + struct perf_evlist *evlist, + int *error_pos, int err, char *buf, + size_t size); #else static inline struct bpf_object * bpf__prepare_load(const char *filename __maybe_unused, @@ -83,6 +101,15 @@ bpf__foreach_tev(struct bpf_object *obj __maybe_unused, return 0; } +static inline int +bpf__config_obj(struct bpf_object *obj __maybe_unused, + struct parse_events_term *term __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + int *error_pos __maybe_unused) +{ + return 0; +} + static inline int __bpf_strerror(char *buf, size_t size) { @@ -118,5 +145,16 @@ static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused, { return __bpf_strerror(buf, size); } + +static inline int +bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, + struct parse_events_term *term __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + int *error_pos __maybe_unused, + int err __maybe_unused, + char *buf, size_t size) +{ + return __bpf_strerror(buf, size); +} #endif #endif -- cgit v1.2.3 From a34f3be70cdf986850552e62b9f22d659bfbcef3 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 22 Feb 2016 09:10:31 +0000 Subject: perf tools: Enable BPF object configure syntax This patch adds the final step for BPF map configuration. A new syntax is appended into parser so user can config BPF objects through '/' '/' enclosed config terms. After this patch, following syntax is available: # perf record -e ./test_bpf_map_1.c/map:channel.value=10/ ... It would takes effect after appling following commits. Test result: # cat ./test_bpf_map_1.c /************************ BEGIN **************************/ #include #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=sys_nanosleep") int func(void *ctx) { int key = 0; char fmt[] = "%d\n"; int *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), *pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ - Normal case: # ./perf record -e './test_bpf_map_1.c/map:channel.value=10/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] - Error case: # ./perf record -e './test_bpf_map_1.c/map:channel.value/' usleep 10 event syntax error: '..ps:channel:value/' \___ Config value not set (missing '=') Hint: Valid config term: map:[]:value=[value] (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [] [] or: perf record [] -- [] -e, --event event selector. use 'perf list' to list available events # ./perf record -e './test_bpf_map_1.c/xmap:channel.value=10/' usleep 10 event syntax error: '..pf_map_1.c/xmap:channel.value=10/' \___ Invalid object config option [SNIP] # ./perf record -e './test_bpf_map_1.c/map:xchannel.value=10/' usleep 10 event syntax error: '..p_1.c/map:xchannel.value=10/' \___ Target map not exist [SNIP] # ./perf record -e './test_bpf_map_1.c/map:channel.xvalue=10/' usleep 10 event syntax error: '..ps:channel.xvalue=10/' \___ Invalid object map config option [SNIP] # ./perf record -e './test_bpf_map_1.c/map:channel.value=x10/' usleep 10 event syntax error: '..nnel.value=x10/' \___ Incorrect value type for map [SNIP] Change BPF_MAP_TYPE_ARRAY to '1' in test_bpf_map_1.c: # ./perf record -e './test_bpf_map_1.c/map:channel.value=10/' usleep 10 event syntax error: '..ps:channel.value=10/' \___ Can't use this config term to this type of map Hint: Valid config term: map:[].value=[value] (add -v to see detail) Signed-off-by: Wang Nan [for parser part] Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-5-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 55 +++++++++++++++++++++++++++++++++++++++--- tools/perf/util/parse-events.h | 3 ++- tools/perf/util/parse-events.l | 2 +- tools/perf/util/parse-events.y | 10 +++++--- 4 files changed, 61 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b0b329539db5..a5dd6703a56b 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -672,17 +672,63 @@ errout: return err; } +static int +parse_events_config_bpf(struct parse_events_evlist *data, + struct bpf_object *obj, + struct list_head *head_config) +{ + struct parse_events_term *term; + int error_pos; + + if (!head_config || list_empty(head_config)) + return 0; + + list_for_each_entry(term, head_config, list) { + char errbuf[BUFSIZ]; + int err; + + if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) { + snprintf(errbuf, sizeof(errbuf), + "Invalid config term for BPF object"); + errbuf[BUFSIZ - 1] = '\0'; + + data->error->idx = term->err_term; + data->error->str = strdup(errbuf); + return -EINVAL; + } + + err = bpf__config_obj(obj, term, NULL, &error_pos); + if (err) { + bpf__strerror_config_obj(obj, term, NULL, + &error_pos, err, errbuf, + sizeof(errbuf)); + data->error->help = strdup( +"Hint:\tValid config term:\n" +" \tmap:[].value=[value]\n" +" \t(add -v to see detail)"); + data->error->str = strdup(errbuf); + if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE) + data->error->idx = term->err_val; + else + data->error->idx = term->err_term + error_pos; + return err; + } + } + return 0; +} + int parse_events_load_bpf(struct parse_events_evlist *data, struct list_head *list, char *bpf_file_name, - bool source) + bool source, + struct list_head *head_config) { struct bpf_object *obj; + int err; obj = bpf__prepare_load(bpf_file_name, source); if (IS_ERR(obj)) { char errbuf[BUFSIZ]; - int err; err = PTR_ERR(obj); @@ -700,7 +746,10 @@ int parse_events_load_bpf(struct parse_events_evlist *data, return err; } - return parse_events_load_bpf_obj(data, list, obj); + err = parse_events_load_bpf_obj(data, list, obj); + if (err) + return err; + return parse_events_config_bpf(data, obj, head_config); } static int diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index d5eb2af78826..c48377ad4e82 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -129,7 +129,8 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, int parse_events_load_bpf(struct parse_events_evlist *data, struct list_head *list, char *bpf_file_name, - bool source); + bool source, + struct list_head *head_config); /* Provide this function for perf test */ struct bpf_object; int parse_events_load_bpf_obj(struct parse_events_evlist *data, diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 99486e6a8b97..0cc6b84a740a 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -122,7 +122,7 @@ num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ name [a-zA-Z_*?][a-zA-Z0-9_*?.]* -name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.]* +name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* /* If you add a modifier you need to update check_modifier() */ modifier_event [ukhpPGHSDI]+ modifier_bp [rwx]{1,3} diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 6a2d006ea77f..0e2d433e4ffa 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -437,24 +437,26 @@ PE_RAW opt_event_config } event_bpf_file: -PE_BPF_OBJECT +PE_BPF_OBJECT opt_event_config { struct parse_events_evlist *data = _data; struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_load_bpf(data, list, $1, false)); + ABORT_ON(parse_events_load_bpf(data, list, $1, false, $2)); + parse_events_terms__delete($2); $$ = list; } | -PE_BPF_SOURCE +PE_BPF_SOURCE opt_event_config { struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_load_bpf(data, list, $1, true)); + ABORT_ON(parse_events_load_bpf(data, list, $1, true, $2)); + parse_events_terms__delete($2); $$ = list; } -- cgit v1.2.3 From 8690a2a773703e4ad2a07a7f3912ea6b131307cc Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 22 Feb 2016 09:10:32 +0000 Subject: perf record: Apply config to BPF objects before recording bpf__apply_obj_config() is introduced as the core API to apply object config options to all BPF objects. This patch also does the real work for setting values for BPF_MAP_TYPE_PERF_ARRAY maps by inserting value stored in map's private field into the BPF map. This patch is required because we are not always able to set all BPF config during parsing. Further patch will set events created by perf to BPF_MAP_TYPE_PERF_EVENT_ARRAY maps, which is not exist until perf_evsel__open(). bpf_map_foreach_key() is introduced to iterate over each key needs to be configured. This function would be extended to support more map types and different key settings. In perf record, before start recording, call bpf__apply_config() to turn on all BPF config options. Test result: # cat ./test_bpf_map_1.c /************************ BEGIN **************************/ #include #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=sys_nanosleep") int func(void *ctx) { int key = 0; char fmt[] = "%d\n"; int *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), *pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_1.c/map:channel.value=11/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 # ./perf record -e './test_bpf_map_1.c/map:channel.value=101/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 usleep-19000 [006] d... 2394831.057840: : 101 Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-6-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 11 +++ tools/perf/util/bpf-loader.c | 184 +++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/bpf-loader.h | 15 ++++ 3 files changed, 210 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index cf3a28d83066..7d11162b6c41 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -32,6 +32,7 @@ #include "util/parse-branch-options.h" #include "util/parse-regs-options.h" #include "util/llvm-utils.h" +#include "util/bpf-loader.h" #include #include @@ -536,6 +537,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } + err = bpf__apply_obj_config(); + if (err) { + char errbuf[BUFSIZ]; + + bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); + pr_err("ERROR: Apply config to BPF failed: %s\n", + errbuf); + goto out_child; + } + /* * Normally perf_session__new would do this, but it doesn't have the * evlist. diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index caeef9ec0124..dbbd17ca6d6f 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include "perf.h" @@ -994,6 +995,182 @@ out: } +typedef int (*map_config_func_t)(const char *name, int map_fd, + struct bpf_map_def *pdef, + struct bpf_map_op *op, + void *pkey, void *arg); + +static int +foreach_key_array_all(map_config_func_t func, + void *arg, const char *name, + int map_fd, struct bpf_map_def *pdef, + struct bpf_map_op *op) +{ + unsigned int i; + int err; + + for (i = 0; i < pdef->max_entries; i++) { + err = func(name, map_fd, pdef, op, &i, arg); + if (err) { + pr_debug("ERROR: failed to insert value to %s[%u]\n", + name, i); + return err; + } + } + return 0; +} + +static int +bpf_map_config_foreach_key(struct bpf_map *map, + map_config_func_t func, + void *arg) +{ + int err, map_fd; + const char *name; + struct bpf_map_op *op; + struct bpf_map_def def; + struct bpf_map_priv *priv; + + name = bpf_map__get_name(map); + + err = bpf_map__get_private(map, (void **)&priv); + if (err) { + pr_debug("ERROR: failed to get private from map %s\n", name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + if (!priv || list_empty(&priv->ops_list)) { + pr_debug("INFO: nothing to config for map %s\n", name); + return 0; + } + + err = bpf_map__get_def(map, &def); + if (err) { + pr_debug("ERROR: failed to get definition from map %s\n", name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + map_fd = bpf_map__get_fd(map); + if (map_fd < 0) { + pr_debug("ERROR: failed to get fd from map %s\n", name); + return map_fd; + } + + list_for_each_entry(op, &priv->ops_list, list) { + switch (def.type) { + case BPF_MAP_TYPE_ARRAY: + switch (op->key_type) { + case BPF_MAP_KEY_ALL: + err = foreach_key_array_all(func, arg, name, + map_fd, &def, op); + if (err) + return err; + break; + default: + pr_debug("ERROR: keytype for map '%s' invalid\n", + name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + break; + default: + pr_debug("ERROR: type of '%s' incorrect\n", name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; + } + } + + return 0; +} + +static int +apply_config_value_for_key(int map_fd, void *pkey, + size_t val_size, u64 val) +{ + int err = 0; + + switch (val_size) { + case 1: { + u8 _val = (u8)(val); + err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); + break; + } + case 2: { + u16 _val = (u16)(val); + err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); + break; + } + case 4: { + u32 _val = (u32)(val); + err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); + break; + } + case 8: { + err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY); + break; + } + default: + pr_debug("ERROR: invalid value size\n"); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE; + } + if (err && errno) + err = -errno; + return err; +} + +static int +apply_obj_config_map_for_key(const char *name, int map_fd, + struct bpf_map_def *pdef __maybe_unused, + struct bpf_map_op *op, + void *pkey, void *arg __maybe_unused) +{ + int err; + + switch (op->op_type) { + case BPF_MAP_OP_SET_VALUE: + err = apply_config_value_for_key(map_fd, pkey, + pdef->value_size, + op->v.value); + break; + default: + pr_debug("ERROR: unknown value type for '%s'\n", name); + err = -BPF_LOADER_ERRNO__INTERNAL; + } + return err; +} + +static int +apply_obj_config_map(struct bpf_map *map) +{ + return bpf_map_config_foreach_key(map, + apply_obj_config_map_for_key, + NULL); +} + +static int +apply_obj_config_object(struct bpf_object *obj) +{ + struct bpf_map *map; + int err; + + bpf_map__for_each(map, obj) { + err = apply_obj_config_map(map); + if (err) + return err; + } + return 0; +} + +int bpf__apply_obj_config(void) +{ + struct bpf_object *obj, *tmp; + int err; + + bpf_object__for_each_safe(obj, tmp) { + err = apply_obj_config_object(obj); + if (err) + return err; + } + + return 0; +} + #define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START) #define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c) #define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START) @@ -1148,3 +1325,10 @@ int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, bpf__strerror_end(buf, size); return 0; } + +int bpf__strerror_apply_obj_config(int err, char *buf, size_t size) +{ + bpf__strerror_head(err, buf, size); + bpf__strerror_end(buf, size); + return 0; +} diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index cc46a07b1a62..5d3b931f9baa 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -71,6 +71,8 @@ int bpf__strerror_config_obj(struct bpf_object *obj, struct perf_evlist *evlist, int *error_pos, int err, char *buf, size_t size); +int bpf__apply_obj_config(void); +int bpf__strerror_apply_obj_config(int err, char *buf, size_t size); #else static inline struct bpf_object * bpf__prepare_load(const char *filename __maybe_unused, @@ -110,6 +112,12 @@ bpf__config_obj(struct bpf_object *obj __maybe_unused, return 0; } +static inline int +bpf__apply_obj_config(void) +{ + return 0; +} + static inline int __bpf_strerror(char *buf, size_t size) { @@ -156,5 +164,12 @@ bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, { return __bpf_strerror(buf, size); } + +static inline int +bpf__strerror_apply_obj_config(int err __maybe_unused, + char *buf, size_t size) +{ + return __bpf_strerror(buf, size); +} #endif #endif -- cgit v1.2.3 From 7630b3e28dd827fffad13cc0aada14b00ec524d9 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 22 Feb 2016 09:10:33 +0000 Subject: perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[].value=[value] map:[].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 163 +++++++++++++++++++++++++++++++++++++++-- tools/perf/util/bpf-loader.h | 5 ++ tools/perf/util/evlist.c | 16 ++++ tools/perf/util/evlist.h | 3 + tools/perf/util/parse-events.c | 15 ++-- tools/perf/util/parse-events.h | 1 + 6 files changed, 190 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index dbbd17ca6d6f..deacb95f27ec 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -742,6 +742,7 @@ int bpf__foreach_tev(struct bpf_object *obj, enum bpf_map_op_type { BPF_MAP_OP_SET_VALUE, + BPF_MAP_OP_SET_EVSEL, }; enum bpf_map_key_type { @@ -754,6 +755,7 @@ struct bpf_map_op { enum bpf_map_key_type key_type; union { u64 value; + struct perf_evsel *evsel; } v; }; @@ -838,6 +840,24 @@ bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) return 0; } +static struct bpf_map_op * +bpf_map__add_newop(struct bpf_map *map) +{ + struct bpf_map_op *op; + int err; + + op = bpf_map_op__new(); + if (IS_ERR(op)) + return op; + + err = bpf_map__add_op(map, op); + if (err) { + bpf_map_op__delete(op); + return ERR_PTR(err); + } + return op; +} + static int __bpf_map__config_value(struct bpf_map *map, struct parse_events_term *term) @@ -876,16 +896,12 @@ __bpf_map__config_value(struct bpf_map *map, return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE; } - op = bpf_map_op__new(); + op = bpf_map__add_newop(map); if (IS_ERR(op)) return PTR_ERR(op); op->op_type = BPF_MAP_OP_SET_VALUE; op->v.value = term->val.num; - - err = bpf_map__add_op(map, op); - if (err) - bpf_map_op__delete(op); - return err; + return 0; } static int @@ -899,13 +915,75 @@ bpf_map__config_value(struct bpf_map *map, } if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) { - pr_debug("ERROR: wrong value type\n"); + pr_debug("ERROR: wrong value type for 'value'\n"); return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE; } return __bpf_map__config_value(map, term); } +static int +__bpf_map__config_event(struct bpf_map *map, + struct parse_events_term *term, + struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + struct bpf_map_def def; + struct bpf_map_op *op; + const char *map_name; + int err; + + map_name = bpf_map__get_name(map); + evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str); + if (!evsel) { + pr_debug("Event (for '%s') '%s' doesn't exist\n", + map_name, term->val.str); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT; + } + + err = bpf_map__get_def(map, &def); + if (err) { + pr_debug("Unable to get map definition from '%s'\n", + map_name); + return err; + } + + /* + * No need to check key_size and value_size: + * kernel has already checked them. + */ + if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", + map_name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; + } + + op = bpf_map__add_newop(map); + if (IS_ERR(op)) + return PTR_ERR(op); + op->op_type = BPF_MAP_OP_SET_EVSEL; + op->v.evsel = evsel; + return 0; +} + +static int +bpf_map__config_event(struct bpf_map *map, + struct parse_events_term *term, + struct perf_evlist *evlist) +{ + if (!term->err_val) { + pr_debug("Config value not set\n"); + return -BPF_LOADER_ERRNO__OBJCONF_CONF; + } + + if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) { + pr_debug("ERROR: wrong value type for 'event'\n"); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE; + } + + return __bpf_map__config_event(map, term, evlist); +} + struct bpf_obj_config__map_func { const char *config_opt; int (*config_func)(struct bpf_map *, struct parse_events_term *, @@ -914,6 +992,7 @@ struct bpf_obj_config__map_func { struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = { {"value", bpf_map__config_value}, + {"event", bpf_map__config_event}, }; static int @@ -1057,6 +1136,7 @@ bpf_map_config_foreach_key(struct bpf_map *map, list_for_each_entry(op, &priv->ops_list, list) { switch (def.type) { case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: switch (op->key_type) { case BPF_MAP_KEY_ALL: err = foreach_key_array_all(func, arg, name, @@ -1114,6 +1194,60 @@ apply_config_value_for_key(int map_fd, void *pkey, return err; } +static int +apply_config_evsel_for_key(const char *name, int map_fd, void *pkey, + struct perf_evsel *evsel) +{ + struct xyarray *xy = evsel->fd; + struct perf_event_attr *attr; + unsigned int key, events; + bool check_pass = false; + int *evt_fd; + int err; + + if (!xy) { + pr_debug("ERROR: evsel not ready for map %s\n", name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + if (xy->row_size / xy->entry_size != 1) { + pr_debug("ERROR: Dimension of target event is incorrect for map %s\n", + name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM; + } + + attr = &evsel->attr; + if (attr->inherit) { + pr_debug("ERROR: Can't put inherit event into map %s\n", name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH; + } + + if (attr->type == PERF_TYPE_RAW) + check_pass = true; + if (attr->type == PERF_TYPE_HARDWARE) + check_pass = true; + if (attr->type == PERF_TYPE_SOFTWARE && + attr->config == PERF_COUNT_SW_BPF_OUTPUT) + check_pass = true; + if (!check_pass) { + pr_debug("ERROR: Event type is wrong for map %s\n", name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE; + } + + events = xy->entries / (xy->row_size / xy->entry_size); + key = *((unsigned int *)pkey); + if (key >= events) { + pr_debug("ERROR: there is no event %d for map %s\n", + key, name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE; + } + evt_fd = xyarray__entry(xy, key, 0); + err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY); + if (err && errno) + err = -errno; + return err; +} + static int apply_obj_config_map_for_key(const char *name, int map_fd, struct bpf_map_def *pdef __maybe_unused, @@ -1128,6 +1262,10 @@ apply_obj_config_map_for_key(const char *name, int map_fd, pdef->value_size, op->v.value); break; + case BPF_MAP_OP_SET_EVSEL: + err = apply_config_evsel_for_key(name, map_fd, pkey, + op->v.evsel); + break; default: pr_debug("ERROR: unknown value type for '%s'\n", name); err = -BPF_LOADER_ERRNO__INTERNAL; @@ -1193,6 +1331,11 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = { [ERRCODE_OFFSET(OBJCONF_MAP_TYPE)] = "Incorrect map type", [ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)] = "Incorrect map key size", [ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size", + [ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)] = "Event not found for map setting", + [ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)] = "Invalid map size for event setting", + [ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)] = "Event dimension too large", + [ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)] = "Doesn't support inherit event", + [ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)] = "Wrong event type for map", }; static int @@ -1329,6 +1472,12 @@ int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, int bpf__strerror_apply_obj_config(int err, char *buf, size_t size) { bpf__strerror_head(err, buf, size); + bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, + "Cannot set event to BPF map in multi-thread tracing"); + bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, + "%s (Hint: use -i to turn off inherit)", emsg); + bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, + "Can only put raw, hardware and BPF output event into a BPF map"); bpf__strerror_end(buf, size); return 0; } diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index 5d3b931f9baa..7c7689f800cf 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -33,6 +33,11 @@ enum bpf_loader_errno { BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, /* Incorrect map type */ BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE, /* Incorrect map key size */ BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */ + BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT, /* Event not found for map setting */ + BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE, /* Invalid map size for event setting */ + BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, /* Event dimension too large */ + BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, /* Doesn't support inherit event */ + BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, /* Wrong event type for map */ __BPF_LOADER_ERRNO__END, }; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 0f577162c699..c42e1967e970 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1741,3 +1741,19 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist, tracking_evsel->tracking = true; } + +struct perf_evsel * +perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, + const char *str) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + if (!evsel->name) + continue; + if (strcmp(str, evsel->name) == 0) + return evsel; + } + + return NULL; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 7c4d9a206776..a0d15221db6e 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -294,4 +294,7 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist, struct perf_evsel *tracking_evsel); void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr); + +struct perf_evsel * +perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a5dd6703a56b..5909fd2825d5 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -697,14 +697,16 @@ parse_events_config_bpf(struct parse_events_evlist *data, return -EINVAL; } - err = bpf__config_obj(obj, term, NULL, &error_pos); + err = bpf__config_obj(obj, term, data->evlist, &error_pos); if (err) { - bpf__strerror_config_obj(obj, term, NULL, + bpf__strerror_config_obj(obj, term, data->evlist, &error_pos, err, errbuf, sizeof(errbuf)); data->error->help = strdup( -"Hint:\tValid config term:\n" +"Hint:\tValid config terms:\n" " \tmap:[].value=[value]\n" +" \tmap:[].event=[event]\n" +"\n" " \t(add -v to see detail)"); data->error->str = strdup(errbuf); if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE) @@ -1530,9 +1532,10 @@ int parse_events(struct perf_evlist *evlist, const char *str, struct parse_events_error *err) { struct parse_events_evlist data = { - .list = LIST_HEAD_INIT(data.list), - .idx = evlist->nr_entries, - .error = err, + .list = LIST_HEAD_INIT(data.list), + .idx = evlist->nr_entries, + .error = err, + .evlist = evlist, }; int ret; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index c48377ad4e82..e0369695870f 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -99,6 +99,7 @@ struct parse_events_evlist { int idx; int nr_groups; struct parse_events_error *error; + struct perf_evlist *evlist; }; struct parse_events_terms { -- cgit v1.2.3 From 2d055bf253c0d606c5de3fe7749e3188080780ad Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 22 Feb 2016 09:10:34 +0000 Subject: perf tools: Support setting different slots in a BPF map separately This patch introduces basic facilities to support config different slots in a BPF map one by one. array.nr_ranges and array.ranges are introduced into 'struct parse_events_term', where ranges is an array of indices range (start, length) which will be configured by this config term. nr_ranges is the size of the array. The array is passed to 'struct bpf_map_priv'. To indicate the new type of configuration, BPF_MAP_KEY_RANGES is added as a new key type. bpf_map_config_foreach_key() is extended to iterate over those indices instead of all possible keys. Code in this commit will be enabled by following commit which enables the indices syntax for array configuration. Signed-off-by: Wang Nan Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-8-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 128 ++++++++++++++++++++++++++++++++++++++--- tools/perf/util/bpf-loader.h | 1 + tools/perf/util/parse-events.c | 7 +++ tools/perf/util/parse-events.h | 10 ++++ 4 files changed, 137 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index deacb95f27ec..44824e3eeaed 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -17,6 +17,7 @@ #include "llvm-utils.h" #include "probe-event.h" #include "probe-finder.h" // for MAX_PROBES +#include "parse-events.h" #include "llvm-utils.h" #define DEFINE_PRINT_FN(name, level) \ @@ -747,12 +748,16 @@ enum bpf_map_op_type { enum bpf_map_key_type { BPF_MAP_KEY_ALL, + BPF_MAP_KEY_RANGES, }; struct bpf_map_op { struct list_head list; enum bpf_map_op_type op_type; enum bpf_map_key_type key_type; + union { + struct parse_events_array array; + } k; union { u64 value; struct perf_evsel *evsel; @@ -768,6 +773,8 @@ bpf_map_op__delete(struct bpf_map_op *op) { if (!list_empty(&op->list)) list_del(&op->list); + if (op->key_type == BPF_MAP_KEY_RANGES) + parse_events__clear_array(&op->k.array); free(op); } @@ -792,10 +799,33 @@ bpf_map_priv__clear(struct bpf_map *map __maybe_unused, free(priv); } +static int +bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term) +{ + op->key_type = BPF_MAP_KEY_ALL; + if (!term) + return 0; + + if (term->array.nr_ranges) { + size_t memsz = term->array.nr_ranges * + sizeof(op->k.array.ranges[0]); + + op->k.array.ranges = memdup(term->array.ranges, memsz); + if (!op->k.array.ranges) { + pr_debug("No enough memory to alloc indices for map\n"); + return -ENOMEM; + } + op->key_type = BPF_MAP_KEY_RANGES; + op->k.array.nr_ranges = term->array.nr_ranges; + } + return 0; +} + static struct bpf_map_op * -bpf_map_op__new(void) +bpf_map_op__new(struct parse_events_term *term) { struct bpf_map_op *op; + int err; op = zalloc(sizeof(*op)); if (!op) { @@ -804,7 +834,11 @@ bpf_map_op__new(void) } INIT_LIST_HEAD(&op->list); - op->key_type = BPF_MAP_KEY_ALL; + err = bpf_map_op_setkey(op, term); + if (err) { + free(op); + return ERR_PTR(err); + } return op; } @@ -841,12 +875,12 @@ bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) } static struct bpf_map_op * -bpf_map__add_newop(struct bpf_map *map) +bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term) { struct bpf_map_op *op; int err; - op = bpf_map_op__new(); + op = bpf_map_op__new(term); if (IS_ERR(op)) return op; @@ -896,7 +930,7 @@ __bpf_map__config_value(struct bpf_map *map, return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE; } - op = bpf_map__add_newop(map); + op = bpf_map__add_newop(map, term); if (IS_ERR(op)) return PTR_ERR(op); op->op_type = BPF_MAP_OP_SET_VALUE; @@ -958,7 +992,7 @@ __bpf_map__config_event(struct bpf_map *map, return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; } - op = bpf_map__add_newop(map); + op = bpf_map__add_newop(map, term); if (IS_ERR(op)) return PTR_ERR(op); op->op_type = BPF_MAP_OP_SET_EVSEL; @@ -995,6 +1029,44 @@ struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = { {"event", bpf_map__config_event}, }; +static int +config_map_indices_range_check(struct parse_events_term *term, + struct bpf_map *map, + const char *map_name) +{ + struct parse_events_array *array = &term->array; + struct bpf_map_def def; + unsigned int i; + int err; + + if (!array->nr_ranges) + return 0; + if (!array->ranges) { + pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n", + map_name, (int)array->nr_ranges); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + err = bpf_map__get_def(map, &def); + if (err) { + pr_debug("ERROR: Unable to get map definition from '%s'\n", + map_name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + for (i = 0; i < array->nr_ranges; i++) { + unsigned int start = array->ranges[i].start; + size_t length = array->ranges[i].length; + unsigned int idx = start + length - 1; + + if (idx >= def.max_entries) { + pr_debug("ERROR: index %d too large\n", idx); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG; + } + } + return 0; +} + static int bpf__obj_config_map(struct bpf_object *obj, struct parse_events_term *term, @@ -1030,7 +1102,12 @@ bpf__obj_config_map(struct bpf_object *obj, goto out; } - *key_scan_pos += map_opt - map_name; + *key_scan_pos += strlen(map_opt); + err = config_map_indices_range_check(term, map, map_name); + if (err) + goto out; + *key_scan_pos -= strlen(map_opt); + for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) { struct bpf_obj_config__map_func *func = &bpf_obj_config__map_funcs[i]; @@ -1099,6 +1176,33 @@ foreach_key_array_all(map_config_func_t func, return 0; } +static int +foreach_key_array_ranges(map_config_func_t func, void *arg, + const char *name, int map_fd, + struct bpf_map_def *pdef, + struct bpf_map_op *op) +{ + unsigned int i, j; + int err; + + for (i = 0; i < op->k.array.nr_ranges; i++) { + unsigned int start = op->k.array.ranges[i].start; + size_t length = op->k.array.ranges[i].length; + + for (j = 0; j < length; j++) { + unsigned int idx = start + j; + + err = func(name, map_fd, pdef, op, &idx, arg); + if (err) { + pr_debug("ERROR: failed to insert value to %s[%u]\n", + name, idx); + return err; + } + } + } + return 0; +} + static int bpf_map_config_foreach_key(struct bpf_map *map, map_config_func_t func, @@ -1141,14 +1245,19 @@ bpf_map_config_foreach_key(struct bpf_map *map, case BPF_MAP_KEY_ALL: err = foreach_key_array_all(func, arg, name, map_fd, &def, op); - if (err) - return err; + break; + case BPF_MAP_KEY_RANGES: + err = foreach_key_array_ranges(func, arg, name, + map_fd, &def, + op); break; default: pr_debug("ERROR: keytype for map '%s' invalid\n", name); return -BPF_LOADER_ERRNO__INTERNAL; } + if (err) + return err; break; default: pr_debug("ERROR: type of '%s' incorrect\n", name); @@ -1336,6 +1445,7 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = { [ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)] = "Event dimension too large", [ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)] = "Doesn't support inherit event", [ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)] = "Wrong event type for map", + [ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)] = "Index too large", }; static int diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index 7c7689f800cf..be4311944e3d 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -38,6 +38,7 @@ enum bpf_loader_errno { BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, /* Event dimension too large */ BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, /* Doesn't support inherit event */ BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, /* Wrong event type for map */ + BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG, /* Index too large */ __BPF_LOADER_ERRNO__END, }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5909fd2825d5..697d3506c584 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2211,6 +2211,8 @@ void parse_events_terms__purge(struct list_head *terms) struct parse_events_term *term, *h; list_for_each_entry_safe(term, h, terms, list) { + if (term->array.nr_ranges) + free(term->array.ranges); list_del_init(&term->list); free(term); } @@ -2224,6 +2226,11 @@ void parse_events_terms__delete(struct list_head *terms) free(terms); } +void parse_events__clear_array(struct parse_events_array *a) +{ + free(a->ranges); +} + void parse_events_evlist_error(struct parse_events_evlist *data, int idx, const char *str) { diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index e0369695870f..e4456221f52d 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -72,8 +72,17 @@ enum { __PARSE_EVENTS__TERM_TYPE_NR, }; +struct parse_events_array { + size_t nr_ranges; + struct { + unsigned int start; + size_t length; + } *ranges; +}; + struct parse_events_term { char *config; + struct parse_events_array array; union { char *str; u64 num; @@ -120,6 +129,7 @@ int parse_events_term__clone(struct parse_events_term **new, struct parse_events_term *term); void parse_events_terms__delete(struct list_head *terms); void parse_events_terms__purge(struct list_head *terms); +void parse_events__clear_array(struct parse_events_array *a); int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_name(struct list_head *list, char *name); -- cgit v1.2.3 From e571e029bdbf59f485fe67740b7a4ef421e1d55d Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 22 Feb 2016 09:10:35 +0000 Subject: perf tools: Enable indices setting syntax for BPF map This patch introduces a new syntax to perf event parser: # perf record -e './test_bpf_map_3.c/map:channel.value[0,1,2,3...5]=101/' usleep 2 By utilizing the basic facilities in bpf-loader.c which allow setting different slots in a BPF map separately, the newly introduced syntax allows perf to control specific elements in a BPF map. Test result: # cat ./test_bpf_map_3.c /************************ BEGIN **************************/ #include #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(unsigned char), .max_entries = 100, }; SEC("func=hrtimer_nanosleep rqtp->tv_nsec") int func(void *ctx, int err, long nsec) { char fmt[] = "%ld\n"; long usec = nsec * 0x10624dd3 >> 38; // nsec / 1000 int key = (int)usec; unsigned char *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), (unsigned char)*pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_3.c/map:channel.value[0,1,2,3...5]=101/' usleep 2 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep usleep usleep-405 [004] d... 2745423.547822: : 101 # ./perf record -e './test_bpf_map_3.c/map:channel.value[0...9,20...29]=102,map:channel.value[10...19]=103/' usleep 3 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # ./perf record -e './test_bpf_map_3.c/map:channel.value[0...9,20...29]=102,map:channel.value[10...19]=103/' usleep 15 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep usleep usleep-405 [004] d... 2745423.547822: : 101 usleep-655 [006] d... 2745434.122814: : 102 usleep-904 [006] d... 2745439.916264: : 103 # ./perf record -e './test_bpf_map_3.c/map:channel.value[all]=104/' usleep 99 # cat /sys/kernel/debug/tracing/trace | grep usleep usleep-405 [004] d... 2745423.547822: : 101 usleep-655 [006] d... 2745434.122814: : 102 usleep-904 [006] d... 2745439.916264: : 103 usleep-1537 [003] d... 2745538.053737: : 104 Error case: # ./perf record -e './test_bpf_map_3.c/map:channel.value[10...1000]=104/' usleep 99 event syntax error: '..annel.value[10...1000]=104/' \___ Index too large Hint: Valid config terms: map:[].value=[value] map:[].event=[event] where is something like [0,3...5] or [all] (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [] [] or: perf record [] -- [] -e, --event event selector. use 'perf list' to list available events Signed-off-by: Wang Nan Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-9-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 5 ++- tools/perf/util/parse-events.l | 13 ++++++- tools/perf/util/parse-events.y | 85 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 697d3506c584..6e2f20334379 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -704,9 +704,10 @@ parse_events_config_bpf(struct parse_events_evlist *data, sizeof(errbuf)); data->error->help = strdup( "Hint:\tValid config terms:\n" -" \tmap:[].value=[value]\n" -" \tmap:[].event=[event]\n" +" \tmap:[].value=[value]\n" +" \tmap:[].event=[event]\n" "\n" +" \twhere is something like [0,3...5] or [all]\n" " \t(add -v to see detail)"); data->error->str = strdup(errbuf); if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE) diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 0cc6b84a740a..fb85d0311d28 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -9,8 +9,8 @@ %{ #include #include "../perf.h" -#include "parse-events-bison.h" #include "parse-events.h" +#include "parse-events-bison.h" char *parse_events_get_text(yyscan_t yyscanner); YYSTYPE *parse_events_get_lval(yyscan_t yyscanner); @@ -111,6 +111,7 @@ do { \ %x mem %s config %x event +%x array group [^,{}/]*[{][^}]*[}][^,{}/]* event_pmu [^,{}/]+[/][^/]*[/][^,{}/]* @@ -176,6 +177,14 @@ modifier_bp [rwx]{1,3} } +{ +"]" { BEGIN(config); return ']'; } +{num_dec} { return value(yyscanner, 10); } +{num_hex} { return value(yyscanner, 16); } +, { return ','; } +"\.\.\." { return PE_ARRAY_RANGE; } +} + { /* * Please update config_term_names when new static term is added. @@ -195,6 +204,8 @@ no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } +\[all\] { return PE_ARRAY_ALL; } +"[" { BEGIN(array); return '['; } } { diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 0e2d433e4ffa..d1fbcabbe70d 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -48,6 +48,7 @@ static inc_group_count(struct list_head *list, %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR %token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT +%token PE_ARRAY_ALL PE_ARRAY_RANGE %type PE_VALUE %type PE_VALUE_SYM_HW %type PE_VALUE_SYM_SW @@ -83,6 +84,9 @@ static inc_group_count(struct list_head *list, %type group_def %type group %type groups +%type array +%type array_term +%type array_terms %union { @@ -94,6 +98,7 @@ static inc_group_count(struct list_head *list, char *sys; char *event; } tracepoint_name; + struct parse_events_array array; } %% @@ -572,6 +577,86 @@ PE_TERM ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL)); $$ = term; } +| +PE_NAME array '=' PE_NAME +{ + struct parse_events_term *term; + int i; + + ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, $4, &@1, &@4)); + + term->array = $2; + $$ = term; +} +| +PE_NAME array '=' PE_VALUE +{ + struct parse_events_term *term; + + ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, $4, &@1, &@4)); + term->array = $2; + $$ = term; +} + +array: +'[' array_terms ']' +{ + $$ = $2; +} +| +PE_ARRAY_ALL +{ + $$.nr_ranges = 0; + $$.ranges = NULL; +} + +array_terms: +array_terms ',' array_term +{ + struct parse_events_array new_array; + + new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges; + new_array.ranges = malloc(sizeof(new_array.ranges[0]) * + new_array.nr_ranges); + ABORT_ON(!new_array.ranges); + memcpy(&new_array.ranges[0], $1.ranges, + $1.nr_ranges * sizeof(new_array.ranges[0])); + memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges, + $3.nr_ranges * sizeof(new_array.ranges[0])); + free($1.ranges); + free($3.ranges); + $$ = new_array; +} +| +array_term + +array_term: +PE_VALUE +{ + struct parse_events_array array; + + array.nr_ranges = 1; + array.ranges = malloc(sizeof(array.ranges[0])); + ABORT_ON(!array.ranges); + array.ranges[0].start = $1; + array.ranges[0].length = 1; + $$ = array; +} +| +PE_VALUE PE_ARRAY_RANGE PE_VALUE +{ + struct parse_events_array array; + + ABORT_ON($3 < $1); + array.nr_ranges = 1; + array.ranges = malloc(sizeof(array.ranges[0])); + ABORT_ON(!array.ranges); + array.ranges[0].start = $1; + array.ranges[0].length = $3 - $1 + 1; + $$ = array; +} sep_dc: ':' | -- cgit v1.2.3 From 95088a591e197610bd03f4059f5fdbe9e376425b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 22 Feb 2016 09:10:36 +0000 Subject: perf tools: Apply tracepoint event definition options to BPF script Users can pass options to tracepoints defined in the BPF script. For example: # perf record -e ./test.c/no-inherit/ bash # dd if=/dev/zero of=/dev/null count=10000 # exit [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.022 MB perf.data (139 samples) ] (no-inherit works, only the sys_read issued by bash are captured, at least 10000 sys_read issued by dd are skipped.) test.c: #define SEC(NAME) __attribute__((section(NAME), used)) SEC("func=sys_read") int bpf_func__sys_read(void *ctx) { return 1; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; no-inherit is applied to the kprobe event defined in test.c. Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-10-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 2 +- tools/perf/util/parse-events.c | 56 +++++++++++++++++++++++++++++++++++++----- tools/perf/util/parse-events.h | 3 ++- 3 files changed, 53 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 4aed5cb4ac2d..199501c71e27 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -112,7 +112,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), parse_evlist.error = &parse_error; INIT_LIST_HEAD(&parse_evlist.list); - err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj); + err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj, NULL); if (err || list_empty(&parse_evlist.list)) { pr_debug("Failed to add events selected by BPF\n"); return TEST_FAIL; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6e2f20334379..4c19d5e79d8c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -581,6 +581,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, struct __add_bpf_event_param { struct parse_events_evlist *data; struct list_head *list; + struct list_head *head_config; }; static int add_bpf_event(struct probe_trace_event *tev, int fd, @@ -597,7 +598,8 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd, tev->group, tev->event, fd); err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, tev->group, - tev->event, evlist->error, NULL); + tev->event, evlist->error, + param->head_config); if (err) { struct perf_evsel *evsel, *tmp; @@ -622,11 +624,12 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd, int parse_events_load_bpf_obj(struct parse_events_evlist *data, struct list_head *list, - struct bpf_object *obj) + struct bpf_object *obj, + struct list_head *head_config) { int err; char errbuf[BUFSIZ]; - struct __add_bpf_event_param param = {data, list}; + struct __add_bpf_event_param param = {data, list, head_config}; static bool registered_unprobe_atexit = false; if (IS_ERR(obj) || !obj) { @@ -720,14 +723,47 @@ parse_events_config_bpf(struct parse_events_evlist *data, return 0; } +/* + * Split config terms: + * perf record -e bpf.c/call-graph=fp,map:array.value[0]=1/ ... + * 'call-graph=fp' is 'evt config', should be applied to each + * events in bpf.c. + * 'map:array.value[0]=1' is 'obj config', should be processed + * with parse_events_config_bpf. + * + * Move object config terms from the first list to obj_head_config. + */ +static void +split_bpf_config_terms(struct list_head *evt_head_config, + struct list_head *obj_head_config) +{ + struct parse_events_term *term, *temp; + + /* + * Currectly, all possible user config term + * belong to bpf object. parse_events__is_hardcoded_term() + * happends to be a good flag. + * + * See parse_events_config_bpf() and + * config_term_tracepoint(). + */ + list_for_each_entry_safe(term, temp, evt_head_config, list) + if (!parse_events__is_hardcoded_term(term)) + list_move_tail(&term->list, obj_head_config); +} + int parse_events_load_bpf(struct parse_events_evlist *data, struct list_head *list, char *bpf_file_name, bool source, struct list_head *head_config) { - struct bpf_object *obj; int err; + struct bpf_object *obj; + LIST_HEAD(obj_head_config); + + if (head_config) + split_bpf_config_terms(head_config, &obj_head_config); obj = bpf__prepare_load(bpf_file_name, source); if (IS_ERR(obj)) { @@ -749,10 +785,18 @@ int parse_events_load_bpf(struct parse_events_evlist *data, return err; } - err = parse_events_load_bpf_obj(data, list, obj); + err = parse_events_load_bpf_obj(data, list, obj, head_config); if (err) return err; - return parse_events_config_bpf(data, obj, head_config); + err = parse_events_config_bpf(data, obj, &obj_head_config); + + /* + * Caller doesn't know anything about obj_head_config, + * so combine them together again before returnning. + */ + if (head_config) + list_splice_tail(&obj_head_config, head_config); + return err; } static int diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index e4456221f52d..67e493088e81 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -146,7 +146,8 @@ int parse_events_load_bpf(struct parse_events_evlist *data, struct bpf_object; int parse_events_load_bpf_obj(struct parse_events_evlist *data, struct list_head *list, - struct bpf_object *obj); + struct bpf_object *obj, + struct list_head *head_config); int parse_events_add_numeric(struct parse_events_evlist *data, struct list_head *list, u32 type, u64 config, -- cgit v1.2.3 From 03e0a7df3efd959e40cd7ff40b1fabddc234ec5a Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 22 Feb 2016 09:10:37 +0000 Subject: perf tools: Introduce bpf-output event Commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") adds a helper to enable a BPF program to output data to a perf ring buffer through a new type of perf event, PERF_COUNT_SW_BPF_OUTPUT. This patch enables perf to create events of that type. Now a perf user can use the following cmdline to receive output data from BPF programs: # perf record -a -e bpf-output/no-inherit,name=evt/ \ -e ./test_bpf_output.c/map:channel.event=evt/ ls / # perf script perf 1560 [004] 347747.086295: evt: ffffffff811fd201 sys_write ... perf 1560 [004] 347747.086300: evt: ffffffff811fd201 sys_write ... perf 1560 [004] 347747.086315: evt: ffffffff811fd201 sys_write ... ... Test result: # cat test_bpf_output.c /************************ BEGIN **************************/ #include struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; #define SEC(NAME) __attribute__((section(NAME), used)) static u64 (*ktime_get_ns)(void) = (void *)BPF_FUNC_ktime_get_ns; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { struct { u64 ktime; int cpuid; } __attribute__((packed)) output_data; char error_data[] = "Error: failed to output: %d\n"; output_data.cpuid = get_smp_processor_id(); output_data.ktime = ktime_get_ns(); int err = perf_event_output(ctx, &channel, get_smp_processor_id(), &output_data, sizeof(output_data)); if (err) trace_printk(error_data, sizeof(error_data), err); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************ END ***************************/ # perf record -a -e bpf-output/no-inherit,name=evt/ \ -e ./test_bpf_output.c/map:channel.event=evt/ ls / # perf script | grep ls ls 2242 [003] 347851.557563: evt: ffffffff811fd201 sys_write ... ls 2242 [003] 347851.557571: evt: ffffffff811fd201 sys_write ... Signed-off-by: Wang Nan Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Cody P Schafer Cc: He Kuang Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kirill Smelkov Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-11-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 5 ++--- tools/perf/util/evsel.c | 5 +++++ tools/perf/util/evsel.h | 8 ++++++++ tools/perf/util/parse-events.l | 1 + 4 files changed, 16 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 44824e3eeaed..0967ce601931 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1331,13 +1331,12 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey, return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH; } + if (perf_evsel__is_bpf_output(evsel)) + check_pass = true; if (attr->type == PERF_TYPE_RAW) check_pass = true; if (attr->type == PERF_TYPE_HARDWARE) check_pass = true; - if (attr->type == PERF_TYPE_SOFTWARE && - attr->config == PERF_COUNT_SW_BPF_OUTPUT) - check_pass = true; if (!check_pass) { pr_debug("ERROR: Event type is wrong for map %s\n", name); return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6ae20d0056de..0902fe418754 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -225,6 +225,11 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) if (evsel != NULL) perf_evsel__init(evsel, attr, idx); + if (perf_evsel__is_bpf_output(evsel)) { + evsel->attr.sample_type |= PERF_SAMPLE_RAW; + evsel->attr.sample_period = 1; + } + return evsel; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8e75434bd01c..efad78f811ad 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -364,6 +364,14 @@ static inline bool perf_evsel__is_function_event(struct perf_evsel *evsel) #undef FUNCTION_EVENT } +static inline bool perf_evsel__is_bpf_output(struct perf_evsel *evsel) +{ + struct perf_event_attr *attr = &evsel->attr; + + return (attr->config == PERF_COUNT_SW_BPF_OUTPUT) && + (attr->type == PERF_TYPE_SOFTWARE); +} + struct perf_attr_details { bool freq; bool verbose; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index fb85d0311d28..1477fbc78993 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -248,6 +248,7 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } +bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } /* * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately. -- cgit v1.2.3 From 0e47b38dcd24c78d0699b42f28d5986154d2aa11 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Mon, 22 Feb 2016 14:08:22 -0300 Subject: tools lib traceevent: Implement '%' operation The operation '%' is not implemented on event-parse.c, causing an error when parsing events with '%' the operation in its printk format. For example, # perf record -e sched:sched_deadline_yield ~/yield-test Warning: [sched:sched_deadline_yield] unknown op '%' .... # perf script Warning: [sched:sched_deadline_yield] unknown op '%' test 1641 [006] 3364.109319: sched:sched_deadline_yield: \ [FAILED TO PARSE] now=3364109314595 \ deadline=3364139295135 runtime=19975597 This patch implements the '%' operation. With this patch, we see the correct output: # perf record -e sched:sched_deadline_yield ~/yield-test No Warning # perf script yield-test 4005 [001] 4623.650978: sched:sched_deadline_yield: \ now=4623.650974050 \ deadline=4623.680957364 remaining_runtime=19979611 Signed-off-by: Daniel Bristot de Oliveira Reviewed-by: Steven Rostedt Cc: Juri Lelli Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-rt-users Link: http://lkml.kernel.org/r/5c96a395c56cea6d3d13d949051bdece86cc26e0.1456157869.git.bristot@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index c3bd294a63d1..575e75174087 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1951,6 +1951,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) strcmp(token, "*") == 0 || strcmp(token, "^") == 0 || strcmp(token, "/") == 0 || + strcmp(token, "%") == 0 || strcmp(token, "<") == 0 || strcmp(token, ">") == 0 || strcmp(token, "<=") == 0 || @@ -3689,6 +3690,9 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg case '/': val = left / right; break; + case '%': + val = left % right; + break; case '*': val = left * right; break; -- cgit v1.2.3 From e95cf700b1b267d912ee779c3ab36e582111a52d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 15 Feb 2016 09:34:32 +0100 Subject: perf tools: Make cl_address global It'll be used in following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1455525293-8671-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 6 ------ tools/perf/util/sort.h | 5 +++++ 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6d0f85894f38..5f94ee740c5b 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1015,12 +1015,6 @@ static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*s", width, out); } -static inline u64 cl_address(u64 address) -{ - /* return the cacheline of the address */ - return (address & ~(cacheline_size - 1)); -} - static int64_t sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) { diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 89a1273fd2da..46f159f9d947 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -162,6 +162,11 @@ static inline float hist_entry__get_percent_limit(struct hist_entry *he) return period * 100.0 / total_period; } +static inline u64 cl_address(u64 address) +{ + /* return the cacheline of the address */ + return (address & ~(cacheline_size - 1)); +} enum sort_mode { SORT_MODE__NORMAL, -- cgit v1.2.3 From d392711095f12942a61e2963f5ab0076ac651e73 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 15 Feb 2016 09:34:33 +0100 Subject: perf tools: Introduce cl_offset function It'll be used in following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1455525293-8671-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 46f159f9d947..5b9c6246de6d 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -168,6 +168,12 @@ static inline u64 cl_address(u64 address) return (address & ~(cacheline_size - 1)); } +static inline u64 cl_offset(u64 address) +{ + /* return the cacheline of the address */ + return (address & (cacheline_size - 1)); +} + enum sort_mode { SORT_MODE__NORMAL, SORT_MODE__BRANCH, -- cgit v1.2.3 From acbe613e0c03d59cab21aec3565cdb28c7df98c3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 15 Feb 2016 09:34:34 +0100 Subject: perf tools: Add monitored events array It will ease up configuration of memory events and addition of other memory events in following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1455525293-8671-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 17 +++++++++-------- tools/perf/util/Build | 1 + tools/perf/util/mem-events.c | 10 ++++++++++ tools/perf/util/mem-events.h | 19 +++++++++++++++++++ 4 files changed, 39 insertions(+), 8 deletions(-) create mode 100644 tools/perf/util/mem-events.c create mode 100644 tools/perf/util/mem-events.h (limited to 'tools') diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 390170041696..36c56a4b778b 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -6,6 +6,7 @@ #include "util/tool.h" #include "util/session.h" #include "util/data.h" +#include "util/mem-events.h" #define MEM_OPERATION_LOAD 0x1 #define MEM_OPERATION_STORE 0x2 @@ -34,20 +35,20 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "record"; - if (mem->operation & MEM_OPERATION_LOAD) + if (mem->operation & MEM_OPERATION_LOAD) { + perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; rec_argv[i++] = "-W"; + } rec_argv[i++] = "-d"; - if (mem->operation & MEM_OPERATION_LOAD) { - rec_argv[i++] = "-e"; - rec_argv[i++] = "cpu/mem-loads/pp"; - } + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + if (!perf_mem_events[j].record) + continue; - if (mem->operation & MEM_OPERATION_STORE) { rec_argv[i++] = "-e"; - rec_argv[i++] = "cpu/mem-stores/pp"; - } + rec_argv[i++] = perf_mem_events[j].name; + }; for (j = 1; j < argc; j++, i++) rec_argv[i] = argv[j]; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index a34752d28488..df2b690970ac 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -82,6 +82,7 @@ libperf-y += parse-branch-options.o libperf-y += parse-regs-options.o libperf-y += term.o libperf-y += help-unknown-cmd.o +libperf-y += mem-events.o libperf-$(CONFIG_LIBBPF) += bpf-loader.o libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c new file mode 100644 index 000000000000..c6ba0a19ec9a --- /dev/null +++ b/tools/perf/util/mem-events.c @@ -0,0 +1,10 @@ +#include "mem-events.h" + +#define E(n) { .name = n } + +struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { + E("cpu/mem-loads,ldlat=30/P"), + E("cpu/mem-stores/P"), +}; + +#undef E diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h new file mode 100644 index 000000000000..c97b21465fa9 --- /dev/null +++ b/tools/perf/util/mem-events.h @@ -0,0 +1,19 @@ +#ifndef __PERF_MEM_EVENTS_H +#define __PERF_MEM_EVENTS_H + +#include + +struct perf_mem_event { + bool record; + const char *name; +}; + +enum { + PERF_MEM_EVENTS__LOAD, + PERF_MEM_EVENTS__STORE, + PERF_MEM_EVENTS__MAX, +}; + +extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX]; + +#endif /* __PERF_MEM_EVENTS_H */ -- cgit v1.2.3 From ce1e22b08f0728e840614d3d0fc43fd1d6b7f7a2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 15 Feb 2016 09:34:35 +0100 Subject: perf mem: Add -e record option Adding -e option for perf mem record command, to be able to specify memory event directly. Get list of available events: $ perf mem record -e list ldlat-loads ldlat-stores Monitor ldlat-loads: $ perf mem record -e ldlat-loads true Committer notes: Further testing: # perf mem record -e ldlat-loads true [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data (10 samples) ] # perf evlist cpu/mem-loads,ldlat=30/P # Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1455525293-8671-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 63 +++++++++++++++++++++++++++++++++++++++++--- tools/perf/util/mem-events.c | 47 ++++++++++++++++++++++++++++++--- tools/perf/util/mem-events.h | 3 +++ 3 files changed, 106 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 36c56a4b778b..b3f8a89ede40 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -7,6 +7,7 @@ #include "util/session.h" #include "util/data.h" #include "util/mem-events.h" +#include "util/debug.h" #define MEM_OPERATION_LOAD 0x1 #define MEM_OPERATION_STORE 0x2 @@ -22,11 +23,55 @@ struct perf_mem { DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; +static int parse_record_events(const struct option *opt, + const char *str, int unset __maybe_unused) +{ + struct perf_mem *mem = *(struct perf_mem **)opt->value; + int j; + + if (strcmp(str, "list")) { + if (!perf_mem_events__parse(str)) { + mem->operation = 0; + return 0; + } + exit(-1); + } + + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + struct perf_mem_event *e = &perf_mem_events[j]; + + fprintf(stderr, "%-20s%s", + e->tag, verbose ? "" : "\n"); + if (verbose) + fprintf(stderr, " [%s]\n", e->name); + } + exit(0); +} + +static const char * const __usage[] = { + "perf mem record [] []", + "perf mem record [] -- []", + NULL +}; + +static const char * const *record_mem_usage = __usage; + static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) { int rec_argc, i = 0, j; const char **rec_argv; int ret; + struct option options[] = { + OPT_CALLBACK('e', "event", &mem, "event", + "event selector. use 'perf mem record -e list' to list available events", + parse_record_events), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show counter open errors, etc)"), + OPT_END() + }; + + argc = parse_options(argc, argv, options, record_mem_usage, + PARSE_OPT_STOP_AT_NON_OPTION); rec_argc = argc + 7; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); @@ -35,10 +80,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "record"; - if (mem->operation & MEM_OPERATION_LOAD) { + if (mem->operation & MEM_OPERATION_LOAD) perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; + + if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) rec_argv[i++] = "-W"; - } rec_argv[i++] = "-d"; @@ -50,9 +96,19 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = perf_mem_events[j].name; }; - for (j = 1; j < argc; j++, i++) + for (j = 0; j < argc; j++, i++) rec_argv[i] = argv[j]; + if (verbose > 0) { + pr_debug("calling: record "); + + while (rec_argv[j]) { + pr_debug("%s ", rec_argv[j]); + j++; + } + pr_debug("\n"); + } + ret = cmd_record(i, rec_argv, NULL); free(rec_argv); return ret; @@ -299,7 +355,6 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; - argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index c6ba0a19ec9a..b1507c04b257 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -1,10 +1,51 @@ +#include +#include +#include +#include #include "mem-events.h" +#include "debug.h" -#define E(n) { .name = n } +#define E(t, n) { .tag = t, .name = n } struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { - E("cpu/mem-loads,ldlat=30/P"), - E("cpu/mem-stores/P"), + E("ldlat-loads", "cpu/mem-loads,ldlat=30/P"), + E("ldlat-stores", "cpu/mem-stores/P"), }; #undef E + +int perf_mem_events__parse(const char *str) +{ + char *tok, *saveptr = NULL; + bool found = false; + char *buf; + int j; + + /* We need buffer that we know we can write to. */ + buf = malloc(strlen(str) + 1); + if (!buf) + return -ENOMEM; + + strcpy(buf, str); + + tok = strtok_r((char *)buf, ",", &saveptr); + + while (tok) { + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + struct perf_mem_event *e = &perf_mem_events[j]; + + if (strstr(e->tag, tok)) + e->record = found = true; + } + + tok = strtok_r(NULL, ",", &saveptr); + } + + free(buf); + + if (found) + return 0; + + pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str); + return -1; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index c97b21465fa9..2995bae6ac33 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -5,6 +5,7 @@ struct perf_mem_event { bool record; + const char *tag; const char *name; }; @@ -16,4 +17,6 @@ enum { extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX]; +int perf_mem_events__parse(const char *str); + #endif /* __PERF_MEM_EVENTS_H */ -- cgit v1.2.3 From b19a1b6a233ede3ffc379b49e3653d6ce80dd743 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 15 Feb 2016 09:34:42 +0100 Subject: perf tools: Use ARRAY_SIZE in mem sort display functions There's no need to define extra macros for that. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1455525293-8671-13-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 5f94ee740c5b..5388f7940474 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -838,7 +838,6 @@ static const char * const tlb_access[] = { "Walker", "Fault", }; -#define NUM_TLB_ACCESS (sizeof(tlb_access)/sizeof(const char *)) static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) @@ -860,7 +859,7 @@ static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf, /* already taken care of */ m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS); - for (i = 0; m && i < NUM_TLB_ACCESS; i++, m >>= 1) { + for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) { if (!(m & 0x1)) continue; if (l) { @@ -915,7 +914,6 @@ static const char * const mem_lvl[] = { "I/O", "Uncached", }; -#define NUM_MEM_LVL (sizeof(mem_lvl)/sizeof(const char *)) static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) @@ -937,7 +935,7 @@ static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf, /* already taken care of */ m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); - for (i = 0; m && i < NUM_MEM_LVL; i++, m >>= 1) { + for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { if (!(m & 0x1)) continue; if (l) { @@ -983,7 +981,6 @@ static const char * const snoop_access[] = { "Hit", "HitM", }; -#define NUM_SNOOP_ACCESS (sizeof(snoop_access)/sizeof(const char *)) static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) @@ -998,7 +995,7 @@ static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf, if (he->mem_info) m = he->mem_info->data_src.mem_snoop; - for (i = 0; m && i < NUM_SNOOP_ACCESS; i++, m >>= 1) { + for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) { if (!(m & 0x1)) continue; if (l) { -- cgit v1.2.3 From 94ddddfab521423d94d6066879b514b9431e5cae Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 15 Feb 2016 09:34:51 +0100 Subject: perf script: Add data_src and weight column definitions Adding data_src and weight column definitions, so it's displayed for related sample types. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1455525293-8671-22-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c691214d820f..b7f1e8e22dcf 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -58,6 +58,8 @@ enum perf_output_field { PERF_OUTPUT_IREGS = 1U << 14, PERF_OUTPUT_BRSTACK = 1U << 15, PERF_OUTPUT_BRSTACKSYM = 1U << 16, + PERF_OUTPUT_DATA_SRC = 1U << 17, + PERF_OUTPUT_WEIGHT = 1U << 18, }; struct output_option { @@ -81,6 +83,8 @@ struct output_option { {.str = "iregs", .field = PERF_OUTPUT_IREGS}, {.str = "brstack", .field = PERF_OUTPUT_BRSTACK}, {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM}, + {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC}, + {.str = "weight", .field = PERF_OUTPUT_WEIGHT}, }; /* default set to maintain compatibility with current format */ @@ -242,6 +246,16 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_ADDR, allow_user_set)) return -EINVAL; + if (PRINT_FIELD(DATA_SRC) && + perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", + PERF_OUTPUT_DATA_SRC)) + return -EINVAL; + + if (PRINT_FIELD(WEIGHT) && + perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", + PERF_OUTPUT_WEIGHT)) + return -EINVAL; + if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { pr_err("Display of symbols requested but neither sample IP nor " "sample address\nis selected. Hence, no addresses to convert " @@ -673,6 +687,12 @@ static void process_event(struct perf_script *script, union perf_event *event, if (PRINT_FIELD(ADDR)) print_sample_addr(event, sample, thread, attr); + if (PRINT_FIELD(DATA_SRC)) + printf("%16" PRIx64, sample->data_src); + + if (PRINT_FIELD(WEIGHT)) + printf("%16" PRIu64, sample->weight); + if (PRINT_FIELD(IP)) { if (!symbol_conf.use_callchain) printf(" "); -- cgit v1.2.3 From ff7b191583c368612fde88bf3cff6e3f3b0d73d5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 15 Feb 2016 09:34:52 +0100 Subject: perf script: Display addr/data_src/weight columns for raw events Adding addr/data_src/weight columns for raw events. Example: $ perf script ... true 11883 322960.489590: ... ffff8801aa0b8400 68501042 246 ffffffff813b2cd true 11883 322960.489600: ... ffff8800b90b38d8 68501042 251 ffffffff811d0b7 true 11883 322960.489612: ... ffff880196893130 6a100142 94 ffffffff8177fb8 true 11883 322960.489637: ... ffff880164277b40 68100842 101 ffffffff813b2cd true 11883 322960.489683: ... ffff880035d3d818 68501042 201 ffffffff811d0b7 true 11883 322960.489733: ... 7fb9616efcf0 68100242 199 7fb961aaba9 true 11883 322960.489818: ... ffffea000481c39c 6a100142 122 ffffffff811b634 ^^^^^^^^^^^^^^^^ ^^^^^^^^ ^^^ addr data_src weight Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1455525293-8671-23-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b7f1e8e22dcf..f4caf4898245 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -135,7 +135,8 @@ static struct { PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_PERIOD, + PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | + PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT, .invalid_fields = PERF_OUTPUT_TRACE, }, -- cgit v1.2.3 From 940db6dcd3f4659303fdf6befe7416adc4d24118 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 17 Feb 2016 14:44:55 -0800 Subject: perf tools: Dont stop PMU parsing on alias parse error When an error happens during alias parsing currently the complete parsing of all attributes of the PMU is stopped. This is breaks old perf on a newer kernel that may have not-yet-know alias attributes (such as .scale or .per-pkg). Continue when some attribute is unparseable. This is IMHO a stable candidate and should be backported to older versions to avoid problems with newer kernels. v2: Print warnings when something goes wrong. v3: Change warning to debug output Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: stable@vger.kernel.org # v3.6+ Link: http://lkml.kernel.org/r/1455749095-18358-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index cf59fbaee491..ce61f79dbaae 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -284,13 +284,12 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) { struct dirent *evt_ent; DIR *event_dir; - int ret = 0; event_dir = opendir(dir); if (!event_dir) return -EINVAL; - while (!ret && (evt_ent = readdir(event_dir))) { + while ((evt_ent = readdir(event_dir))) { char path[PATH_MAX]; char *name = evt_ent->d_name; FILE *file; @@ -306,17 +305,19 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) snprintf(path, PATH_MAX, "%s/%s", dir, name); - ret = -EINVAL; file = fopen(path, "r"); - if (!file) - break; + if (!file) { + pr_debug("Cannot open %s\n", path); + continue; + } - ret = perf_pmu__new_alias(head, dir, name, file); + if (perf_pmu__new_alias(head, dir, name, file) < 0) + pr_debug("Cannot set up %s\n", name); fclose(file); } closedir(event_dir); - return ret; + return 0; } /* -- cgit v1.2.3 From d1130686f463e6feff19196475c3c15b1923c525 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2016 16:18:37 -0300 Subject: perf help: No need to use strbuf_remove() It is the only user of this function, just use the strlen() to skip the prefix. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-blao710l5cd5hmwrhy51ftgq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-help.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 96c1a4cfbbbf..f4dd2b48f90f 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -86,8 +86,7 @@ static int check_emacsclient_version(void) return -1; } - strbuf_remove(&buffer, 0, strlen("emacsclient")); - version = atoi(buffer.buf); + version = atoi(buffer.buf + strlen("emacsclient")); if (version < 22) { fprintf(stderr, -- cgit v1.2.3 From bea2400621836b028d82c3d6a74053921d70dbd7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2016 16:21:04 -0300 Subject: perf tools: Remove strbuf_{remove,splice}() No users, nuke them. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-kfv2wo8xann8t97wdalttcx7@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/strbuf.c | 24 ------------------------ tools/perf/util/strbuf.h | 2 -- 2 files changed, 26 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 25671fa16618..d3d279275432 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -51,30 +51,6 @@ void strbuf_grow(struct strbuf *sb, size_t extra) ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); } -static void strbuf_splice(struct strbuf *sb, size_t pos, size_t len, - const void *data, size_t dlen) -{ - if (pos + len < pos) - die("you want to use way too much memory"); - if (pos > sb->len) - die("`pos' is too far after the end of the buffer"); - if (pos + len > sb->len) - die("`pos + len' is too far after the end of the buffer"); - - if (dlen >= len) - strbuf_grow(sb, dlen - len); - memmove(sb->buf + pos + dlen, - sb->buf + pos + len, - sb->len - pos - len); - memcpy(sb->buf + pos, data, dlen); - strbuf_setlen(sb, sb->len + dlen - len); -} - -void strbuf_remove(struct strbuf *sb, size_t pos, size_t len) -{ - strbuf_splice(sb, pos, len, NULL, 0); -} - void strbuf_add(struct strbuf *sb, const void *data, size_t len) { strbuf_grow(sb, len); diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 529f2f035249..7a32c838884d 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -77,8 +77,6 @@ static inline void strbuf_addch(struct strbuf *sb, int c) { sb->buf[sb->len] = '\0'; } -extern void strbuf_remove(struct strbuf *, size_t pos, size_t len); - extern void strbuf_add(struct strbuf *, const void *, size_t); static inline void strbuf_addstr(struct strbuf *sb, const char *s) { strbuf_add(sb, s, strlen(s)); -- cgit v1.2.3 From 54fbad54ebcde9db9c7459e9e379f2350c25e1f1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2016 09:46:42 +0100 Subject: perf mem record: Check for memory events support Check if current kernel support available memory events and display the status within -e list option: $ perf mem record -e list ldlat-loads : available ldlat-stores : available Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1456303616-26926-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 20 ++++++++++++++++---- tools/perf/util/mem-events.c | 35 ++++++++++++++++++++++++++++++++--- tools/perf/util/mem-events.h | 3 +++ 3 files changed, 51 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index b3f8a89ede40..f1fa7b8d1f69 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -40,10 +40,11 @@ static int parse_record_events(const struct option *opt, for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { struct perf_mem_event *e = &perf_mem_events[j]; - fprintf(stderr, "%-20s%s", - e->tag, verbose ? "" : "\n"); - if (verbose) - fprintf(stderr, " [%s]\n", e->name); + fprintf(stderr, "%-13s%-*s%s\n", + e->tag, + verbose ? 25 : 0, + verbose ? e->name : "", + e->supported ? ": available" : ""); } exit(0); } @@ -92,6 +93,12 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (!perf_mem_events[j].record) continue; + if (!perf_mem_events[j].supported) { + pr_err("failed: event '%s' not supported\n", + perf_mem_events[j].name); + return -1; + } + rec_argv[i++] = "-e"; rec_argv[i++] = perf_mem_events[j].name; }; @@ -355,6 +362,11 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; + if (perf_mem_events__init()) { + pr_err("failed: memory events not supported\n"); + return -1; + } + argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index b1507c04b257..e21853fe1312 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -2,15 +2,20 @@ #include #include #include +#include +#include +#include +#include #include "mem-events.h" #include "debug.h" -#define E(t, n) { .tag = t, .name = n } +#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { - E("ldlat-loads", "cpu/mem-loads,ldlat=30/P"), - E("ldlat-stores", "cpu/mem-stores/P"), + E("ldlat-loads", "cpu/mem-loads,ldlat=30/P", "mem-loads"), + E("ldlat-stores", "cpu/mem-stores/P", "mem-stores"), }; +#undef E #undef E @@ -49,3 +54,27 @@ int perf_mem_events__parse(const char *str) pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str); return -1; } + +int perf_mem_events__init(void) +{ + const char *mnt = sysfs__mount(); + bool found = false; + int j; + + if (!mnt) + return -ENOENT; + + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + char path[PATH_MAX]; + struct perf_mem_event *e = &perf_mem_events[j]; + struct stat st; + + scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s", + mnt, e->sysfs_name); + + if (!stat(path, &st)) + e->supported = found = true; + } + + return found ? 0 : -ENOENT; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 2995bae6ac33..75c1660bda62 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -5,8 +5,10 @@ struct perf_mem_event { bool record; + bool supported; const char *tag; const char *name; + const char *sysfs_name; }; enum { @@ -18,5 +20,6 @@ enum { extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX]; int perf_mem_events__parse(const char *str); +int perf_mem_events__init(void); #endif /* __PERF_MEM_EVENTS_H */ -- cgit v1.2.3 From 2ba7ac5814a6952aad647ce31696b893772cbe83 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2016 09:46:43 +0100 Subject: perf mem: Introduce perf_mem_events__name function Wrap perf_mem_events[].name into perf_mem_events__name() so we could alter the events name if needed. This will be handy when changing latency settings for loads event in following patch. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1456303616-26926-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 6 +++--- tools/perf/util/mem-events.c | 5 +++++ tools/perf/util/mem-events.h | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index f1fa7b8d1f69..88aeac9aa1da 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -43,7 +43,7 @@ static int parse_record_events(const struct option *opt, fprintf(stderr, "%-13s%-*s%s\n", e->tag, verbose ? 25 : 0, - verbose ? e->name : "", + verbose ? perf_mem_events__name(j) : "", e->supported ? ": available" : ""); } exit(0); @@ -95,12 +95,12 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (!perf_mem_events[j].supported) { pr_err("failed: event '%s' not supported\n", - perf_mem_events[j].name); + perf_mem_events__name(j)); return -1; } rec_argv[i++] = "-e"; - rec_argv[i++] = perf_mem_events[j].name; + rec_argv[i++] = perf_mem_events__name(j); }; for (j = 0; j < argc; j++, i++) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index e21853fe1312..2330db510976 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -19,6 +19,11 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { #undef E +char *perf_mem_events__name(int i) +{ + return (char *)perf_mem_events[i].name; +} + int perf_mem_events__parse(const char *str) { char *tok, *saveptr = NULL; diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 75c1660bda62..2a91b959de62 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -22,4 +22,5 @@ extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX]; int perf_mem_events__parse(const char *str); int perf_mem_events__init(void); +char *perf_mem_events__name(int i); #endif /* __PERF_MEM_EVENTS_H */ -- cgit v1.2.3 From 0c877d759d3a62a01d75dc6de4a923a686bb285a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2016 09:46:46 +0100 Subject: perf tools: Introduce perf_mem__tlb_scnprintf function Move meminfo's tlb display function into mem-events.c object, so it could be reused later from script code. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1456303616-26926-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 47 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/mem-events.h | 3 +++ tools/perf/util/sort.c | 44 ++--------------------------------------- 3 files changed, 52 insertions(+), 42 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 2330db510976..4be3eb74001b 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -8,6 +8,7 @@ #include #include "mem-events.h" #include "debug.h" +#include "symbol.h" #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } @@ -83,3 +84,49 @@ int perf_mem_events__init(void) return found ? 0 : -ENOENT; } + +static const char * const tlb_access[] = { + "N/A", + "HIT", + "MISS", + "L1", + "L2", + "Walker", + "Fault", +}; + +void perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + size_t l = 0, i; + u64 m = PERF_MEM_TLB_NA; + u64 hit, miss; + + sz -= 1; /* -1 for null termination */ + out[0] = '\0'; + + if (mem_info) + m = mem_info->data_src.mem_dtlb; + + hit = m & PERF_MEM_TLB_HIT; + miss = m & PERF_MEM_TLB_MISS; + + /* already taken care of */ + m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS); + + for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (l) { + strcat(out, " or "); + l += 4; + } + strncat(out, tlb_access[i], sz - l); + l += strlen(tlb_access[i]); + } + if (*out == '\0') + strcpy(out, "N/A"); + if (hit) + strncat(out, " hit", sz - l); + if (miss) + strncat(out, " miss", sz - l); +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 2a91b959de62..d8fb8e18418c 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -23,4 +23,7 @@ int perf_mem_events__parse(const char *str); int perf_mem_events__init(void); char *perf_mem_events__name(int i); + +struct mem_info; +void perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); #endif /* __PERF_MEM_EVENTS_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 5388f7940474..160df202c34f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -6,6 +6,7 @@ #include "evsel.h" #include "evlist.h" #include +#include "mem-events.h" regex_t parent_regex; const char default_parent_pattern[] = "^sys_|^do_page_fault"; @@ -829,53 +830,12 @@ sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right) return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb); } -static const char * const tlb_access[] = { - "N/A", - "HIT", - "MISS", - "L1", - "L2", - "Walker", - "Fault", -}; - static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { char out[64]; - size_t sz = sizeof(out) - 1; /* -1 for null termination */ - size_t l = 0, i; - u64 m = PERF_MEM_TLB_NA; - u64 hit, miss; - - out[0] = '\0'; - - if (he->mem_info) - m = he->mem_info->data_src.mem_dtlb; - - hit = m & PERF_MEM_TLB_HIT; - miss = m & PERF_MEM_TLB_MISS; - - /* already taken care of */ - m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS); - - for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) { - if (!(m & 0x1)) - continue; - if (l) { - strcat(out, " or "); - l += 4; - } - strncat(out, tlb_access[i], sz - l); - l += strlen(tlb_access[i]); - } - if (*out == '\0') - strcpy(out, "N/A"); - if (hit) - strncat(out, " hit", sz - l); - if (miss) - strncat(out, " miss", sz - l); + perf_mem__tlb_scnprintf(out, sizeof(out), he->mem_info); return repsep_snprintf(bf, size, "%-*s", width, out); } -- cgit v1.2.3 From 071e9a1e12dceaec6f9d3ffe6e77ee68364166d6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2016 09:46:47 +0100 Subject: perf tools: Introduce perf_mem__lvl_scnprintf function Move meminfo's lvl display function into mem-events.c object, so it could be reused later from script code. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1456303616-26926-7-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 53 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/mem-events.h | 1 + tools/perf/util/sort.c | 50 +---------------------------------------- 3 files changed, 55 insertions(+), 49 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 4be3eb74001b..bddb1217d129 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -130,3 +130,56 @@ void perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info) if (miss) strncat(out, " miss", sz - l); } + +static const char * const mem_lvl[] = { + "N/A", + "HIT", + "MISS", + "L1", + "LFB", + "L2", + "L3", + "Local RAM", + "Remote RAM (1 hop)", + "Remote RAM (2 hops)", + "Remote Cache (1 hop)", + "Remote Cache (2 hops)", + "I/O", + "Uncached", +}; + +void perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + size_t i, l = 0; + u64 m = PERF_MEM_LVL_NA; + u64 hit, miss; + + if (mem_info) + m = mem_info->data_src.mem_lvl; + + sz -= 1; /* -1 for null termination */ + out[0] = '\0'; + + hit = m & PERF_MEM_LVL_HIT; + miss = m & PERF_MEM_LVL_MISS; + + /* already taken care of */ + m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); + + for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (l) { + strcat(out, " or "); + l += 4; + } + strncat(out, mem_lvl[i], sz - l); + l += strlen(mem_lvl[i]); + } + if (*out == '\0') + strcpy(out, "N/A"); + if (hit) + strncat(out, " hit", sz - l); + if (miss) + strncat(out, " miss", sz - l); +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index d8fb8e18418c..bd0d7f7ae113 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -26,4 +26,5 @@ char *perf_mem_events__name(int i); struct mem_info; void perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +void perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); #endif /* __PERF_MEM_EVENTS_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 160df202c34f..d894759c47f0 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -858,60 +858,12 @@ sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right) return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl); } -static const char * const mem_lvl[] = { - "N/A", - "HIT", - "MISS", - "L1", - "LFB", - "L2", - "L3", - "Local RAM", - "Remote RAM (1 hop)", - "Remote RAM (2 hops)", - "Remote Cache (1 hop)", - "Remote Cache (2 hops)", - "I/O", - "Uncached", -}; - static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { char out[64]; - size_t sz = sizeof(out) - 1; /* -1 for null termination */ - size_t i, l = 0; - u64 m = PERF_MEM_LVL_NA; - u64 hit, miss; - - if (he->mem_info) - m = he->mem_info->data_src.mem_lvl; - - out[0] = '\0'; - - hit = m & PERF_MEM_LVL_HIT; - miss = m & PERF_MEM_LVL_MISS; - - /* already taken care of */ - m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); - - for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { - if (!(m & 0x1)) - continue; - if (l) { - strcat(out, " or "); - l += 4; - } - strncat(out, mem_lvl[i], sz - l); - l += strlen(mem_lvl[i]); - } - if (*out == '\0') - strcpy(out, "N/A"); - if (hit) - strncat(out, " hit", sz - l); - if (miss) - strncat(out, " miss", sz - l); + perf_mem__lvl_scnprintf(out, sizeof(out), he->mem_info); return repsep_snprintf(bf, size, "%-*s", width, out); } -- cgit v1.2.3 From 2c07af13dcd4d971578041b50598f1269b33e68a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2016 09:46:48 +0100 Subject: perf tools: Introduce perf_mem__snp_scnprintf function Move meminfo's snp display function into mem-events.c object, so it could be reused later from script code. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1456303616-26926-8-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 34 ++++++++++++++++++++++++++++++++++ tools/perf/util/mem-events.h | 1 + tools/perf/util/sort.c | 31 +------------------------------ 3 files changed, 36 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index bddb1217d129..d03edc2ec607 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -183,3 +183,37 @@ void perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) if (miss) strncat(out, " miss", sz - l); } + +static const char * const snoop_access[] = { + "N/A", + "None", + "Miss", + "Hit", + "HitM", +}; + +void perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + size_t i, l = 0; + u64 m = PERF_MEM_SNOOP_NA; + + sz -= 1; /* -1 for null termination */ + out[0] = '\0'; + + if (mem_info) + m = mem_info->data_src.mem_snoop; + + for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (l) { + strcat(out, " or "); + l += 4; + } + strncat(out, snoop_access[i], sz - l); + l += strlen(snoop_access[i]); + } + + if (*out == '\0') + strcpy(out, "N/A"); +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index bd0d7f7ae113..6efdd6fcdb01 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -27,4 +27,5 @@ char *perf_mem_events__name(int i); struct mem_info; void perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); void perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +void perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); #endif /* __PERF_MEM_EVENTS_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index d894759c47f0..2007c3b683f3 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -886,41 +886,12 @@ sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right) return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop); } -static const char * const snoop_access[] = { - "N/A", - "None", - "Miss", - "Hit", - "HitM", -}; - static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { char out[64]; - size_t sz = sizeof(out) - 1; /* -1 for null termination */ - size_t i, l = 0; - u64 m = PERF_MEM_SNOOP_NA; - - out[0] = '\0'; - - if (he->mem_info) - m = he->mem_info->data_src.mem_snoop; - - for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) { - if (!(m & 0x1)) - continue; - if (l) { - strcat(out, " or "); - l += 4; - } - strncat(out, snoop_access[i], sz - l); - l += strlen(snoop_access[i]); - } - - if (*out == '\0') - strcpy(out, "N/A"); + perf_mem__snp_scnprintf(out, sizeof(out), he->mem_info); return repsep_snprintf(bf, size, "%-*s", width, out); } -- cgit v1.2.3 From 69a77275926ccd0c08fde103de52b59f18370f5a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2016 09:46:49 +0100 Subject: perf tools: Introduce perf_mem__lck_scnprintf function Move meminfo's lck display function into mem-events.c object, so it could be reused later from script code. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1456303616-26926-9-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 16 ++++++++++++++++ tools/perf/util/mem-events.h | 2 ++ tools/perf/util/sort.c | 14 ++------------ 3 files changed, 20 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index d03edc2ec607..9844e3e36c1d 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -217,3 +217,19 @@ void perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) if (*out == '\0') strcpy(out, "N/A"); } + +void perf_mem__lck_scnprintf(char *out, size_t sz __maybe_unused, + struct mem_info *mem_info) +{ + u64 mask = PERF_MEM_LOCK_NA; + + if (mem_info) + mask = mem_info->data_src.mem_lock; + + if (mask & PERF_MEM_LOCK_NA) + strncat(out, "N/A", 3); + else if (mask & PERF_MEM_LOCK_LOCKED) + strncat(out, "Yes", 3); + else + strncat(out, "No", 2); +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 6efdd6fcdb01..99678b51c747 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -28,4 +28,6 @@ struct mem_info; void perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); void perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); void perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +void perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); + #endif /* __PERF_MEM_EVENTS_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2007c3b683f3..4175b2944ff9 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -795,19 +795,9 @@ sort__locked_cmp(struct hist_entry *left, struct hist_entry *right) static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - const char *out; - u64 mask = PERF_MEM_LOCK_NA; - - if (he->mem_info) - mask = he->mem_info->data_src.mem_lock; - - if (mask & PERF_MEM_LOCK_NA) - out = "N/A"; - else if (mask & PERF_MEM_LOCK_LOCKED) - out = "Yes"; - else - out = "No"; + char out[10]; + perf_mem__lck_scnprintf(out, sizeof(out), he->mem_info); return repsep_snprintf(bf, size, "%.*s", width, out); } -- cgit v1.2.3 From b1a5fbea3d69511e445b8d9efe6dc605edb508c8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2016 09:46:50 +0100 Subject: perf tools: Change perf_mem__tlb_scnprintf to return nb of displayed bytes Moving strncat/strcpy calls into scnprintf to easily track number of displayed bytes. It will be used in following patch. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1456303616-26926-10-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 13 +++++++------ tools/perf/util/mem-events.h | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 9844e3e36c1d..b58d32e1c80a 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -95,7 +95,7 @@ static const char * const tlb_access[] = { "Fault", }; -void perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info) { size_t l = 0, i; u64 m = PERF_MEM_TLB_NA; @@ -120,15 +120,16 @@ void perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info) strcat(out, " or "); l += 4; } - strncat(out, tlb_access[i], sz - l); - l += strlen(tlb_access[i]); + l += scnprintf(out + l, sz - l, tlb_access[i]); } if (*out == '\0') - strcpy(out, "N/A"); + l += scnprintf(out, sz - l, "N/A"); if (hit) - strncat(out, " hit", sz - l); + l += scnprintf(out + l, sz - l, " hit"); if (miss) - strncat(out, " miss", sz - l); + l += scnprintf(out + l, sz - l, " miss"); + + return l; } static const char * const mem_lvl[] = { diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 99678b51c747..4141df6850e4 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -25,7 +25,7 @@ int perf_mem_events__init(void); char *perf_mem_events__name(int i); struct mem_info; -void perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); void perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); void perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); void perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); -- cgit v1.2.3 From 969075630e3abd1c740ac4f3183949cbf54b410d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2016 09:46:51 +0100 Subject: perf tools: Change perf_mem__lvl_scnprintf to return nb of displayed bytes Moving strncat/strcpy calls into scnprintf to easily track number of displayed bytes. It will be used in following patch. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1456303616-26926-11-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 13 +++++++------ tools/perf/util/mem-events.h | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index b58d32e1c80a..249250f60481 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -149,7 +149,7 @@ static const char * const mem_lvl[] = { "Uncached", }; -void perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) { size_t i, l = 0; u64 m = PERF_MEM_LVL_NA; @@ -174,15 +174,16 @@ void perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) strcat(out, " or "); l += 4; } - strncat(out, mem_lvl[i], sz - l); - l += strlen(mem_lvl[i]); + l += scnprintf(out + l, sz - l, mem_lvl[i]); } if (*out == '\0') - strcpy(out, "N/A"); + l += scnprintf(out, sz - l, "N/A"); if (hit) - strncat(out, " hit", sz - l); + l += scnprintf(out + l, sz - l, " hit"); if (miss) - strncat(out, " miss", sz - l); + l += scnprintf(out + l, sz - l, " miss"); + + return l; } static const char * const snoop_access[] = { diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 4141df6850e4..0467f6775dc2 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -26,7 +26,7 @@ char *perf_mem_events__name(int i); struct mem_info; int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); -void perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); void perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); void perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); -- cgit v1.2.3 From 149d75076778d3b14e13b79d683b4f4fdd4fdb01 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2016 09:46:52 +0100 Subject: perf tools: Change perf_mem__snp_scnprintf to return nb of displayed bytes Moving strncat/strcpy calls into scnprintf to easily track number of displayed bytes. It will be used in following patch. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1456303616-26926-12-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 9 +++++---- tools/perf/util/mem-events.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 249250f60481..de981ddcd1d3 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -194,7 +194,7 @@ static const char * const snoop_access[] = { "HitM", }; -void perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) { size_t i, l = 0; u64 m = PERF_MEM_SNOOP_NA; @@ -212,12 +212,13 @@ void perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) strcat(out, " or "); l += 4; } - strncat(out, snoop_access[i], sz - l); - l += strlen(snoop_access[i]); + l += scnprintf(out + l, sz - l, snoop_access[i]); } if (*out == '\0') - strcpy(out, "N/A"); + l += scnprintf(out, sz - l, "N/A"); + + return l; } void perf_mem__lck_scnprintf(char *out, size_t sz __maybe_unused, diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 0467f6775dc2..84c79a452f80 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -27,7 +27,7 @@ char *perf_mem_events__name(int i); struct mem_info; int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); -void perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); void perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); #endif /* __PERF_MEM_EVENTS_H */ -- cgit v1.2.3 From 8b0819c8a3c97279b815581b606407c0387cc26f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2016 09:46:53 +0100 Subject: perf tools: Change perf_mem__lck_scnprintf to return nb of displayed bytes Moving strncat call into scnprintf to easily track number of displayed bytes. It will be used in following patch. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1456303616-26926-13-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 12 +++++++----- tools/perf/util/mem-events.h | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index de981ddcd1d3..eadb83d16367 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -221,18 +221,20 @@ int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) return l; } -void perf_mem__lck_scnprintf(char *out, size_t sz __maybe_unused, - struct mem_info *mem_info) +int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info) { u64 mask = PERF_MEM_LOCK_NA; + int l; if (mem_info) mask = mem_info->data_src.mem_lock; if (mask & PERF_MEM_LOCK_NA) - strncat(out, "N/A", 3); + l = scnprintf(out, sz, "N/A"); else if (mask & PERF_MEM_LOCK_LOCKED) - strncat(out, "Yes", 3); + l = scnprintf(out, sz, "Yes"); else - strncat(out, "No", 2); + l = scnprintf(out, sz, "No"); + + return l; } diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 84c79a452f80..87c44ff75dc8 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -28,6 +28,6 @@ struct mem_info; int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); -void perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); #endif /* __PERF_MEM_EVENTS_H */ -- cgit v1.2.3 From c19ac91245a2f8d26aafd7f23256f3b76314d5d4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2016 09:46:54 +0100 Subject: perf script: Display data_src values Adding support to display data_src values, for events with data_src data in sample. Example: $ perf script ... rcuos/3 32 [002] ... 68501042 Local RAM hit|SNP None or Hit|TLB L1 or L2 hit|LCK No ... rcuos/3 32 [002] ... 68100142 L1 hit|SNP None|TLB L1 or L2 hit|LCK No ... swapper 0 [002] ... 68100242 LFB hit|SNP None|TLB L1 or L2 hit|LCK No ... swapper 0 [000] ... 68100142 L1 hit|SNP None|TLB L1 or L2 hit|LCK No ... swapper 0 [000] ... 50100142 L1 hit|SNP None|TLB L2 miss|LCK No ... rcuos/3 32 [002] ... 68100142 L1 hit|SNP None|TLB L1 or L2 hit|LCK No ... plugin-containe 16538 [000] ... 6a100142 L1 hit|SNP None|TLB L1 or L2 hit|LCK Yes ... gkrellm 1736 [000] ... 68100242 LFB hit|SNP None|TLB L1 or L2 hit|LCK No ... gkrellm 1736 [000] ... 6a100142 L1 hit|SNP None|TLB L1 or L2 hit|LCK Yes ... ^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ data_src value data_src translation Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1456303616-26926-14-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 20 +++++++++++++++++++- tools/perf/util/mem-events.c | 15 +++++++++++++++ tools/perf/util/mem-events.h | 2 ++ 3 files changed, 36 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index f4caf4898245..8ff5ff0fe38c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -23,6 +23,7 @@ #include "util/stat.h" #include #include "asm/bug.h" +#include "util/mem-events.h" static char const *script_name; static char const *generate_script_lang; @@ -649,6 +650,23 @@ static int perf_evlist__max_name_len(struct perf_evlist *evlist) return max; } +static size_t data_src__printf(u64 data_src) +{ + struct mem_info mi = { .data_src.val = data_src }; + char decode[100]; + char out[100]; + static int maxlen; + int len; + + perf_script__meminfo_scnprintf(decode, 100, &mi); + + len = scnprintf(out, 100, "%16" PRIx64 " %s", data_src, decode); + if (maxlen < len) + maxlen = len; + + return printf("%-*s", maxlen, out); +} + static void process_event(struct perf_script *script, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al) @@ -689,7 +707,7 @@ static void process_event(struct perf_script *script, union perf_event *event, print_sample_addr(event, sample, thread, attr); if (PRINT_FIELD(DATA_SRC)) - printf("%16" PRIx64, sample->data_src); + data_src__printf(sample->data_src); if (PRINT_FIELD(WEIGHT)) printf("%16" PRIu64, sample->weight); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index eadb83d16367..75465f89a413 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -238,3 +238,18 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info) return l; } + +int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + int i = 0; + + i += perf_mem__lvl_scnprintf(out, sz, mem_info); + i += scnprintf(out + i, sz - i, "|SNP "); + i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info); + i += scnprintf(out + i, sz - i, "|TLB "); + i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info); + i += scnprintf(out + i, sz - i, "|LCK "); + i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info); + + return i; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 87c44ff75dc8..5d6d93066a6e 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -30,4 +30,6 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info); + #endif /* __PERF_MEM_EVENTS_H */ -- cgit v1.2.3 From c339b1a90e6cd638a1d99cbbf49d870ce233198e Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 24 Feb 2016 11:20:44 +0000 Subject: perf tools: Make binary data printer code in trace_event public available Move code printing binray data from trace_event() to utils.c and allows passing different printer. Further commits will use this logic to print bpf output event. Signed-off-by: Wang Nan Cc: Brendan Gregg Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456312845-111583-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 75 +++++++++++++++++++++++++++++++------------------ tools/perf/util/util.c | 37 ++++++++++++++++++++++++ tools/perf/util/util.h | 20 +++++++++++++ 3 files changed, 105 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index ff7e86ad1b06..8c4212abd19b 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -106,40 +106,61 @@ int dump_printf(const char *fmt, ...) return ret; } +static void trace_event_printer(enum binary_printer_ops op, + unsigned int val, void *extra) +{ + const char *color = PERF_COLOR_BLUE; + union perf_event *event = (union perf_event *)extra; + unsigned char ch = (unsigned char)val; + + switch (op) { + case BINARY_PRINT_DATA_BEGIN: + printf("."); + color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n", + event->header.size); + break; + case BINARY_PRINT_LINE_BEGIN: + printf("."); + break; + case BINARY_PRINT_ADDR: + color_fprintf(stdout, color, " %04x: ", val); + break; + case BINARY_PRINT_NUM_DATA: + color_fprintf(stdout, color, " %02x", val); + break; + case BINARY_PRINT_NUM_PAD: + color_fprintf(stdout, color, " "); + break; + case BINARY_PRINT_SEP: + color_fprintf(stdout, color, " "); + break; + case BINARY_PRINT_CHAR_DATA: + color_fprintf(stdout, color, "%c", + isprint(ch) ? ch : '.'); + break; + case BINARY_PRINT_CHAR_PAD: + color_fprintf(stdout, color, " "); + break; + case BINARY_PRINT_LINE_END: + color_fprintf(stdout, color, "\n"); + break; + case BINARY_PRINT_DATA_END: + printf("\n"); + break; + default: + break; + } +} + void trace_event(union perf_event *event) { unsigned char *raw_event = (void *)event; - const char *color = PERF_COLOR_BLUE; - int i, j; if (!dump_trace) return; - printf("."); - color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n", - event->header.size); - - for (i = 0; i < event->header.size; i++) { - if ((i & 15) == 0) { - printf("."); - color_fprintf(stdout, color, " %04x: ", i); - } - - color_fprintf(stdout, color, " %02x", raw_event[i]); - - if (((i & 15) == 15) || i == event->header.size-1) { - color_fprintf(stdout, color, " "); - for (j = 0; j < 15-(i & 15); j++) - color_fprintf(stdout, color, " "); - for (j = i & ~15; j <= i; j++) { - color_fprintf(stdout, color, "%c", - isprint(raw_event[j]) ? - raw_event[j] : '.'); - } - color_fprintf(stdout, color, "\n"); - } - } - printf(".\n"); + print_binary(raw_event, event->header.size, 16, + trace_event_printer, event); } static struct debug_variable { diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 35b20dd454de..b7766c577b01 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "callchain.h" #include "strlist.h" @@ -670,3 +671,39 @@ int fetch_current_timestamp(char *buf, size_t sz) return 0; } + +void print_binary(unsigned char *data, size_t len, + size_t bytes_per_line, print_binary_t printer, + void *extra) +{ + size_t i, j, mask; + + if (!printer) + return; + + bytes_per_line = roundup_pow_of_two(bytes_per_line); + mask = bytes_per_line - 1; + + printer(BINARY_PRINT_DATA_BEGIN, 0, extra); + for (i = 0; i < len; i++) { + if ((i & mask) == 0) { + printer(BINARY_PRINT_LINE_BEGIN, -1, extra); + printer(BINARY_PRINT_ADDR, i, extra); + } + + printer(BINARY_PRINT_NUM_DATA, data[i], extra); + + if (((i & mask) == mask) || i == len - 1) { + for (j = 0; j < mask-(i & mask); j++) + printer(BINARY_PRINT_NUM_PAD, -1, extra); + + printer(BINARY_PRINT_SEP, i, extra); + for (j = i & ~mask; j <= i; j++) + printer(BINARY_PRINT_CHAR_DATA, data[j], extra); + for (j = 0; j < mask-(i & mask); j++) + printer(BINARY_PRINT_CHAR_PAD, i, extra); + printer(BINARY_PRINT_LINE_END, -1, extra); + } + } + printer(BINARY_PRINT_DATA_END, -1, extra); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 3dd04089e8be..7015019ee5fb 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -345,4 +345,24 @@ const char *perf_tip(const char *dirpath); bool is_regular_file(const char *file); int fetch_current_timestamp(char *buf, size_t sz); +enum binary_printer_ops { + BINARY_PRINT_DATA_BEGIN, + BINARY_PRINT_LINE_BEGIN, + BINARY_PRINT_ADDR, + BINARY_PRINT_NUM_DATA, + BINARY_PRINT_NUM_PAD, + BINARY_PRINT_SEP, + BINARY_PRINT_CHAR_DATA, + BINARY_PRINT_CHAR_PAD, + BINARY_PRINT_LINE_END, + BINARY_PRINT_DATA_END, +}; + +typedef void (*print_binary_t)(enum binary_printer_ops, + unsigned int val, + void *extra); + +void print_binary(unsigned char *data, size_t len, + size_t bytes_per_line, print_binary_t printer, + void *extra); #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.2.3 From 30372f04c9dc159f99f1f09c61e5e0dbe4c91251 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 24 Feb 2016 11:20:45 +0000 Subject: perf script: Print bpf-output events in 'perf script' This patch allows 'perf script' output messages from BPF program. For example, use test_bpf_output_3.c at the end of this commit message, # ./perf record -e bpf-output/no-inherit,name=evt/ \ -e ./test_bpf_output_3.c/map:channel.event=evt/ \ usleep 100000 # ./perf script usleep 4882 21384.532523: evt: ffffffff810e97d1 sys_nanosleep ([kernel.kallsyms]) BPF output: 0000: 52 61 69 73 65 20 61 20 Raise a 0008: 42 50 46 20 65 76 65 6e BPF even 0010: 74 21 00 00 t!.. BPF string: "Raise a BPF event!" usleep 4882 21384.632606: evt: ffffffff8105c609 kretprobe_trampoline_holder ([kernel.kallsyms BPF output: 0000: 52 61 69 73 65 20 61 20 Raise a 0008: 42 50 46 20 65 76 65 6e BPF even 0010: 74 21 00 00 t!.. BPF string: "Raise a BPF event!" Two samples from BPF output are printed by both binary and string format. If BPF program output something unprintable, string format is suppressed. /************************ BEGIN **************************/ #include struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; #define SEC(NAME) __attribute__((section(NAME), used)) static u64 (*ktime_get_ns)(void) = (void *)BPF_FUNC_ktime_get_ns; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; static inline int __attribute__((always_inline)) func(void *ctx, int type) { char output_str[] = "Raise a BPF event!"; perf_event_output(ctx, &channel, get_smp_processor_id(), &output_str, sizeof(output_str)); return 0; } SEC("func_begin=sys_nanosleep") int func_begin(void *ctx) {return func(ctx, 1);} SEC("func_end=sys_nanosleep%return") int func_end(void *ctx) { return func(ctx, 2);} char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Brendan Gregg Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456312845-111583-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 93 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8ff5ff0fe38c..ec4fbd410a4b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -61,6 +61,7 @@ enum perf_output_field { PERF_OUTPUT_BRSTACKSYM = 1U << 16, PERF_OUTPUT_DATA_SRC = 1U << 17, PERF_OUTPUT_WEIGHT = 1U << 18, + PERF_OUTPUT_BPF_OUTPUT = 1U << 19, }; struct output_option { @@ -86,6 +87,7 @@ struct output_option { {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM}, {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC}, {.str = "weight", .field = PERF_OUTPUT_WEIGHT}, + {.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT}, }; /* default set to maintain compatibility with current format */ @@ -106,7 +108,7 @@ static struct { PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, - .invalid_fields = PERF_OUTPUT_TRACE, + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, [PERF_TYPE_SOFTWARE] = { @@ -116,7 +118,7 @@ static struct { PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_PERIOD, + PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT, .invalid_fields = PERF_OUTPUT_TRACE, }, @@ -126,7 +128,7 @@ static struct { .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | - PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE, + PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE }, [PERF_TYPE_RAW] = { @@ -139,7 +141,7 @@ static struct { PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT, - .invalid_fields = PERF_OUTPUT_TRACE, + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, [PERF_TYPE_BREAKPOINT] = { @@ -151,7 +153,7 @@ static struct { PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, - .invalid_fields = PERF_OUTPUT_TRACE, + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, }; @@ -624,6 +626,84 @@ static void print_sample_flags(u32 flags) printf(" %-4s ", str); } +struct printer_data { + int line_no; + bool hit_nul; + bool is_printable; +}; + +static void +print_sample_bpf_output_printer(enum binary_printer_ops op, + unsigned int val, + void *extra) +{ + unsigned char ch = (unsigned char)val; + struct printer_data *printer_data = extra; + + switch (op) { + case BINARY_PRINT_DATA_BEGIN: + printf("\n"); + break; + case BINARY_PRINT_LINE_BEGIN: + printf("%17s", !printer_data->line_no ? "BPF output:" : + " "); + break; + case BINARY_PRINT_ADDR: + printf(" %04x:", val); + break; + case BINARY_PRINT_NUM_DATA: + printf(" %02x", val); + break; + case BINARY_PRINT_NUM_PAD: + printf(" "); + break; + case BINARY_PRINT_SEP: + printf(" "); + break; + case BINARY_PRINT_CHAR_DATA: + if (printer_data->hit_nul && ch) + printer_data->is_printable = false; + + if (!isprint(ch)) { + printf("%c", '.'); + + if (!printer_data->is_printable) + break; + + if (ch == '\0') + printer_data->hit_nul = true; + else + printer_data->is_printable = false; + } else { + printf("%c", ch); + } + break; + case BINARY_PRINT_CHAR_PAD: + printf(" "); + break; + case BINARY_PRINT_LINE_END: + printf("\n"); + printer_data->line_no++; + break; + case BINARY_PRINT_DATA_END: + default: + break; + } +} + +static void print_sample_bpf_output(struct perf_sample *sample) +{ + unsigned int nr_bytes = sample->raw_size; + struct printer_data printer_data = {0, false, true}; + + print_binary(sample->raw_data, nr_bytes, 8, + print_sample_bpf_output_printer, &printer_data); + + if (printer_data.is_printable && printer_data.hit_nul) + printf("%17s \"%s\"\n", "BPF string:", + (char *)(sample->raw_data)); +} + struct perf_script { struct perf_tool tool; struct perf_session *session; @@ -731,6 +811,9 @@ static void process_event(struct perf_script *script, union perf_event *event, else if (PRINT_FIELD(BRSTACKSYM)) print_sample_brstacksym(event, sample, thread, attr); + if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) + print_sample_bpf_output(sample); + printf("\n"); } -- cgit v1.2.3 From a9c6e46c04ba38925e94c4c2fa9217460338db43 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:33 +0900 Subject: perf tools: Add helper functions for some sort keys The 'trace', 'srcline' and 'srcfile' sort keys updates hist entry's field later. With the hierarchy mode, those fields are passed to a matching entry so it needs to identify the sort keys. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.h | 3 +++ tools/perf/util/sort.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 97baa1d6ae5f..044419b3be7c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -301,6 +301,9 @@ void perf_hpp__append_sort_keys(struct perf_hpp_list *list); bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format); bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists); +bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt); static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format, struct hists *hists) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 4175b2944ff9..358035c2c327 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1391,6 +1391,39 @@ bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format) return format->header == __sort__hpp_header; } +bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt) +{ + struct hpp_sort_entry *hse; + + if (!perf_hpp__is_sort_entry(fmt)) + return false; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + return hse->se == &sort_trace; +} + +bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt) +{ + struct hpp_sort_entry *hse; + + if (!perf_hpp__is_sort_entry(fmt)) + return false; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + return hse->se == &sort_srcline; +} + +bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt) +{ + struct hpp_sort_entry *hse; + + if (!perf_hpp__is_sort_entry(fmt)) + return false; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + return hse->se == &sort_srcfile; +} + static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) { struct hpp_sort_entry *hse_a; -- cgit v1.2.3 From aef810ec4e6b638facb6c81803c019906f34f014 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:34 +0900 Subject: perf hists: Basic support of hierarchical report view In the hierarchical view, entries will be grouped and sorted on the first key, and then on the second key, and so on. Add the he->hroot_{in,out} fields to keep the lower level entries. Actually this can share space, in a union, with callchain's 'sorted_root' since the hroots are only used by non-leaf entries and callchain is only used by leaf entries. It also adds the 'parent_he' and 'depth' fields which can be used by browsers. This patch only implements collapsing part which creates internal entries for each sort key. These need to be sorted by output_sort stage and to be displayed properly in the later patch(es). Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 13 +++++- tools/perf/util/symbol.h | 3 +- 3 files changed, 128 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 017eb5c42c37..881452450959 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -396,6 +396,9 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, } INIT_LIST_HEAD(&he->pairs.node); thread__get(he->thread); + + if (!symbol_conf.report_hierarchy) + he->leaf = true; } return he; @@ -1049,6 +1052,114 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, * collapse the histogram */ +static void hists__apply_filters(struct hists *hists, struct hist_entry *he); + +static struct hist_entry *hierarchy_insert_entry(struct hists *hists, + struct rb_root *root, + struct hist_entry *he, + struct perf_hpp_fmt *fmt) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter, *new; + int64_t cmp; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node_in); + + cmp = fmt->collapse(fmt, iter, he); + if (!cmp) { + he_stat__add_stat(&iter->stat, &he->stat); + return iter; + } + + if (cmp < 0) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + + new = hist_entry__new(he, true); + if (new == NULL) + return NULL; + + hists__apply_filters(hists, new); + hists->nr_entries++; + + /* save related format for output */ + new->fmt = fmt; + + /* some fields are now passed to 'new' */ + if (perf_hpp__is_trace_entry(fmt)) + he->trace_output = NULL; + else + new->trace_output = NULL; + + if (perf_hpp__is_srcline_entry(fmt)) + he->srcline = NULL; + else + new->srcline = NULL; + + if (perf_hpp__is_srcfile_entry(fmt)) + he->srcfile = NULL; + else + new->srcfile = NULL; + + rb_link_node(&new->rb_node_in, parent, p); + rb_insert_color(&new->rb_node_in, root); + return new; +} + +static int hists__hierarchy_insert_entry(struct hists *hists, + struct rb_root *root, + struct hist_entry *he) +{ + struct perf_hpp_fmt *fmt; + struct hist_entry *new_he = NULL; + struct hist_entry *parent = NULL; + int depth = 0; + int ret = 0; + + hists__for_each_sort_list(hists, fmt) { + if (!perf_hpp__is_sort_entry(fmt) && + !perf_hpp__is_dynamic_entry(fmt)) + continue; + if (perf_hpp__should_skip(fmt, hists)) + continue; + + /* insert copy of 'he' for each fmt into the hierarchy */ + new_he = hierarchy_insert_entry(hists, root, he, fmt); + if (new_he == NULL) { + ret = -1; + break; + } + + root = &new_he->hroot_in; + new_he->parent_he = parent; + new_he->depth = depth++; + parent = new_he; + } + + if (new_he) { + new_he->leaf = true; + + if (symbol_conf.use_callchain) { + callchain_cursor_reset(&callchain_cursor); + if (callchain_merge(&callchain_cursor, + new_he->callchain, + he->callchain) < 0) + ret = -1; + } + } + + /* 'he' is no longer used */ + hist_entry__delete(he); + + /* return 0 (or -1) since it already applied filters */ + return ret; +} + int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root, struct hist_entry *he) { @@ -1057,6 +1168,9 @@ int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root, struct hist_entry *iter; int64_t cmp; + if (symbol_conf.report_hierarchy) + return hists__hierarchy_insert_entry(hists, root, he); + while (*p != NULL) { parent = *p; iter = rb_entry(parent, struct hist_entry, rb_node_in); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 5b9c6246de6d..10315e02adf5 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -96,9 +96,11 @@ struct hist_entry { s32 socket; s32 cpu; u8 cpumode; + u8 depth; /* We are added by hists__add_dummy_entry. */ bool dummy; + bool leaf; char level; u8 filtered; @@ -120,13 +122,22 @@ struct hist_entry { char *srcline; char *srcfile; struct symbol *parent; - struct rb_root sorted_chain; struct branch_info *branch_info; struct hists *hists; struct mem_info *mem_info; void *raw_data; u32 raw_size; void *trace_output; + struct perf_hpp_fmt *fmt; + struct hist_entry *parent_he; + union { + /* this is for hierarchical entry structure */ + struct { + struct rb_root hroot_in; + struct rb_root hroot_out; + }; /* non-leaf entries */ + struct rb_root sorted_chain; /* leaf entry has callchains */ + }; struct callchain_root callchain[0]; /* must be last member */ }; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ccd1caa40e11..a937053a0ae0 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -110,7 +110,8 @@ struct symbol_conf { has_filter, show_ref_callgraph, hide_unresolved, - raw_trace; + raw_trace, + report_hierarchy; const char *vmlinux_name, *kallsyms_name, *source_prefix, -- cgit v1.2.3 From 1a3906a7e6b9cbfaf2a3d00c310aed8af8e10d92 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:35 +0900 Subject: perf hists: Resort hist entries with hierarchy For hierarchical output, each entry must be sorted in their rbtree (hroot) properly. Add hists__hierarchy_output_resort() to do the job. Note that those hierarchy entries share the period counts, it'd be important to update the hists->stats only once (for leaves). Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 881452450959..6ddac2fb29b5 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1318,6 +1318,86 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) hists->stats.total_period += h->stat.period; } +static void hierarchy_insert_output_entry(struct rb_root *root, + struct hist_entry *he) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node); + + if (hist_entry__sort(he, iter) > 0) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, root); +} + +static void hists__hierarchy_output_resort(struct hists *hists, + struct ui_progress *prog, + struct rb_root *root_in, + struct rb_root *root_out, + u64 min_callchain_hits, + bool use_callchain) +{ + struct rb_node *node; + struct hist_entry *he; + + *root_out = RB_ROOT; + node = rb_first(root_in); + + while (node) { + he = rb_entry(node, struct hist_entry, rb_node_in); + node = rb_next(node); + + hierarchy_insert_output_entry(root_out, he); + + if (prog) + ui_progress__update(prog, 1); + + if (!he->leaf) { + hists__hierarchy_output_resort(hists, prog, + &he->hroot_in, + &he->hroot_out, + min_callchain_hits, + use_callchain); + hists->nr_entries++; + if (!he->filtered) { + hists->nr_non_filtered_entries++; + hists__calc_col_len(hists, he); + } + + continue; + } + + /* only update stat for leaf entries to avoid duplication */ + hists__inc_stats(hists, he); + if (!he->filtered) + hists__calc_col_len(hists, he); + + if (!use_callchain) + continue; + + if (callchain_param.mode == CHAIN_GRAPH_REL) { + u64 total = he->stat.period; + + if (symbol_conf.cumulate_callchain) + total = he->stat_acc->period; + + min_callchain_hits = total * (callchain_param.min_percent / 100); + } + + callchain_param.sort(&he->sorted_chain, he->callchain, + min_callchain_hits, &callchain_param); + } +} + static void __hists__insert_output_entry(struct rb_root *entries, struct hist_entry *he, u64 min_callchain_hits, @@ -1369,6 +1449,17 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, min_callchain_hits = callchain_total * (callchain_param.min_percent / 100); + hists__reset_stats(hists); + hists__reset_col_len(hists); + + if (symbol_conf.report_hierarchy) { + return hists__hierarchy_output_resort(hists, prog, + &hists->entries_collapsed, + &hists->entries, + min_callchain_hits, + use_callchain); + } + if (sort__need_collapse) root = &hists->entries_collapsed; else @@ -1377,9 +1468,6 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, next = rb_first(root); hists->entries = RB_ROOT; - hists__reset_stats(hists); - hists__reset_col_len(hists); - while (next) { n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); -- cgit v1.2.3 From 8c01872fe3c17fde1ce74eecf523d6d7fce5ffec Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:36 +0900 Subject: perf hists: Add helper functions for hierarchy mode The rb_hierarchy_{next,prev,last} functions are to traverse all hist entries in a hierarchy. They will be used by various function which supports hierarchy output. As the rb_hierarchy_next() is used to traverse the whole hierarchy, it sometime needs to visit entries regardless of current folding state. So add enum hierarchy_move_dir and pass it to __rb_hierarchy_next() for those cases. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/hist.h | 16 +++++++++++++++ 2 files changed, 72 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6ddac2fb29b5..358af7ed07af 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1500,6 +1500,62 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) output_resort(hists, prog, symbol_conf.use_callchain); } +static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd) +{ + if (he->leaf || hmd == HMD_FORCE_SIBLING) + return false; + + if (he->unfolded || hmd == HMD_FORCE_CHILD) + return true; + + return false; +} + +struct rb_node *rb_hierarchy_last(struct rb_node *node) +{ + struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); + + while (can_goto_child(he, HMD_NORMAL)) { + node = rb_last(&he->hroot_out); + he = rb_entry(node, struct hist_entry, rb_node); + } + return node; +} + +struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_dir hmd) +{ + struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); + + if (can_goto_child(he, hmd)) + node = rb_first(&he->hroot_out); + else + node = rb_next(node); + + while (node == NULL) { + he = he->parent_he; + if (he == NULL) + break; + + node = rb_next(&he->rb_node); + } + return node; +} + +struct rb_node *rb_hierarchy_prev(struct rb_node *node) +{ + struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); + + node = rb_prev(node); + if (node) + return rb_hierarchy_last(node); + + he = he->parent_he; + if (he == NULL) + return NULL; + + return &he->rb_node; +} + static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, enum hist_filter filter) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 044419b3be7c..5690906c154c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -418,4 +418,20 @@ int perf_hist_config(const char *var, const char *value); void perf_hpp_list__init(struct perf_hpp_list *list); +enum hierarchy_move_dir { + HMD_NORMAL, + HMD_FORCE_SIBLING, + HMD_FORCE_CHILD, +}; + +struct rb_node *rb_hierarchy_last(struct rb_node *node); +struct rb_node *__rb_hierarchy_next(struct rb_node *node, + enum hierarchy_move_dir hmd); +struct rb_node *rb_hierarchy_prev(struct rb_node *node); + +static inline struct rb_node *rb_hierarchy_next(struct rb_node *node) +{ + return __rb_hierarchy_next(node, HMD_NORMAL); +} + #endif /* __PERF_HIST_H */ -- cgit v1.2.3 From 54430101d2af260dba2d129cc9d9b7c7e60087b0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:37 +0900 Subject: perf hists: Introduce hist_entry__filter() The hist_entry__filter() function is to filter hist entries using sort key related info. This is needed to support hierarchy mode since each hist entry will be associated with a hpp fmt which has a sort key. So each entry should compare to only matching type of filters. To do that, add the ->se_filter callback field to struct sort_entry. This callback takes 'type' argument which determines whether it's matching sort key or not. It returns -1 for non-matching type, 0 for filtered entry and 1 for not filtered entries. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-6-git-send-email-namhyung@kernel.org [ 'socket' is reserved in sys/socket.h, so replace it with 'sk' ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.h | 2 + tools/perf/util/sort.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 1 + 3 files changed, 116 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 5690906c154c..480d2eb26001 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -305,6 +305,8 @@ bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt); +int hist_entry__filter(struct hist_entry *he, int type, const void *arg); + static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format, struct hists *hists) { diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 358035c2c327..6bee8bdfb91b 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -90,10 +90,21 @@ static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf, width, width, comm ?: ""); } +static int hist_entry__thread_filter(struct hist_entry *he, int type, const void *arg) +{ + const struct thread *th = arg; + + if (type != HIST_FILTER__THREAD) + return -1; + + return th && he->thread != th; +} + struct sort_entry sort_thread = { .se_header = " Pid:Command", .se_cmp = sort__thread_cmp, .se_snprintf = hist_entry__thread_snprintf, + .se_filter = hist_entry__thread_filter, .se_width_idx = HISTC_THREAD, }; @@ -131,6 +142,7 @@ struct sort_entry sort_comm = { .se_collapse = sort__comm_collapse, .se_sort = sort__comm_sort, .se_snprintf = hist_entry__comm_snprintf, + .se_filter = hist_entry__thread_filter, .se_width_idx = HISTC_COMM, }; @@ -180,10 +192,21 @@ static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf, return _hist_entry__dso_snprintf(he->ms.map, bf, size, width); } +static int hist_entry__dso_filter(struct hist_entry *he, int type, const void *arg) +{ + const struct dso *dso = arg; + + if (type != HIST_FILTER__DSO) + return -1; + + return dso && (!he->ms.map || he->ms.map->dso != dso); +} + struct sort_entry sort_dso = { .se_header = "Shared Object", .se_cmp = sort__dso_cmp, .se_snprintf = hist_entry__dso_snprintf, + .se_filter = hist_entry__dso_filter, .se_width_idx = HISTC_DSO, }; @@ -277,11 +300,22 @@ static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, he->level, bf, size, width); } +static int hist_entry__sym_filter(struct hist_entry *he, int type, const void *arg) +{ + const char *sym = arg; + + if (type != HIST_FILTER__SYMBOL) + return -1; + + return sym && (!he->ms.sym || !strstr(he->ms.sym->name, sym)); +} + struct sort_entry sort_sym = { .se_header = "Symbol", .se_cmp = sort__sym_cmp, .se_sort = sort__sym_sort, .se_snprintf = hist_entry__sym_snprintf, + .se_filter = hist_entry__sym_filter, .se_width_idx = HISTC_SYMBOL, }; @@ -440,10 +474,21 @@ static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket); } +static int hist_entry__socket_filter(struct hist_entry *he, int type, const void *arg) +{ + int sk = *(const int *)arg; + + if (type != HIST_FILTER__SOCKET) + return -1; + + return sk >= 0 && he->socket != sk; +} + struct sort_entry sort_socket = { .se_header = "Socket", .se_cmp = sort__socket_cmp, .se_snprintf = hist_entry__socket_snprintf, + .se_filter = hist_entry__socket_filter, .se_width_idx = HISTC_SOCKET, }; @@ -530,6 +575,18 @@ static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); } +static int hist_entry__dso_from_filter(struct hist_entry *he, int type, + const void *arg) +{ + const struct dso *dso = arg; + + if (type != HIST_FILTER__DSO) + return -1; + + return dso && (!he->branch_info || !he->branch_info->from.map || + he->branch_info->from.map->dso != dso); +} + static int64_t sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -550,6 +607,18 @@ static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); } +static int hist_entry__dso_to_filter(struct hist_entry *he, int type, + const void *arg) +{ + const struct dso *dso = arg; + + if (type != HIST_FILTER__DSO) + return -1; + + return dso && (!he->branch_info || !he->branch_info->to.map || + he->branch_info->to.map->dso != dso); +} + static int64_t sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -611,10 +680,35 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); } +static int hist_entry__sym_from_filter(struct hist_entry *he, int type, + const void *arg) +{ + const char *sym = arg; + + if (type != HIST_FILTER__SYMBOL) + return -1; + + return sym && !(he->branch_info && he->branch_info->from.sym && + strstr(he->branch_info->from.sym->name, sym)); +} + +static int hist_entry__sym_to_filter(struct hist_entry *he, int type, + const void *arg) +{ + const char *sym = arg; + + if (type != HIST_FILTER__SYMBOL) + return -1; + + return sym && !(he->branch_info && he->branch_info->to.sym && + strstr(he->branch_info->to.sym->name, sym)); +} + struct sort_entry sort_dso_from = { .se_header = "Source Shared Object", .se_cmp = sort__dso_from_cmp, .se_snprintf = hist_entry__dso_from_snprintf, + .se_filter = hist_entry__dso_from_filter, .se_width_idx = HISTC_DSO_FROM, }; @@ -622,6 +716,7 @@ struct sort_entry sort_dso_to = { .se_header = "Target Shared Object", .se_cmp = sort__dso_to_cmp, .se_snprintf = hist_entry__dso_to_snprintf, + .se_filter = hist_entry__dso_to_filter, .se_width_idx = HISTC_DSO_TO, }; @@ -629,6 +724,7 @@ struct sort_entry sort_sym_from = { .se_header = "Source Symbol", .se_cmp = sort__sym_from_cmp, .se_snprintf = hist_entry__sym_from_snprintf, + .se_filter = hist_entry__sym_from_filter, .se_width_idx = HISTC_SYMBOL_FROM, }; @@ -636,6 +732,7 @@ struct sort_entry sort_sym_to = { .se_header = "Target Symbol", .se_cmp = sort__sym_to_cmp, .se_snprintf = hist_entry__sym_to_snprintf, + .se_filter = hist_entry__sym_to_filter, .se_width_idx = HISTC_SYMBOL_TO, }; @@ -1498,6 +1595,22 @@ static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd) return fmt; } +int hist_entry__filter(struct hist_entry *he, int type, const void *arg) +{ + struct perf_hpp_fmt *fmt; + struct hpp_sort_entry *hse; + + fmt = he->fmt; + if (fmt == NULL || !perf_hpp__is_sort_entry(fmt)) + return -1; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + if (hse->se->se_filter == NULL) + return -1; + + return hse->se->se_filter(he, type, arg); +} + static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd) { struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 10315e02adf5..a8d53ffe0916 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -245,6 +245,7 @@ struct sort_entry { int64_t (*se_sort)(struct hist_entry *, struct hist_entry *); int (*se_snprintf)(struct hist_entry *he, char *bf, size_t size, unsigned int width); + int (*se_filter)(struct hist_entry *he, int type, const void *arg); u8 se_width_idx; }; -- cgit v1.2.3 From 155e9afff77916931f615a394cef187b342530dc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:38 +0900 Subject: perf hists: Support filtering in hierarchy mode The hists__filter_hierarchy() function implements filtering in hierarchy mode. Now we have hist_entry__filter() so use it for entries in the hierarchy. It returns 3 kind of values. A negative value means that it's not filtered by this type. It marks current entry as filtered tentatively so if a lower level entry removes the filter it also removes the all parent so that we can find the entry in the output. Zero means it's filtered out by this type. A positive value means it's not filtered so it removes the filter and shows in the output. In these cases, it moves to next entry since lower level entry won't match by this type of filter anymore. Thus all children will be filtered or not together. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-7-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 101 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 93 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 358af7ed07af..dbab977f3a44 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1560,6 +1560,27 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h enum hist_filter filter) { h->filtered &= ~(1 << filter); + + if (symbol_conf.report_hierarchy) { + struct hist_entry *parent = h->parent_he; + + while (parent) { + he_stat__add_stat(&parent->stat, &h->stat); + + parent->filtered &= ~(1 << filter); + + if (parent->filtered) + goto next; + + /* force fold unfiltered entry for simplicity */ + parent->unfolded = false; + parent->row_offset = 0; + parent->nr_rows = 0; +next: + parent = parent->parent_he; + } + } + if (h->filtered) return; @@ -1645,28 +1666,92 @@ static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t fil } } +static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg) +{ + struct rb_node *nd; + + hists->stats.nr_non_filtered_samples = 0; + + hists__reset_filter_stats(hists); + hists__reset_col_len(hists); + + nd = rb_first(&hists->entries); + while (nd) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + int ret; + + ret = hist_entry__filter(h, type, arg); + + /* + * case 1. non-matching type + * zero out the period, set filter marker and move to child + */ + if (ret < 0) { + memset(&h->stat, 0, sizeof(h->stat)); + h->filtered |= (1 << type); + + nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_CHILD); + } + /* + * case 2. matched type (filter out) + * set filter marker and move to next + */ + else if (ret == 1) { + h->filtered |= (1 << type); + + nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING); + } + /* + * case 3. ok (not filtered) + * add period to hists and parents, erase the filter marker + * and move to next sibling + */ + else { + hists__remove_entry_filter(hists, h, type); + + nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING); + } + } +} + void hists__filter_by_thread(struct hists *hists) { - hists__filter_by_type(hists, HIST_FILTER__THREAD, - hists__filter_entry_by_thread); + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__THREAD, + hists->thread_filter); + else + hists__filter_by_type(hists, HIST_FILTER__THREAD, + hists__filter_entry_by_thread); } void hists__filter_by_dso(struct hists *hists) { - hists__filter_by_type(hists, HIST_FILTER__DSO, - hists__filter_entry_by_dso); + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__DSO, + hists->dso_filter); + else + hists__filter_by_type(hists, HIST_FILTER__DSO, + hists__filter_entry_by_dso); } void hists__filter_by_symbol(struct hists *hists) { - hists__filter_by_type(hists, HIST_FILTER__SYMBOL, - hists__filter_entry_by_symbol); + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__SYMBOL, + hists->symbol_filter_str); + else + hists__filter_by_type(hists, HIST_FILTER__SYMBOL, + hists__filter_entry_by_symbol); } void hists__filter_by_socket(struct hists *hists) { - hists__filter_by_type(hists, HIST_FILTER__SOCKET, - hists__filter_entry_by_socket); + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__SOCKET, + &hists->socket_filter); + else + hists__filter_by_type(hists, HIST_FILTER__SOCKET, + hists__filter_entry_by_socket); } void events_stats__inc(struct events_stats *stats, u32 type) -- cgit v1.2.3 From 70642850fa581df219d7bc03cd7aca6e1956968c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:39 +0900 Subject: perf hists: Resort after filtering hierarchy In hierarchy mode, a filter can affect periods of entries in upper hierarchy. So it needs to resort the hists after filter. For example, let's look at following example: Overhead Command / Shared Object / Symbol ------------ -------------------------------- 30.00% perf 20.00% perf 10.00% main 5.00% pr_debug 5.00% memcpy 10.00% [kernel.vmlinux] 8.00% memset 2.00% cpu_idle If we apply simbol filter for 'mem' it should look like this 13.00% perf 8.00% [kernel.vmlinux] 8.00% memset 5.00% perf 5.00% memcpy Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-8-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index dbab977f3a44..a44bf5ae6acb 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1666,9 +1666,47 @@ static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t fil } } +static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter; + struct rb_root new_root = RB_ROOT; + struct rb_node *nd; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node); + + if (hist_entry__sort(he, iter) > 0) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, root); + + if (he->leaf || he->filtered) + return; + + nd = rb_first(&he->hroot_out); + while (nd) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + nd = rb_next(nd); + rb_erase(&h->rb_node, &he->hroot_out); + + resort_filtered_entry(&new_root, h); + } + + he->hroot_out = new_root; +} + static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg) { struct rb_node *nd; + struct rb_root new_root = RB_ROOT; hists->stats.nr_non_filtered_samples = 0; @@ -1712,6 +1750,22 @@ static void hists__filter_hierarchy(struct hists *hists, int type, const void *a nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING); } } + + /* + * resort output after applying a new filter since filter in a lower + * hierarchy can change periods in a upper hierarchy. + */ + nd = rb_first(&hists->entries); + while (nd) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + nd = rb_next(nd); + rb_erase(&h->rb_node, &hists->entries); + + resort_filtered_entry(&new_root, h); + } + + hists->entries = new_root; } void hists__filter_by_thread(struct hists *hists) -- cgit v1.2.3 From 1f2d72cf3258eacd667cd1920e64c9b64b9984d5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:40 +0900 Subject: perf hists: Count number of sort keys It'll be used for hierarchy output mode to indent entries properly. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-9-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 3 +++ tools/perf/util/hist.h | 1 + 2 files changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 12223d791e9f..edbf854e8e1c 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -514,6 +514,9 @@ void perf_hpp_list__column_register(struct perf_hpp_list *list, void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, struct perf_hpp_fmt *format) { + if (perf_hpp__is_sort_entry(format) || perf_hpp__is_dynamic_entry(format)) + list->nr_sort_keys++; + list_add_tail(&format->sort_list, &list->sorts); } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 480d2eb26001..d08e4f36f193 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -237,6 +237,7 @@ struct perf_hpp_fmt { struct perf_hpp_list { struct list_head fields; struct list_head sorts; + int nr_sort_keys; }; extern struct perf_hpp_list perf_hpp_list; -- cgit v1.2.3 From ef86d68a088c324e4bd85f82387d1f9a571affd0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:41 +0900 Subject: perf ui/stdio: Implement hierarchy output mode The hierarchy output mode is to group entries for each level so that user can see higher level picture more easily. It also helps to find out which component is most costly. The output will look like below: 15.11% swapper 14.97% [kernel.vmlinux] 0.09% [libahci] 0.05% [iwlwifi] 10.29% irq/33-iwlwifi 6.45% [kernel.vmlinux] 1.41% [mac80211] 1.15% [iwldvm] 1.14% [iwlwifi] 0.14% [cfg80211] 4.81% firefox 3.92% libxul.so 0.34% [kernel.vmlinux] Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-10-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/stdio/hist.c | 79 +++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/hist.h | 2 ++ 2 files changed, 80 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 87b022ff03d8..90b86776f964 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -410,6 +410,76 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) return hpp->buf - start; } +static int hist_entry__hierarchy_fprintf(struct hist_entry *he, + struct perf_hpp *hpp, + int nr_sort_key, struct hists *hists, + FILE *fp) +{ + const char *sep = symbol_conf.field_sep; + struct perf_hpp_fmt *fmt; + char *buf = hpp->buf; + int ret, printed = 0; + bool first = true; + + if (symbol_conf.exclude_other && !he->parent) + return 0; + + ret = scnprintf(hpp->buf, hpp->size, "%*s", he->depth * HIERARCHY_INDENT, ""); + advance_hpp(hpp, ret); + + hists__for_each_format(he->hists, fmt) { + if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) + break; + + /* + * If there's no field_sep, we still need + * to display initial ' '. + */ + if (!sep || !first) { + ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: " "); + advance_hpp(hpp, ret); + } else + first = false; + + if (perf_hpp__use_color() && fmt->color) + ret = fmt->color(fmt, hpp, he); + else + ret = fmt->entry(fmt, hpp, he); + + ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret); + advance_hpp(hpp, ret); + } + + if (sep) + ret = scnprintf(hpp->buf, hpp->size, "%s", sep); + else + ret = scnprintf(hpp->buf, hpp->size, "%*s", + (nr_sort_key - 1) * HIERARCHY_INDENT + 2, ""); + advance_hpp(hpp, ret); + + /* + * No need to call hist_entry__snprintf_alignment() since this + * fmt is always the last column in the hierarchy mode. + */ + fmt = he->fmt; + if (perf_hpp__use_color() && fmt->color) + fmt->color(fmt, hpp, he); + else + fmt->entry(fmt, hpp, he); + + printed += fprintf(fp, "%s\n", buf); + + if (symbol_conf.use_callchain && he->leaf) { + u64 total = hists__total_period(hists); + + printed += hist_entry_callchain__fprintf(he, total, 0, fp); + goto out; + } + +out: + return printed; +} + static int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists, char *bf, size_t bfsz, FILE *fp) @@ -424,6 +494,13 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, if (size == 0 || size > bfsz) size = hpp.size = bfsz; + if (symbol_conf.report_hierarchy) { + int nr_sort = hists->hpp_list->nr_sort_keys; + + return hist_entry__hierarchy_fprintf(he, &hpp, nr_sort, + hists, fp); + } + hist_entry__snprintf(he, &hpp); ret = fprintf(fp, "%s\n", bf); @@ -522,7 +599,7 @@ print_entries: goto out; } - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); float percent; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d08e4f36f193..722aa447f705 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -437,4 +437,6 @@ static inline struct rb_node *rb_hierarchy_next(struct rb_node *node) return __rb_hierarchy_next(node, HMD_NORMAL); } +#define HIERARCHY_INDENT 3 + #endif /* __PERF_HIST_H */ -- cgit v1.2.3 From 8e2fc44f46ba4d3d1ee8b6ba0839a282a9f3fdd7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:42 +0900 Subject: perf ui/stdio: Align column header for hierarchy output The hierarchy output mode is to group entries so the existing columns won't fit to the new output. Treat all sort keys as a single column and separate headers by "/". # Overhead Command / Shared Object # ........... ................................ # 15.11% swapper 14.97% [kernel.vmlinux] 0.09% [libahci] 0.05% [iwlwifi] ... Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-11-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/stdio/hist.c | 105 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/ctype.c | 9 ++++ tools/perf/util/util.h | 2 + 3 files changed, 116 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 90b86776f964..435eaaaf2f1d 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -511,6 +511,106 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, return ret; } +static int print_hierarchy_indent(const char *sep, int nr_sort, + const char *line, FILE *fp) +{ + if (sep != NULL || nr_sort < 1) + return 0; + + return fprintf(fp, "%-.*s", (nr_sort - 1) * HIERARCHY_INDENT, line); +} + +static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, + const char *sep, FILE *fp) +{ + bool first = true; + int nr_sort; + unsigned width = 0; + unsigned header_width = 0; + struct perf_hpp_fmt *fmt; + + nr_sort = hists->hpp_list->nr_sort_keys; + + /* preserve max indent depth for column headers */ + print_hierarchy_indent(sep, nr_sort, spaces, fp); + + hists__for_each_format(hists, fmt) { + if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) + break; + + if (!first) + fprintf(fp, "%s", sep ?: " "); + else + first = false; + + fmt->header(fmt, hpp, hists_to_evsel(hists)); + fprintf(fp, "%s", hpp->buf); + } + + /* combine sort headers with ' / ' */ + first = true; + hists__for_each_format(hists, fmt) { + if (!perf_hpp__is_sort_entry(fmt) && !perf_hpp__is_dynamic_entry(fmt)) + continue; + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (!first) + header_width += fprintf(fp, " / "); + else { + header_width += fprintf(fp, "%s", sep ?: " "); + first = false; + } + + fmt->header(fmt, hpp, hists_to_evsel(hists)); + rtrim(hpp->buf); + + header_width += fprintf(fp, "%s", hpp->buf); + } + + /* preserve max indent depth for combined sort headers */ + print_hierarchy_indent(sep, nr_sort, spaces, fp); + + fprintf(fp, "\n# "); + + /* preserve max indent depth for initial dots */ + print_hierarchy_indent(sep, nr_sort, dots, fp); + + first = true; + hists__for_each_format(hists, fmt) { + if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) + break; + + if (!first) + fprintf(fp, "%s", sep ?: " "); + else + first = false; + + width = fmt->width(fmt, hpp, hists_to_evsel(hists)); + fprintf(fp, "%.*s", width, dots); + } + + hists__for_each_format(hists, fmt) { + if (!perf_hpp__is_sort_entry(fmt) && !perf_hpp__is_dynamic_entry(fmt)) + continue; + if (perf_hpp__should_skip(fmt, hists)) + continue; + + width = fmt->width(fmt, hpp, hists_to_evsel(hists)); + if (width > header_width) + header_width = width; + } + + fprintf(fp, "%s%-.*s", sep ?: " ", header_width, dots); + + /* preserve max indent depth for dots under sort headers */ + print_hierarchy_indent(sep, nr_sort, dots, fp); + + fprintf(fp, "\n#\n"); + + return 2; +} + size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, float min_pcnt, FILE *fp) { @@ -542,6 +642,11 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, fprintf(fp, "# "); + if (symbol_conf.report_hierarchy) { + nr_rows += print_hierarchy_header(hists, &dummy_hpp, sep, fp); + goto print_entries; + } + hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, hists)) continue; diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index aada3ac5e891..d4a5a21c2a7e 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c @@ -31,9 +31,18 @@ unsigned char sane_ctype[256] = { }; const char *graph_line = + "_____________________________________________________________________" "_____________________________________________________________________" "_____________________________________________________________________"; const char *graph_dotted_line = "---------------------------------------------------------------------" "---------------------------------------------------------------------" "---------------------------------------------------------------------"; +const char *spaces = + " " + " " + " "; +const char *dots = + "....................................................................." + "....................................................................." + "....................................................................."; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 7015019ee5fb..d0d50cef8b2a 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -82,6 +82,8 @@ extern const char *graph_line; extern const char *graph_dotted_line; +extern const char *spaces; +extern const char *dots; extern char buildid_dir[]; /* On most systems would have given us this, but -- cgit v1.2.3 From f5b763feebe9770c3e6b01f1e19860e95f24b623 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:43 +0900 Subject: perf hists browser: Count number of hierarchy entries Add nr_hierarchy_entries field to keep current number of (unfolded) hist entries. And the hist_entry->nr_rows carries number of direct children. But in the hierarchy mode, entry can have grand children and callchains. So update the number properly using hierarchy_count_rows() when toggling the folded state (by pressing ENTER key). Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-12-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 85 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 74 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 1819771243f9..de1d6f0df8a7 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -32,6 +32,7 @@ struct hist_browser { bool show_headers; float min_pcnt; u64 nr_non_filtered_entries; + u64 nr_hierarchy_entries; u64 nr_callchain_rows; }; @@ -58,11 +59,11 @@ static int hist_browser__get_folding(struct hist_browser *browser) for (nd = rb_first(&hists->entries); (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; - nd = rb_next(nd)) { + nd = rb_hierarchy_next(nd)) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); - if (he->unfolded) + if (he->leaf && he->unfolded) unfolded_rows += he->nr_rows; } return unfolded_rows; @@ -72,7 +73,9 @@ static u32 hist_browser__nr_entries(struct hist_browser *hb) { u32 nr_entries; - if (hist_browser__has_filter(hb)) + if (symbol_conf.report_hierarchy) + nr_entries = hb->nr_hierarchy_entries; + else if (hist_browser__has_filter(hb)) nr_entries = hb->nr_non_filtered_entries; else nr_entries = hb->hists->nr_entries; @@ -247,6 +250,35 @@ static int callchain__count_rows(struct rb_root *chain) return n; } +static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he, + bool include_children) +{ + int count = 0; + struct rb_node *node; + struct hist_entry *child; + + if (he->leaf) + return callchain__count_rows(&he->sorted_chain); + + node = rb_first(&he->hroot_out); + while (node) { + float percent; + + child = rb_entry(node, struct hist_entry, rb_node); + percent = hist_entry__get_percent_limit(child); + + if (!child->filtered && percent >= hb->min_pcnt) { + count++; + + if (include_children && child->unfolded) + count += hierarchy_count_rows(hb, child, true); + } + + node = rb_next(node); + } + return count; +} + static bool hist_entry__toggle_fold(struct hist_entry *he) { if (!he) @@ -326,11 +358,17 @@ static void callchain__init_have_children(struct rb_root *root) static void hist_entry__init_have_children(struct hist_entry *he) { - if (!he->init_have_children) { + if (he->init_have_children) + return; + + if (he->leaf) { he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain); callchain__init_have_children(&he->sorted_chain); - he->init_have_children = true; + } else { + he->has_children = !RB_EMPTY_ROOT(&he->hroot_out); } + + he->init_have_children = true; } static bool hist_browser__toggle_fold(struct hist_browser *browser) @@ -349,17 +387,41 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser) has_children = callchain_list__toggle_fold(cl); if (has_children) { + int child_rows = 0; + hist_entry__init_have_children(he); browser->b.nr_entries -= he->nr_rows; - browser->nr_callchain_rows -= he->nr_rows; - if (he->unfolded) - he->nr_rows = callchain__count_rows(&he->sorted_chain); + if (he->leaf) + browser->nr_callchain_rows -= he->nr_rows; else + browser->nr_hierarchy_entries -= he->nr_rows; + + if (symbol_conf.report_hierarchy) + child_rows = hierarchy_count_rows(browser, he, true); + + if (he->unfolded) { + if (he->leaf) + he->nr_rows = callchain__count_rows(&he->sorted_chain); + else + he->nr_rows = hierarchy_count_rows(browser, he, false); + + /* account grand children */ + if (symbol_conf.report_hierarchy) + browser->b.nr_entries += child_rows - he->nr_rows; + } else { + if (symbol_conf.report_hierarchy) + browser->b.nr_entries -= child_rows - he->nr_rows; + he->nr_rows = 0; + } browser->b.nr_entries += he->nr_rows; - browser->nr_callchain_rows += he->nr_rows; + + if (he->leaf) + browser->nr_callchain_rows += he->nr_rows; + else + browser->nr_hierarchy_entries += he->nr_rows; return true; } @@ -2025,17 +2087,18 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb) u64 nr_entries = 0; struct rb_node *nd = rb_first(&hb->hists->entries); - if (hb->min_pcnt == 0) { + if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) { hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries; return; } while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { nr_entries++; - nd = rb_next(nd); + nd = rb_hierarchy_next(nd); } hb->nr_non_filtered_entries = nr_entries; + hb->nr_hierarchy_entries = nr_entries; } static void hist_browser__update_percent_limit(struct hist_browser *hb, -- cgit v1.2.3 From 492b1010606e9222690992ad8e4898a88a696856 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:44 +0900 Subject: perf hists browser: Support collapsing/expanding whole entries in hierarchy The 'C' and 'E' keys are to collapse/expand all hist entries. Update nr_hierarchy_entries properly in this case. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-13-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 58 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index de1d6f0df8a7..857b9beb0aab 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -484,13 +484,38 @@ static int callchain__set_folding(struct rb_root *chain, bool unfold) return n; } -static void hist_entry__set_folding(struct hist_entry *he, bool unfold) +static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he, + bool unfold __maybe_unused) +{ + float percent; + struct rb_node *nd; + struct hist_entry *child; + int n = 0; + + for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) { + child = rb_entry(nd, struct hist_entry, rb_node); + percent = hist_entry__get_percent_limit(child); + if (!child->filtered && percent >= hb->min_pcnt) + n++; + } + + return n; +} + +static void hist_entry__set_folding(struct hist_entry *he, + struct hist_browser *hb, bool unfold) { hist_entry__init_have_children(he); he->unfolded = unfold ? he->has_children : false; if (he->has_children) { - int n = callchain__set_folding(&he->sorted_chain, unfold); + int n; + + if (he->leaf) + n = callchain__set_folding(&he->sorted_chain, unfold); + else + n = hierarchy_set_folding(hb, he, unfold); + he->nr_rows = unfold ? n : 0; } else he->nr_rows = 0; @@ -500,19 +525,32 @@ static void __hist_browser__set_folding(struct hist_browser *browser, bool unfold) { struct rb_node *nd; - struct hists *hists = browser->hists; + struct hist_entry *he; + double percent; - for (nd = rb_first(&hists->entries); - (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; - nd = rb_next(nd)) { - struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); - hist_entry__set_folding(he, unfold); - browser->nr_callchain_rows += he->nr_rows; + nd = rb_first(&browser->hists->entries); + while (nd) { + he = rb_entry(nd, struct hist_entry, rb_node); + + /* set folding state even if it's currently folded */ + nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD); + + hist_entry__set_folding(he, browser, unfold); + + percent = hist_entry__get_percent_limit(he); + if (he->filtered || percent < browser->min_pcnt) + continue; + + if (!he->depth || unfold) + browser->nr_hierarchy_entries++; + if (he->leaf) + browser->nr_callchain_rows += he->nr_rows; } } static void hist_browser__set_folding(struct hist_browser *browser, bool unfold) { + browser->nr_hierarchy_entries = 0; browser->nr_callchain_rows = 0; __hist_browser__set_folding(browser, unfold); @@ -2131,7 +2169,7 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb, /* force to re-evaluate folding state of callchains */ he->init_have_children = false; - hist_entry__set_folding(he, false); + hist_entry__set_folding(he, hb, false); nd = rb_next(nd); } -- cgit v1.2.3 From d0506edbec7d04dcca632fddfc162faa78d5527a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:45 +0900 Subject: perf hists browser: Implement hierarchy output Implement hierarchy mode in TUI. The output is look like stdio but it also supports to fold/unfold children dynamically. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-14-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 290 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 268 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 857b9beb0aab..2bccf68ce5f1 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1260,6 +1260,158 @@ static int hist_browser__show_entry(struct hist_browser *browser, return printed; } +static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, + struct hist_entry *entry, + unsigned short row, + int level, int nr_sort_keys) +{ + int printed = 0; + int width = browser->b.width; + char folded_sign = ' '; + bool current_entry = ui_browser__is_current_entry(&browser->b, row); + off_t row_offset = entry->row_offset; + bool first = true; + struct perf_hpp_fmt *fmt; + struct hpp_arg arg = { + .b = &browser->b, + .current_entry = current_entry, + }; + int column = 0; + int hierarchy_indent = (nr_sort_keys - 1) * HIERARCHY_INDENT; + + if (current_entry) { + browser->he_selection = entry; + browser->selection = &entry->ms; + } + + hist_entry__init_have_children(entry); + folded_sign = hist_entry__folded(entry); + arg.folded_sign = folded_sign; + + if (entry->leaf && row_offset) { + row_offset--; + goto show_callchain; + } + + hist_browser__gotorc(browser, row, 0); + + if (current_entry && browser->b.navkeypressed) + ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED); + else + ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL); + + ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT); + width -= level * HIERARCHY_INDENT; + + hists__for_each_format(entry->hists, fmt) { + char s[2048]; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .ptr = &arg, + }; + + if (perf_hpp__should_skip(fmt, entry->hists) || + column++ < browser->b.horiz_scroll) + continue; + + if (perf_hpp__is_sort_entry(fmt) || + perf_hpp__is_dynamic_entry(fmt)) + break; + + if (current_entry && browser->b.navkeypressed) { + ui_browser__set_color(&browser->b, + HE_COLORSET_SELECTED); + } else { + ui_browser__set_color(&browser->b, + HE_COLORSET_NORMAL); + } + + if (first) { + ui_browser__printf(&browser->b, "%c", folded_sign); + width--; + first = false; + } else { + ui_browser__printf(&browser->b, " "); + width -= 2; + } + + if (fmt->color) { + int ret = fmt->color(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, ret); + /* + * fmt->color() already used ui_browser to + * print the non alignment bits, skip it (+ret): + */ + ui_browser__printf(&browser->b, "%s", s + ret); + } else { + int ret = fmt->entry(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, ret); + ui_browser__printf(&browser->b, "%s", s); + } + width -= hpp.buf - s; + } + + ui_browser__write_nstring(&browser->b, "", hierarchy_indent); + width -= hierarchy_indent; + + if (column >= browser->b.horiz_scroll) { + char s[2048]; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .ptr = &arg, + }; + + if (current_entry && browser->b.navkeypressed) { + ui_browser__set_color(&browser->b, + HE_COLORSET_SELECTED); + } else { + ui_browser__set_color(&browser->b, + HE_COLORSET_NORMAL); + } + + ui_browser__write_nstring(&browser->b, "", 2); + width -= 2; + + /* + * No need to call hist_entry__snprintf_alignment() + * since this fmt is always the last column in the + * hierarchy mode. + */ + fmt = entry->fmt; + if (fmt->color) { + width -= fmt->color(fmt, &hpp, entry); + } else { + width -= fmt->entry(fmt, &hpp, entry); + ui_browser__printf(&browser->b, "%s", s); + } + } + + /* The scroll bar isn't being used */ + if (!browser->b.navkeypressed) + width += 1; + + ui_browser__write_nstring(&browser->b, "", width); + + ++row; + ++printed; + +show_callchain: + if (entry->leaf && folded_sign == '-' && row != browser->b.rows) { + struct callchain_print_arg carg = { + .row_offset = row_offset, + }; + + printed += hist_browser__show_callchain(browser, entry, + level + 1, row, + hist_browser__show_callchain_entry, &carg, + hist_browser__check_output_full); + } + + return printed; +} + static int advance_hpp_check(struct perf_hpp *hpp, int inc) { advance_hpp(hpp, inc); @@ -1325,6 +1477,7 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) u16 header_offset = 0; struct rb_node *nd; struct hist_browser *hb = container_of(browser, struct hist_browser, b); + int nr_sort = hb->hists->hpp_list->nr_sort_keys; if (hb->show_headers) { hist_browser__show_headers(hb); @@ -1335,18 +1488,28 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) hb->he_selection = NULL; hb->selection = NULL; - for (nd = browser->top; nd; nd = rb_next(nd)) { + for (nd = browser->top; nd; nd = rb_hierarchy_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); float percent; - if (h->filtered) + if (h->filtered) { + /* let it move to sibling */ + h->unfolded = false; continue; + } percent = hist_entry__get_percent_limit(h); if (percent < hb->min_pcnt) continue; - row += hist_browser__show_entry(hb, h, row); + if (symbol_conf.report_hierarchy) { + row += hist_browser__show_hierarchy_entry(hb, h, row, + h->depth, + nr_sort); + } else { + row += hist_browser__show_entry(hb, h, row); + } + if (row == browser->rows) break; } @@ -1364,7 +1527,14 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, if (!h->filtered && percent >= min_pcnt) return nd; - nd = rb_next(nd); + /* + * If it's filtered, its all children also were filtered. + * So move to sibling node. + */ + if (rb_next(nd)) + nd = rb_next(nd); + else + nd = rb_hierarchy_next(nd); } return NULL; @@ -1380,7 +1550,7 @@ static struct rb_node *hists__filter_prev_entries(struct rb_node *nd, if (!h->filtered && percent >= min_pcnt) return nd; - nd = rb_prev(nd); + nd = rb_hierarchy_prev(nd); } return NULL; @@ -1410,8 +1580,8 @@ static void ui_browser__hists_seek(struct ui_browser *browser, nd = browser->top; goto do_offset; case SEEK_END: - nd = hists__filter_prev_entries(rb_last(browser->entries), - hb->min_pcnt); + nd = rb_hierarchy_last(rb_last(browser->entries)); + nd = hists__filter_prev_entries(nd, hb->min_pcnt); first = false; break; default: @@ -1445,7 +1615,7 @@ do_offset: if (offset > 0) { do { h = rb_entry(nd, struct hist_entry, rb_node); - if (h->unfolded) { + if (h->unfolded && h->leaf) { u16 remaining = h->nr_rows - h->row_offset; if (offset > remaining) { offset -= remaining; @@ -1457,7 +1627,8 @@ do_offset: break; } } - nd = hists__filter_entries(rb_next(nd), hb->min_pcnt); + nd = hists__filter_entries(rb_hierarchy_next(nd), + hb->min_pcnt); if (nd == NULL) break; --offset; @@ -1466,7 +1637,7 @@ do_offset: } else if (offset < 0) { while (1) { h = rb_entry(nd, struct hist_entry, rb_node); - if (h->unfolded) { + if (h->unfolded && h->leaf) { if (first) { if (-offset > h->row_offset) { offset += h->row_offset; @@ -1490,7 +1661,7 @@ do_offset: } } - nd = hists__filter_prev_entries(rb_prev(nd), + nd = hists__filter_prev_entries(rb_hierarchy_prev(nd), hb->min_pcnt); if (nd == NULL) break; @@ -1503,7 +1674,7 @@ do_offset: * row_offset at its last entry. */ h = rb_entry(nd, struct hist_entry, rb_node); - if (h->unfolded) + if (h->unfolded && h->leaf) h->row_offset = h->nr_rows; break; } @@ -1517,13 +1688,14 @@ do_offset: } static int hist_browser__fprintf_callchain(struct hist_browser *browser, - struct hist_entry *he, FILE *fp) + struct hist_entry *he, FILE *fp, + int level) { struct callchain_print_arg arg = { .fp = fp, }; - hist_browser__show_callchain(browser, he, 1, 0, + hist_browser__show_callchain(browser, he, level, 0, hist_browser__fprintf_callchain_entry, &arg, hist_browser__check_dump_full); return arg.printed; @@ -1566,7 +1738,65 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, printed += fprintf(fp, "%s\n", s); if (folded_sign == '-') - printed += hist_browser__fprintf_callchain(browser, he, fp); + printed += hist_browser__fprintf_callchain(browser, he, fp, 1); + + return printed; +} + + +static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser, + struct hist_entry *he, + FILE *fp, int level, + int nr_sort_keys) +{ + char s[8192]; + int printed = 0; + char folded_sign = ' '; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + }; + struct perf_hpp_fmt *fmt; + bool first = true; + int ret; + int hierarchy_indent = (nr_sort_keys + 1) * HIERARCHY_INDENT; + + printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, ""); + + folded_sign = hist_entry__folded(he); + printed += fprintf(fp, "%c", folded_sign); + + hists__for_each_format(he->hists, fmt) { + if (perf_hpp__should_skip(fmt, he->hists)) + continue; + + if (perf_hpp__is_sort_entry(fmt) || + perf_hpp__is_dynamic_entry(fmt)) + break; + + if (!first) { + ret = scnprintf(hpp.buf, hpp.size, " "); + advance_hpp(&hpp, ret); + } else + first = false; + + ret = fmt->entry(fmt, &hpp, he); + advance_hpp(&hpp, ret); + } + + ret = scnprintf(hpp.buf, hpp.size, "%*s", hierarchy_indent, ""); + advance_hpp(&hpp, ret); + + fmt = he->fmt; + ret = fmt->entry(fmt, &hpp, he); + advance_hpp(&hpp, ret); + + printed += fprintf(fp, "%s\n", rtrim(s)); + + if (he->leaf && folded_sign == '-') { + printed += hist_browser__fprintf_callchain(browser, he, fp, + he->depth + 1); + } return printed; } @@ -1576,12 +1806,22 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries), browser->min_pcnt); int printed = 0; + int nr_sort = browser->hists->hpp_list->nr_sort_keys; while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - printed += hist_browser__fprintf_entry(browser, h, fp); - nd = hists__filter_entries(rb_next(nd), browser->min_pcnt); + if (symbol_conf.report_hierarchy) { + printed += hist_browser__fprintf_hierarchy_entry(browser, + h, fp, + h->depth, + nr_sort); + } else { + printed += hist_browser__fprintf_entry(browser, h, fp); + } + + nd = hists__filter_entries(rb_hierarchy_next(nd), + browser->min_pcnt); } return printed; @@ -2149,12 +2389,12 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb, hb->min_pcnt = callchain_param.min_percent = percent; - if (!symbol_conf.use_callchain) - return; - while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { he = rb_entry(nd, struct hist_entry, rb_node); + if (!he->leaf || !symbol_conf.use_callchain) + goto next; + if (callchain_param.mode == CHAIN_GRAPH_REL) { total = he->stat.period; @@ -2167,11 +2407,17 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb, callchain_param.sort(&he->sorted_chain, he->callchain, min_callchain_hits, &callchain_param); +next: + /* + * Tentatively set unfolded so that the rb_hierarchy_next() + * can toggle children of folded entries too. + */ + he->unfolded = he->has_children; + nd = rb_hierarchy_next(nd); + /* force to re-evaluate folding state of callchains */ he->init_have_children = false; hist_entry__set_folding(he, hb, false); - - nd = rb_next(nd); } } -- cgit v1.2.3 From d8b92400d3ba6bb9a310c42b7518a81eb90f83be Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:46 +0900 Subject: perf hists browser: Align column header in hierarchy mode Like in stdio, fit column header to hierarchy output. Merge column headers with "/" as a separator. Overhead Command / Shared Object / Symbol ... + 0.09% dwm + 0.06% emacs - 0.05% perf - 0.05% [kernel.vmlinux] + 0.03% [k] memcpy_orig + 0.01% [k] unmap_single_vma + 0.01% [k] smp_call_function_single + 0.00% [k] native_irq_return_iret + 0.00% [k] arch_trigger_all_cpu_backtrace_handler + 0.00% [k] native_write_msr_safe Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-15-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 71 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 2bccf68ce5f1..6bcd7670ce5f 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1451,11 +1451,80 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char * return ret; } +static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *browser, char *buf, size_t size) +{ + struct hists *hists = browser->hists; + struct perf_hpp dummy_hpp = { + .buf = buf, + .size = size, + }; + struct perf_hpp_fmt *fmt; + size_t ret = 0; + int column = 0; + int nr_sort_keys = hists->hpp_list->nr_sort_keys; + bool first = true; + + ret = scnprintf(buf, size, " "); + if (advance_hpp_check(&dummy_hpp, ret)) + return ret; + + hists__for_each_format(hists, fmt) { + if (column++ < browser->b.horiz_scroll) + continue; + + if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) + break; + + ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); + if (advance_hpp_check(&dummy_hpp, ret)) + break; + + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " "); + if (advance_hpp_check(&dummy_hpp, ret)) + break; + } + + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s", + (nr_sort_keys - 1) * HIERARCHY_INDENT, ""); + if (advance_hpp_check(&dummy_hpp, ret)) + return ret; + + hists__for_each_format(hists, fmt) { + if (!perf_hpp__is_sort_entry(fmt) && !perf_hpp__is_dynamic_entry(fmt)) + continue; + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (first) { + first = false; + } else { + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " / "); + if (advance_hpp_check(&dummy_hpp, ret)) + break; + } + + ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); + dummy_hpp.buf[ret] = '\0'; + rtrim(dummy_hpp.buf); + + ret = strlen(dummy_hpp.buf); + if (advance_hpp_check(&dummy_hpp, ret)) + break; + } + + return ret; +} + static void hist_browser__show_headers(struct hist_browser *browser) { char headers[1024]; - hists_browser__scnprintf_headers(browser, headers, sizeof(headers)); + if (symbol_conf.report_hierarchy) + hists_browser__scnprintf_hierarchy_headers(browser, headers, + sizeof(headers)); + else + hists_browser__scnprintf_headers(browser, headers, + sizeof(headers)); ui_browser__gotorc(&browser->b, 0, 0); ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); -- cgit v1.2.3 From e311ec1e5df6d5de377e67f704bca43fb023ca5e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:47 +0900 Subject: perf ui/gtk: Implement hierarchy output mode The hierarchy output mode is to group entries for each level so that user can see higher level picture more easily. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-16-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/hists.c | 163 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 162 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 32cc38a5b57f..7f343339eae7 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -396,6 +396,164 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, gtk_container_add(GTK_CONTAINER(window), view); } +static void perf_gtk__add_hierarchy_entries(struct hists *hists, + struct rb_root *root, + GtkTreeStore *store, + GtkTreeIter *parent, + struct perf_hpp *hpp, + float min_pcnt) +{ + int col_idx = 0; + struct rb_node *node; + struct hist_entry *he; + struct perf_hpp_fmt *fmt; + u64 total = hists__total_period(hists); + + for (node = rb_first(root); node; node = rb_next(node)) { + GtkTreeIter iter; + float percent; + + he = rb_entry(node, struct hist_entry, rb_node); + if (he->filtered) + continue; + + percent = hist_entry__get_percent_limit(he); + if (percent < min_pcnt) + continue; + + gtk_tree_store_append(store, &iter, parent); + + col_idx = 0; + hists__for_each_format(hists, fmt) { + if (perf_hpp__is_sort_entry(fmt) || + perf_hpp__is_dynamic_entry(fmt)) + break; + + if (fmt->color) + fmt->color(fmt, hpp, he); + else + fmt->entry(fmt, hpp, he); + + gtk_tree_store_set(store, &iter, col_idx++, hpp->buf, -1); + } + + fmt = he->fmt; + if (fmt->color) + fmt->color(fmt, hpp, he); + else + fmt->entry(fmt, hpp, he); + + gtk_tree_store_set(store, &iter, col_idx, rtrim(hpp->buf), -1); + + if (!he->leaf) { + perf_gtk__add_hierarchy_entries(hists, &he->hroot_out, + store, &iter, hpp, + min_pcnt); + } + + if (symbol_conf.use_callchain && he->leaf) { + if (callchain_param.mode == CHAIN_GRAPH_REL) + total = symbol_conf.cumulate_callchain ? + he->stat_acc->period : he->stat.period; + + perf_gtk__add_callchain(&he->sorted_chain, store, &iter, + col_idx, total); + } + } + +} + +static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists, + float min_pcnt) +{ + struct perf_hpp_fmt *fmt; + GType col_types[MAX_COLUMNS]; + GtkCellRenderer *renderer; + GtkTreeStore *store; + GtkWidget *view; + int col_idx; + int nr_cols = 0; + char s[512]; + char buf[512]; + bool first = true; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + }; + + hists__for_each_format(hists, fmt) { + if (perf_hpp__is_sort_entry(fmt) || + perf_hpp__is_dynamic_entry(fmt)) + break; + + col_types[nr_cols++] = G_TYPE_STRING; + } + col_types[nr_cols++] = G_TYPE_STRING; + + store = gtk_tree_store_newv(nr_cols, col_types); + view = gtk_tree_view_new(); + renderer = gtk_cell_renderer_text_new(); + + col_idx = 0; + hists__for_each_format(hists, fmt) { + if (perf_hpp__is_sort_entry(fmt) || + perf_hpp__is_dynamic_entry(fmt)) + break; + + gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), + -1, fmt->name, + renderer, "markup", + col_idx++, NULL); + } + + /* construct merged column header since sort keys share single column */ + buf[0] = '\0'; + hists__for_each_format(hists ,fmt) { + if (!perf_hpp__is_sort_entry(fmt) && + !perf_hpp__is_dynamic_entry(fmt)) + continue; + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (first) + first = false; + else + strcat(buf, " / "); + + fmt->header(fmt, &hpp, hists_to_evsel(hists)); + strcat(buf, rtrim(hpp.buf)); + } + + gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), + -1, buf, + renderer, "markup", + col_idx++, NULL); + + for (col_idx = 0; col_idx < nr_cols; col_idx++) { + GtkTreeViewColumn *column; + + column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), col_idx); + gtk_tree_view_column_set_resizable(column, TRUE); + + if (col_idx == 0) { + gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view), + column); + } + } + + gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store)); + g_object_unref(GTK_TREE_MODEL(store)); + + perf_gtk__add_hierarchy_entries(hists, &hists->entries, store, + NULL, &hpp, min_pcnt); + + gtk_tree_view_set_rules_hint(GTK_TREE_VIEW(view), TRUE); + + g_signal_connect(view, "row-activated", + G_CALLBACK(on_row_activated), NULL); + gtk_container_add(GTK_CONTAINER(window), view); +} + int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt __maybe_unused, @@ -463,7 +621,10 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, GTK_POLICY_AUTOMATIC, GTK_POLICY_AUTOMATIC); - perf_gtk__show_hists(scrolled_window, hists, min_pcnt); + if (symbol_conf.report_hierarchy) + perf_gtk__show_hierarchy(scrolled_window, hists, min_pcnt); + else + perf_gtk__show_hists(scrolled_window, hists, min_pcnt); tab_label = gtk_label_new(evname); -- cgit v1.2.3 From 4251446d778e669db5ac9f86b02d38064bdbbf9a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:48 +0900 Subject: perf report: Add --hierarchy option The --hierarchy option is to show output in hierarchy mode. It extends folding/unfolding in the TUI and GTK browsers to support sort items as well as callchains. Users can toggle the items to see the performance result at wanted level. $ perf report --hierarchy --tui Overhead Command / Shared Object / Symbol -------------------------------------------------- + 32.96% gnome-shell - 15.11% swapper - 14.97% [kernel.vmlinux] 6.82% [k] intel_idle 0.66% [k] menu_select 0.43% [k] __hrtimer_start_range_ns ... Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-17-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 3 +++ tools/perf/Documentation/tips.txt | 1 + tools/perf/builtin-report.c | 17 +++++++++++++++++ 3 files changed, 21 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 89cab84e92fd..12113992ac9d 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -401,6 +401,9 @@ include::itrace.txt[] --raw-trace:: When displaying traceevent output, do not use print fmt or plugins. +--hierarchy:: + Enable hierarchical output. + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index e0ce9573b79b..5950b5a24efd 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -27,3 +27,4 @@ Skip collecing build-id when recording: perf record -B To change sampling frequency to 100 Hz: perf record -F 100 See assembly instructions with percentage: perf annotate If you prefer Intel style assembly, try: perf annotate -M intel +For hierarchical output, try: perf report --hierarchy diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 760e886ca9d9..f4d8244449ca 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -811,6 +811,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "only show processor socket that match with this filter"), OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace, "Show raw trace event output (do not use print fmt or plugins)"), + OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, + "Show entries in a hierarchy"), OPT_END() }; struct perf_data_file file = { @@ -920,6 +922,21 @@ repeat: symbol_conf.cumulate_callchain = false; } + if (symbol_conf.report_hierarchy) { + /* disable incompatible options */ + symbol_conf.event_group = false; + symbol_conf.cumulate_callchain = false; + + if (field_order) { + pr_err("Error: --hierarchy and --fields options cannot be used together\n"); + parse_options_usage(report_usage, options, "F", 1); + parse_options_usage(NULL, options, "hierarchy", 0); + goto error; + } + + sort__need_collapse = true; + } + /* Force tty output for header output and per-thread stat. */ if (report.header || report.header_only || report.show_threads) use_browser = 0; -- cgit v1.2.3 From 5d8200ae67724960f7761b3a2216a1ca651fcc65 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:49 +0900 Subject: perf hists: Support decaying in hierarchy mode In the hierarchy mode, hist entries should decay their children too. Also update hists__delete_entry() to be able to free child entries. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-18-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index a44bf5ae6acb..1c530428e087 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -248,6 +248,8 @@ static void he_stat__decay(struct he_stat *he_stat) /* XXX need decay for weight too? */ } +static void hists__delete_entry(struct hists *hists, struct hist_entry *he); + static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) { u64 prev_period = he->stat.period; @@ -263,21 +265,45 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) diff = prev_period - he->stat.period; - hists->stats.total_period -= diff; - if (!he->filtered) - hists->stats.total_non_filtered_period -= diff; + if (!he->depth) { + hists->stats.total_period -= diff; + if (!he->filtered) + hists->stats.total_non_filtered_period -= diff; + } + + if (!he->leaf) { + struct hist_entry *child; + struct rb_node *node = rb_first(&he->hroot_out); + while (node) { + child = rb_entry(node, struct hist_entry, rb_node); + node = rb_next(node); + + if (hists__decay_entry(hists, child)) + hists__delete_entry(hists, child); + } + } return he->stat.period == 0; } static void hists__delete_entry(struct hists *hists, struct hist_entry *he) { - rb_erase(&he->rb_node, &hists->entries); + struct rb_root *root_in; + struct rb_root *root_out; - if (sort__need_collapse) - rb_erase(&he->rb_node_in, &hists->entries_collapsed); - else - rb_erase(&he->rb_node_in, hists->entries_in); + if (he->parent_he) { + root_in = &he->parent_he->hroot_in; + root_out = &he->parent_he->hroot_out; + } else { + if (sort__need_collapse) + root_in = &hists->entries_collapsed; + else + root_in = hists->entries_in; + root_out = &hists->entries; + } + + rb_erase(&he->rb_node_in, root_in); + rb_erase(&he->rb_node, root_out); --hists->nr_entries; if (!he->filtered) -- cgit v1.2.3 From c92fcfde3486fb4b9e476ee5ad5995a62e401cce Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2016 00:13:50 +0900 Subject: perf top: Add --hierarchy option Support hierarchy output for perf-top using --hierarchy option. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456326830-30456-19-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 3 +++ tools/perf/builtin-top.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index b0e60e17db38..19f046f027cd 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -233,6 +233,9 @@ Default is to monitor all CPUS. --raw-trace:: When displaying traceevent output, do not use print fmt or plugins. +--hierarchy:: + Enable hierarchy output. + INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a75de3940b97..b86b623e8799 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1214,6 +1214,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) parse_branch_stack), OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace, "Show raw trace event output (do not use print fmt or plugins)"), + OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, + "Show entries in a hierarchy"), OPT_END() }; const char * const top_usage[] = { @@ -1241,6 +1243,19 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) goto out_delete_evlist; } + if (symbol_conf.report_hierarchy) { + /* disable incompatible options */ + symbol_conf.event_group = false; + symbol_conf.cumulate_callchain = false; + + if (field_order) { + pr_err("Error: --hierarchy and --fields options cannot be used together\n"); + parse_options_usage(top_usage, options, "fields", 0); + parse_options_usage(NULL, options, "hierarchy", 0); + goto out_delete_evlist; + } + } + sort__mode = SORT_MODE__TOP; /* display thread wants entries to be collapsed in a different tree */ sort__need_collapse = 1; -- cgit v1.2.3 From 5104ffb229c357d9672344126040721e5dc4cc7b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Feb 2016 10:14:50 -0300 Subject: perf tools: Use asprintf() for simple string formatting/allocation No need to use strbuf there, its just a simple alloc+formatting, which asprintf does just fine. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-6q6cxfhk8c8ypg3tfpo0i2iy@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 144047c396f0..f6321194937f 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -454,11 +454,12 @@ static void handle_internal_command(int argc, const char **argv) static void execv_dashed_external(const char **argv) { - struct strbuf cmd = STRBUF_INIT; + char *cmd; const char *tmp; int status; - strbuf_addf(&cmd, "perf-%s", argv[0]); + if (asprintf(&cmd, "perf-%s", argv[0]) < 0) + goto do_die; /* * argv[0] must be the perf command, but the argv array @@ -467,7 +468,7 @@ static void execv_dashed_external(const char **argv) * restore it on error. */ tmp = argv[0]; - argv[0] = cmd.buf; + argv[0] = cmd; /* * if we fail because the command is not found, it is @@ -475,15 +476,16 @@ static void execv_dashed_external(const char **argv) */ status = run_command_v_opt(argv, 0); if (status != -ERR_RUN_COMMAND_EXEC) { - if (IS_RUN_COMMAND_ERR(status)) + if (IS_RUN_COMMAND_ERR(status)) { +do_die: die("unable to run '%s'", argv[0]); + } exit(-status); } errno = ENOENT; /* as if we called execvp */ argv[0] = tmp; - - strbuf_release(&cmd); + zfree(&cmd); } static int run_argv(int *argcp, const char ***argv) -- cgit v1.2.3 From 7e9551bc72201838ebaf388224ff43ddf2b0d853 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 25 Feb 2016 06:27:36 +0100 Subject: perf jvmti: improve error message in Makefile This patch improves the error message given by jvmti Makefile when the alternatives command cannot be found. It now suggests the user locates the root of their Java installation and pass it with JDIR= Signed-off-by: Stephane Eranian Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1456378056-18812-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/jvmti/Makefile | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile index 0277a64b391b..5ce61a1bda9c 100644 --- a/tools/perf/jvmti/Makefile +++ b/tools/perf/jvmti/Makefile @@ -35,12 +35,21 @@ SOLIBEXT=so # The following works at least on fedora 23, you may need the next # line for other distros. -ifeq (,$(wildcard /usr/sbin/update-java-alternatives)) -JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') -else +ifneq (,$(wildcard /usr/sbin/update-java-alternatives)) JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3) +else + ifneq (,$(wildcard /usr/sbin/alternatives)) + JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + endif endif - +ifndef JDIR +$(error Could not find alternatives command, you need to set JDIR= to point to the root of your Java directory) +else + ifeq (,$(wildcard $(JDIR)/include/jvmti.h)) + $(error the openjdk development package appears to me missing, install and try again) + endif +endif +$(info Using Java from $(JDIR)) # -lrt required in 32-bit mode for clock_gettime() LIBS=-lelf -lrt INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux -- cgit v1.2.3 From bb109acc4adeae425147ca87b84d312ea40f24f1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Feb 2016 10:56:21 -0300 Subject: perf tools: Fix parsing of pmu events with empty list of modifiers In 1d55e8ef340d ("perf tools: Introduce opt_event_config nonterminal") I removed the unconditional "'/' '/'" for pmu events such as "intel_pt//" but forgot to use opt_event_config where it expected some event_config, oops. Fix it. Noticed when trying to use: # perf record -e intel_pt// -a sleep 1 event syntax error: 'intel_pt//' \___ parser error Run 'perf list' for a list of valid events Usage: perf record [] [] or: perf record [] -- [] -e, --event event selector. use 'perf list' to list available events # Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 1d55e8ef340d ("perf tools: Introduce opt_event_config nonterminal") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index d1fbcabbe70d..85c44ba79cad 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -217,14 +217,14 @@ event_def: event_pmu | event_bpf_file event_pmu: -PE_NAME '/' event_config '/' +PE_NAME opt_event_config { struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(data, list, $1, $3)); - parse_events_terms__delete($3); + ABORT_ON(parse_events_add_pmu(data, list, $1, $2)); + parse_events_terms__delete($2); $$ = list; } | -- cgit v1.2.3 From 8579aca3f9a8f890d6d94ccaed7cf5fd54a0c3bd Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 26 Feb 2016 00:12:59 +0900 Subject: perf script: Exception handling when the print fmt is empty After collecting samples for events 'syscalls:', perf-script with python script doesn't occasionally work generating a segmentation fault. The reason is that the print fmt is empty and a value of event->print_fmt.args is NULL, so dereferencing the null pointer results in a segmentation fault i.e.: # perf record -e syscalls:* # perf script -g python # perf script -s perf-script.py in trace_begin syscalls__sys_enter_brk 3 79841.832099154 3777 test.sh syscall_nr=12, brk=0 ... (omitted) ... Segmentation fault (core dumped) For example, a format of sys_enter_getuid() hasn't print fmt as below. # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_getuid/format name: sys_enter_getuid ID: 188 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:int syscall_nr; offset:8; size:4; signed:1; print fmt: "" So add exception handling to avoid this problem. Signed-off-by: Taeung Song Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/r/1456413179-12331-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-perl.c | 3 +++ tools/perf/util/scripting-engines/trace-event-python.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 544509c159ce..b3aabc0d4eb0 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -187,6 +187,9 @@ static void define_event_symbols(struct event_format *event, const char *ev_name, struct print_arg *args) { + if (args == NULL) + return; + switch (args->type) { case PRINT_NULL: break; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d72fafc1c800..309d90fa7698 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -205,6 +205,9 @@ static void define_event_symbols(struct event_format *event, const char *ev_name, struct print_arg *args) { + if (args == NULL) + return; + switch (args->type) { case PRINT_NULL: break; -- cgit v1.2.3 From 8560bae02a948876b26d1d86423cf5e0bb04a815 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 26 Feb 2016 00:13:10 +0900 Subject: perf script: Remove duplicated code and needless script_spec__findnew() script_spec_register() called two functions: script_spec__find() and script_spec__findnew(). But this way script_spec__find() gets called two times, directly and via script_spec__findnew(). So remove script_spec__findnew() and make script_spec_register() only call once script_spec__find(). Signed-off-by: Taeung Song Acked-by: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1456413190-12378-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ec4fbd410a4b..57f9a7e7f7d3 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1212,23 +1212,6 @@ static struct script_spec *script_spec__find(const char *spec) return NULL; } -static struct script_spec *script_spec__findnew(const char *spec, - struct scripting_ops *ops) -{ - struct script_spec *s = script_spec__find(spec); - - if (s) - return s; - - s = script_spec__new(spec, ops); - if (!s) - return NULL; - - script_spec__add(s); - - return s; -} - int script_spec_register(const char *spec, struct scripting_ops *ops) { struct script_spec *s; @@ -1237,9 +1220,11 @@ int script_spec_register(const char *spec, struct scripting_ops *ops) if (s) return -1; - s = script_spec__findnew(spec, ops); + s = script_spec__new(spec, ops); if (!s) return -1; + else + script_spec__add(s); return 0; } -- cgit v1.2.3 From a7b5895b91fb97f2b0dcc2e3ce47413c18d19ca5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 26 Feb 2016 21:13:16 +0900 Subject: perf hists: Add more helper functions for the hierarchy mode The hists__overhead_width() is to calculate width occupied by the overhead (and others) columns before the sort columns. The hist_entry__has_hiearchy_children() is to check whether an entry has lower entries (children) in the hierarchy to be shown in the output. This means the children should not be filtered out and above the percent limit. These two functions will be used to show information when all children of an entry is omitted by the percent limit (or filter). Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456488800-28124-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 22 ++++++++++++++++++++++ tools/perf/util/hist.c | 25 +++++++++++++++++++++++++ tools/perf/util/hist.h | 3 +++ 3 files changed, 50 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index edbf854e8e1c..7c0585c146e1 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -643,6 +643,28 @@ unsigned int hists__sort_list_width(struct hists *hists) return ret; } +unsigned int hists__overhead_width(struct hists *hists) +{ + struct perf_hpp_fmt *fmt; + int ret = 0; + bool first = true; + struct perf_hpp dummy_hpp; + + hists__for_each_format(hists, fmt) { + if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) + break; + + if (first) + first = false; + else + ret += 2; + + ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists)); + } + + return ret; +} + void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists) { if (perf_hpp__is_sort_entry(fmt)) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 1c530428e087..e71691977a95 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1582,6 +1582,31 @@ struct rb_node *rb_hierarchy_prev(struct rb_node *node) return &he->rb_node; } +bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit) +{ + struct rb_node *node; + struct hist_entry *child; + float percent; + + if (he->leaf) + return false; + + node = rb_first(&he->hroot_out); + child = rb_entry(node, struct hist_entry, rb_node); + + while (node && child->filtered) { + node = rb_next(node); + child = rb_entry(node, struct hist_entry, rb_node); + } + + if (node) + percent = hist_entry__get_percent_limit(child); + else + percent = 0; + + return node && percent >= limit; +} + static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, enum hist_filter filter) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 722aa447f705..da3e7b6e4615 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -410,6 +410,7 @@ static inline int script_browse(const char *script_opt __maybe_unused) #endif unsigned int hists__sort_list_width(struct hists *hists); +unsigned int hists__overhead_width(struct hists *hists); void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, struct perf_sample *sample, bool nonany_branch_mode); @@ -439,4 +440,6 @@ static inline struct rb_node *rb_hierarchy_next(struct rb_node *node) #define HIERARCHY_INDENT 3 +bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit); + #endif /* __PERF_HIST_H */ -- cgit v1.2.3 From bd4abd39db92225dde8335c37d6f4efb319f9cf2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 26 Feb 2016 21:13:17 +0900 Subject: perf report: Show message for percent limit on stdio When the hierarchy mode is used, some entries might be omiited due to a percent limit or filter. In this case the output hierarchy is different than other entries. Add an informative message to users about this. For example, when 4% of percent limit is applied: Before: # Overhead Command / Shared Object / Symbol # .............. .......................................... # 49.09% swapper 48.67% [kernel.vmlinux] 34.42% [k] intel_idle 11.51% firefox 8.87% libpthread-2.22.so 6.60% [.] __GI___libc_recvmsg 10.49% gnome-shell 4.74% libc-2.22.so 10.08% Xorg 6.11% libc-2.22.so 5.27% [.] __memcpy_sse2_unaligned 6.15% perf Note that, gnome-shell/libc has no symbols and perf has no dso/symbols. With that patch the output will look like below: After: # Overhead Command / Shared Object / Symbol # .............. .......................................... # 49.09% swapper 48.67% [kernel.vmlinux] 34.42% [k] intel_idle 11.51% firefox 8.87% libpthread-2.22.so 6.60% [.] __GI___libc_recvmsg 10.49% gnome-shell 4.74% libc-2.22.so no entry >= 4.00% 10.08% Xorg 6.11% libc-2.22.so 5.27% [.] __memcpy_sse2_unaligned 6.15% perf no entry >= 4.00% Suggested-and-Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456488800-28124-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/stdio/hist.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 435eaaaf2f1d..b3bdfcb245f9 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -628,6 +628,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, bool first = true; size_t linesz; char *line = NULL; + unsigned indent; init_rem_hits(); @@ -704,6 +705,8 @@ print_entries: goto out; } + indent = hists__overhead_width(hists) + 4; + for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); float percent; @@ -720,6 +723,20 @@ print_entries: if (max_rows && ++nr_rows >= max_rows) break; + /* + * If all children are filtered out or percent-limited, + * display "no entry >= x.xx%" message. + */ + if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) { + int nr_sort = hists->hpp_list->nr_sort_keys; + + print_hierarchy_indent(sep, nr_sort + h->depth + 1, spaces, fp); + fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt); + + if (max_rows && ++nr_rows >= max_rows) + break; + } + if (h->ms.map == NULL && verbose > 1) { __map_groups__fprintf_maps(h->thread->mg, MAP__FUNCTION, fp); -- cgit v1.2.3 From 201fde73b111e7c31fdc0e9fa6bc4b73dfef699d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 26 Feb 2016 21:13:18 +0900 Subject: perf hists browser: Cleanup hist_browser__update_percent_limit() The previous patch introduced __rb_hierarchy_next() function with various move direction like HMD_FORCE_CHILD but missed to change using it some place. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456488800-28124-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 6bcd7670ce5f..904eaa719eb3 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2477,12 +2477,7 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb, min_callchain_hits, &callchain_param); next: - /* - * Tentatively set unfolded so that the rb_hierarchy_next() - * can toggle children of folded entries too. - */ - he->unfolded = he->has_children; - nd = rb_hierarchy_next(nd); + nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD); /* force to re-evaluate folding state of callchains */ he->init_have_children = false; -- cgit v1.2.3 From 79dded8776c2dc4d6e1229de69f4027e84d63673 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 26 Feb 2016 21:13:19 +0900 Subject: perf hists browser: Show message for percent limit Like the stdio, it should show messages about omitted hierarchy entries. Please refer the previous commit for more details. As it needs to check an entry is omitted or not multiple times, add the has_no_entry field in the hist entry. Suggested-and-Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456488800-28124-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 99 ++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/hist.c | 2 + tools/perf/util/sort.h | 1 + 3 files changed, 102 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 904eaa719eb3..71c6d510390f 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -260,6 +260,9 @@ static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he, if (he->leaf) return callchain__count_rows(&he->sorted_chain); + if (he->has_no_entry) + return 1; + node = rb_first(&he->hroot_out); while (node) { float percent; @@ -409,10 +412,18 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser) /* account grand children */ if (symbol_conf.report_hierarchy) browser->b.nr_entries += child_rows - he->nr_rows; + + if (!he->leaf && he->nr_rows == 0) { + he->has_no_entry = true; + he->nr_rows = 1; + } } else { if (symbol_conf.report_hierarchy) browser->b.nr_entries -= child_rows - he->nr_rows; + if (he->has_no_entry) + he->has_no_entry = false; + he->nr_rows = 0; } @@ -545,6 +556,12 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold) browser->nr_hierarchy_entries++; if (he->leaf) browser->nr_callchain_rows += he->nr_rows; + else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) { + browser->nr_hierarchy_entries++; + he->has_no_entry = true; + he->nr_rows = 1; + } else + he->has_no_entry = false; } } @@ -1412,6 +1429,75 @@ show_callchain: return printed; } +static int hist_browser__show_no_entry(struct hist_browser *browser, + unsigned short row, + int level, int nr_sort_keys) +{ + int width = browser->b.width; + bool current_entry = ui_browser__is_current_entry(&browser->b, row); + bool first = true; + int column = 0; + int ret; + struct perf_hpp_fmt *fmt; + + if (current_entry) { + browser->he_selection = NULL; + browser->selection = NULL; + } + + hist_browser__gotorc(browser, row, 0); + + if (current_entry && browser->b.navkeypressed) + ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED); + else + ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL); + + ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT); + width -= level * HIERARCHY_INDENT; + + hists__for_each_format(browser->hists, fmt) { + if (perf_hpp__should_skip(fmt, browser->hists) || + column++ < browser->b.horiz_scroll) + continue; + + if (perf_hpp__is_sort_entry(fmt) || + perf_hpp__is_dynamic_entry(fmt)) + break; + + ret = fmt->width(fmt, NULL, hists_to_evsel(browser->hists)); + + if (first) { + /* for folded sign */ + first = false; + ret++; + } else { + /* space between columns */ + ret += 2; + } + + ui_browser__write_nstring(&browser->b, "", ret); + width -= ret; + } + + ui_browser__write_nstring(&browser->b, "", nr_sort_keys * HIERARCHY_INDENT); + width -= nr_sort_keys * HIERARCHY_INDENT; + + if (column >= browser->b.horiz_scroll) { + char buf[32]; + + ret = snprintf(buf, sizeof(buf), "no entry >= %.2f%%", browser->min_pcnt); + ui_browser__printf(&browser->b, " %s", buf); + width -= ret + 2; + } + + /* The scroll bar isn't being used */ + if (!browser->b.navkeypressed) + width += 1; + + ui_browser__write_nstring(&browser->b, "", width); + return 1; +} + static int advance_hpp_check(struct perf_hpp *hpp, int inc) { advance_hpp(hpp, inc); @@ -1575,6 +1661,14 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) row += hist_browser__show_hierarchy_entry(hb, h, row, h->depth, nr_sort); + if (row == browser->rows) + break; + + if (h->has_no_entry) { + hist_browser__show_no_entry(hb, row, h->depth, + nr_sort); + row++; + } } else { row += hist_browser__show_entry(hb, h, row); } @@ -2461,6 +2555,11 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb, while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { he = rb_entry(nd, struct hist_entry, rb_node); + if (he->has_no_entry) { + he->has_no_entry = false; + he->nr_rows = 0; + } + if (!he->leaf || !symbol_conf.use_callchain) goto next; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e71691977a95..75dc41d2dca9 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1625,6 +1625,7 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h /* force fold unfiltered entry for simplicity */ parent->unfolded = false; + parent->has_no_entry = false; parent->row_offset = 0; parent->nr_rows = 0; next: @@ -1637,6 +1638,7 @@ next: /* force fold unfiltered entry for simplicity */ h->unfolded = false; + h->has_no_entry = false; h->row_offset = 0; h->nr_rows = 0; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index a8d53ffe0916..25a5529a94e4 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -117,6 +117,7 @@ struct hist_entry { bool init_have_children; bool unfolded; bool has_children; + bool has_no_entry; }; }; char *srcline; -- cgit v1.2.3 From 2ddda792373065577eb9207370cb7a28395caffa Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 26 Feb 2016 21:13:20 +0900 Subject: perf report: Show message for percent limit on gtk Like the stdio, it should show messages about omitted hierarchy entries. Please refer the previous commit for more details. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456488800-28124-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/hists.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 7f343339eae7..a5758fdfbe1f 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -449,6 +449,17 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, perf_gtk__add_hierarchy_entries(hists, &he->hroot_out, store, &iter, hpp, min_pcnt); + + if (!hist_entry__has_hierarchy_children(he, min_pcnt)) { + char buf[32]; + GtkTreeIter child; + + snprintf(buf, sizeof(buf), "no entry >= %.2f%%", + min_pcnt); + + gtk_tree_store_append(store, &child, &iter); + gtk_tree_store_set(store, &child, col_idx, buf, -1); + } } if (symbol_conf.use_callchain && he->leaf) { -- cgit v1.2.3 From 84b6ee8ea36ff797afa13c297a86ed0144482bee Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 27 Feb 2016 03:52:43 +0900 Subject: perf hists: Fix comparing of dynamic entries When hist_entry__cmp() and hist_entry__collapse() are called, they should check if the dynamic entry is comparing matching hists only. Otherwise it might access different hists resulting in incorrect output. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456512767-1164-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 75dc41d2dca9..cc849d326211 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1002,6 +1002,10 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) int64_t cmp = 0; hists__for_each_sort_list(hists, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + !perf_hpp__defined_dynamic_entry(fmt, hists)) + continue; + cmp = fmt->cmp(fmt, left, right); if (cmp) break; @@ -1018,6 +1022,10 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) int64_t cmp = 0; hists__for_each_sort_list(hists, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + !perf_hpp__defined_dynamic_entry(fmt, hists)) + continue; + cmp = fmt->collapse(fmt, left, right); if (cmp) break; -- cgit v1.2.3 From d3a72fd8187b7fa0014394c9dec95ba349b3301e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 27 Feb 2016 03:52:44 +0900 Subject: perf report: Fix indentation of dynamic entries in hierarchy When dynamic entries are used in the hierarchy mode with multiple events, the output might not be aligned properly. In the hierarchy mode, the each sort column is indented using total number of sort keys. So it keeps track of number of sort keys when adding them. However a dynamic sort key can be added more than once when multiple events have same field names. This results in unnecessarily long indentation in the output. For example perf kmem records following events: $ perf evlist --trace-fields -i perf.data.kmem kmem:kmalloc: trace_fields: call_site,ptr,bytes_req,bytes_alloc,gfp_flags kmem:kmalloc_node: trace_fields: call_site,ptr,bytes_req,bytes_alloc,gfp_flags,node kmem:kfree: trace_fields: call_site,ptr kmem:kmem_cache_alloc: trace_fields: call_site,ptr,bytes_req,bytes_alloc,gfp_flags kmem:kmem_cache_alloc_node: trace_fields: call_site,ptr,bytes_req,bytes_alloc,gfp_flags,node kmem:kmem_cache_free: trace_fields: call_site,ptr kmem:mm_page_alloc: trace_fields: page,order,gfp_flags,migratetype kmem:mm_page_free: trace_fields: page,order As you can see, many field names shared between kmem events. So adding 'ptr' dynamic sort key alone will set nr_sort_keys to 6. And this adds many unnecessary spaces between columns. Before: $ perf report -i perf.data.kmem --hierarchy -s ptr -g none --stdio ... # Overhead ptr # ....................... ................................... # 99.89% 0xffff8803ffb79720 0.06% 0xffff8803d228a000 0.03% 0xffff8803f7678f00 0.00% 0xffff880401dc5280 0.00% 0xffff880406172380 0.00% 0xffff8803ffac3a00 0.00% 0xffff8803ffac1600 After: # Overhead ptr # ........ .................... # 99.89% 0xffff8803ffb79720 0.06% 0xffff8803d228a000 0.03% 0xffff8803f7678f00 0.00% 0xffff880401dc5280 0.00% 0xffff880406172380 0.00% 0xffff8803ffac3a00 0.00% 0xffff8803ffac1600 Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456512767-1164-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 6 +++--- tools/perf/ui/stdio/hist.c | 6 +++--- tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 19 +++++++++++++++++++ 4 files changed, 26 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 71c6d510390f..5f74c6723c53 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1547,7 +1547,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows struct perf_hpp_fmt *fmt; size_t ret = 0; int column = 0; - int nr_sort_keys = hists->hpp_list->nr_sort_keys; + int nr_sort_keys = hists->nr_sort_keys; bool first = true; ret = scnprintf(buf, size, " "); @@ -1632,7 +1632,7 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) u16 header_offset = 0; struct rb_node *nd; struct hist_browser *hb = container_of(browser, struct hist_browser, b); - int nr_sort = hb->hists->hpp_list->nr_sort_keys; + int nr_sort = hb->hists->nr_sort_keys; if (hb->show_headers) { hist_browser__show_headers(hb); @@ -1969,7 +1969,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries), browser->min_pcnt); int printed = 0; - int nr_sort = browser->hists->hpp_list->nr_sort_keys; + int nr_sort = browser->hists->nr_sort_keys; while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index b3bdfcb245f9..5733d6c196be 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -495,7 +495,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, size = hpp.size = bfsz; if (symbol_conf.report_hierarchy) { - int nr_sort = hists->hpp_list->nr_sort_keys; + int nr_sort = hists->nr_sort_keys; return hist_entry__hierarchy_fprintf(he, &hpp, nr_sort, hists, fp); @@ -529,7 +529,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, unsigned header_width = 0; struct perf_hpp_fmt *fmt; - nr_sort = hists->hpp_list->nr_sort_keys; + nr_sort = hists->nr_sort_keys; /* preserve max indent depth for column headers */ print_hierarchy_indent(sep, nr_sort, spaces, fp); @@ -728,7 +728,7 @@ print_entries: * display "no entry >= x.xx%" message. */ if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) { - int nr_sort = hists->hpp_list->nr_sort_keys; + int nr_sort = hists->nr_sort_keys; print_hierarchy_indent(sep, nr_sort + h->depth + 1, spaces, fp); fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index da3e7b6e4615..da5e50586bfd 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -78,6 +78,7 @@ struct hists { u16 col_len[HISTC_NR_COLS]; int socket_filter; struct perf_hpp_list *hpp_list; + int nr_sort_keys; }; struct hist_entry_iter; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6bee8bdfb91b..2beb7a6360a4 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2633,6 +2633,9 @@ out: int setup_sorting(struct perf_evlist *evlist) { int err; + struct hists *hists; + struct perf_evsel *evsel; + struct perf_hpp_fmt *fmt; err = __setup_sorting(evlist); if (err < 0) @@ -2644,6 +2647,22 @@ int setup_sorting(struct perf_evlist *evlist) return err; } + evlist__for_each(evlist, evsel) { + hists = evsel__hists(evsel); + hists->nr_sort_keys = perf_hpp_list.nr_sort_keys; + + /* + * If dynamic entries were used, it might add multiple + * entries to each evsel for a single field name. Set + * actual number of sort keys for each hists. + */ + perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + !perf_hpp__defined_dynamic_entry(fmt, hists)) + hists->nr_sort_keys--; + } + } + reset_dimensions(); /* -- cgit v1.2.3 From cb1fab917206f822d1f905cbc45971eefdef361d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 27 Feb 2016 03:52:45 +0900 Subject: perf report: Left align dynamic entries in hierarchy The dynamic entries are right-aligned unlike other entries since it usually has numeric value. But for the hierarchy mode, left alignment is more appropriate IMHO. Also trim spaces on the left so that we can easily identify the hierarchy. Before: $ perf report --hierarchy -i perf.data.kmem -s gfp_flags,ptr,bytes_req --stdio -g none ... # # Overhead gfp_flags / ptr / bytes_req # .............. ................................................................................................. # 91.67% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 37.50% 0xffff8803f7669400 37.50% 448 8.33% 0xffff8803f766be00 8.33% 96 4.17% 0xffff8800d156dc00 4.17% 704 After: # Overhead gfp_flags / ptr / bytes_req # .............. .................................... # 91.67% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 37.50% 0xffff8803f7669400 37.50% 448 8.33% 0xffff8803f766be00 8.33% 96 4.17% 0xffff8800d156dc00 4.17% 704 Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456512767-1164-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 16 ++++++++++++++-- tools/perf/ui/stdio/hist.c | 28 +++++++++++++++++++--------- 2 files changed, 33 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 5f74c6723c53..5ffffcb1e3c5 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1400,8 +1400,13 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, if (fmt->color) { width -= fmt->color(fmt, &hpp, entry); } else { + int i = 0; + width -= fmt->entry(fmt, &hpp, entry); - ui_browser__printf(&browser->b, "%s", s); + ui_browser__printf(&browser->b, "%s", ltrim(s)); + + while (isspace(s[i++])) + width++; } } @@ -1576,6 +1581,8 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows return ret; hists__for_each_format(hists, fmt) { + char *start; + if (!perf_hpp__is_sort_entry(fmt) && !perf_hpp__is_dynamic_entry(fmt)) continue; if (perf_hpp__should_skip(fmt, hists)) @@ -1593,7 +1600,12 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows dummy_hpp.buf[ret] = '\0'; rtrim(dummy_hpp.buf); - ret = strlen(dummy_hpp.buf); + start = ltrim(dummy_hpp.buf); + ret = strlen(start); + + if (start != dummy_hpp.buf) + memmove(dummy_hpp.buf, start, ret + 1); + if (advance_hpp_check(&dummy_hpp, ret)) break; } diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 5733d6c196be..6d06fbb365b6 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -418,6 +418,7 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he, const char *sep = symbol_conf.field_sep; struct perf_hpp_fmt *fmt; char *buf = hpp->buf; + size_t size = hpp->size; int ret, printed = 0; bool first = true; @@ -457,6 +458,11 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he, (nr_sort_key - 1) * HIERARCHY_INDENT + 2, ""); advance_hpp(hpp, ret); + printed += fprintf(fp, "%s", buf); + + hpp->buf = buf; + hpp->size = size; + /* * No need to call hist_entry__snprintf_alignment() since this * fmt is always the last column in the hierarchy mode. @@ -467,7 +473,11 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he, else fmt->entry(fmt, hpp, he); - printed += fprintf(fp, "%s\n", buf); + /* + * dynamic entries are right-aligned but we want left-aligned + * in the hierarchy mode + */ + printed += fprintf(fp, "%s\n", ltrim(buf)); if (symbol_conf.use_callchain && he->leaf) { u64 total = hists__total_period(hists); @@ -525,6 +535,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, { bool first = true; int nr_sort; + int depth; unsigned width = 0; unsigned header_width = 0; struct perf_hpp_fmt *fmt; @@ -558,19 +569,16 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, if (!first) header_width += fprintf(fp, " / "); else { - header_width += fprintf(fp, "%s", sep ?: " "); + fprintf(fp, "%s", sep ?: " "); first = false; } fmt->header(fmt, hpp, hists_to_evsel(hists)); rtrim(hpp->buf); - header_width += fprintf(fp, "%s", hpp->buf); + header_width += fprintf(fp, "%s", ltrim(hpp->buf)); } - /* preserve max indent depth for combined sort headers */ - print_hierarchy_indent(sep, nr_sort, spaces, fp); - fprintf(fp, "\n# "); /* preserve max indent depth for initial dots */ @@ -590,6 +598,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, fprintf(fp, "%.*s", width, dots); } + depth = 0; hists__for_each_format(hists, fmt) { if (!perf_hpp__is_sort_entry(fmt) && !perf_hpp__is_dynamic_entry(fmt)) continue; @@ -597,15 +606,16 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, continue; width = fmt->width(fmt, hpp, hists_to_evsel(hists)); + width += depth * HIERARCHY_INDENT; + if (width > header_width) header_width = width; + + depth++; } fprintf(fp, "%s%-.*s", sep ?: " ", header_width, dots); - /* preserve max indent depth for dots under sort headers */ - print_hierarchy_indent(sep, nr_sort, dots, fp); - fprintf(fp, "\n#\n"); return 2; -- cgit v1.2.3 From e049d4a3fa194c8aa0d3ca29a9b11b32387ca6e3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 27 Feb 2016 03:52:46 +0900 Subject: perf hists: Fix dynamic entry display in hierarchy When dynamic sort key is used it might not show pretty printed output. This is because the trace output was not set only for the first dynamic sort key. During hierarchy_insert_entry() it missed to pass the trace_output to dynamic entries. Also even if it did, only first entry will have it. Subsequent entries might set it during collapsing stage but it's not guaranteed. Before: $ perf report --hierarchy --stdio -s ptr,bytes_req,gfp_flags -g none # # Overhead ptr / bytes_req / gfp_flags # .............. .......................................... # 37.50% 0xffff8803f7669400 37.50% 448 37.50% 66080 10.42% 0xffff8803f766be00 8.33% 96 8.33% 66080 2.08% 512 2.08% 67280 After: # # Overhead ptr / bytes_req / gfp_flags # .............. .......................................... # 37.50% 0xffff8803f7669400 37.50% 448 37.50% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 10.42% 0xffff8803f766be00 8.33% 96 8.33% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 2.08% 512 2.08% GFP_KERNEL|GFP_NOWARN|GFP_REPEAT|GFP Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456512767-1164-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 2 +- tools/perf/util/sort.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index cc849d326211..9b3f582867d6 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1125,7 +1125,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, new->fmt = fmt; /* some fields are now passed to 'new' */ - if (perf_hpp__is_trace_entry(fmt)) + if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) he->trace_output = NULL; else new->trace_output = NULL; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2beb7a6360a4..d26c6b9fe348 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1764,6 +1764,9 @@ static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, if (hde->raw_trace) goto raw_field; + if (!he->trace_output) + he->trace_output = get_trace_output(he); + field = hde->field; namelen = strlen(field->name); str = he->trace_output; -- cgit v1.2.3 From abab5e7fcec16e526968f8a5448cd81c635705ce Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 27 Feb 2016 03:52:47 +0900 Subject: perf report: Update column width of dynamic entries The column width of dynamic entries is updated when comparing hist entries. However some unique entries can miss the chance to update. So move the update to output resort stage to make sure every entry will get called before display. To do that, abuse ->sort callback to update the width when the third argument is NULL. When resorting entries in normal path, it never be NULL so it should be fine IMHO. Before: # Overhead ptr / bytes_req / gfp_flags # .............. .......................................... # 37.50% 0xffff8803f7669400 37.50% 448 37.50% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 10.42% 0xffff8803f766be00 8.33% 96 8.33% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 2.08% 512 2.08% GFP_KERNEL|GFP_NOWARN|GFP_REPEAT|GFP <-- here After: # Overhead ptr / bytes_req / gfp_flags # .............. ..................................................... # 37.50% 0xffff8803f7669400 37.50% 448 37.50% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 10.42% 0xffff8803f766be00 8.33% 96 8.33% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 2.08% 512 2.08% GFP_KERNEL|GFP_NOWARN|GFP_REPEAT|GFP_NOMEMALLOC Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1456512767-1164-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 11 +++++++++++ tools/perf/util/sort.c | 8 +++++--- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 9b3f582867d6..4b8b67bc0cd8 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1371,6 +1371,10 @@ static void hierarchy_insert_output_entry(struct rb_root *root, rb_link_node(&he->rb_node, parent, p); rb_insert_color(&he->rb_node, root); + + /* update column width of dynamic entry */ + if (perf_hpp__is_dynamic_entry(he->fmt)) + he->fmt->sort(he->fmt, he, NULL); } static void hists__hierarchy_output_resort(struct hists *hists, @@ -1440,6 +1444,7 @@ static void __hists__insert_output_entry(struct rb_root *entries, struct rb_node **p = &entries->rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; + struct perf_hpp_fmt *fmt; if (use_callchain) { if (callchain_param.mode == CHAIN_GRAPH_REL) { @@ -1466,6 +1471,12 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_link_node(&he->rb_node, parent, p); rb_insert_color(&he->rb_node, entries); + + perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + perf_hpp__defined_dynamic_entry(fmt, he->hists)) + fmt->sort(fmt, he, NULL); /* update column width */ + } } static void output_resort(struct hists *hists, struct ui_progress *prog, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index d26c6b9fe348..5888bfe9a193 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1816,6 +1816,11 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + if (b == NULL) { + update_dynamic_len(hde, a); + return 0; + } + field = hde->field; if (field->flags & FIELD_IS_DYNAMIC) { unsigned long long dyn; @@ -1830,9 +1835,6 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, } else { offset = field->offset; size = field->size; - - update_dynamic_len(hde, a); - update_dynamic_len(hde, b); } return memcmp(a->raw_data + offset, b->raw_data + offset, size); -- cgit v1.2.3 From b8cbb349061edda648463b086cfa869a7ab583af Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:31:51 +0000 Subject: perf config: Bring perf_default_config to the very beginning at main() Before this patch each subcommand calls perf_config() by themself, reading the default configuration together with subcommand specific options. If a subcommand doesn't have it own options, it needs to call 'perf_config(perf_default_config, NULL)' to ensure .perfconfig is loaded. This patch brings perf_config(perf_default_config, NULL) to the very start of main(), so subcommands don't need to do it. After this patch, 'llvm.clang-path' works for 'perf trace'. Signed-off-by: Wang Nan Suggested-and-Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-4-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 2 -- tools/perf/builtin-help.c | 2 +- tools/perf/builtin-kmem.c | 4 ++-- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 4 ++-- tools/perf/perf.c | 2 ++ tools/perf/tests/llvm.c | 8 -------- tools/perf/util/color.c | 5 +++-- tools/perf/util/data-convert-bt.c | 2 +- tools/perf/util/help-unknown-cmd.c | 5 +++-- 10 files changed, 15 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 36ccc2b8827f..4d72359fd15a 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1264,8 +1264,6 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) if (ret < 0) return ret; - perf_config(perf_default_config, NULL); - argc = parse_options(argc, argv, options, diff_usage, 0); if (symbol__init(NULL) < 0) diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index f4dd2b48f90f..49d55e21b1b0 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -272,7 +272,7 @@ static int perf_help_config(const char *var, const char *value, void *cb) if (!prefixcmp(var, "man.")) return add_man_viewer_info(var, value); - return perf_default_config(var, value, cb); + return 0; } static struct cmdnames main_cmds, other_cmds; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 118010553d0c..4d3340cce9a0 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1834,7 +1834,7 @@ static int __cmd_record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } -static int kmem_config(const char *var, const char *value, void *cb) +static int kmem_config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "kmem.default")) { if (!strcmp(value, "slab")) @@ -1847,7 +1847,7 @@ static int kmem_config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f4d8244449ca..7eea49f9ed46 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -90,7 +90,7 @@ static int report__config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } static int hist_iter__report_callback(struct hist_entry_iter *iter, diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b86b623e8799..94af190f6843 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1065,7 +1065,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) return parse_callchain_top_opt(arg); } -static int perf_top_config(const char *var, const char *value, void *cb) +static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "top.call-graph")) var = "call-graph.record-mode"; /* fall-through */ @@ -1074,7 +1074,7 @@ static int perf_top_config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } static int diff --git a/tools/perf/perf.c b/tools/perf/perf.c index f6321194937f..aaee0a782747 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -548,6 +548,8 @@ int main(int argc, const char **argv) srandom(time(NULL)); + perf_config(perf_default_config, NULL); + /* get debugfs/tracefs mount point from /proc/mounts */ tracing_path_mount(); diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index 70edcdfa5672..cff564fb4b66 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -6,12 +6,6 @@ #include "tests.h" #include "debug.h" -static int perf_config_cb(const char *var, const char *val, - void *arg __maybe_unused) -{ - return perf_default_config(var, val, arg); -} - #ifdef HAVE_LIBBPF_SUPPORT static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) { @@ -77,8 +71,6 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf, if (should_load_fail) *should_load_fail = bpf_source_table[idx].should_load_fail; - perf_config(perf_config_cb, NULL); - /* * Skip this test if user's .perfconfig doesn't set [llvm] section * and clang is not found in $PATH, and this is not perf test -v diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index e5fb88bab9e1..43e84aa27e4a 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -32,14 +32,15 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) return 0; } -int perf_color_default_config(const char *var, const char *value, void *cb) +int perf_color_default_config(const char *var, const char *value, + void *cb __maybe_unused) { if (!strcmp(var, "color.ui")) { perf_use_color_default = perf_config_colorbool(var, value, -1); return 0; } - return perf_default_config(var, value, cb); + return 0; } static int __color_vsnprintf(char *bf, size_t size, const char *color, diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index b722e57d5a87..6729f4d9df7c 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1117,7 +1117,7 @@ static int convert__config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } int bt_convert__perf2ctf(const char *input, const char *path, bool force) diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index dc1e41c9b054..43a98a4dc1e1 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -6,7 +6,8 @@ static int autocorrect; static struct cmdnames aliases; -static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) +static int perf_unknown_cmd_config(const char *var, const char *value, + void *cb __maybe_unused) { if (!strcmp(var, "help.autocorrect")) autocorrect = perf_config_int(var,value); @@ -14,7 +15,7 @@ static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) if (!prefixcmp(var, "alias.")) add_cmdname(&aliases, var + 6, strlen(var + 6)); - return perf_default_config(var, value, cb); + return 0; } static int levenshtein_compare(const void *p1, const void *p2) -- cgit v1.2.3 From fdf14720fbd02d406ac2c1c50444774b4c7eed9a Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:31:53 +0000 Subject: perf tools: Only set filter for tracepoints events perf_evlist__set_filter() tries to set filter to every evsel linked in the evlist. However, since filters can only be applied to tracepoints, checking type of evsel before calling perf_evsel__set_filter() would be better. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Li Zefan Cc: Peter Zijlstra Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-6-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c42e1967e970..86a03836a83f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1223,6 +1223,9 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) int err = 0; evlist__for_each(evlist, evsel) { + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) + continue; + err = perf_evsel__set_filter(evsel, filter); if (err) break; -- cgit v1.2.3 From ba50423530200659d4deb703a8f72d3b69bc13ce Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:31:54 +0000 Subject: perf trace: Call bpf__apply_obj_config in 'perf trace' Without this patch BPF map configuration is not applied. Command like this: # ./perf trace --ev bpf-output/no-inherit,name=evt/ \ --ev ./test_bpf_trace.c/map:channel.event=evt/ \ usleep 100000 Load BPF files without error, but since map:channel.event=evt is not applied, bpf-output event not work. This patch allows 'perf trace' load and run BPF scripts. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Li Zefan Cc: Peter Zijlstra Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 20916dd77aac..254149ca5e1b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -33,6 +33,7 @@ #include "util/stat.h" #include "trace-event.h" #include "util/parse-events.h" +#include "util/bpf-loader.h" #include #include @@ -2586,6 +2587,16 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_open; + err = bpf__apply_obj_config(); + if (err) { + char errbuf[BUFSIZ]; + + bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); + pr_err("ERROR: Apply config to BPF failed: %s\n", + errbuf); + goto out_error_open; + } + /* * Better not use !target__has_task() here because we need to cover the * case where no threads were specified in the command line, but a -- cgit v1.2.3 From 1d6c9407d45dd622b277ca9f725da3cc9e95b5de Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:31:55 +0000 Subject: perf trace: Print content of bpf-output event With this patch the contend of BPF output event is printed by 'perf trace'. For example: # ./perf trace -a --ev bpf-output/no-inherit,name=evt/ \ --ev ./test_bpf_trace.c/map:channel.event=evt/ \ usleep 100000 ... 1.787 ( 0.004 ms): usleep/3832 nanosleep(rqtp: 0x7ffc78b18980 ) ... 1.787 ( ): evt:Raise a BPF event!..) 1.788 ( ): perf_bpf_probe:func_begin:(ffffffff810e97d0)) ... 101.866 (87.038 ms): gmain/1654 poll(ufds: 0x7f57a80008c0, nfds: 2, timeout_msecs: 1000 ) ... 101.866 ( ): evt:Raise a BPF event!..) 101.867 ( ): perf_bpf_probe:func_end:(ffffffff810e97d0 <- ffffffff81796173)) 101.869 (100.087 ms): usleep/3832 ... [continued]: nanosleep()) = 0 ... (There is an extra ')' at the end of several lines. However, it is another problem, unrelated to this commit.) Where test_bpf_trace.c is: /************************ BEGIN **************************/ #include struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; #define SEC(NAME) __attribute__((section(NAME), used)) static u64 (*ktime_get_ns)(void) = (void *)BPF_FUNC_ktime_get_ns; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; static inline int __attribute__((always_inline)) func(void *ctx, int type) { char output_str[] = "Raise a BPF event!"; char err_str[] = "BAD %d\n"; int err; err = perf_event_output(ctx, &channel, get_smp_processor_id(), &output_str, sizeof(output_str)); if (err) trace_printk(err_str, sizeof(err_str), err); return 1; } SEC("func_begin=sys_nanosleep") int func_begin(void *ctx) {return func(ctx, 1);} SEC("func_end=sys_nanosleep%return") int func_end(void *ctx) { return func(ctx, 2);} char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Li Zefan Cc: Peter Zijlstra Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-8-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 254149ca5e1b..26a337f939d8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2178,6 +2178,37 @@ out_dump: return 0; } +static void bpf_output__printer(enum binary_printer_ops op, + unsigned int val, void *extra) +{ + FILE *output = extra; + unsigned char ch = (unsigned char)val; + + switch (op) { + case BINARY_PRINT_CHAR_DATA: + fprintf(output, "%c", isprint(ch) ? ch : '.'); + break; + case BINARY_PRINT_DATA_BEGIN: + case BINARY_PRINT_LINE_BEGIN: + case BINARY_PRINT_ADDR: + case BINARY_PRINT_NUM_DATA: + case BINARY_PRINT_NUM_PAD: + case BINARY_PRINT_SEP: + case BINARY_PRINT_CHAR_PAD: + case BINARY_PRINT_LINE_END: + case BINARY_PRINT_DATA_END: + default: + break; + } +} + +static void bpf_output__fprintf(struct trace *trace, + struct perf_sample *sample) +{ + print_binary(sample->raw_data, sample->raw_size, 8, + bpf_output__printer, trace->output); +} + static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -2190,7 +2221,9 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, fprintf(trace->output, "%s:", evsel->name); - if (evsel->tp_format) { + if (perf_evsel__is_bpf_output(evsel)) { + bpf_output__fprintf(trace, sample); + } else if (evsel->tp_format) { event_format__fprintf(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size, trace->output); -- cgit v1.2.3 From 67d5268908283c187e0a460048a423256c2fb288 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 27 Feb 2016 21:21:12 +0100 Subject: perf tools: Fix python extension build The util/python-ext-sources file contains source files required to build the python extension relative to $(srctree)/tools/perf, Such a file path $(FILE).c is handed over to the python extension build system, which builds the final object in the $(PYTHON_EXTBUILD)/tmp/$(FILE).o path. After the build is done all files from $(PYTHON_EXTBUILD)lib/ are carried as the result binaries. Above system fails when we add source file relative to ../lib, which we do for: ../lib/bitmap.c ../lib/find_bit.c ../lib/hweight.c ../lib/rbtree.c All above objects will be built like: $(PYTHON_EXTBUILD)/tmp/../lib/bitmap.c $(PYTHON_EXTBUILD)/tmp/../lib/find_bit.c $(PYTHON_EXTBUILD)/tmp/../lib/hweight.c $(PYTHON_EXTBUILD)/tmp/../lib/rbtree.c which accidentally happens to be final library path: $(PYTHON_EXTBUILD)/lib/ Changing setup.py to pass full paths of source files to Extension build class and thus keep all built objects under $(PYTHON_EXTBUILD)tmp directory. Reported-by: Jeff Bastian Signed-off-by: Jiri Olsa Tested-by: Josh Boyer Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org # v4.2+ Link: http://lkml.kernel.org/r/20160227201350.GB28494@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/setup.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 1833103768cb..c8680984d2d6 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -22,6 +22,7 @@ cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +src_perf = getenv('srctree') + '/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') libtraceevent = getenv('LIBTRACEEVENT') @@ -30,6 +31,9 @@ libapikfs = getenv('LIBAPI') ext_sources = [f.strip() for f in file('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] +# use full paths with source files +ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources) + perf = Extension('perf', sources = ext_sources, include_dirs = ['util/include'], -- cgit v1.2.3 From c42de706dad3f39c1f65e473a1d165ea33f8b6e8 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 26 Feb 2016 22:14:25 +0900 Subject: perf trace: Check and discard not only 'nr' but also '__syscall_nr' Format fields of a syscall have the first variable '__syscall_nr' or 'nr' that mean the syscall number. But it isn't relevant here so drop it. 'nr' among fields of syscall was renamed '__syscall_nr'. So add exception handling to drop '__syscall_nr' and modify the comment for this excpetion handling. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1456492465-5946-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 26a337f939d8..8dc98c598b1a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1725,8 +1725,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->args = sc->tp_format->format.fields; sc->nr_args = sc->tp_format->format.nr_fields; - /* drop nr field - not relevant here; does not exist on older kernels */ - if (sc->args && strcmp(sc->args->name, "nr") == 0) { + /* + * We need to check and discard the first variable '__syscall_nr' + * or 'nr' that mean the syscall number. It is needless here. + * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels. + */ + if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) { sc->args = sc->args->next; --sc->nr_args; } -- cgit v1.2.3 From a6745330789f25103e67011bcfeec908fcc3b341 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 29 Feb 2016 09:01:28 -0500 Subject: tools lib traceevent: Split pevent_print_event() into specific functionality functions Currently there's a single function that is used to display a record's data in human readable format. That's pevent_print_event(). Unfortunately, this gives little room for adding other output within the line without updating that function call. I've decided to split that function into 3 parts. pevent_print_event_task() which prints the task comm, pid and the CPU pevent_print_event_time() which outputs the record's timestamp pevent_print_event_data() which outputs the rest of the event data. pevent_print_event() now simply calls these three functions. To save time from doing the search for event from the record's type, I created a new helper function called pevent_find_event_by_record(), which returns the record's event, and this event has to be passed to the above functions. Signed-off-by: Steven Rostedt Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20160229090128.43a56704@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 136 ++++++++++++++++++++++++++++--------- tools/lib/traceevent/event-parse.h | 13 ++++ 2 files changed, 117 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 575e75174087..9a1e48afcf89 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5339,41 +5339,45 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock) return false; } -void pevent_print_event(struct pevent *pevent, struct trace_seq *s, - struct pevent_record *record, bool use_trace_clock) +/** + * pevent_find_event_by_record - return the event from a given record + * @pevent: a handle to the pevent + * @record: The record to get the event from + * + * Returns the associated event for a given record, or NULL if non is + * is found. + */ +struct event_format * +pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record) { - static const char *spaces = " "; /* 20 spaces */ - struct event_format *event; - unsigned long secs; - unsigned long usecs; - unsigned long nsecs; - const char *comm; - void *data = record->data; int type; - int pid; - int len; - int p; - bool use_usec_format; - - use_usec_format = is_timestamp_in_us(pevent->trace_clock, - use_trace_clock); - if (use_usec_format) { - secs = record->ts / NSECS_PER_SEC; - nsecs = record->ts - secs * NSECS_PER_SEC; - } if (record->size < 0) { do_warning("ug! negative record size %d", record->size); - return; + return NULL; } - type = trace_parse_common_type(pevent, data); + type = trace_parse_common_type(pevent, record->data); - event = pevent_find_event(pevent, type); - if (!event) { - do_warning("ug! no event found for type %d", type); - return; - } + return pevent_find_event(pevent, type); +} + +/** + * pevent_print_event_task - Write the event task comm, pid and CPU + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * + * Writes the tasks comm, pid and CPU to @s. + */ +void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record) +{ + void *data = record->data; + const char *comm; + int pid; pid = parse_common_pid(pevent, data); comm = find_cmdline(pevent, pid); @@ -5381,9 +5385,43 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, if (pevent->latency_format) { trace_seq_printf(s, "%8.8s-%-5d %3d", comm, pid, record->cpu); - pevent_data_lat_fmt(pevent, s, record); } else trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu); +} + +/** + * pevent_print_event_time - Write the event timestamp + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * @use_trace_clock: Set to parse according to the @pevent->trace_clock + * + * Writes the timestamp of the record into @s. + */ +void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record, + bool use_trace_clock) +{ + unsigned long secs; + unsigned long usecs; + unsigned long nsecs; + int p; + bool use_usec_format; + + use_usec_format = is_timestamp_in_us(pevent->trace_clock, + use_trace_clock); + if (use_usec_format) { + secs = record->ts / NSECS_PER_SEC; + nsecs = record->ts - secs * NSECS_PER_SEC; + } + + if (pevent->latency_format) { + trace_seq_printf(s, " %3d", record->cpu); + pevent_data_lat_fmt(pevent, s, record); + } else + trace_seq_printf(s, " [%03d]", record->cpu); if (use_usec_format) { if (pevent->flags & PEVENT_NSEC_OUTPUT) { @@ -5394,11 +5432,28 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, p = 6; } - trace_seq_printf(s, " %5lu.%0*lu: %s: ", - secs, p, usecs, event->name); + trace_seq_printf(s, " %5lu.%0*lu:", secs, p, usecs); } else - trace_seq_printf(s, " %12llu: %s: ", - record->ts, event->name); + trace_seq_printf(s, " %12llu:", record->ts); +} + +/** + * pevent_print_event_data - Write the event data section + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * + * Writes the parsing of the record's data to @s. + */ +void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record) +{ + static const char *spaces = " "; /* 20 spaces */ + int len; + + trace_seq_printf(s, " %s: ", event->name); /* Space out the event names evenly. */ len = strlen(event->name); @@ -5408,6 +5463,23 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, pevent_event_info(s, event, record); } +void pevent_print_event(struct pevent *pevent, struct trace_seq *s, + struct pevent_record *record, bool use_trace_clock) +{ + struct event_format *event; + + event = pevent_find_event_by_record(pevent, record); + if (!event) { + do_warning("ug! no event found for type %d", + trace_parse_common_type(pevent, record->data)); + return; + } + + pevent_print_event_task(pevent, s, event, record); + pevent_print_event_time(pevent, s, event, record, use_trace_clock); + pevent_print_event_data(pevent, s, event, record); +} + static int events_id_cmp(const void *a, const void *b) { struct event_format * const * ea = a; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 706d9bc24066..9ffde377e89d 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -628,6 +628,16 @@ int pevent_register_print_string(struct pevent *pevent, const char *fmt, unsigned long long addr); int pevent_pid_is_registered(struct pevent *pevent, int pid); +void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record); +void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record, + bool use_trace_clock); +void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record); void pevent_print_event(struct pevent *pevent, struct trace_seq *s, struct pevent_record *record, bool use_trace_clock); @@ -694,6 +704,9 @@ struct event_format *pevent_find_event(struct pevent *pevent, int id); struct event_format * pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name); +struct event_format * +pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record); + void pevent_data_lat_fmt(struct pevent *pevent, struct trace_seq *s, struct pevent_record *record); int pevent_data_type(struct pevent *pevent, struct pevent_record *rec); -- cgit v1.2.3 From f9a5978ac4ede901fa73d7c28ae1c5d89bc2a46a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Mar 2016 10:53:48 +0100 Subject: perf tools: Fix locale handling in pmu parsing Ingo reported regression on display format of big numbers, which is missing separators (in default perf stat output). triton:~/tip> perf stat -a sleep 1 ... 127008602 cycles # 0.011 GHz 279538533 stalled-cycles-frontend # 220.09% frontend cycles idle 119213269 instructions # 0.94 insn per cycle This is caused by recent change: perf stat: Check existence of frontend/backed stalled cycles that added call to pmu_have_event, that subsequently calls perf_pmu__parse_scale, which has a bug in locale handling. The lc string returned from setlocale, that we use to store old locale value, may be allocated in static storage. Getting a dynamic copy to make it survive another setlocale call. $ perf stat ls ... 2,360,602 cycles # 3.080 GHz 2,703,090 instructions # 1.15 insn per cycle 546,031 branches # 712.511 M/sec Committer note: Since the patch introducing the regression didn't made to perf/core, move it to just before where the regression was introduced, so that we don't break bisection for this feature. Reported-by: Ingo Molnar Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20160303095348.GA24511@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ce61f79dbaae..d8cd038baed2 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -123,6 +123,17 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * */ lc = setlocale(LC_NUMERIC, NULL); + /* + * The lc string may be allocated in static storage, + * so get a dynamic copy to make it survive setlocale + * call below. + */ + lc = strdup(lc); + if (!lc) { + ret = -ENOMEM; + goto error; + } + /* * force to C locale to ensure kernel * scale string is converted correctly. @@ -135,6 +146,8 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * /* restore locale */ setlocale(LC_NUMERIC, lc); + free((char *) lc); + ret = 0; error: close(fd); -- cgit v1.2.3 From 9dec4473abe7967c204fe700baf5344ade34e9c8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 26 Feb 2016 16:27:56 -0800 Subject: perf stat: Check existence of frontend/backed stalled cycles Only put the frontend/backend stalled cycles into the default perf stat events when the CPU actually supports them. This avoids empty columns with --metric-only on newer Intel CPUs. Committer note: Before: $ perf stat ls Performance counter stats for 'ls': 1.080893 task-clock (msec) # 0.619 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 97 page-faults # 0.090 M/sec 3,327,741 cycles # 3.079 GHz stalled-cycles-frontend stalled-cycles-backend 1,609,544 instructions # 0.48 insn per cycle 319,117 branches # 295.235 M/sec 12,246 branch-misses # 3.84% of all branches 0.001746508 seconds time elapsed $ After: $ perf stat ls Performance counter stats for 'ls': 0.693948 task-clock (msec) # 0.662 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 95 page-faults # 0.137 M/sec 1,792,509 cycles # 2.583 GHz 1,599,047 instructions # 0.89 insn per cycle 316,328 branches # 455.838 M/sec 12,453 branch-misses # 3.94% of all branches 0.001048987 seconds time elapsed $ Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1456532881-26621-2-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8c0bc0fe5179..24f222dd2a8a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1441,7 +1441,7 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) */ static int add_default_attributes(void) { - struct perf_event_attr default_attrs[] = { + struct perf_event_attr default_attrs0[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, @@ -1449,8 +1449,14 @@ static int add_default_attributes(void) { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, +}; + struct perf_event_attr frontend_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, +}; + struct perf_event_attr backend_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, +}; + struct perf_event_attr default_attrs1[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, @@ -1567,7 +1573,19 @@ static int add_default_attributes(void) } if (!evsel_list->nr_entries) { - if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) + return -1; + if (pmu_have_event("cpu", "stalled-cycles-frontend")) { + if (perf_evlist__add_default_attrs(evsel_list, + frontend_attrs) < 0) + return -1; + } + if (pmu_have_event("cpu", "stalled-cycles-backend")) { + if (perf_evlist__add_default_attrs(evsel_list, + backend_attrs) < 0) + return -1; + } + if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) return -1; } -- cgit v1.2.3 From 6122d57e9f7c6cb0f0aa276fbd3a12e3af826ef2 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:31:56 +0000 Subject: perf data: Support converting data from bpf_perf_event_output() bpf_perf_event_output() outputs data through sample->raw_data. This patch adds support to convert those data into CTF. A python script then can be used to process output data from BPF programs. Test result: # cat ./test_bpf_output_2.c /************************ BEGIN **************************/ #include struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; #define SEC(NAME) __attribute__((section(NAME), used)) static u64 (*ktime_get_ns)(void) = (void *)BPF_FUNC_ktime_get_ns; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; static inline int __attribute__((always_inline)) func(void *ctx, int type) { struct { u64 ktime; int type; } __attribute__((packed)) output_data; char error_data[] = "Error: failed to output\n"; int err; output_data.type = type; output_data.ktime = ktime_get_ns(); err = perf_event_output(ctx, &channel, get_smp_processor_id(), &output_data, sizeof(output_data)); if (err) trace_printk(error_data, sizeof(error_data)); return 0; } SEC("func_begin=sys_nanosleep") int func_begin(void *ctx) {return func(ctx, 1);} SEC("func_end=sys_nanosleep%return") int func_end(void *ctx) { return func(ctx, 2);} char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # ./perf record -e bpf-output/no-inherit,name=evt/ \ -e ./test_bpf_output_2.c/map:channel.event=evt/ \ usleep 100000 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data (2 samples) ] # ./perf script usleep 14942 92503.198504: evt: ffffffff810e0ba1 sys_nanosleep (/lib/modules/4.3.0.... usleep 14942 92503.298562: evt: ffffffff810585e9 kretprobe_trampoline_holder (/lib.... # ./perf data convert --to-ctf ./out.ctf [ perf data convert: Converted 'perf.data' into CTF data './out.ctf' ] [ perf data convert: Converted and wrote 0.000 MB (2 samples) ] # babeltrace ./out.ctf [01:41:43.198504134] (+?.?????????) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810E0BA1, perf_tid = 14942, perf_pid = 14942, perf_id = 1044, raw_len = 3, raw_data = [ [0] = 0x32C0C07B, [1] = 0x5421, [2] = 0x1 ] } [01:41:43.298562257] (+0.100058123) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810585E9, perf_tid = 14942, perf_pid = 14942, perf_id = 1044, raw_len = 3, raw_data = [ [0] = 0x38B77FAA, [1] = 0x5421, [2] = 0x2 ] } # cat ./test_bpf_output_2.py from babeltrace import TraceCollection tc = TraceCollection() tc.add_trace('./out.ctf', 'ctf') d = {1:[], 2:[]} for event in tc.events: if not event.name.startswith('evt'): continue raw_data = event['raw_data'] (time, type) = ((raw_data[0] + (raw_data[1] << 32)), raw_data[2]) d[type].append(time) print(list(map(lambda i: d[2][i] - d[1][i], range(len(d[1]))))); # python3 ./test_bpf_output_2.py [100056879] Committer note: Make sure you have python3-devel installed, not python-devel, which may be for python2, which will lead to some "PyInstance_Type" errors. Also make sure that you use the right libbabeltrace, because it is shipped in Fedora, for instance, but an older version. To build libbabeltrace's python binding one also needs to use: ./configure --enable-python-bindings And then set PYTHONPATH=/usr/local/lib64/python3.4/site-packages/. Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-9-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data-convert-bt.c | 112 +++++++++++++++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 6729f4d9df7c..1f608a6e2c14 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -352,6 +352,84 @@ static int add_tracepoint_values(struct ctf_writer *cw, return ret; } +static int +add_bpf_output_values(struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct perf_sample *sample) +{ + struct bt_ctf_field_type *len_type, *seq_type; + struct bt_ctf_field *len_field, *seq_field; + unsigned int raw_size = sample->raw_size; + unsigned int nr_elements = raw_size / sizeof(u32); + unsigned int i; + int ret; + + if (nr_elements * sizeof(u32) != raw_size) + pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n", + raw_size, nr_elements * sizeof(u32) - raw_size); + + len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len"); + len_field = bt_ctf_field_create(len_type); + if (!len_field) { + pr_err("failed to create 'raw_len' for bpf output event\n"); + ret = -1; + goto put_len_type; + } + + ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements); + if (ret) { + pr_err("failed to set field value for raw_len\n"); + goto put_len_field; + } + ret = bt_ctf_event_set_payload(event, "raw_len", len_field); + if (ret) { + pr_err("failed to set payload to raw_len\n"); + goto put_len_field; + } + + seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data"); + seq_field = bt_ctf_field_create(seq_type); + if (!seq_field) { + pr_err("failed to create 'raw_data' for bpf output event\n"); + ret = -1; + goto put_seq_type; + } + + ret = bt_ctf_field_sequence_set_length(seq_field, len_field); + if (ret) { + pr_err("failed to set length of 'raw_data'\n"); + goto put_seq_field; + } + + for (i = 0; i < nr_elements; i++) { + struct bt_ctf_field *elem_field = + bt_ctf_field_sequence_get_field(seq_field, i); + + ret = bt_ctf_field_unsigned_integer_set_value(elem_field, + ((u32 *)(sample->raw_data))[i]); + + bt_ctf_field_put(elem_field); + if (ret) { + pr_err("failed to set raw_data[%d]\n", i); + goto put_seq_field; + } + } + + ret = bt_ctf_event_set_payload(event, "raw_data", seq_field); + if (ret) + pr_err("failed to set payload for raw_data\n"); + +put_seq_field: + bt_ctf_field_put(seq_field); +put_seq_type: + bt_ctf_field_type_put(seq_type); +put_len_field: + bt_ctf_field_put(len_field); +put_len_type: + bt_ctf_field_type_put(len_type); + return ret; +} + static int add_generic_values(struct ctf_writer *cw, struct bt_ctf_event *event, struct perf_evsel *evsel, @@ -597,6 +675,12 @@ static int process_sample_event(struct perf_tool *tool, return -1; } + if (perf_evsel__is_bpf_output(evsel)) { + ret = add_bpf_output_values(event_class, event, sample); + if (ret) + return -1; + } + cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel)); if (cs) { if (is_flush_needed(cs)) @@ -744,6 +828,25 @@ static int add_tracepoint_types(struct ctf_writer *cw, return ret; } +static int add_bpf_output_types(struct ctf_writer *cw, + struct bt_ctf_event_class *class) +{ + struct bt_ctf_field_type *len_type = cw->data.u32; + struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex; + struct bt_ctf_field_type *seq_type; + int ret; + + ret = bt_ctf_event_class_add_field(class, len_type, "raw_len"); + if (ret) + return ret; + + seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len"); + if (!seq_type) + return -1; + + return bt_ctf_event_class_add_field(class, seq_type, "raw_data"); +} + static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, struct bt_ctf_event_class *event_class) { @@ -755,7 +858,8 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, * ctf event header * PERF_SAMPLE_READ - TODO * PERF_SAMPLE_CALLCHAIN - TODO - * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_RAW - tracepoint fields and BPF output + * are handled separately * PERF_SAMPLE_BRANCH_STACK - TODO * PERF_SAMPLE_REGS_USER - TODO * PERF_SAMPLE_STACK_USER - TODO @@ -824,6 +928,12 @@ static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel) goto err; } + if (perf_evsel__is_bpf_output(evsel)) { + ret = add_bpf_output_types(cw, event_class); + if (ret) + goto err; + } + ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class); if (ret) { pr("Failed to add event class into stream.\n"); -- cgit v1.2.3 From f8dd2d5ff953bc498d682ae8022439c940a7d5c4 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:31:57 +0000 Subject: perf data: Explicitly set byte order for integer types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After babeltrace commit 5cec03e402aa ("ir: copy variants and sequences when setting a field path"), 'perf data convert' gets incorrect result if there's bpf output data. For example: # perf data convert --to-ctf ./out.ctf # babeltrace ./out.ctf [10:44:31.186045346] (+?.?????????) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810E7DD1, perf_tid = 23819, perf_pid = 23819, perf_id = 518, raw_len = 3, raw_data = [ [0] = 0xC028E32F, [1] = 0x815D0100, [2] = 0x1000000 ] } [10:44:31.286101003] (+0.100055657) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF8105B609, perf_tid = 23819, perf_pid = 23819, perf_id = 518, raw_len = 3, raw_data = [ [0] = 0x35D9F1EB, [1] = 0x15D81, [2] = 0x2 ] } The expected result of the first sample should be: raw_data = [ [0] = 0x2FE328C0, [1] = 0x15D81, [2] = 0x1 ] } however, 'perf data convert' output big endian value to resuling CTF file. The reason is a internal change (or a bug?) of babeltrace. Before this patch, at the first add_bpf_output_values(), byte order of all integer type is uncertain (is 0, neither 1234 (le) nor 4321 (be)). It would be fixed by: perf_evlist__deliver_sample -> process_sample_event -> ctf_stream ... ->bt_ctf_trace_add_stream_class ->bt_ctf_field_type_structure_set_byte_order ->bt_ctf_field_type_integer_set_byte_order during creating the stream. However, the babeltrace commit mentioned above duplicates types in sequence to prevent potential conflict in following call stack and link the newly allocated type into the 'raw_data' sequence: perf_evlist__deliver_sample -> process_sample_event -> ctf_stream ... -> bt_ctf_trace_add_stream_class -> bt_ctf_stream_class_resolve_types ... -> bt_ctf_field_type_sequence_copy ->bt_ctf_field_type_integer_copy This happens before byte order setting, so only the newly allocated type is initialized, the byte order of original type perf choose to create the first raw_data is still uncertain. Byte order in CTF output is not related to byte order in perf.data. Setting it to anything other than BT_CTF_BYTE_ORDER_NATIVE solves this problem (only BT_CTF_BYTE_ORDER_NATIVE needs to be fixed). To reduce behavior changing, set byte order according to compiling options. Signed-off-by: Wang Nan Cc: Jeremie Galarneau Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Jiri Olsa Cc: Jérémie Galarneau Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-10-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data-convert-bt.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 1f608a6e2c14..811af89ce0bb 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1080,6 +1080,12 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) goto err; +#if __BYTE_ORDER == __BIG_ENDIAN + bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN); +#else + bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN); +#endif + pr2("Created type: INTEGER %d-bit %ssigned %s\n", size, sign ? "un" : "", hex ? "hex" : ""); return type; -- cgit v1.2.3 From d8871ea71281ed689dc3303d1b50eb00c5d06141 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:32:06 +0000 Subject: perf record: Use WARN_ONCE to replace 'if' condition Commits in a BPF patchkit will extract kernel and module synthesizing code into a separated function and call it multiple times. This patch replace 'if (err < 0)' using WARN_ONCE, makes sure the error message show one time. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-19-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7d11162b6c41..9dec7e529832 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -33,6 +33,7 @@ #include "util/parse-regs-options.h" #include "util/llvm-utils.h" #include "util/bpf-loader.h" +#include "asm/bug.h" #include #include @@ -615,17 +616,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, machine); - if (err < 0) - pr_err("Couldn't record kernel reference relocation symbol\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/kallsyms permission or run as root.\n"); + WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/kallsyms permission or run as root.\n"); err = perf_event__synthesize_modules(tool, process_synthesized_event, machine); - if (err < 0) - pr_err("Couldn't record kernel module information.\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/modules permission or run as root.\n"); + WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/modules permission or run as root.\n"); if (perf_guest) { machines__process_guests(&session->machines, -- cgit v1.2.3 From c45c86eb70964615bd13b4c1e647bf9ee60c3db9 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:32:07 +0000 Subject: perf record: Extract synthesize code to record__synthesize() Create record__synthesize(). It can be used to create tracking events for each perf.data after perf supporting splitting into multiple outputs. Signed-off-by: He Kuang Cc: Alexei Starovoitov Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-20-git-send-email-wangnan0@huawei.com Signed-off-by: Wang Nan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 125 +++++++++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 55 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 9dec7e529832..cb583b49a175 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -486,6 +486,74 @@ static void workload_exec_failed_signal(int signo __maybe_unused, static void snapshot_sig_handler(int sig); +static int record__synthesize(struct record *rec) +{ + struct perf_session *session = rec->session; + struct machine *machine = &session->machines.host; + struct perf_data_file *file = &rec->file; + struct record_opts *opts = &rec->opts; + struct perf_tool *tool = &rec->tool; + int fd = perf_data_file__fd(file); + int err = 0; + + if (file->is_pipe) { + err = perf_event__synthesize_attrs(tool, session, + process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize attrs.\n"); + goto out; + } + + if (have_tracepoints(&rec->evlist->entries)) { + /* + * FIXME err <= 0 here actually means that + * there were no tracepoints so its not really + * an error, just that we don't need to + * synthesize anything. We really have to + * return this more properly and also + * propagate errors that now are calling die() + */ + err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, + process_synthesized_event); + if (err <= 0) { + pr_err("Couldn't record tracing data.\n"); + goto out; + } + rec->bytes_written += err; + } + } + + if (rec->opts.full_auxtrace) { + err = perf_event__synthesize_auxtrace_info(rec->itr, tool, + session, process_synthesized_event); + if (err) + goto out; + } + + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/kallsyms permission or run as root.\n"); + + err = perf_event__synthesize_modules(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/modules permission or run as root.\n"); + + if (perf_guest) { + machines__process_guests(&session->machines, + perf_event__synthesize_guest_os, tool); + } + + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, + process_synthesized_event, opts->sample_address, + opts->proc_map_timeout); +out: + return err; +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -580,61 +648,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) machine = &session->machines.host; - if (file->is_pipe) { - err = perf_event__synthesize_attrs(tool, session, - process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize attrs.\n"); - goto out_child; - } - - if (have_tracepoints(&rec->evlist->entries)) { - /* - * FIXME err <= 0 here actually means that - * there were no tracepoints so its not really - * an error, just that we don't need to - * synthesize anything. We really have to - * return this more properly and also - * propagate errors that now are calling die() - */ - err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, - process_synthesized_event); - if (err <= 0) { - pr_err("Couldn't record tracing data.\n"); - goto out_child; - } - rec->bytes_written += err; - } - } - - if (rec->opts.full_auxtrace) { - err = perf_event__synthesize_auxtrace_info(rec->itr, tool, - session, process_synthesized_event); - if (err) - goto out_delete_session; - } - - err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine); - WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/kallsyms permission or run as root.\n"); - - err = perf_event__synthesize_modules(tool, process_synthesized_event, - machine); - WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/modules permission or run as root.\n"); - - if (perf_guest) { - machines__process_guests(&session->machines, - perf_event__synthesize_guest_os, tool); - } - - err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, - process_synthesized_event, opts->sample_address, - opts->proc_map_timeout); - if (err != 0) + err = record__synthesize(rec); + if (err < 0) goto out_child; if (rec->realtime_prio) { -- cgit v1.2.3 From e1ab48ba63ee6b2494a67bb60bf99692ecdaca7c Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:32:10 +0000 Subject: perf record: Introduce record__finish_output() to finish a perf.data Move code for finalizing 'perf.data' to record__finish_output(). It will be used by following commits to split output to multiple files. Signed-off-by: He Kuang Cc: Alexei Starovoitov Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-23-git-send-email-wangnan0@huawei.com Signed-off-by: Wang Nan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index cb583b49a175..46e2772f838e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -468,6 +468,29 @@ static void record__init_features(struct record *rec) perf_header__clear_feat(&session->header, HEADER_STAT); } +static void +record__finish_output(struct record *rec) +{ + struct perf_data_file *file = &rec->file; + int fd = perf_data_file__fd(file); + + if (file->is_pipe) + return; + + rec->session->header.data_size += rec->bytes_written; + file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); + + if (!rec->no_buildid) { + process_buildids(rec); + + if (rec->buildid_all) + dsos__hit_all(rec->session); + } + perf_session__write_header(rec->session, rec->evlist, fd, true); + + return; +} + static volatile int workload_exec_errno; /* @@ -785,18 +808,8 @@ out_child: /* this will be recalculated during process_buildids() */ rec->samples = 0; - if (!err && !file->is_pipe) { - rec->session->header.data_size += rec->bytes_written; - file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); - - if (!rec->no_buildid) { - process_buildids(rec); - - if (rec->buildid_all) - dsos__hit_all(rec->session); - } - perf_session__write_header(rec->session, rec->evlist, fd, true); - } + if (!err) + record__finish_output(rec); if (!err && !quiet) { char samples[128]; -- cgit v1.2.3 From 95c365617aa37878592f2f1c6c64e1abb19f0d4a Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:32:17 +0000 Subject: perf record: Ensure return non-zero rc when mmap fail perf_evlist__mmap_ex() can fail without setting errno (for example, fail in condition checking. In this case all syscall is success). If this happen, record__open() incorrectly returns 0. Force setting rc is a quick way to avoid this problem, or we have to follow all possible code path in perf_evlist__mmap_ex() to make sure there's at least one system call before returning an error. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-30-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 46e2772f838e..515510ecc76a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -324,7 +324,10 @@ try_again: } else { pr_err("failed to mmap with %d (%s)\n", errno, strerror_r(errno, msg, sizeof(msg))); - rc = -errno; + if (errno) + rc = -errno; + else + rc = -EINVAL; } goto out; } -- cgit v1.2.3 From 92a61f6412d3a09d6462252a522fa79c9290f405 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 29 Feb 2016 14:36:21 -0800 Subject: perf stat: Implement CSV metrics output Now support CSV output for metrics. With the new output callbacks this is relatively straight forward by creating new callbacks. This allows to easily plot metrics from CSV files. The new line callback needs to know the number of fields to skip them correctly Example output before: % perf stat -x, true 0.200687,,task-clock,200687,100.00 0,,context-switches,200687,100.00 0,,cpu-migrations,200687,100.00 40,,page-faults,200687,100.00 730871,,cycles,203601,100.00 551056,,stalled-cycles-frontend,203601,100.00 ,,stalled-cycles-backend,0,100.00 385523,,instructions,203601,100.00 78028,,branches,203601,100.00 3946,,branch-misses,203601,100.00 After: % perf stat -x, true .502457,,task-clock,502457,100.00,0.485,CPUs utilized 0,,context-switches,502457,100.00,0.000,K/sec 0,,cpu-migrations,502457,100.00,0.000,K/sec 45,,page-faults,502457,100.00,0.090,M/sec 644692,,cycles,509102,100.00,1.283,GHz 423470,,stalled-cycles-frontend,509102,100.00,65.69,frontend cycles idle ,,stalled-cycles-backend,0,100.00,,,, 492701,,instructions,509102,100.00,0.76,insn per cycle ,,,,,0.86,stalled cycles per insn 97767,,branches,509102,100.00,194.578,M/sec 4788,,branch-misses,509102,100.00,4.90,of all branches or easier readable $ perf stat -x, -o x.csv true $ column -s, -t x.csv 0.490635 task-clock 490635 100.00 0.489 CPUs utilized 0 context-switches 490635 100.00 0.000 K/sec 0 cpu-migrations 490635 100.00 0.000 K/sec 45 page-faults 490635 100.00 0.092 M/sec 629080 cycles 497698 100.00 1.282 GHz 409498 stalled-cycles-frontend 497698 100.00 65.09 frontend cycles idle stalled-cycles-backend 0 100.00 491424 instructions 497698 100.00 0.78 insn per cycle 0.83 stalled cycles per insn 97278 branches 497698 100.00 198.270 M/sec 4569 branch-misses 497698 100.00 4.70 of all branches Two new fields are added: metric value and metric name. v2: Split out function argument changes v3: Reenable metrics for real. v4: Fix wrong hunk from refactoring. v5: Remove extra "noise" printing (Jiri), but add it to the not counted case. Print empty metrics for not counted. v6: Avoid outputting metric on empty format. v7: Print metric at the end v8: Remove extra run, ena fields v9: Avoid extra new line for unsupported counters Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1456785386-19481-3-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 73 ++++++++++++++++++++++++++++++++++++++++--- tools/perf/util/stat-shadow.c | 2 +- 2 files changed, 70 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 24f222dd2a8a..2ffb8221917a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -739,6 +739,7 @@ struct outstate { FILE *fh; bool newline; const char *prefix; + int nfields; }; #define METRIC_LEN 35 @@ -789,6 +790,43 @@ static void print_metric_std(void *ctx, const char *color, const char *fmt, fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); } +static void new_line_csv(void *ctx) +{ + struct outstate *os = ctx; + int i; + + fputc('\n', os->fh); + if (os->prefix) + fprintf(os->fh, "%s%s", os->prefix, csv_sep); + for (i = 0; i < os->nfields; i++) + fputs(csv_sep, os->fh); +} + +static void print_metric_csv(void *ctx, + const char *color __maybe_unused, + const char *fmt, const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); + return; + } + snprintf(buf, sizeof(buf), fmt, val); + vals = buf; + while (isspace(*vals)) + vals++; + ends = vals; + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + while (isspace(*unit)) + unit++; + fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); +} + static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -860,6 +898,22 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, nl = new_line_std; + if (csv_output) { + static int aggr_fields[] = { + [AGGR_GLOBAL] = 0, + [AGGR_THREAD] = 1, + [AGGR_NONE] = 1, + [AGGR_SOCKET] = 2, + [AGGR_CORE] = 2, + }; + + pm = print_metric_csv; + nl = new_line_csv; + os.nfields = 3; + os.nfields += aggr_fields[stat_config.aggr_mode]; + if (counter->cgrp) + os.nfields++; + } if (run == 0 || ena == 0 || counter->counts->scaled == -1) { aggr_printout(counter, id, nr); @@ -880,7 +934,12 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, fprintf(stat_config.output, "%s%s", csv_sep, counter->cgrp->name); + if (!csv_output) + pm(&os, NULL, NULL, "", 0); + print_noise(counter, noise); print_running(run, ena); + if (csv_output) + pm(&os, NULL, NULL, "", 0); return; } @@ -893,14 +952,20 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, out.new_line = nl; out.ctx = &os; - if (!csv_output) - perf_stat__print_shadow_stats(counter, uval, + if (csv_output) { + print_noise(counter, noise); + print_running(run, ena); + } + + perf_stat__print_shadow_stats(counter, uval, stat_config.aggr_mode == AGGR_GLOBAL ? 0 : cpu_map__id_to_cpu(id), &out); - print_noise(counter, noise); - print_running(run, ena); + if (!csv_output) { + print_noise(counter, noise); + print_running(run, ena); + } } static void print_aggr(char *prefix) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 4d8f18581b9b..367e220e93d5 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -310,8 +310,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); - out->new_line(ctxp); if (total && avg) { + out->new_line(ctxp); ratio = total / avg; print_metric(ctxp, NULL, "%7.2f ", "stalled cycles per insn", -- cgit v1.2.3 From 44d49a6002595ccb95712e86ad2857cd55207602 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 29 Feb 2016 14:36:22 -0800 Subject: perf stat: Support metrics in --per-core/socket mode Enable metrics printing in --per-core / --per-socket mode. We need to save the shadow metrics in a unique place. Always use the first CPU in the aggregation. Then use the same CPU to retrieve the shadow value later. Example output: % perf stat --per-core -a ./BC1s Performance counter stats for 'system wide': S0-C0 2 2966.020381 task-clock (msec) # 2.004 CPUs utilized (100.00%) S0-C0 2 49 context-switches # 0.017 K/sec (100.00%) S0-C0 2 4 cpu-migrations # 0.001 K/sec (100.00%) S0-C0 2 467 page-faults # 0.157 K/sec S0-C0 2 4,599,061,773 cycles # 1.551 GHz (100.00%) S0-C0 2 9,755,886,883 instructions # 2.12 insn per cycle (100.00%) S0-C0 2 1,906,272,125 branches # 642.704 M/sec (100.00%) S0-C0 2 81,180,867 branch-misses # 4.26% of all branches S0-C1 2 2965.995373 task-clock (msec) # 2.003 CPUs utilized (100.00%) S0-C1 2 62 context-switches # 0.021 K/sec (100.00%) S0-C1 2 8 cpu-migrations # 0.003 K/sec (100.00%) S0-C1 2 281 page-faults # 0.095 K/sec S0-C1 2 6,347,290 cycles # 0.002 GHz (100.00%) S0-C1 2 4,654,156 instructions # 0.73 insn per cycle (100.00%) S0-C1 2 947,121 branches # 0.319 M/sec (100.00%) S0-C1 2 37,322 branch-misses # 3.94% of all branches 1.480409747 seconds time elapsed v2: Rebase to older patches v3: Document shadow cpus. Fix aggr_get_id argument. Fix -A shadows (Jiri) Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1456785386-19481-4-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 64 +++++++++++++++++++++++++++++++++++++------ tools/perf/util/stat-shadow.c | 7 +++++ 2 files changed, 63 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2ffb8221917a..9b5089c5dffe 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -740,6 +740,8 @@ struct outstate { bool newline; const char *prefix; int nfields; + int id, nr; + struct perf_evsel *evsel; }; #define METRIC_LEN 35 @@ -755,12 +757,9 @@ static void do_new_line_std(struct outstate *os) { fputc('\n', os->fh); fputs(os->prefix, os->fh); + aggr_printout(os->evsel, os->id, os->nr); if (stat_config.aggr_mode == AGGR_NONE) fprintf(os->fh, " "); - if (stat_config.aggr_mode == AGGR_CORE) - fprintf(os->fh, " "); - if (stat_config.aggr_mode == AGGR_SOCKET) - fprintf(os->fh, " "); fprintf(os->fh, " "); } @@ -798,6 +797,7 @@ static void new_line_csv(void *ctx) fputc('\n', os->fh); if (os->prefix) fprintf(os->fh, "%s%s", os->prefix, csv_sep); + aggr_printout(os->evsel, os->id, os->nr); for (i = 0; i < os->nfields; i++) fputs(csv_sep, os->fh); } @@ -855,6 +855,28 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } +static int first_shadow_cpu(struct perf_evsel *evsel, int id) +{ + int i; + + if (!aggr_get_id) + return 0; + + if (stat_config.aggr_mode == AGGR_NONE) + return id; + + if (stat_config.aggr_mode == AGGR_GLOBAL) + return 0; + + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + int cpu2 = perf_evsel__cpus(evsel)->map[i]; + + if (aggr_get_id(evsel_list->cpus, cpu2) == id) + return cpu2; + } + return 0; +} + static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -891,7 +913,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, struct perf_stat_output_ctx out; struct outstate os = { .fh = stat_config.output, - .prefix = prefix ? prefix : "" + .prefix = prefix ? prefix : "", + .id = id, + .nr = nr, + .evsel = counter, }; print_metric_t pm = print_metric_std; void (*nl)(void *); @@ -958,16 +983,37 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, } perf_stat__print_shadow_stats(counter, uval, - stat_config.aggr_mode == AGGR_GLOBAL ? 0 : - cpu_map__id_to_cpu(id), + first_shadow_cpu(counter, id), &out); - if (!csv_output) { print_noise(counter, noise); print_running(run, ena); } } +static void aggr_update_shadow(void) +{ + int cpu, s2, id, s; + u64 val; + struct perf_evsel *counter; + + for (s = 0; s < aggr_map->nr; s++) { + id = aggr_map->map[s]; + evlist__for_each(evsel_list, counter) { + val = 0; + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + s2 = aggr_get_id(evsel_list->cpus, cpu); + if (s2 != id) + continue; + val += perf_counts(counter->counts, cpu, 0)->val; + } + val = val * counter->scale; + perf_stat__update_shadow_stats(counter, &val, + first_shadow_cpu(counter, id)); + } + } +} + static void print_aggr(char *prefix) { FILE *output = stat_config.output; @@ -979,6 +1025,8 @@ static void print_aggr(char *prefix) if (!(aggr_map || aggr_get_id)) return; + aggr_update_shadow(); + for (s = 0; s < aggr_map->nr; s++) { id = aggr_map->map[s]; evlist__for_each(evsel_list, counter) { diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 367e220e93d5..5e2d2e34e1bc 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -14,6 +14,13 @@ enum { #define NUM_CTX CTX_BIT_MAX +/* + * AGGR_GLOBAL: Use CPU 0 + * AGGR_SOCKET: Use first CPU of socket + * AGGR_CORE: Use first CPU of core + * AGGR_NONE: Use matching CPU + * AGGR_THREAD: Not supported? + */ static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; -- cgit v1.2.3 From 676787939ef8ccfcf8039104f766ebe5ebe23924 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 1 Feb 2016 02:59:00 +0900 Subject: tools build: Use .s extension for preprocessed assembler code The "man gcc" says .i extension represents the file is C source code that should not be preprocessed. Here, .s should be used. For clarification, .c ---(preprocess)---> .i .S ---(preprocess)---> .s Signed-off-by: Masahiro Yamada Acked-by: Jiri Olsa Cc: Aaro Koskinen Cc: Adrian Hunter Cc: Lukas Wunner Link: http://lkml.kernel.org/r/1454263140-19670-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 4a96473b180f..ee566e8bd1cf 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -85,7 +85,7 @@ $(OUTPUT)%.i: %.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_i_c) -$(OUTPUT)%.i: %.S FORCE +$(OUTPUT)%.s: %.S FORCE $(call rule_mkdir) $(call if_changed_dep,cc_i_c) -- cgit v1.2.3 From 979ac257b00c53aacec3eacf86142e8c00bee889 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 1 Mar 2016 23:46:20 +0000 Subject: perf script: Fix double free on command_line The 'command_line' variable is free'd twice if db_export__branch_types() fails. To avoid this, defer the free'ing of 'command_line' to after this call so that the error return path will just free 'command_line' once. Signed-off-by: Colin Ian King Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: He Kuang Cc: Javi Merino Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1456875980-25606-1-git-send-email-colin.king@canonical.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 309d90fa7698..fbd05242b4e5 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1094,8 +1094,6 @@ static int python_start_script(const char *script, int argc, const char **argv) goto error; } - free(command_line); - set_table_handlers(tables); if (tables->db_export_mode) { @@ -1104,6 +1102,8 @@ static int python_start_script(const char *script, int argc, const char **argv) goto error; } + free(command_line); + return err; error: Py_Finalize(); -- cgit v1.2.3 From 21a30100453516004905d4d5f0806ebaffa95131 Mon Sep 17 00:00:00 2001 From: "Chaos.Chen" Date: Tue, 9 Feb 2016 15:40:14 -0500 Subject: tools lib traceevent: Fix time stamp rounding issue When rounding to microseconds, if the timestamp subsecond is between .999999500 and .999999999, it is rounded to .1000000, when it should instead increment the second counter due to the overflow. For example, if the timestamp is 1234.999999501 instead of seeing: 1235.000000 we see: 1234.1000000 Signed-off-by: Chaos.Chen Cc: Andrew Morton Link: http://lkml.kernel.org/r/20160209204236.824426460@goodmis.org [ fixed incrementing "secs" instead of decrementing it ] Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 9a1e48afcf89..ce59f4891fa2 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5429,6 +5429,11 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, p = 9; } else { usecs = (nsecs + 500) / NSECS_PER_USEC; + /* To avoid usecs larger than 1 sec */ + if (usecs >= 1000000) { + usecs -= 1000000; + secs++; + } p = 6; } -- cgit v1.2.3 From 9ec72eafee61f68cd57310a99db129ffb71302f3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 9 Feb 2016 15:40:16 -0500 Subject: tools lib traceevent: Set int_array fields to NULL if freeing from error Had a bug where on error of parsing __print_array() where the fields are freed after they were allocated, but since they were not set to NULL, the freeing of the arg also tried to free the already freed fields causing a double free. Fix process_hex() while at it. Signed-off-by: Steven Rostedt Cc: Andrew Morton Link: http://lkml.kernel.org/r/20160209204237.188327674@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index ce59f4891fa2..fb790aa757eb 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2635,6 +2635,7 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok) free_field: free_arg(arg->hex.field); + arg->hex.field = NULL; out: *tok = NULL; return EVENT_ERROR; @@ -2659,8 +2660,10 @@ process_int_array(struct event_format *event, struct print_arg *arg, char **tok) free_size: free_arg(arg->int_array.count); + arg->int_array.count = NULL; free_field: free_arg(arg->int_array.field); + arg->int_array.field = NULL; out: *tok = NULL; return EVENT_ERROR; -- cgit v1.2.3 From a66673a07e260807f570db8f08a9c1207932c665 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 9 Feb 2016 15:40:17 -0500 Subject: tools lib traceevent: Fix output of %llu for 64 bit values read on 32 bit machines When a long value is read on 32 bit machines for 64 bit output, the parsing needs to change "%lu" into "%llu", as the value is read natively. Unfortunately, if "%llu" is already there, the code will add another "l" to it and fail to parse it properly. Signed-off-by: Steven Rostedt Cc: Andrew Morton Link: http://lkml.kernel.org/r/20160209204237.337024613@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index fb790aa757eb..865dea55454b 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4978,7 +4978,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event break; } } - if (pevent->long_size == 8 && ls && + if (pevent->long_size == 8 && ls == 1 && sizeof(long) != 8) { char *p; -- cgit v1.2.3 From 9b240637eb9b9677a9e9bc2dc568f5e0811e04d6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 2 Mar 2016 09:58:00 -0300 Subject: perf test: Fix hists related entries That got broken by d3a72fd8187b ("perf report: Fix indentation of dynamic entries in hierarchy"), by using the evlist in setup_sorting() without checking if it is NULL, as done in some 'perf test' entries: $ find tools/ -name "*.c" | xargs grep 'setup_sorting(NULL);' tools/perf/tests/hists_output.c: setup_sorting(NULL); tools/perf/tests/hists_output.c: setup_sorting(NULL); tools/perf/tests/hists_output.c: setup_sorting(NULL); tools/perf/tests/hists_output.c: setup_sorting(NULL); tools/perf/tests/hists_output.c: setup_sorting(NULL); tools/perf/tests/hists_cumulate.c: setup_sorting(NULL); tools/perf/tests/hists_cumulate.c: setup_sorting(NULL); tools/perf/tests/hists_cumulate.c: setup_sorting(NULL); tools/perf/tests/hists_cumulate.c: setup_sorting(NULL); $ Fix it. Before: [root@jouet ~]# perf test 15: Test matching and linking multiple hists : FAILED! 16: Try 'import perf' in python, checking link problems : Ok 17: Test breakpoint overflow signal handler : Ok 18: Test breakpoint overflow sampling : Ok 19: Test number of exit event of a simple workload : Ok 20: Test software clock events have valid period values : Ok 21: Test object code reading : Ok 22: Test sample parsing : Ok 23: Test using a dummy software event to keep tracking : Ok 24: Test parsing with no sample_id_all bit set : Ok 25: Test filtering hist entries : FAILED! 26: Test mmap thread lookup : Ok 27: Test thread mg sharing : Ok 28: Test output sorting of hist entries : FAILED! 29: Test cumulation of child hist entries : FAILED! After the patch the above failed tests complete successfully. Acked-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Wang Nan Fixes: d3a72fd8187b ("perf report: Fix indentation of dynamic entries in hierarchy") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 5888bfe9a193..4380a2858802 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2635,25 +2635,14 @@ out: return ret; } -int setup_sorting(struct perf_evlist *evlist) +static void evlist__set_hists_nr_sort_keys(struct perf_evlist *evlist) { - int err; - struct hists *hists; struct perf_evsel *evsel; - struct perf_hpp_fmt *fmt; - - err = __setup_sorting(evlist); - if (err < 0) - return err; - - if (parent_pattern != default_parent_pattern) { - err = sort_dimension__add("parent", evlist); - if (err < 0) - return err; - } evlist__for_each(evlist, evsel) { - hists = evsel__hists(evsel); + struct perf_hpp_fmt *fmt; + struct hists *hists = evsel__hists(evsel); + hists->nr_sort_keys = perf_hpp_list.nr_sort_keys; /* @@ -2667,6 +2656,24 @@ int setup_sorting(struct perf_evlist *evlist) hists->nr_sort_keys--; } } +} + +int setup_sorting(struct perf_evlist *evlist) +{ + int err; + + err = __setup_sorting(evlist); + if (err < 0) + return err; + + if (parent_pattern != default_parent_pattern) { + err = sort_dimension__add("parent", evlist); + if (err < 0) + return err; + } + + if (evlist != NULL) + evlist__set_hists_nr_sort_keys(evlist); reset_dimensions(); -- cgit v1.2.3 From e17a0e16ca3a63d1bafbcba313586cf137418f45 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 2 Mar 2016 12:55:22 +0000 Subject: perf tests: Initialize sa.sa_flags The sa_flags field is not being initialized, so a garbage value is being passed to sigaction. Initialize it to zero. Signed-off-by: Colin Ian King Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1456923322-29697-1-git-send-email-colin.king@canonical.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/rdpmc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index 7bb0d13c235f..7945462851a4 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -103,6 +103,7 @@ static int __test__rdpmc(void) sigfillset(&sa.sa_mask); sa.sa_sigaction = segfault_handler; + sa.sa_flags = 0; sigaction(SIGSEGV, &sa, NULL); fd = sys_perf_event_open(&attr, 0, -1, -1, -- cgit v1.2.3 From 1b69317d2dc80bc8e1d005e1a771c4f5bff3dabe Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 2 Mar 2016 13:50:25 +0000 Subject: tools/power turbostat: fix various build warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with gcc 6 we're getting various build warnings that just require some trivial function declaration and call fixes: turbostat.c: In function ‘dump_cstate_pstate_config_info’: turbostat.c:1973:1: warning: type of ‘family’ defaults to ‘int’ dump_cstate_pstate_config_info(family, model) turbostat.c:1973:1: warning: type of ‘model’ defaults to ‘int’ turbostat.c: In function ‘get_tdp’: turbostat.c:2145:8: warning: type of ‘model’ defaults to ‘int’ double get_tdp(model) turbostat.c: In function ‘perf_limit_reasons_probe’: turbostat.c:2259:6: warning: type of ‘family’ defaults to ‘int’ void perf_limit_reasons_probe(family, model) turbostat.c:2259:6: warning: type of ‘model’ defaults to ‘int’ Signed-off-by: Colin Ian King Cc: Matt Fleming Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-wbicer8n0s9qe6ql8h9x478e@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/power/x86/turbostat/turbostat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0dac7e05a6ac..3fa94e291d16 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1970,7 +1970,7 @@ int has_config_tdp(unsigned int family, unsigned int model) } static void -dump_cstate_pstate_config_info(family, model) +dump_cstate_pstate_config_info(unsigned int family, unsigned int model) { if (!do_nhm_platform_info) return; @@ -2142,7 +2142,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ -double get_tdp(model) +double get_tdp(unsigned int model) { unsigned long long msr; @@ -2256,7 +2256,7 @@ void rapl_probe(unsigned int family, unsigned int model) return; } -void perf_limit_reasons_probe(family, model) +void perf_limit_reasons_probe(unsigned int family, unsigned int model) { if (!genuine_intel) return; @@ -2792,7 +2792,7 @@ void process_cpuid() perf_limit_reasons_probe(family, model); if (debug) - dump_cstate_pstate_config_info(); + dump_cstate_pstate_config_info(family, model); if (has_skl_msrs(family, model)) calculate_tsc_tweak(); -- cgit v1.2.3 From fb4605ba47e772ff9d62d1d54218a832ec8b3e1d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 1 Mar 2016 10:57:52 -0800 Subject: perf stat: Check for frontend stalled for metrics Add an extra check for frontend stalled in the metrics. This avoids an extra column for the --metric-only case when the CPU does not support frontend stalled. v2: Add separate init function Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1456858672-21594-8-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 + tools/perf/util/stat-shadow.c | 9 ++++++++- tools/perf/util/stat.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9b5089c5dffe..baa82078c148 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1966,6 +1966,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, (const char **) stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); + perf_stat__init_shadow_stats(); if (csv_sep) { csv_output = true; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 5e2d2e34e1bc..b33ffb2af2cf 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -2,6 +2,7 @@ #include "evsel.h" #include "stat.h" #include "color.h" +#include "pmu.h" enum { CTX_BIT_USER = 1 << 0, @@ -35,9 +36,15 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; +static bool have_frontend_stalled; struct stats walltime_nsecs_stats; +void perf_stat__init_shadow_stats(void) +{ + have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); +} + static int evsel_context(struct perf_evsel *evsel) { int ctx = 0; @@ -323,7 +330,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, "%7.2f ", "stalled cycles per insn", ratio); - } else { + } else if (have_frontend_stalled) { print_metric(ctxp, NULL, NULL, "stalled cycles per insn", 0); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index f02af68adc04..0150e786ccc7 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -72,6 +72,7 @@ typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, const char *fmt, double val); typedef void (*new_line_t )(void *ctx); +void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, int cpu); -- cgit v1.2.3 From 07ef7574458369cb0345facc748e964af68a75f4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 7 Mar 2016 16:44:37 -0300 Subject: perf tools: Explicitly declare inc_group_count as a void function The return type is not defined, so it defaults to int, however, the function is not returning anything, so this is clearly not correct. Make it a void function. Signed-off-by: Colin Ian King Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: He Kuang Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Wang Nan Link: http://lkml.kernel.org/r/1457008214-14393-1-git-send-email-colin.king@canonical.com Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.y | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 85c44ba79cad..5be4a5f216d6 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -28,7 +28,7 @@ do { \ INIT_LIST_HEAD(list); \ } while (0) -static inc_group_count(struct list_head *list, +static void inc_group_count(struct list_head *list, struct parse_events_evlist *data) { /* Count groups only have more than 1 members */ -- cgit v1.2.3 From 640dad47988ec4b734d71934be103bb6e931279f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Mar 2016 16:44:38 -0300 Subject: perf inject: Hit all DSOs for AUX data in JIT and other cases Currently, when injecting build ids, if there is AUX data then 'perf inject' hits all DSOs because it is not known which DSOs the trace data would hit. That needs to be done for JIT injection also, and in fact there is no reason to distinguish what kind of injection is being done. That is, any time there is AUX data and the HEADER_BUID_ID feature flag is set, and the AUX data is not being processed, then hit all DSOs. This patch does that. Signed-off-by: Adrian Hunter Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1457005856-6143-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Ingo Molnar --- tools/perf/builtin-inject.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index b38445f08c2f..c6a4f2f94ab1 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -679,12 +679,16 @@ static int __cmd_inject(struct perf_inject *inject) ret = perf_session__process_events(session); if (!file_out->is_pipe) { - if (inject->build_ids) { + if (inject->build_ids) perf_header__set_feat(&session->header, HEADER_BUILD_ID); - if (inject->have_auxtrace) - dsos__hit_all(session); - } + /* + * Keep all buildids when there is unprocessed AUX data because + * it is not known which ones the AUX trace hits. + */ + if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) && + inject->have_auxtrace && !inject->itrace_synth_opts.set) + dsos__hit_all(session); /* * The AUX areas have been removed and replaced with * synthesized hardware events, so clear the feature flag and -- cgit v1.2.3 From 5fb0ac16c5091f48eecf1a77e461f6957a463d61 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Mar 2016 16:44:39 -0300 Subject: perf session: Simplify tool stubs Some of the stubs are identical so just have one function for them. Signed-off-by: Adrian Hunter Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1457005856-6143-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Ingo Molnar --- tools/perf/util/session.c | 40 +++++++--------------------------------- 1 file changed, 7 insertions(+), 33 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 40b7a0d0905b..60b3593d210d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -240,14 +240,6 @@ static int process_event_stub(struct perf_tool *tool __maybe_unused, return 0; } -static int process_build_id_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct ordered_events *oe __maybe_unused) @@ -260,23 +252,6 @@ static int process_finished_round(struct perf_tool *tool, union perf_event *event, struct ordered_events *oe); -static int process_id_index_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *perf_session - __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_event_auxtrace_info_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - static int skipn(int fd, off_t n) { char buf[4096]; @@ -303,10 +278,9 @@ static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused, return event->auxtrace.size; } -static -int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) +static int process_event_op2_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -410,7 +384,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool) if (tool->tracing_data == NULL) tool->tracing_data = process_event_synth_tracing_data_stub; if (tool->build_id == NULL) - tool->build_id = process_build_id_stub; + tool->build_id = process_event_op2_stub; if (tool->finished_round == NULL) { if (tool->ordered_events) tool->finished_round = process_finished_round; @@ -418,13 +392,13 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->finished_round = process_finished_round_stub; } if (tool->id_index == NULL) - tool->id_index = process_id_index_stub; + tool->id_index = process_event_op2_stub; if (tool->auxtrace_info == NULL) - tool->auxtrace_info = process_event_auxtrace_info_stub; + tool->auxtrace_info = process_event_op2_stub; if (tool->auxtrace == NULL) tool->auxtrace = process_event_auxtrace_stub; if (tool->auxtrace_error == NULL) - tool->auxtrace_error = process_event_auxtrace_error_stub; + tool->auxtrace_error = process_event_op2_stub; if (tool->thread_map == NULL) tool->thread_map = process_event_thread_map_stub; if (tool->cpu_map == NULL) -- cgit v1.2.3 From 570735b33d122bcb259ef67c6aa63e5609af5752 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Mar 2016 16:44:40 -0300 Subject: perf jit: Let jit_process() return errors In preparation for moving clockid validation into jit_process(). Previously a return value of zero meant the processing had been done and non-zero meant either the processing was not done (i.e. not the jitdump file mmap event) or an error occurred. Change it so that zero means the processing was not done, one means the processing was done and successful, and negative values are an error. Signed-off-by: Adrian Hunter Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1457005856-6143-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Ingo Molnar --- tools/perf/builtin-inject.c | 16 ++++++++++++---- tools/perf/util/jitdump.c | 6 ++++-- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index c6a4f2f94ab1..2512d71ca386 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -253,12 +253,16 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool, { struct perf_inject *inject = container_of(tool, struct perf_inject, tool); u64 n = 0; + int ret; /* * if jit marker, then inject jit mmaps and generate ELF images */ - if (!jit_process(inject->session, &inject->output, machine, - event->mmap.filename, sample->pid, &n)) { + ret = jit_process(inject->session, &inject->output, machine, + event->mmap.filename, sample->pid, &n); + if (ret < 0) + return ret; + if (ret) { inject->bytes_written += n; return 0; } @@ -287,12 +291,16 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, { struct perf_inject *inject = container_of(tool, struct perf_inject, tool); u64 n = 0; + int ret; /* * if jit marker, then inject jit mmaps and generate ELF images */ - if (!jit_process(inject->session, &inject->output, machine, - event->mmap2.filename, sample->pid, &n)) { + ret = jit_process(inject->session, &inject->output, machine, + event->mmap2.filename, sample->pid, &n); + if (ret < 0) + return ret; + if (ret) { inject->bytes_written += n; return 0; } diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 99fa5eee9fe0..bd9e44f9fff2 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -647,7 +647,7 @@ jit_process(struct perf_session *session, * first, detect marker mmap (i.e., the jitdump mmap) */ if (jit_detect(filename, pid)) - return -1; + return 0; memset(&jd, 0, sizeof(jd)); @@ -665,8 +665,10 @@ jit_process(struct perf_session *session, *nbytes = 0; ret = jit_inject(&jd, filename); - if (!ret) + if (!ret) { *nbytes = jd.bytes_written; + ret = 1; + } return ret; } -- cgit v1.2.3 From 4a018cc47932ef1e68a0600ce3ac100df70fab2a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Mar 2016 16:44:41 -0300 Subject: perf jit: Move clockid validation Move clockid validation into jit_process() so it can later be made conditional. Signed-off-by: Adrian Hunter Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1457005856-6143-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Ingo Molnar --- tools/perf/builtin-inject.c | 24 ------------------------ tools/perf/util/jitdump.c | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 2512d71ca386..b2885776b602 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -729,23 +729,6 @@ static int __cmd_inject(struct perf_inject *inject) return ret; } -#ifdef HAVE_LIBELF_SUPPORT -static int -jit_validate_events(struct perf_session *session) -{ - struct perf_evsel *evsel; - - /* - * check that all events use CLOCK_MONOTONIC - */ - evlist__for_each(session->evlist, evsel) { - if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC) - return -1; - } - return 0; -} -#endif - int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_inject inject = { @@ -852,13 +835,6 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) } #ifdef HAVE_LIBELF_SUPPORT if (inject.jit_mode) { - /* - * validate event is using the correct clockid - */ - if (jit_validate_events(inject.session)) { - fprintf(stderr, "error, jitted code must be sampled with perf record -k 1\n"); - return -1; - } inject.tool.mmap2 = perf_event__jit_repipe_mmap2; inject.tool.mmap = perf_event__jit_repipe_mmap; inject.tool.ordered_events = true; diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index bd9e44f9fff2..cd272cc21e05 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -98,6 +98,21 @@ jit_close(struct jit_buf_desc *jd) jd->in = NULL; } +static int +jit_validate_events(struct perf_session *session) +{ + struct perf_evsel *evsel; + + /* + * check that all events use CLOCK_MONOTONIC + */ + evlist__for_each(session->evlist, evsel) { + if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC) + return -1; + } + return 0; +} + static int jit_open(struct jit_buf_desc *jd, const char *name) { @@ -157,6 +172,14 @@ jit_open(struct jit_buf_desc *jd, const char *name) goto error; } + /* + * validate event is using the correct clockid + */ + if (jit_validate_events(jd->session)) { + pr_err("error, jitted code must be sampled with perf record -k 1\n"); + goto error; + } + bs = header.total_size - sizeof(header); if (bs > bsz) { -- cgit v1.2.3 From a23f96ee4d51ebd50b83ce0dbb5d04898fb8e3cb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Mar 2016 16:44:42 -0300 Subject: perf tools: Use 64-bit shifts with (TSC) time conversion Commit b9511cd761fa ("perf/x86: Fix time_shift in perf_event_mmap_page") altered the time conversion algorithms documented in the perf_event.h header file, to use 64-bit shifts. That was done to make the code more future-proof (i.e. some time in the future a 32-bit shift could be allowed). Reflect those changes in perf tools. Signed-off-by: Adrian Hunter Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1457005856-6143-9-git-send-email-adrian.hunter@intel.com Signed-off-by: Ingo Molnar --- tools/perf/arch/x86/tests/rdpmc.c | 2 +- tools/perf/util/tsc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index 7945462851a4..72193f19d6d7 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -59,7 +59,7 @@ static u64 mmap_read_self(void *addr) u64 quot, rem; quot = (cyc >> time_shift); - rem = cyc & ((1 << time_shift) - 1); + rem = cyc & (((u64)1 << time_shift) - 1); delta = time_offset + quot * time_mult + ((rem * time_mult) >> time_shift); diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index 4d4210d4e13d..1b741646eed0 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -19,7 +19,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) u64 quot, rem; quot = cyc >> tc->time_shift; - rem = cyc & ((1 << tc->time_shift) - 1); + rem = cyc & (((u64)1 << tc->time_shift) - 1); return tc->time_zero + quot * tc->time_mult + ((rem * tc->time_mult) >> tc->time_shift); } -- cgit v1.2.3 From 4b633eba14627bcb1ef5c7a498e7dc308cd6a5d6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Mar 2016 16:44:43 -0300 Subject: perf hists: Add level field to struct perf_hpp_fmt The level field is to distinguish levels in the hierarchy mode. Currently each column (perf_hpp_fmt) has a different level. Signed-off-by: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Wang Nan Link: http://lkml.kernel.org/r/1457103582-28396-2-git-send-email-namhyung@kernel.org Signed-off-by: Ingo Molnar --- tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 74 ++++++++++++++++++++++++++++---------------------- 2 files changed, 42 insertions(+), 33 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index da5e50586bfd..f4ef513527ba 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -233,6 +233,7 @@ struct perf_hpp_fmt { int len; int user_len; int idx; + int level; }; struct perf_hpp_list { diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 4380a2858802..ab6eb7ca8c60 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1544,7 +1544,7 @@ static void hse_free(struct perf_hpp_fmt *fmt) } static struct hpp_sort_entry * -__sort_dimension__alloc_hpp(struct sort_dimension *sd) +__sort_dimension__alloc_hpp(struct sort_dimension *sd, int level) { struct hpp_sort_entry *hse; @@ -1572,6 +1572,7 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) hse->hpp.elide = false; hse->hpp.len = 0; hse->hpp.user_len = 0; + hse->hpp.level = level; return hse; } @@ -1581,7 +1582,8 @@ static void hpp_free(struct perf_hpp_fmt *fmt) free(fmt); } -static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd) +static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd, + int level) { struct perf_hpp_fmt *fmt; @@ -1590,6 +1592,7 @@ static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd) INIT_LIST_HEAD(&fmt->list); INIT_LIST_HEAD(&fmt->sort_list); fmt->free = hpp_free; + fmt->level = level; } return fmt; @@ -1611,9 +1614,9 @@ int hist_entry__filter(struct hist_entry *he, int type, const void *arg) return hse->se->se_filter(he, type, arg); } -static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd) +static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, int level) { - struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd); + struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level); if (hse == NULL) return -1; @@ -1625,7 +1628,7 @@ static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd) static int __sort_dimension__add_hpp_output(struct perf_hpp_list *list, struct sort_dimension *sd) { - struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd); + struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0); if (hse == NULL) return -1; @@ -1868,7 +1871,8 @@ static void hde_free(struct perf_hpp_fmt *fmt) } static struct hpp_dynamic_entry * -__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field) +__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field, + int level) { struct hpp_dynamic_entry *hde; @@ -1899,6 +1903,7 @@ __alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field) hde->hpp.elide = false; hde->hpp.len = 0; hde->hpp.user_len = 0; + hde->hpp.level = level; return hde; } @@ -1974,11 +1979,11 @@ static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_nam static int __dynamic_dimension__add(struct perf_evsel *evsel, struct format_field *field, - bool raw_trace) + bool raw_trace, int level) { struct hpp_dynamic_entry *hde; - hde = __alloc_dynamic_entry(evsel, field); + hde = __alloc_dynamic_entry(evsel, field, level); if (hde == NULL) return -ENOMEM; @@ -1988,14 +1993,14 @@ static int __dynamic_dimension__add(struct perf_evsel *evsel, return 0; } -static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace) +static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace, int level) { int ret; struct format_field *field; field = evsel->tp_format->format.fields; while (field) { - ret = __dynamic_dimension__add(evsel, field, raw_trace); + ret = __dynamic_dimension__add(evsel, field, raw_trace, level); if (ret < 0) return ret; @@ -2004,7 +2009,8 @@ static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace) return 0; } -static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace) +static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace, + int level) { int ret; struct perf_evsel *evsel; @@ -2013,7 +2019,7 @@ static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace) if (evsel->attr.type != PERF_TYPE_TRACEPOINT) continue; - ret = add_evsel_fields(evsel, raw_trace); + ret = add_evsel_fields(evsel, raw_trace, level); if (ret < 0) return ret; } @@ -2021,7 +2027,7 @@ static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace) } static int add_all_matching_fields(struct perf_evlist *evlist, - char *field_name, bool raw_trace) + char *field_name, bool raw_trace, int level) { int ret = -ESRCH; struct perf_evsel *evsel; @@ -2035,14 +2041,15 @@ static int add_all_matching_fields(struct perf_evlist *evlist, if (field == NULL) continue; - ret = __dynamic_dimension__add(evsel, field, raw_trace); + ret = __dynamic_dimension__add(evsel, field, raw_trace, level); if (ret < 0) break; } return ret; } -static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok) +static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok, + int level) { char *str, *event_name, *field_name, *opt_name; struct perf_evsel *evsel; @@ -2072,12 +2079,12 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok) } if (!strcmp(field_name, "trace_fields")) { - ret = add_all_dynamic_fields(evlist, raw_trace); + ret = add_all_dynamic_fields(evlist, raw_trace, level); goto out; } if (event_name == NULL) { - ret = add_all_matching_fields(evlist, field_name, raw_trace); + ret = add_all_matching_fields(evlist, field_name, raw_trace, level); goto out; } @@ -2095,7 +2102,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok) } if (!strcmp(field_name, "*")) { - ret = add_evsel_fields(evsel, raw_trace); + ret = add_evsel_fields(evsel, raw_trace, level); } else { field = pevent_find_any_field(evsel->tp_format, field_name); if (field == NULL) { @@ -2104,7 +2111,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok) return -ENOENT; } - ret = __dynamic_dimension__add(evsel, field, raw_trace); + ret = __dynamic_dimension__add(evsel, field, raw_trace, level); } out: @@ -2112,12 +2119,12 @@ out: return ret; } -static int __sort_dimension__add(struct sort_dimension *sd) +static int __sort_dimension__add(struct sort_dimension *sd, int level) { if (sd->taken) return 0; - if (__sort_dimension__add_hpp_sort(sd) < 0) + if (__sort_dimension__add_hpp_sort(sd, level) < 0) return -1; if (sd->entry->se_collapse) @@ -2128,14 +2135,14 @@ static int __sort_dimension__add(struct sort_dimension *sd) return 0; } -static int __hpp_dimension__add(struct hpp_dimension *hd) +static int __hpp_dimension__add(struct hpp_dimension *hd, int level) { struct perf_hpp_fmt *fmt; if (hd->taken) return 0; - fmt = __hpp_dimension__alloc_hpp(hd); + fmt = __hpp_dimension__alloc_hpp(hd, level); if (!fmt) return -1; @@ -2165,7 +2172,7 @@ static int __hpp_dimension__add_output(struct perf_hpp_list *list, if (hd->taken) return 0; - fmt = __hpp_dimension__alloc_hpp(hd); + fmt = __hpp_dimension__alloc_hpp(hd, 0); if (!fmt) return -1; @@ -2180,8 +2187,8 @@ int hpp_dimension__add_output(unsigned col) return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]); } -static int sort_dimension__add(const char *tok, - struct perf_evlist *evlist __maybe_unused) +static int sort_dimension__add(const char *tok, struct perf_evlist *evlist, + int level) { unsigned int i; @@ -2220,7 +2227,7 @@ static int sort_dimension__add(const char *tok, sort__has_thread = 1; } - return __sort_dimension__add(sd); + return __sort_dimension__add(sd, level); } for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { @@ -2229,7 +2236,7 @@ static int sort_dimension__add(const char *tok, if (strncasecmp(tok, hd->name, strlen(tok))) continue; - return __hpp_dimension__add(hd); + return __hpp_dimension__add(hd, level); } for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { @@ -2244,7 +2251,7 @@ static int sort_dimension__add(const char *tok, if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) sort__has_sym = 1; - __sort_dimension__add(sd); + __sort_dimension__add(sd, level); return 0; } @@ -2260,11 +2267,11 @@ static int sort_dimension__add(const char *tok, if (sd->entry == &sort_mem_daddr_sym) sort__has_sym = 1; - __sort_dimension__add(sd); + __sort_dimension__add(sd, level); return 0; } - if (!add_dynamic_entry(evlist, tok)) + if (!add_dynamic_entry(evlist, tok, level)) return 0; return -ESRCH; @@ -2274,10 +2281,11 @@ static int setup_sort_list(char *str, struct perf_evlist *evlist) { char *tmp, *tok; int ret = 0; + int level = 0; for (tok = strtok_r(str, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = sort_dimension__add(tok, evlist); + ret = sort_dimension__add(tok, evlist, level++); if (ret == -EINVAL) { error("Invalid --sort key: `%s'", tok); break; @@ -2667,7 +2675,7 @@ int setup_sorting(struct perf_evlist *evlist) return err; if (parent_pattern != default_parent_pattern) { - err = sort_dimension__add("parent", evlist); + err = sort_dimension__add("parent", evlist, -1); if (err < 0) return err; } -- cgit v1.2.3 From f594bae08183fb6b57db55387794ece3e1edf6f6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 7 Mar 2016 16:44:44 -0300 Subject: perf stat: Document --detailed option I'm surprised this remained undocumented since at least 2011. And it is actually a very useful switch, as Steve and I came to realize recently. Add the text from 2cba3ffb9a9d ("perf stat: Add -d -d and -d -d -d options to show more CPU events") which added the incrementing aspect to -d. Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Borislav Petkov Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Davidlohr Bueso Cc: Jiri Olsa Cc: Mel Gorman Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Fixes: 2cba3ffb9a9d ("perf stat: Add -d -d and -d -d -d options to show more CPU events") Link: http://lkml.kernel.org/r/1457347294-32546-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-stat.txt | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 52ef7a9d50aa..14d9e8ffaff7 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -69,6 +69,14 @@ report:: --scale:: scale/normalize counter values +-d:: +--detailed:: + print more detailed statistics, can be specified up to 3 times + + -d: detailed events, L1 and LLC data cache + -d -d: more detailed events, dTLB and iTLB events + -d -d -d: very detailed events, adding prefetch events + -r:: --repeat=:: repeat command and print average + stddev (max: 100). 0 means forever. -- cgit v1.2.3 From c3bc0c436899d01c3a09fddb308d487cc032fbd2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Mar 2016 16:44:45 -0300 Subject: perf hists: Introduce perf_hpp__setup_hists_formats() The perf_hpp__setup_hists_formats() is to build hists-specific output formats (and sort keys). Currently it's only used in order to build the output format in a hierarchy with same sort keys, but it could be used with different sort keys in non-hierarchy mode later. Signed-off-by: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Wang Nan Link: http://lkml.kernel.org/r/1457361308-514-2-git-send-email-namhyung@kernel.org Signed-off-by: Ingo Molnar --- tools/perf/ui/hist.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/hist.c | 12 ++++++++++ tools/perf/util/hist.h | 11 +++++++++ tools/perf/util/sort.c | 32 +++++++++++++++++++++++++ 4 files changed, 118 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 7c0585c146e1..3a15e844f89a 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -5,6 +5,7 @@ #include "../util/util.h" #include "../util/sort.h" #include "../util/evsel.h" +#include "../util/evlist.h" /* hist period print (hpp) functions */ @@ -715,3 +716,65 @@ void perf_hpp__set_user_width(const char *width_list_str) break; } } + +static int add_hierarchy_fmt(struct hists *hists, struct perf_hpp_fmt *fmt) +{ + struct perf_hpp_list_node *node = NULL; + struct perf_hpp_fmt *fmt_copy; + bool found = false; + + list_for_each_entry(node, &hists->hpp_formats, list) { + if (node->level == fmt->level) { + found = true; + break; + } + } + + if (!found) { + node = malloc(sizeof(*node)); + if (node == NULL) + return -1; + + node->level = fmt->level; + perf_hpp_list__init(&node->hpp); + + list_add_tail(&node->list, &hists->hpp_formats); + } + + fmt_copy = perf_hpp_fmt__dup(fmt); + if (fmt_copy == NULL) + return -1; + + list_add_tail(&fmt_copy->list, &node->hpp.fields); + list_add_tail(&fmt_copy->sort_list, &node->hpp.sorts); + + return 0; +} + +int perf_hpp__setup_hists_formats(struct perf_hpp_list *list, + struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + struct perf_hpp_fmt *fmt; + struct hists *hists; + int ret; + + if (!symbol_conf.report_hierarchy) + return 0; + + evlist__for_each(evlist, evsel) { + hists = evsel__hists(evsel); + + perf_hpp_list__for_each_sort_list(list, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + !perf_hpp__defined_dynamic_entry(fmt, hists)) + continue; + + ret = add_hierarchy_fmt(hists, fmt); + if (ret < 0) + return ret; + } + } + + return 0; +} diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 4b8b67bc0cd8..fea92fcb6903 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -2105,6 +2105,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) pthread_mutex_init(&hists->lock, NULL); hists->socket_filter = -1; hists->hpp_list = hpp_list; + INIT_LIST_HEAD(&hists->hpp_formats); return 0; } @@ -2133,8 +2134,19 @@ static void hists__delete_all_entries(struct hists *hists) static void hists_evsel__exit(struct perf_evsel *evsel) { struct hists *hists = evsel__hists(evsel); + struct perf_hpp_fmt *fmt, *pos; + struct perf_hpp_list_node *node, *tmp; hists__delete_all_entries(hists); + + list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) { + perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) { + list_del(&fmt->list); + free(fmt); + } + list_del(&node->list); + free(node); + } } static int hists_evsel__init(struct perf_evsel *evsel) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f4ef513527ba..3cab9dc20822 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -78,6 +78,7 @@ struct hists { u16 col_len[HISTC_NR_COLS]; int socket_filter; struct perf_hpp_list *hpp_list; + struct list_head hpp_formats; int nr_sort_keys; }; @@ -244,6 +245,12 @@ struct perf_hpp_list { extern struct perf_hpp_list perf_hpp_list; +struct perf_hpp_list_node { + struct list_head list; + struct perf_hpp_list hpp; + int level; +}; + void perf_hpp_list__column_register(struct perf_hpp_list *list, struct perf_hpp_fmt *format); void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, @@ -299,6 +306,8 @@ void perf_hpp__cancel_cumulate(void); void perf_hpp__setup_output_field(struct perf_hpp_list *list); void perf_hpp__reset_output_field(struct perf_hpp_list *list); void perf_hpp__append_sort_keys(struct perf_hpp_list *list); +int perf_hpp__setup_hists_formats(struct perf_hpp_list *list, + struct perf_evlist *evlist); bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); @@ -308,6 +317,8 @@ bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt); +struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt); + int hist_entry__filter(struct hist_entry *he, int type, const void *arg); static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index ab6eb7ca8c60..71d45d147376 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1908,6 +1908,34 @@ __alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field, return hde; } +struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt) +{ + struct perf_hpp_fmt *new_fmt = NULL; + + if (perf_hpp__is_sort_entry(fmt)) { + struct hpp_sort_entry *hse, *new_hse; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + new_hse = memdup(hse, sizeof(*hse)); + if (new_hse) + new_fmt = &new_hse->hpp; + } else if (perf_hpp__is_dynamic_entry(fmt)) { + struct hpp_dynamic_entry *hde, *new_hde; + + hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + new_hde = memdup(hde, sizeof(*hde)); + if (new_hde) + new_fmt = &new_hde->hpp; + } else { + new_fmt = memdup(fmt, sizeof(*fmt)); + } + + INIT_LIST_HEAD(&new_fmt->list); + INIT_LIST_HEAD(&new_fmt->sort_list); + + return new_fmt; +} + static int parse_field_name(char *str, char **event, char **field, char **opt) { char *event_name, *field_name, *opt_name; @@ -2700,6 +2728,10 @@ int setup_sorting(struct perf_evlist *evlist) /* and then copy output fields to sort keys */ perf_hpp__append_sort_keys(&perf_hpp_list); + /* setup hists-specific output fields */ + if (perf_hpp__setup_hists_formats(&perf_hpp_list, evlist) < 0) + return -1; + return 0; } -- cgit v1.2.3 From 1b2dbbf41a0f4cf7a5662bccb9a18128d16e5ffb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Mar 2016 16:44:46 -0300 Subject: perf hists: Use own hpp_list for hierarchy mode Now each hists has its own hpp lists in hierarchy. So instead of having a pointer to a single perf_hpp_fmt in a hist entry, make it point the hpp_list for its level. This will be used to support multiple sort keys in a single hierarchy level. Signed-off-by: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Wang Nan Link: http://lkml.kernel.org/r/1457361308-514-3-git-send-email-namhyung@kernel.org Signed-off-by: Ingo Molnar --- tools/perf/ui/browsers/hists.c | 45 +++++++++++++++++-------------- tools/perf/ui/gtk/hists.c | 20 +++++++++----- tools/perf/ui/hist.c | 5 ++++ tools/perf/ui/stdio/hist.c | 44 +++++++++++++++---------------- tools/perf/util/hist.c | 60 ++++++++++++++++++++++++------------------ tools/perf/util/hist.h | 1 + tools/perf/util/sort.h | 1 + 7 files changed, 103 insertions(+), 73 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 5ffffcb1e3c5..928b4825b752 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1388,25 +1388,26 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, HE_COLORSET_NORMAL); } - ui_browser__write_nstring(&browser->b, "", 2); - width -= 2; + perf_hpp_list__for_each_format(entry->hpp_list, fmt) { + ui_browser__write_nstring(&browser->b, "", 2); + width -= 2; - /* - * No need to call hist_entry__snprintf_alignment() - * since this fmt is always the last column in the - * hierarchy mode. - */ - fmt = entry->fmt; - if (fmt->color) { - width -= fmt->color(fmt, &hpp, entry); - } else { - int i = 0; + /* + * No need to call hist_entry__snprintf_alignment() + * since this fmt is always the last column in the + * hierarchy mode. + */ + if (fmt->color) { + width -= fmt->color(fmt, &hpp, entry); + } else { + int i = 0; - width -= fmt->entry(fmt, &hpp, entry); - ui_browser__printf(&browser->b, "%s", ltrim(s)); + width -= fmt->entry(fmt, &hpp, entry); + ui_browser__printf(&browser->b, "%s", ltrim(s)); - while (isspace(s[i++])) - width++; + while (isspace(s[i++])) + width++; + } } } @@ -1934,7 +1935,7 @@ static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser, struct perf_hpp_fmt *fmt; bool first = true; int ret; - int hierarchy_indent = (nr_sort_keys + 1) * HIERARCHY_INDENT; + int hierarchy_indent = nr_sort_keys * HIERARCHY_INDENT; printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, ""); @@ -1962,9 +1963,13 @@ static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser, ret = scnprintf(hpp.buf, hpp.size, "%*s", hierarchy_indent, ""); advance_hpp(&hpp, ret); - fmt = he->fmt; - ret = fmt->entry(fmt, &hpp, he); - advance_hpp(&hpp, ret); + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + ret = scnprintf(hpp.buf, hpp.size, " "); + advance_hpp(&hpp, ret); + + ret = fmt->entry(fmt, &hpp, he); + advance_hpp(&hpp, ret); + } printed += fprintf(fp, "%s\n", rtrim(s)); diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index a5758fdfbe1f..4534e2d7669c 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -412,6 +412,7 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, for (node = rb_first(root); node; node = rb_next(node)) { GtkTreeIter iter; float percent; + char *bf; he = rb_entry(node, struct hist_entry, rb_node); if (he->filtered) @@ -437,13 +438,20 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, gtk_tree_store_set(store, &iter, col_idx++, hpp->buf, -1); } - fmt = he->fmt; - if (fmt->color) - fmt->color(fmt, hpp, he); - else - fmt->entry(fmt, hpp, he); + bf = hpp->buf; + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + int ret; + + if (fmt->color) + ret = fmt->color(fmt, hpp, he); + else + ret = fmt->entry(fmt, hpp, he); + + snprintf(hpp->buf + ret, hpp->size - ret, " "); + advance_hpp(hpp, ret + 2); + } - gtk_tree_store_set(store, &iter, col_idx, rtrim(hpp->buf), -1); + gtk_tree_store_set(store, &iter, col_idx, rtrim(bf), -1); if (!he->leaf) { perf_gtk__add_hierarchy_entries(hists, &he->hroot_out, diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 3a15e844f89a..95795ef4209b 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -722,6 +722,7 @@ static int add_hierarchy_fmt(struct hists *hists, struct perf_hpp_fmt *fmt) struct perf_hpp_list_node *node = NULL; struct perf_hpp_fmt *fmt_copy; bool found = false; + bool skip = perf_hpp__should_skip(fmt, hists); list_for_each_entry(node, &hists->hpp_formats, list) { if (node->level == fmt->level) { @@ -735,6 +736,7 @@ static int add_hierarchy_fmt(struct hists *hists, struct perf_hpp_fmt *fmt) if (node == NULL) return -1; + node->skip = skip; node->level = fmt->level; perf_hpp_list__init(&node->hpp); @@ -745,6 +747,9 @@ static int add_hierarchy_fmt(struct hists *hists, struct perf_hpp_fmt *fmt) if (fmt_copy == NULL) return -1; + if (!skip) + node->skip = false; + list_add_tail(&fmt_copy->list, &node->hpp.fields); list_add_tail(&fmt_copy->sort_list, &node->hpp.sorts); diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 6d06fbb365b6..073642a63cc9 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -451,33 +451,33 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he, advance_hpp(hpp, ret); } - if (sep) - ret = scnprintf(hpp->buf, hpp->size, "%s", sep); - else + if (!sep) ret = scnprintf(hpp->buf, hpp->size, "%*s", - (nr_sort_key - 1) * HIERARCHY_INDENT + 2, ""); + (nr_sort_key - 1) * HIERARCHY_INDENT, ""); advance_hpp(hpp, ret); printed += fprintf(fp, "%s", buf); - hpp->buf = buf; - hpp->size = size; - - /* - * No need to call hist_entry__snprintf_alignment() since this - * fmt is always the last column in the hierarchy mode. - */ - fmt = he->fmt; - if (perf_hpp__use_color() && fmt->color) - fmt->color(fmt, hpp, he); - else - fmt->entry(fmt, hpp, he); - - /* - * dynamic entries are right-aligned but we want left-aligned - * in the hierarchy mode - */ - printed += fprintf(fp, "%s\n", ltrim(buf)); + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + hpp->buf = buf; + hpp->size = size; + + /* + * No need to call hist_entry__snprintf_alignment() since this + * fmt is always the last column in the hierarchy mode. + */ + if (perf_hpp__use_color() && fmt->color) + fmt->color(fmt, hpp, he); + else + fmt->entry(fmt, hpp, he); + + /* + * dynamic entries are right-aligned but we want left-aligned + * in the hierarchy mode + */ + printed += fprintf(fp, "%s%s", sep ?: " ", ltrim(buf)); + } + printed += putc('\n', fp); if (symbol_conf.use_callchain && he->leaf) { u64 total = hists__total_period(hists); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index fea92fcb6903..29da9e0d8db9 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1091,18 +1091,25 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he); static struct hist_entry *hierarchy_insert_entry(struct hists *hists, struct rb_root *root, struct hist_entry *he, - struct perf_hpp_fmt *fmt) + struct perf_hpp_list *hpp_list) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct hist_entry *iter, *new; + struct perf_hpp_fmt *fmt; int64_t cmp; while (*p != NULL) { parent = *p; iter = rb_entry(parent, struct hist_entry, rb_node_in); - cmp = fmt->collapse(fmt, iter, he); + cmp = 0; + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { + cmp = fmt->collapse(fmt, iter, he); + if (cmp) + break; + } + if (!cmp) { he_stat__add_stat(&iter->stat, &he->stat); return iter; @@ -1121,24 +1128,26 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, hists__apply_filters(hists, new); hists->nr_entries++; - /* save related format for output */ - new->fmt = fmt; + /* save related format list for output */ + new->hpp_list = hpp_list; /* some fields are now passed to 'new' */ - if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) - he->trace_output = NULL; - else - new->trace_output = NULL; + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { + if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) + he->trace_output = NULL; + else + new->trace_output = NULL; - if (perf_hpp__is_srcline_entry(fmt)) - he->srcline = NULL; - else - new->srcline = NULL; + if (perf_hpp__is_srcline_entry(fmt)) + he->srcline = NULL; + else + new->srcline = NULL; - if (perf_hpp__is_srcfile_entry(fmt)) - he->srcfile = NULL; - else - new->srcfile = NULL; + if (perf_hpp__is_srcfile_entry(fmt)) + he->srcfile = NULL; + else + new->srcfile = NULL; + } rb_link_node(&new->rb_node_in, parent, p); rb_insert_color(&new->rb_node_in, root); @@ -1149,21 +1158,19 @@ static int hists__hierarchy_insert_entry(struct hists *hists, struct rb_root *root, struct hist_entry *he) { - struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *node; struct hist_entry *new_he = NULL; struct hist_entry *parent = NULL; int depth = 0; int ret = 0; - hists__for_each_sort_list(hists, fmt) { - if (!perf_hpp__is_sort_entry(fmt) && - !perf_hpp__is_dynamic_entry(fmt)) - continue; - if (perf_hpp__should_skip(fmt, hists)) + list_for_each_entry(node, &hists->hpp_formats, list) { + /* skip period (overhead) and elided columns */ + if (node->level == 0 || node->skip) continue; /* insert copy of 'he' for each fmt into the hierarchy */ - new_he = hierarchy_insert_entry(hists, root, he, fmt); + new_he = hierarchy_insert_entry(hists, root, he, &node->hpp); if (new_he == NULL) { ret = -1; break; @@ -1358,6 +1365,7 @@ static void hierarchy_insert_output_entry(struct rb_root *root, struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; + struct perf_hpp_fmt *fmt; while (*p != NULL) { parent = *p; @@ -1373,8 +1381,10 @@ static void hierarchy_insert_output_entry(struct rb_root *root, rb_insert_color(&he->rb_node, root); /* update column width of dynamic entry */ - if (perf_hpp__is_dynamic_entry(he->fmt)) - he->fmt->sort(he->fmt, he, NULL); + perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { + if (perf_hpp__is_dynamic_entry(fmt)) + fmt->sort(fmt, he, NULL); + } } static void hists__hierarchy_output_resort(struct hists *hists, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 3cab9dc20822..2209188d729c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -249,6 +249,7 @@ struct perf_hpp_list_node { struct list_head list; struct perf_hpp_list hpp; int level; + bool skip; }; void perf_hpp_list__column_register(struct perf_hpp_list *list, diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 25a5529a94e4..ea1f722cffea 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -130,6 +130,7 @@ struct hist_entry { u32 raw_size; void *trace_output; struct perf_hpp_fmt *fmt; + struct perf_hpp_list *hpp_list; struct hist_entry *parent_he; union { /* this is for hierarchical entry structure */ -- cgit v1.2.3 From a23f37e864609f0887c1cb77c4d5b62586484a61 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Mar 2016 16:44:47 -0300 Subject: perf hists: Support multiple sort keys in a hierarchy level This implements having multiple sort keys in a single hierarchy level. Originally only single sort key is supported for each level, but now using the group syntax with '{ }', it can set more than one sort key in one level. Note that now it needs to quote in order to prevent shell interpretation. For example: $ perf report --hierarchy -s '{comm,dso},sym' ... # Overhead Command / Shared Object / Symbol # .............. .......................................... # 48.67% swapper [kernel.vmlinux] 34.42% [k] intel_idle 1.30% [k] __tick_nohz_idle_enter 1.03% [k] cpuidle_reflect 8.87% firefox libpthread-2.22.so 6.60% [.] __GI___libc_recvmsg 1.18% [.] pthread_cond_signal@@GLIBC_2.3.2 1.09% [.] 0x000000000000ff4b 6.11% Xorg libc-2.22.so 5.27% [.] __memcpy_sse2_unaligned In the above example, the command name and the shared object name are shown on the same line but the symbol name is on the different line. Since the first two are grouped by '{}', they are in the same level. Suggested-and-Tested=by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Wang Nan Link: http://lkml.kernel.org/r/1457361308-514-4-git-send-email-namhyung@kernel.org Signed-off-by: Ingo Molnar --- tools/perf/util/sort.c | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 71d45d147376..041f236379e0 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2310,18 +2310,40 @@ static int setup_sort_list(char *str, struct perf_evlist *evlist) char *tmp, *tok; int ret = 0; int level = 0; + int next_level = 1; + bool in_group = false; + + do { + tok = str; + tmp = strpbrk(str, "{}, "); + if (tmp) { + if (in_group) + next_level = level; + else + next_level = level + 1; + + if (*tmp == '{') + in_group = true; + else if (*tmp == '}') + in_group = false; + + *tmp = '\0'; + str = tmp + 1; + } - for (tok = strtok_r(str, ", ", &tmp); - tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = sort_dimension__add(tok, evlist, level++); - if (ret == -EINVAL) { - error("Invalid --sort key: `%s'", tok); - break; - } else if (ret == -ESRCH) { - error("Unknown --sort key: `%s'", tok); - break; + if (*tok) { + ret = sort_dimension__add(tok, evlist, level); + if (ret == -EINVAL) { + error("Invalid --sort key: `%s'", tok); + break; + } else if (ret == -ESRCH) { + error("Unknown --sort key: `%s'", tok); + break; + } } - } + + level = next_level; + } while (tmp); return ret; } -- cgit v1.2.3 From 2dbbe9f26c082be5aa0e8ba5480e7bac43b2c4f0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Mar 2016 16:44:48 -0300 Subject: perf hists: Fix indent for multiple hierarchy sort key When multiple sort keys are used in a single hierarchy, it should indent using number of hierarchy levels instead of number of sort keys. Signed-off-by: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Wang Nan Link: http://lkml.kernel.org/r/1457361308-514-5-git-send-email-namhyung@kernel.org Signed-off-by: Ingo Molnar --- tools/perf/ui/browsers/hists.c | 23 ++++++++++------------- tools/perf/ui/hist.c | 1 + tools/perf/ui/stdio/hist.c | 26 +++++++++++--------------- tools/perf/util/hist.h | 1 + 4 files changed, 23 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 928b4825b752..2f02ce79bd9d 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1280,7 +1280,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, struct hist_entry *entry, unsigned short row, - int level, int nr_sort_keys) + int level) { int printed = 0; int width = browser->b.width; @@ -1294,7 +1294,7 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, .current_entry = current_entry, }; int column = 0; - int hierarchy_indent = (nr_sort_keys - 1) * HIERARCHY_INDENT; + int hierarchy_indent = (entry->hists->nr_hpp_node - 2) * HIERARCHY_INDENT; if (current_entry) { browser->he_selection = entry; @@ -1436,8 +1436,7 @@ show_callchain: } static int hist_browser__show_no_entry(struct hist_browser *browser, - unsigned short row, - int level, int nr_sort_keys) + unsigned short row, int level) { int width = browser->b.width; bool current_entry = ui_browser__is_current_entry(&browser->b, row); @@ -1445,6 +1444,7 @@ static int hist_browser__show_no_entry(struct hist_browser *browser, int column = 0; int ret; struct perf_hpp_fmt *fmt; + int indent = browser->hists->nr_hpp_node - 2; if (current_entry) { browser->he_selection = NULL; @@ -1485,8 +1485,8 @@ static int hist_browser__show_no_entry(struct hist_browser *browser, width -= ret; } - ui_browser__write_nstring(&browser->b, "", nr_sort_keys * HIERARCHY_INDENT); - width -= nr_sort_keys * HIERARCHY_INDENT; + ui_browser__write_nstring(&browser->b, "", indent * HIERARCHY_INDENT); + width -= indent * HIERARCHY_INDENT; if (column >= browser->b.horiz_scroll) { char buf[32]; @@ -1553,7 +1553,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows struct perf_hpp_fmt *fmt; size_t ret = 0; int column = 0; - int nr_sort_keys = hists->nr_sort_keys; + int indent = hists->nr_hpp_node - 2; bool first = true; ret = scnprintf(buf, size, " "); @@ -1577,7 +1577,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows } ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s", - (nr_sort_keys - 1) * HIERARCHY_INDENT, ""); + indent * HIERARCHY_INDENT, ""); if (advance_hpp_check(&dummy_hpp, ret)) return ret; @@ -1645,7 +1645,6 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) u16 header_offset = 0; struct rb_node *nd; struct hist_browser *hb = container_of(browser, struct hist_browser, b); - int nr_sort = hb->hists->nr_sort_keys; if (hb->show_headers) { hist_browser__show_headers(hb); @@ -1672,14 +1671,12 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) if (symbol_conf.report_hierarchy) { row += hist_browser__show_hierarchy_entry(hb, h, row, - h->depth, - nr_sort); + h->depth); if (row == browser->rows) break; if (h->has_no_entry) { - hist_browser__show_no_entry(hb, row, h->depth, - nr_sort); + hist_browser__show_no_entry(hb, row, h->depth); row++; } } else { diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 95795ef4209b..f03c4f70438f 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -740,6 +740,7 @@ static int add_hierarchy_fmt(struct hists *hists, struct perf_hpp_fmt *fmt) node->level = fmt->level; perf_hpp_list__init(&node->hpp); + hists->nr_hpp_node++; list_add_tail(&node->list, &hists->hpp_formats); } diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 073642a63cc9..543d7137cc0c 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -412,7 +412,7 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) static int hist_entry__hierarchy_fprintf(struct hist_entry *he, struct perf_hpp *hpp, - int nr_sort_key, struct hists *hists, + struct hists *hists, FILE *fp) { const char *sep = symbol_conf.field_sep; @@ -453,7 +453,7 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he, if (!sep) ret = scnprintf(hpp->buf, hpp->size, "%*s", - (nr_sort_key - 1) * HIERARCHY_INDENT, ""); + (hists->nr_hpp_node - 2) * HIERARCHY_INDENT, ""); advance_hpp(hpp, ret); printed += fprintf(fp, "%s", buf); @@ -504,12 +504,8 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, if (size == 0 || size > bfsz) size = hpp.size = bfsz; - if (symbol_conf.report_hierarchy) { - int nr_sort = hists->nr_sort_keys; - - return hist_entry__hierarchy_fprintf(he, &hpp, nr_sort, - hists, fp); - } + if (symbol_conf.report_hierarchy) + return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp); hist_entry__snprintf(he, &hpp); @@ -521,29 +517,29 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, return ret; } -static int print_hierarchy_indent(const char *sep, int nr_sort, +static int print_hierarchy_indent(const char *sep, int indent, const char *line, FILE *fp) { - if (sep != NULL || nr_sort < 1) + if (sep != NULL || indent < 2) return 0; - return fprintf(fp, "%-.*s", (nr_sort - 1) * HIERARCHY_INDENT, line); + return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line); } static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, const char *sep, FILE *fp) { bool first = true; - int nr_sort; + int indent; int depth; unsigned width = 0; unsigned header_width = 0; struct perf_hpp_fmt *fmt; - nr_sort = hists->nr_sort_keys; + indent = hists->nr_hpp_node; /* preserve max indent depth for column headers */ - print_hierarchy_indent(sep, nr_sort, spaces, fp); + print_hierarchy_indent(sep, indent, spaces, fp); hists__for_each_format(hists, fmt) { if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) @@ -582,7 +578,7 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, fprintf(fp, "\n# "); /* preserve max indent depth for initial dots */ - print_hierarchy_indent(sep, nr_sort, dots, fp); + print_hierarchy_indent(sep, indent, dots, fp); first = true; hists__for_each_format(hists, fmt) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2209188d729c..2cb017f28f9e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -80,6 +80,7 @@ struct hists { struct perf_hpp_list *hpp_list; struct list_head hpp_formats; int nr_sort_keys; + int nr_hpp_node; }; struct hist_entry_iter; -- cgit v1.2.3 From f58c95e344c26223c6503e6ecb0c1e11806d91e0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Mar 2016 16:44:49 -0300 Subject: perf report: Use hierarchy hpp list on stdio Now hpp formats are linked using perf_hpp_list_node when hierarchy is enabled. Use this info to print entries with multiple sort keys in a single hierarchy properly. For example, the below example shows using 4 sort keys with 2 levels. $ perf report --hierarchy -s '{prev_pid,prev_comm},{next_pid,next_comm}' \ --percent-limit 1 -i perf.data.sched ... # Overhead prev_pid+prev_comm / next_pid+next_comm # ........... ....................................... # 22.36% 0 swapper/0 9.48% 17773 transmission-gt 5.25% 109 kworker/0:1H 1.53% 6524 Xephyr 21.39% 17773 transmission-gt 9.52% 0 swapper/0 9.04% 0 swapper/2 1.78% 0 swapper/3 Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Wang Nan Link: http://lkml.kernel.org/r/1457361308-514-6-git-send-email-namhyung@kernel.org Signed-off-by: Ingo Molnar --- tools/perf/ui/stdio/hist.c | 103 +++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 46 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 543d7137cc0c..7aff5acf3265 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -417,6 +417,7 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he, { const char *sep = symbol_conf.field_sep; struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; char *buf = hpp->buf; size_t size = hpp->size; int ret, printed = 0; @@ -428,10 +429,10 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he, ret = scnprintf(hpp->buf, hpp->size, "%*s", he->depth * HIERARCHY_INDENT, ""); advance_hpp(hpp, ret); - hists__for_each_format(he->hists, fmt) { - if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) - break; - + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { /* * If there's no field_sep, we still need * to display initial ' '. @@ -529,50 +530,49 @@ static int print_hierarchy_indent(const char *sep, int indent, static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, const char *sep, FILE *fp) { - bool first = true; + bool first_node, first_col; int indent; int depth; unsigned width = 0; unsigned header_width = 0; struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; indent = hists->nr_hpp_node; /* preserve max indent depth for column headers */ print_hierarchy_indent(sep, indent, spaces, fp); - hists__for_each_format(hists, fmt) { - if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) - break; - - if (!first) - fprintf(fp, "%s", sep ?: " "); - else - first = false; + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { fmt->header(fmt, hpp, hists_to_evsel(hists)); - fprintf(fp, "%s", hpp->buf); + fprintf(fp, "%s%s", hpp->buf, sep ?: " "); } /* combine sort headers with ' / ' */ - first = true; - hists__for_each_format(hists, fmt) { - if (!perf_hpp__is_sort_entry(fmt) && !perf_hpp__is_dynamic_entry(fmt)) - continue; - if (perf_hpp__should_skip(fmt, hists)) - continue; - - if (!first) + first_node = true; + list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { + if (!first_node) header_width += fprintf(fp, " / "); - else { - fprintf(fp, "%s", sep ?: " "); - first = false; - } + first_node = false; - fmt->header(fmt, hpp, hists_to_evsel(hists)); - rtrim(hpp->buf); + first_col = true; + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (!first_col) + header_width += fprintf(fp, "+"); + first_col = false; + + fmt->header(fmt, hpp, hists_to_evsel(hists)); + rtrim(hpp->buf); - header_width += fprintf(fp, "%s", ltrim(hpp->buf)); + header_width += fprintf(fp, "%s", ltrim(hpp->buf)); + } } fprintf(fp, "\n# "); @@ -580,29 +580,35 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, /* preserve max indent depth for initial dots */ print_hierarchy_indent(sep, indent, dots, fp); - first = true; - hists__for_each_format(hists, fmt) { - if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) - break; + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); - if (!first) - fprintf(fp, "%s", sep ?: " "); - else - first = false; + first_col = true; + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (!first_col) + fprintf(fp, "%s", sep ?: ".."); + first_col = false; width = fmt->width(fmt, hpp, hists_to_evsel(hists)); fprintf(fp, "%.*s", width, dots); } depth = 0; - hists__for_each_format(hists, fmt) { - if (!perf_hpp__is_sort_entry(fmt) && !perf_hpp__is_dynamic_entry(fmt)) - continue; - if (perf_hpp__should_skip(fmt, hists)) - continue; + list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { + first_col = true; + width = depth * HIERARCHY_INDENT; - width = fmt->width(fmt, hpp, hists_to_evsel(hists)); - width += depth * HIERARCHY_INDENT; + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (!first_col) + width++; /* for '+' sign between column header */ + first_col = false; + + width += fmt->width(fmt, hpp, hists_to_evsel(hists)); + } if (width > header_width) header_width = width; @@ -621,6 +627,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, float min_pcnt, FILE *fp) { struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; struct rb_node *nd; size_t ret = 0; unsigned int width; @@ -650,6 +657,10 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, fprintf(fp, "# "); if (symbol_conf.report_hierarchy) { + list_for_each_entry(fmt_node, &hists->hpp_formats, list) { + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) + perf_hpp__reset_width(fmt, hists); + } nr_rows += print_hierarchy_header(hists, &dummy_hpp, sep, fp); goto print_entries; } @@ -734,9 +745,9 @@ print_entries: * display "no entry >= x.xx%" message. */ if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) { - int nr_sort = hists->nr_sort_keys; + int depth = hists->nr_hpp_node + h->depth + 1; - print_hierarchy_indent(sep, nr_sort + h->depth + 1, spaces, fp); + print_hierarchy_indent(sep, depth, spaces, fp); fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt); if (max_rows && ++nr_rows >= max_rows) -- cgit v1.2.3 From a61a22f6845f9e86e0ca60d1d256a35ca12312ef Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Mar 2016 16:44:50 -0300 Subject: perf hists browser: Use hierarchy hpp list Now hpp formats are linked using perf_hpp_list_node when hierarchy is enabled. Like in stdio, use this info to print entries with multiple sort keys in a single hierarchy properly. Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Wang Nan Link: http://lkml.kernel.org/r/1457361308-514-7-git-send-email-namhyung@kernel.org Signed-off-by: Ingo Molnar --- tools/perf/ui/browsers/hists.c | 81 +++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 36 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 2f02ce79bd9d..e0e217ec856b 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1289,6 +1289,7 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, off_t row_offset = entry->row_offset; bool first = true; struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; struct hpp_arg arg = { .b = &browser->b, .current_entry = current_entry, @@ -1320,7 +1321,10 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT); width -= level * HIERARCHY_INDENT; - hists__for_each_format(entry->hists, fmt) { + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&entry->hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { char s[2048]; struct perf_hpp hpp = { .buf = s, @@ -1332,10 +1336,6 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, column++ < browser->b.horiz_scroll) continue; - if (perf_hpp__is_sort_entry(fmt) || - perf_hpp__is_dynamic_entry(fmt)) - break; - if (current_entry && browser->b.navkeypressed) { ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED); @@ -1444,6 +1444,7 @@ static int hist_browser__show_no_entry(struct hist_browser *browser, int column = 0; int ret; struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; int indent = browser->hists->nr_hpp_node - 2; if (current_entry) { @@ -1461,15 +1462,14 @@ static int hist_browser__show_no_entry(struct hist_browser *browser, ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT); width -= level * HIERARCHY_INDENT; - hists__for_each_format(browser->hists, fmt) { + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&browser->hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { if (perf_hpp__should_skip(fmt, browser->hists) || column++ < browser->b.horiz_scroll) continue; - if (perf_hpp__is_sort_entry(fmt) || - perf_hpp__is_dynamic_entry(fmt)) - break; - ret = fmt->width(fmt, NULL, hists_to_evsel(browser->hists)); if (first) { @@ -1551,22 +1551,23 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows .size = size, }; struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; size_t ret = 0; int column = 0; int indent = hists->nr_hpp_node - 2; - bool first = true; + bool first_node, first_col; ret = scnprintf(buf, size, " "); if (advance_hpp_check(&dummy_hpp, ret)) return ret; - hists__for_each_format(hists, fmt) { + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { if (column++ < browser->b.horiz_scroll) continue; - if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) - break; - ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); if (advance_hpp_check(&dummy_hpp, ret)) break; @@ -1581,34 +1582,42 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows if (advance_hpp_check(&dummy_hpp, ret)) return ret; - hists__for_each_format(hists, fmt) { - char *start; - - if (!perf_hpp__is_sort_entry(fmt) && !perf_hpp__is_dynamic_entry(fmt)) - continue; - if (perf_hpp__should_skip(fmt, hists)) - continue; - - if (first) { - first = false; - } else { + first_node = true; + list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { + if (!first_node) { ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " / "); if (advance_hpp_check(&dummy_hpp, ret)) break; } + first_node = false; - ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); - dummy_hpp.buf[ret] = '\0'; - rtrim(dummy_hpp.buf); + first_col = true; + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + char *start; - start = ltrim(dummy_hpp.buf); - ret = strlen(start); + if (perf_hpp__should_skip(fmt, hists)) + continue; - if (start != dummy_hpp.buf) - memmove(dummy_hpp.buf, start, ret + 1); + if (!first_col) { + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "+"); + if (advance_hpp_check(&dummy_hpp, ret)) + break; + } + first_col = false; - if (advance_hpp_check(&dummy_hpp, ret)) - break; + ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); + dummy_hpp.buf[ret] = '\0'; + rtrim(dummy_hpp.buf); + + start = ltrim(dummy_hpp.buf); + ret = strlen(start); + + if (start != dummy_hpp.buf) + memmove(dummy_hpp.buf, start, ret + 1); + + if (advance_hpp_check(&dummy_hpp, ret)) + break; + } } return ret; @@ -1676,7 +1685,7 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) break; if (h->has_no_entry) { - hist_browser__show_no_entry(hb, row, h->depth); + hist_browser__show_no_entry(hb, row, h->depth + 1); row++; } } else { -- cgit v1.2.3 From 58ecd33be90647724a78ce5e0b42f5bc482771fd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 7 Mar 2016 16:44:51 -0300 Subject: perf report: Use hierarchy hpp list on gtk Now hpp formats are linked using perf_hpp_list_node when hierarchy is enabled. Like in stdio, use this info to print entries with multiple sort keys in a single hierarchy properly. Signed-off-by: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Wang Nan Link: http://lkml.kernel.org/r/1457361308-514-8-git-send-email-namhyung@kernel.org Signed-off-by: Ingo Molnar --- tools/perf/ui/gtk/hists.c | 55 ++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 4534e2d7669c..bd9bf7e343b1 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -407,7 +407,9 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, struct rb_node *node; struct hist_entry *he; struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; u64 total = hists__total_period(hists); + int size; for (node = rb_first(root); node; node = rb_next(node)) { GtkTreeIter iter; @@ -425,11 +427,11 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, gtk_tree_store_append(store, &iter, parent); col_idx = 0; - hists__for_each_format(hists, fmt) { - if (perf_hpp__is_sort_entry(fmt) || - perf_hpp__is_dynamic_entry(fmt)) - break; + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { if (fmt->color) fmt->color(fmt, hpp, he); else @@ -439,6 +441,7 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, } bf = hpp->buf; + size = hpp->size; perf_hpp_list__for_each_format(he->hpp_list, fmt) { int ret; @@ -451,9 +454,12 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, advance_hpp(hpp, ret + 2); } - gtk_tree_store_set(store, &iter, col_idx, rtrim(bf), -1); + gtk_tree_store_set(store, &iter, col_idx, ltrim(rtrim(bf)), -1); if (!he->leaf) { + hpp->buf = bf; + hpp->size = size; + perf_gtk__add_hierarchy_entries(hists, &he->hroot_out, store, &iter, hpp, min_pcnt); @@ -486,6 +492,7 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists, float min_pcnt) { struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; GType col_types[MAX_COLUMNS]; GtkCellRenderer *renderer; GtkTreeStore *store; @@ -494,7 +501,7 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists, int nr_cols = 0; char s[512]; char buf[512]; - bool first = true; + bool first_node, first_col; struct perf_hpp hpp = { .buf = s, .size = sizeof(s), @@ -514,11 +521,11 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists, renderer = gtk_cell_renderer_text_new(); col_idx = 0; - hists__for_each_format(hists, fmt) { - if (perf_hpp__is_sort_entry(fmt) || - perf_hpp__is_dynamic_entry(fmt)) - break; + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), -1, fmt->name, renderer, "markup", @@ -527,20 +534,24 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists, /* construct merged column header since sort keys share single column */ buf[0] = '\0'; - hists__for_each_format(hists ,fmt) { - if (!perf_hpp__is_sort_entry(fmt) && - !perf_hpp__is_dynamic_entry(fmt)) - continue; - if (perf_hpp__should_skip(fmt, hists)) - continue; - - if (first) - first = false; - else + first_node = true; + list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { + if (!first_node) strcat(buf, " / "); + first_node = false; - fmt->header(fmt, &hpp, hists_to_evsel(hists)); - strcat(buf, rtrim(hpp.buf)); + first_col = true; + perf_hpp_list__for_each_format(&fmt_node->hpp ,fmt) { + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (!first_col) + strcat(buf, "+"); + first_col = false; + + fmt->header(fmt, &hpp, hists_to_evsel(hists)); + strcat(buf, ltrim(rtrim(hpp.buf))); + } } gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), -- cgit v1.2.3 From 3a99e6db539e53cc9c79282e80f8362b0cb96ac8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Mar 2016 10:40:01 +0100 Subject: perf bench mem: Prepare the x86-64 build for upstream memcpy_mcsafe() changes The following upcoming upstream commit: 92b0729c34ca ("x86/mm, x86/mce: Add memcpy_mcsafe()") Adds _ASM_EXTABLE_FAULT(), which is not available in user-space and breaks the build. We don't really need _ASM_EXTABLE_FAULT() in user-space, so simply wrap it to nothing. Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- tools/perf/bench/mem-memcpy-x86-64-asm.S | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index e4c2c30143b9..5c3cce082cb8 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -1,6 +1,11 @@ + +/* Various wrappers to make the kernel .S file build in user-space: */ + #define memcpy MEMCPY /* don't hide glibc's memcpy() */ #define altinstr_replacement text #define globl p2align 4; .globl +#define _ASM_EXTABLE_FAULT(x, y) + #include "../../../arch/x86/lib/memcpy_64.S" /* * We need to provide note.GNU-stack section, saying that we want -- cgit v1.2.3 From 46dad054a19297af65c417c97cb920aa5bdf7e8c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 Mar 2016 18:48:45 -0300 Subject: perf jitdump: DWARF is also needed While building on a Docker container for ubuntu and installing package by package one ends up with: MKDIR /tmp/build/util/ CC /tmp/build/util/genelf.o util/genelf.c:22:19: fatal error: dwarf.h: No such file or directory #include ^ compilation terminated. mv: cannot stat '/tmp/build/util/.genelf.o.tmp': No such file or directory Because the jitdump code needs the DWARF related development packages to be installed. So make it dependent on that so that the build can succeed without jitdump support. Cc: Adrian Hunter Cc: Stephane Eranian Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-le498robnmxd40237wej3w62@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 10 +++++----- tools/perf/util/Build | 3 +++ 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index b2885776b602..e219ed458d97 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -73,7 +73,7 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } -#ifdef HAVE_LIBELF_SUPPORT +#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct ordered_events *oe __maybe_unused) @@ -245,7 +245,7 @@ static int perf_event__repipe_mmap(struct perf_tool *tool, return err; } -#ifdef HAVE_LIBELF_SUPPORT +#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) static int perf_event__jit_repipe_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -283,7 +283,7 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool, return err; } -#ifdef HAVE_LIBELF_SUPPORT +#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -795,7 +795,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) "perf inject []", NULL }; -#ifndef HAVE_LIBELF_SUPPORT +#if !defined(HAVE_LIBELF_SUPPORT) || !defined(HAVE_DWARF_SUPPORT) set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); #endif argc = parse_options(argc, argv, options, inject_usage, 0); @@ -833,7 +833,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) inject.tool.ordered_events = true; inject.tool.ordering_requires_timestamps = true; } -#ifdef HAVE_LIBELF_SUPPORT +#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) if (inject.jit_mode) { inject.tool.mmap2 = perf_event__jit_repipe_mmap2; inject.tool.mmap = perf_event__jit_repipe_mmap; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index df2b690970ac..f130ce240158 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -107,9 +107,12 @@ libperf-y += scripting-engines/ libperf-$(CONFIG_ZLIB) += zlib.o libperf-$(CONFIG_LZMA) += lzma.o libperf-y += demangle-java.o + +ifdef CONFIG_DWARF libperf-$(CONFIG_LIBELF) += jitdump.o libperf-$(CONFIG_LIBELF) += genelf.o libperf-$(CONFIG_LIBELF) += genelf_debug.o +endif CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" # avoid compiler warnings in 32-bit mode -- cgit v1.2.3 From 616df645d7238e45d3b369933a30fee4e4e305e2 Mon Sep 17 00:00:00 2001 From: Chris Phlipot Date: Tue, 8 Mar 2016 21:11:54 -0800 Subject: perf tools: Fix perf script python database export crash Remove the union in evsel so that the database id and priv pointer can be used simultainously without conflicting and crashing. Detailed Description for the fixed bug follows: perf script crashes with a segmentation fault on user space tool version 4.5.rc7.ge2857b when using the python database export API. It works properly in 4.4 and prior versions. the crash fist appeared in: cfc8874a4859 ("perf script: Process cpu/threads maps") How to reproduce the bug: Remove any temporary files left over from a previous crash (if you have already attemped to reproduce the bug): $ rm -r test_db-perf-data $ dropdb test_db $ perf record timeout 1 yes >/dev/null $ perf script -s scripts/python/export-to-postgresql.py test_db Stack Trace: Program received signal SIGSEGV, Segmentation fault. __GI___libc_free (mem=0x1) at malloc.c:2929 2929 malloc.c: No such file or directory. (gdb) bt at util/stat.c:122 argv=, prefix=) at builtin-script.c:2231 argc=argc@entry=4, argv=argv@entry=0x7fffffffdf70) at perf.c:390 at perf.c:451 Signed-off-by: Chris Phlipot Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Peter Zijlstra Fixes: cfc8874a4859 ("perf script: Process cpu/threads maps") Link: http://lkml.kernel.org/r/1457500314-8912-1-git-send-email-cphlipot0@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index efad78f811ad..501ea6e565f1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -93,10 +93,8 @@ struct perf_evsel { const char *unit; struct event_format *tp_format; off_t id_offset; - union { - void *priv; - u64 db_id; - }; + void *priv; + u64 db_id; struct cgroup_sel *cgrp; void *handler; struct cpu_map *cpus; -- cgit v1.2.3 From d7b617f51be4fffa3cbb5adf6d4258e616dce294 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 9 Mar 2016 11:04:17 +0100 Subject: perf tools: Pass perf_hpp_list all the way through setup_sort_list Pass perf_hpp_list all the way through setup_sort_list so that the sort entry can be added on the arbitrary list. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20160309100417.GA30910@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 041f236379e0..59a101e43457 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1614,19 +1614,21 @@ int hist_entry__filter(struct hist_entry *he, int type, const void *arg) return hse->se->se_filter(he, type, arg); } -static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, int level) +static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, + struct perf_hpp_list *list, + int level) { struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level); if (hse == NULL) return -1; - perf_hpp__register_sort_field(&hse->hpp); + perf_hpp_list__register_sort_field(list, &hse->hpp); return 0; } -static int __sort_dimension__add_hpp_output(struct perf_hpp_list *list, - struct sort_dimension *sd) +static int __sort_dimension__add_hpp_output(struct sort_dimension *sd, + struct perf_hpp_list *list) { struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0); @@ -2147,12 +2149,14 @@ out: return ret; } -static int __sort_dimension__add(struct sort_dimension *sd, int level) +static int __sort_dimension__add(struct sort_dimension *sd, + struct perf_hpp_list *list, + int level) { if (sd->taken) return 0; - if (__sort_dimension__add_hpp_sort(sd, level) < 0) + if (__sort_dimension__add_hpp_sort(sd, list, level) < 0) return -1; if (sd->entry->se_collapse) @@ -2163,7 +2167,9 @@ static int __sort_dimension__add(struct sort_dimension *sd, int level) return 0; } -static int __hpp_dimension__add(struct hpp_dimension *hd, int level) +static int __hpp_dimension__add(struct hpp_dimension *hd, + struct perf_hpp_list *list, + int level) { struct perf_hpp_fmt *fmt; @@ -2175,7 +2181,7 @@ static int __hpp_dimension__add(struct hpp_dimension *hd, int level) return -1; hd->taken = 1; - perf_hpp__register_sort_field(fmt); + perf_hpp_list__register_sort_field(list, fmt); return 0; } @@ -2185,7 +2191,7 @@ static int __sort_dimension__add_output(struct perf_hpp_list *list, if (sd->taken) return 0; - if (__sort_dimension__add_hpp_output(list, sd) < 0) + if (__sort_dimension__add_hpp_output(sd, list) < 0) return -1; sd->taken = 1; @@ -2215,7 +2221,8 @@ int hpp_dimension__add_output(unsigned col) return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]); } -static int sort_dimension__add(const char *tok, struct perf_evlist *evlist, +static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, + struct perf_evlist *evlist __maybe_unused, int level) { unsigned int i; @@ -2255,7 +2262,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist, sort__has_thread = 1; } - return __sort_dimension__add(sd, level); + return __sort_dimension__add(sd, list, level); } for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { @@ -2264,7 +2271,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist, if (strncasecmp(tok, hd->name, strlen(tok))) continue; - return __hpp_dimension__add(hd, level); + return __hpp_dimension__add(hd, list, level); } for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { @@ -2279,7 +2286,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist, if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) sort__has_sym = 1; - __sort_dimension__add(sd, level); + __sort_dimension__add(sd, list, level); return 0; } @@ -2295,7 +2302,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist, if (sd->entry == &sort_mem_daddr_sym) sort__has_sym = 1; - __sort_dimension__add(sd, level); + __sort_dimension__add(sd, list, level); return 0; } @@ -2305,7 +2312,8 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist, return -ESRCH; } -static int setup_sort_list(char *str, struct perf_evlist *evlist) +static int setup_sort_list(struct perf_hpp_list *list, char *str, + struct perf_evlist *evlist) { char *tmp, *tok; int ret = 0; @@ -2332,7 +2340,7 @@ static int setup_sort_list(char *str, struct perf_evlist *evlist) } if (*tok) { - ret = sort_dimension__add(tok, evlist, level); + ret = sort_dimension__add(list, tok, evlist, level); if (ret == -EINVAL) { error("Invalid --sort key: `%s'", tok); break; @@ -2480,7 +2488,7 @@ static int __setup_sorting(struct perf_evlist *evlist) } } - ret = setup_sort_list(str, evlist); + ret = setup_sort_list(&perf_hpp_list, str, evlist); free(str); return ret; @@ -2725,7 +2733,7 @@ int setup_sorting(struct perf_evlist *evlist) return err; if (parent_pattern != default_parent_pattern) { - err = sort_dimension__add("parent", evlist, -1); + err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1); if (err < 0) return err; } -- cgit v1.2.3 From ea8f75f981918c5946fc4029acdc86707fa901c1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 8 Mar 2016 19:42:30 +0100 Subject: perf tools: Omit unnecessary cast in perf_pmu__parse_scale There's no need to use a const char pointer, we can used char pointer from the beginning and omit the unnecessary cast. Reported-by: Ingo Molnar Signed-off-by: Jiri Olsa Cc: David Ahern Cc: H. Peter Anvin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160308184230.GB7897@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d8cd038baed2..adef23b1352e 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -98,7 +98,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * char scale[128]; int fd, ret = -1; char path[PATH_MAX]; - const char *lc; + char *lc; snprintf(path, PATH_MAX, "%s/%s.scale", dir, name); @@ -146,7 +146,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * /* restore locale */ setlocale(LC_NUMERIC, lc); - free((char *) lc); + free(lc); ret = 0; error: -- cgit v1.2.3 From 9eb42dee2b11635174c74a7996934b6ca18f2179 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 26 Feb 2016 18:13:28 -0500 Subject: tools lib traceevent: Add '~' operation within arg_num_eval() When evaluating values for print flags, if the value included a '~' operator, the parsing would fail. This broke kmalloc's parsing of: __print_flags(REC->gfp_flags, "|", {(unsigned long)((((((( gfp_t)(0x400000u|0x2000000u)) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( gfp_t)0x20000u)) | (( gfp_t)0x02u)) | (( gfp_t)0x08u)) | (( gfp_t)0x4000u) | (( gfp_t)0x10000u) | (( gfp_t)0x1000u) | (( gfp_t)0x200u)) & ~(( gfp_t)0x2000000u)) ^ | here Signed-off-by: Steven Rostedt Reported-by: Arnaldo Carvalho de Melo Tested-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20160226181328.22f47129@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 865dea55454b..190cc886ab91 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2398,6 +2398,12 @@ static int arg_num_eval(struct print_arg *arg, long long *val) break; *val = left + right; break; + case '~': + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + *val = ~right; + break; default: do_warning("unknown op '%s'", arg->op.op); ret = 0; -- cgit v1.2.3 From e12b202f8fb9b62a3997cad8e93401f85293390c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 10 Mar 2016 17:41:13 +0100 Subject: perf jitdump: Build only on supported archs Build jitdump only on architectures defined in util/genelf.h file, to avoid breaking the build on such arches. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Borislav Petkov Cc: Colin Ian King Cc: David Ahern Cc: Davidlohr Bueso Cc: He Kuang Cc: Mel Gorman Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Cc: Wang Nan Link: http://lkml.kernel.org/r/20160310164113.GA11357@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/Makefile | 1 + tools/perf/arch/arm64/Makefile | 1 + tools/perf/arch/powerpc/Makefile | 1 + tools/perf/arch/x86/Makefile | 1 + tools/perf/builtin-inject.c | 12 +++++++----- tools/perf/config/Makefile | 7 +++++++ tools/perf/util/Build | 2 +- 7 files changed, 19 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 7fbca175099e..18b13518d8d8 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -1,3 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index 7fbca175099e..18b13518d8d8 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -1,3 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 9f9cea3478fd..56e05f126ad8 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 09ba923debe8..269af2143735 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index e219ed458d97..7fa68663ed72 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -73,7 +73,7 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } -#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) +#ifdef HAVE_JITDUMP static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct ordered_events *oe __maybe_unused) @@ -245,7 +245,7 @@ static int perf_event__repipe_mmap(struct perf_tool *tool, return err; } -#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) +#ifdef HAVE_JITDUMP static int perf_event__jit_repipe_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -283,7 +283,7 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool, return err; } -#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) +#ifdef HAVE_JITDUMP static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -778,7 +778,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, "Merge sched-stat and sched-switch for getting events " "where and how long tasks slept"), +#ifdef HAVE_JITDUMP OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), +#endif OPT_INCR('v', "verbose", &verbose, "be more verbose (show build ids, etc)"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", @@ -795,7 +797,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) "perf inject []", NULL }; -#if !defined(HAVE_LIBELF_SUPPORT) || !defined(HAVE_DWARF_SUPPORT) +#ifndef HAVE_JITDUMP set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); #endif argc = parse_options(argc, argv, options, inject_usage, 0); @@ -833,7 +835,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) inject.tool.ordered_events = true; inject.tool.ordering_requires_timestamps = true; } -#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) +#ifdef HAVE_JITDUMP if (inject.jit_mode) { inject.tool.mmap2 = perf_event__jit_repipe_mmap2; inject.tool.mmap = perf_event__jit_repipe_mmap; diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index f7aeaf303f5a..eca6a912e8c2 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -328,6 +328,13 @@ ifndef NO_LIBELF endif # NO_LIBBPF endif # NO_LIBELF +ifdef PERF_HAVE_JITDUMP + ifndef NO_DWARF + $(call detected,CONFIG_JITDUMP) + CFLAGS += -DHAVE_JITDUMP + endif +endif + ifeq ($(ARCH),powerpc) ifndef NO_DWARF CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX diff --git a/tools/perf/util/Build b/tools/perf/util/Build index f130ce240158..eea25e2424e9 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -108,7 +108,7 @@ libperf-$(CONFIG_ZLIB) += zlib.o libperf-$(CONFIG_LZMA) += lzma.o libperf-y += demangle-java.o -ifdef CONFIG_DWARF +ifdef CONFIG_JITDUMP libperf-$(CONFIG_LIBELF) += jitdump.o libperf-$(CONFIG_LIBELF) += genelf.o libperf-$(CONFIG_LIBELF) += genelf_debug.o -- cgit v1.2.3 From f4954cfb1cda4cf0abf36d23213c702e94666c3f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:46:56 +0900 Subject: perf tools: Fix hist_entry__filter() for hierarchy When hierarchy mode is enabled each output format is in a separate hpp list. So when applying a filter it should check all formats in the list. Currently it only checks a single ->fmt field which was not set properly. Signed-off-by: Namhyung Kim Tested-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 59a101e43457..8a49a07ebea6 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1602,16 +1602,30 @@ int hist_entry__filter(struct hist_entry *he, int type, const void *arg) { struct perf_hpp_fmt *fmt; struct hpp_sort_entry *hse; + int ret = -1; + int r; - fmt = he->fmt; - if (fmt == NULL || !perf_hpp__is_sort_entry(fmt)) - return -1; + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + if (!perf_hpp__is_sort_entry(fmt)) + continue; - hse = container_of(fmt, struct hpp_sort_entry, hpp); - if (hse->se->se_filter == NULL) - return -1; + hse = container_of(fmt, struct hpp_sort_entry, hpp); + if (hse->se->se_filter == NULL) + continue; - return hse->se->se_filter(he, type, arg); + /* + * hist entry is filtered if any of sort key in the hpp list + * is applied. But it should skip non-matched filter types. + */ + r = hse->se->se_filter(he, type, arg); + if (r >= 0) { + if (ret < 0) + ret = 0; + ret |= r; + } + } + + return ret; } static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, -- cgit v1.2.3 From 4945cf2aa1ed61994c158f22f26ea6101059a8d4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:46:57 +0900 Subject: perf tools: Add more sort entry check functions Those functions are for checkinf if a given perf_hpp_fmt is a filter-related sort entry. With hierarchy mode, it needs to check filters on the hist entries with its own hpp format list. Signed-off-by: Namhyung Kim Tested-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.h | 4 ++++ tools/perf/util/sort.c | 50 +++++++++++++++++++------------------------------- 2 files changed, 23 insertions(+), 31 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2cb017f28f9e..6870a1bfd762 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -318,6 +318,10 @@ bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *his bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt); struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 8a49a07ebea6..61c74022e47f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1488,38 +1488,26 @@ bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format) return format->header == __sort__hpp_header; } -bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt) -{ - struct hpp_sort_entry *hse; +#define MK_SORT_ENTRY_CHK(key) \ +bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt) \ +{ \ + struct hpp_sort_entry *hse; \ + \ + if (!perf_hpp__is_sort_entry(fmt)) \ + return false; \ + \ + hse = container_of(fmt, struct hpp_sort_entry, hpp); \ + return hse->se == &sort_ ## key ; \ +} + +MK_SORT_ENTRY_CHK(trace) +MK_SORT_ENTRY_CHK(srcline) +MK_SORT_ENTRY_CHK(srcfile) +MK_SORT_ENTRY_CHK(thread) +MK_SORT_ENTRY_CHK(comm) +MK_SORT_ENTRY_CHK(dso) +MK_SORT_ENTRY_CHK(sym) - if (!perf_hpp__is_sort_entry(fmt)) - return false; - - hse = container_of(fmt, struct hpp_sort_entry, hpp); - return hse->se == &sort_trace; -} - -bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt) -{ - struct hpp_sort_entry *hse; - - if (!perf_hpp__is_sort_entry(fmt)) - return false; - - hse = container_of(fmt, struct hpp_sort_entry, hpp); - return hse->se == &sort_srcline; -} - -bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt) -{ - struct hpp_sort_entry *hse; - - if (!perf_hpp__is_sort_entry(fmt)) - return false; - - hse = container_of(fmt, struct hpp_sort_entry, hpp); - return hse->se == &sort_srcfile; -} static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) { -- cgit v1.2.3 From aec13a7ec78d9322a348fb26940097b0bdfef1bd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:46:58 +0900 Subject: perf tools: Fix command line filters in hierarchy mode When a command-line filter is applied in hierarchy mode, output is broken especially when filtering on lower level. The higher level entries doesn't show up so it's hard to see the results. Also it needs to handle multi sort keys in a single hierarchy level. Before: $ perf report --hierarchy -s 'cpu,{dso,comm}' --comms swapper --stdio ... # Overhead CPU / Shared Object+Command # ........... ........................... # 13.79% [kernel.vmlinux] swapper 31.71% 000 13.80% [kernel.vmlinux] swapper 0.43% [e1000e] swapper 11.89% [kernel.vmlinux] swapper 9.18% [kernel.vmlinux] swapper After: # Overhead CPU / Shared Object+Command # ........... ............................... # 33.09% 003 13.79% [kernel.vmlinux] swapper 31.71% 000 13.80% [kernel.vmlinux] swapper 0.43% [e1000e] swapper 21.90% 002 11.89% [kernel.vmlinux] swapper 13.30% 001 9.18% [kernel.vmlinux] swapper Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Tested-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 97 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 29da9e0d8db9..a98f9345f686 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1087,10 +1087,103 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, */ static void hists__apply_filters(struct hists *hists, struct hist_entry *he); +static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he, + enum hist_filter type); + +typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt); + +static bool check_thread_entry(struct perf_hpp_fmt *fmt) +{ + return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt); +} + +static void hist_entry__check_and_remove_filter(struct hist_entry *he, + enum hist_filter type, + fmt_chk_fn check) +{ + struct perf_hpp_fmt *fmt; + bool type_match = false; + struct hist_entry *parent = he->parent_he; + + switch (type) { + case HIST_FILTER__THREAD: + if (symbol_conf.comm_list == NULL && + symbol_conf.pid_list == NULL && + symbol_conf.tid_list == NULL) + return; + break; + case HIST_FILTER__DSO: + if (symbol_conf.dso_list == NULL) + return; + break; + case HIST_FILTER__SYMBOL: + if (symbol_conf.sym_list == NULL) + return; + break; + case HIST_FILTER__PARENT: + case HIST_FILTER__GUEST: + case HIST_FILTER__HOST: + case HIST_FILTER__SOCKET: + default: + return; + } + + /* if it's filtered by own fmt, it has to have filter bits */ + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + if (check(fmt)) { + type_match = true; + break; + } + } + + if (type_match) { + /* + * If the filter is for current level entry, propagate + * filter marker to parents. The marker bit was + * already set by default so it only needs to clear + * non-filtered entries. + */ + if (!(he->filtered & (1 << type))) { + while (parent) { + parent->filtered &= ~(1 << type); + parent = parent->parent_he; + } + } + } else { + /* + * If current entry doesn't have matching formats, set + * filter marker for upper level entries. it will be + * cleared if its lower level entries is not filtered. + * + * For lower-level entries, it inherits parent's + * filter bit so that lower level entries of a + * non-filtered entry won't set the filter marker. + */ + if (parent == NULL) + he->filtered |= (1 << type); + else + he->filtered |= (parent->filtered & (1 << type)); + } +} + +static void hist_entry__apply_hierarchy_filters(struct hist_entry *he) +{ + hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD, + check_thread_entry); + + hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO, + perf_hpp__is_dso_entry); + + hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL, + perf_hpp__is_sym_entry); + + hists__apply_filters(he->hists, he); +} static struct hist_entry *hierarchy_insert_entry(struct hists *hists, struct rb_root *root, struct hist_entry *he, + struct hist_entry *parent_he, struct perf_hpp_list *hpp_list) { struct rb_node **p = &root->rb_node; @@ -1125,11 +1218,13 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, if (new == NULL) return NULL; - hists__apply_filters(hists, new); hists->nr_entries++; /* save related format list for output */ new->hpp_list = hpp_list; + new->parent_he = parent_he; + + hist_entry__apply_hierarchy_filters(new); /* some fields are now passed to 'new' */ perf_hpp_list__for_each_sort_list(hpp_list, fmt) { @@ -1170,14 +1265,13 @@ static int hists__hierarchy_insert_entry(struct hists *hists, continue; /* insert copy of 'he' for each fmt into the hierarchy */ - new_he = hierarchy_insert_entry(hists, root, he, &node->hpp); + new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp); if (new_he == NULL) { ret = -1; break; } root = &new_he->hroot_in; - new_he->parent_he = parent; new_he->depth = depth++; parent = new_he; } -- cgit v1.2.3 From a515d8ff7085d5e9fde867f2048b8da36b95dc51 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:46:59 +0900 Subject: perf tools: Remove hist_entry->fmt field It's not used anymore and the output format is accessed by the hpp_list pointer instead when hierarchy is enabled. Let's get rid of it. Signed-off-by: Namhyung Kim Tested-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.h | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index ea1f722cffea..151afc1b6c2f 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -129,7 +129,6 @@ struct hist_entry { void *raw_data; u32 raw_size; void *trace_output; - struct perf_hpp_fmt *fmt; struct perf_hpp_list *hpp_list; struct hist_entry *parent_he; union { -- cgit v1.2.3 From 325a62834e81452d2a6e253444022cf493bbabfc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:47:00 +0900 Subject: perf hists browser: Cleanup hist_browser__fprintf_hierarchy_entry() The hist_browser__fprintf_hierarchy_entry() if to dump current output into a file so it needs to be sync-ed with the corresponding function hist_browser__show_hierarchy_entry(). So use hists->nr_hpp_node to indent width and use first fmt_node to print overhead columns instead of checking whether it's a sort entry (or dynamic entry). Signed-off-by: Namhyung Kim Tested-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-6-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e0e217ec856b..aed9c8f011f7 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1928,8 +1928,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser, struct hist_entry *he, - FILE *fp, int level, - int nr_sort_keys) + FILE *fp, int level) { char s[8192]; int printed = 0; @@ -1939,23 +1938,20 @@ static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser, .size = sizeof(s), }; struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; bool first = true; int ret; - int hierarchy_indent = nr_sort_keys * HIERARCHY_INDENT; + int hierarchy_indent = (he->hists->nr_hpp_node - 2) * HIERARCHY_INDENT; printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, ""); folded_sign = hist_entry__folded(he); printed += fprintf(fp, "%c", folded_sign); - hists__for_each_format(he->hists, fmt) { - if (perf_hpp__should_skip(fmt, he->hists)) - continue; - - if (perf_hpp__is_sort_entry(fmt) || - perf_hpp__is_dynamic_entry(fmt)) - break; - + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&he->hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { if (!first) { ret = scnprintf(hpp.buf, hpp.size, " "); advance_hpp(&hpp, ret); @@ -1992,7 +1988,6 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries), browser->min_pcnt); int printed = 0; - int nr_sort = browser->hists->nr_sort_keys; while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); @@ -2000,8 +1995,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) if (symbol_conf.report_hierarchy) { printed += hist_browser__fprintf_hierarchy_entry(browser, h, fp, - h->depth, - nr_sort); + h->depth); } else { printed += hist_browser__fprintf_entry(browser, h, fp); } -- cgit v1.2.3 From 86e3ee5224c17b7967aac39aa15539393c144de7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:47:01 +0900 Subject: perf tools: Remove nr_sort_keys field The nr_sort_keys field is to carry the number of sort entries in a hpp_list or hists to determine the depth of indentation of a hist entry. As it's only used in hierarchy mode and now we have used nr_hpp_node for this reason, there's no need to keep it anymore. Let's get rid of it. Signed-off-by: Namhyung Kim Tested-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-7-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 3 --- tools/perf/util/hist.h | 2 -- tools/perf/util/sort.c | 26 -------------------------- 3 files changed, 31 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index f03c4f70438f..3baeaa6e71b5 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -515,9 +515,6 @@ void perf_hpp_list__column_register(struct perf_hpp_list *list, void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, struct perf_hpp_fmt *format) { - if (perf_hpp__is_sort_entry(format) || perf_hpp__is_dynamic_entry(format)) - list->nr_sort_keys++; - list_add_tail(&format->sort_list, &list->sorts); } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 6870a1bfd762..ead18c82294f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -79,7 +79,6 @@ struct hists { int socket_filter; struct perf_hpp_list *hpp_list; struct list_head hpp_formats; - int nr_sort_keys; int nr_hpp_node; }; @@ -241,7 +240,6 @@ struct perf_hpp_fmt { struct perf_hpp_list { struct list_head fields; struct list_head sorts; - int nr_sort_keys; }; extern struct perf_hpp_list perf_hpp_list; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 61c74022e47f..ced849e51e6b 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2703,29 +2703,6 @@ out: return ret; } -static void evlist__set_hists_nr_sort_keys(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - struct perf_hpp_fmt *fmt; - struct hists *hists = evsel__hists(evsel); - - hists->nr_sort_keys = perf_hpp_list.nr_sort_keys; - - /* - * If dynamic entries were used, it might add multiple - * entries to each evsel for a single field name. Set - * actual number of sort keys for each hists. - */ - perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { - if (perf_hpp__is_dynamic_entry(fmt) && - !perf_hpp__defined_dynamic_entry(fmt, hists)) - hists->nr_sort_keys--; - } - } -} - int setup_sorting(struct perf_evlist *evlist) { int err; @@ -2740,9 +2717,6 @@ int setup_sorting(struct perf_evlist *evlist) return err; } - if (evlist != NULL) - evlist__set_hists_nr_sort_keys(evlist); - reset_dimensions(); /* -- cgit v1.2.3 From f7fb538afea55383a9383dac5c56887c601af5f4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:47:02 +0900 Subject: perf tools: Recalc total periods using top-level entries in hierarchy When hierarchy mode is enabled, each entry in a hierarchy level shares the period. IOW an upper level entry's period is the sum of lower level entries. Thus perf uses only one of them to calculate the total period of hists. It was lowest-level (leaf) entries but it has a problem when it comes to filters. If a filter is applied, entries in the same level will be filtered or not. But upper level entries still have period of their sum including filtered one. So total sum of upper level entries will not be same as sum of lower level entries. This resulted in entries having more than 100% of overhead and it can be produced using perf top with filter(s). Reported-and-Tested-by: Jiri Olsa Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-8-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index a98f9345f686..290b3cbf6877 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1453,6 +1453,31 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) hists->stats.total_period += h->stat.period; } +static void hierarchy_recalc_total_periods(struct hists *hists) +{ + struct rb_node *node; + struct hist_entry *he; + + node = rb_first(&hists->entries); + + hists->stats.total_period = 0; + hists->stats.total_non_filtered_period = 0; + + /* + * recalculate total period using top-level entries only + * since lower level entries only see non-filtered entries + * but upper level entries have sum of both entries. + */ + while (node) { + he = rb_entry(node, struct hist_entry, rb_node); + node = rb_next(node); + + hists->stats.total_period += he->stat.period; + if (!he->filtered) + hists->stats.total_non_filtered_period += he->stat.period; + } +} + static void hierarchy_insert_output_entry(struct rb_root *root, struct hist_entry *he) { @@ -1518,11 +1543,6 @@ static void hists__hierarchy_output_resort(struct hists *hists, continue; } - /* only update stat for leaf entries to avoid duplication */ - hists__inc_stats(hists, he); - if (!he->filtered) - hists__calc_col_len(hists, he); - if (!use_callchain) continue; @@ -1602,11 +1622,13 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, hists__reset_col_len(hists); if (symbol_conf.report_hierarchy) { - return hists__hierarchy_output_resort(hists, prog, - &hists->entries_collapsed, - &hists->entries, - min_callchain_hits, - use_callchain); + hists__hierarchy_output_resort(hists, prog, + &hists->entries_collapsed, + &hists->entries, + min_callchain_hits, + use_callchain); + hierarchy_recalc_total_periods(hists); + return; } if (sort__need_collapse) @@ -1927,6 +1949,8 @@ static void hists__filter_hierarchy(struct hists *hists, int type, const void *a } } + hierarchy_recalc_total_periods(hists); + /* * resort output after applying a new filter since filter in a lower * hierarchy can change periods in a upper hierarchy. -- cgit v1.2.3 From 078b8d4a406fa8ce4a3c9d5145c27be1ed2b1dfd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 23:20:51 +0900 Subject: perf tools: Add sort__has_comm variable The sort__has_comm variable is to check whether the comm sort key is given. This is necessary to support thread filtering in the TUI hists browser later. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1457533253-21419-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 3 +++ tools/perf/util/sort.h | 1 + 2 files changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index ced849e51e6b..93fa136b0025 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -27,6 +27,7 @@ int sort__has_sym = 0; int sort__has_dso = 0; int sort__has_socket = 0; int sort__has_thread = 0; +int sort__has_comm = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; /* @@ -2262,6 +2263,8 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, sort__has_socket = 1; } else if (sd->entry == &sort_thread) { sort__has_thread = 1; + } else if (sd->entry == &sort_comm) { + sort__has_comm = 1; } return __sort_dimension__add(sd, list, level); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 151afc1b6c2f..3f4e35998119 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -37,6 +37,7 @@ extern int sort__has_parent; extern int sort__has_sym; extern int sort__has_socket; extern int sort__has_thread; +extern int sort__has_comm; extern enum sort_mode sort__mode; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; -- cgit v1.2.3 From 6962ccb37b50366014074aec6fd14497cf719642 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 10 Mar 2016 00:14:50 +0900 Subject: perf hists browser: Allow thread filtering for comm sort key The commit 2eafd410e669 ("perf hists browser: Only 'Zoom into thread' only when sort order has 'pid'") disabled thread filtering in hist browser for the default sort key. However the he->thread is still valid even if 'pid' sort key is not given. Only thing it should not use is the pid (or tid) of the thread. So allow to filter by thread when 'comm' sort key is given and show pid only if 'pid' sort key is given. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1457536490-24084-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index aed9c8f011f7..cb4191bf6cec 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2136,11 +2136,18 @@ static int hists__browser_title(struct hists *hists, if (hists->uid_filter_str) printed += snprintf(bf + printed, size - printed, ", UID: %s", hists->uid_filter_str); - if (thread) - printed += scnprintf(bf + printed, size - printed, + if (thread) { + if (sort__has_thread) { + printed += scnprintf(bf + printed, size - printed, ", Thread: %s(%d)", (thread->comm_set ? thread__comm_str(thread) : ""), thread->tid); + } else { + printed += scnprintf(bf + printed, size - printed, + ", Thread: %s", + (thread->comm_set ? thread__comm_str(thread) : "")); + } + } if (dso) printed += scnprintf(bf + printed, size - printed, ", DSO: %s", dso->short_name); @@ -2321,9 +2328,15 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) thread__zput(browser->hists->thread_filter); ui_helpline__pop(); } else { - ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"", - thread->comm_set ? thread__comm_str(thread) : "", - thread->tid); + if (sort__has_thread) { + ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"", + thread->comm_set ? thread__comm_str(thread) : "", + thread->tid); + } else { + ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s thread\"", + thread->comm_set ? thread__comm_str(thread) : ""); + } + browser->hists->thread_filter = thread__get(thread); perf_hpp__set_elide(HISTC_THREAD, false); pstack__push(browser->pstack, &browser->hists->thread_filter); @@ -2338,13 +2351,22 @@ static int add_thread_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct thread *thread) { - if (!sort__has_thread || thread == NULL) + int ret; + + if ((!sort__has_thread && !sort__has_comm) || thread == NULL) return 0; - if (asprintf(optstr, "Zoom %s %s(%d) thread", - browser->hists->thread_filter ? "out of" : "into", - thread->comm_set ? thread__comm_str(thread) : "", - thread->tid) < 0) + if (sort__has_thread) { + ret = asprintf(optstr, "Zoom %s %s(%d) thread", + browser->hists->thread_filter ? "out of" : "into", + thread->comm_set ? thread__comm_str(thread) : "", + thread->tid); + } else { + ret = asprintf(optstr, "Zoom %s %s thread", + browser->hists->thread_filter ? "out of" : "into", + thread->comm_set ? thread__comm_str(thread) : ""); + } + if (ret < 0) return 0; act->thread = thread; -- cgit v1.2.3 From 599a2f38a989a79df99838f22cb607f5e2b5b56c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 23:20:53 +0900 Subject: perf hists browser: Check sort keys before hot key actions The context menu in TUI hists browser checks corresponding sort keys when creating the menu item. But hotkey actions lacks these checks so it can filter using incorrect info. For example, default sort key of 'perf top' doesn't contain 'comm' or 'pid' sort key so each hist entry's thread info is not reliable. Thus it should prohibit using thread filter on 't' key. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1457533253-21419-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index cb4191bf6cec..4b9816555946 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2322,6 +2322,9 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) { struct thread *thread = act->thread; + if ((!sort__has_thread && !sort__has_comm) || thread == NULL) + return 0; + if (browser->hists->thread_filter) { pstack__remove(browser->pstack, &browser->hists->thread_filter); perf_hpp__set_elide(HISTC_THREAD, false); @@ -2379,6 +2382,9 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) { struct map *map = act->ms.map; + if (!sort__has_dso || map == NULL) + return 0; + if (browser->hists->dso_filter) { pstack__remove(browser->pstack, &browser->hists->dso_filter); perf_hpp__set_elide(HISTC_DSO, false); @@ -2530,6 +2536,9 @@ add_exit_opt(struct hist_browser *browser __maybe_unused, static int do_zoom_socket(struct hist_browser *browser, struct popup_action *act) { + if (!sort__has_socket || act->socket < 0) + return 0; + if (browser->hists->socket_filter > -1) { pstack__remove(browser->pstack, &browser->hists->socket_filter); browser->hists->socket_filter = -1; -- cgit v1.2.3 From 6b45f7b2a37b0e00693985fd0abfc8e0319f91ce Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 3 Mar 2016 15:57:35 -0800 Subject: perf stat: Document CSV format in manpage With all the recently added fields in the perf stat CSV output we should finally document them in the man page. Do this here. v2: Fix fields in documentation (Jiri) v3: fix order of fields again (Jiri) v4: Change order again. v5: Document more fields (Jiri) v6: Move time stamp first v7: More fixes (Jiri) Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1457049458-28956-5-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 14d9e8ffaff7..8812d7319edb 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -219,6 +219,29 @@ $ perf stat -- make -j Wall-clock time elapsed: 719.554352 msecs +CSV FORMAT +---------- + +With -x, perf stat is able to output a not-quite-CSV format output +Commas in the output are not put into "". To make it easy to parse +it is recommended to use a different character like -x \; + +The fields are in this order: + + - optional usec time stamp in fractions of second (with -I xxx) + - optional CPU, core, or socket identifier + - optional number of logical CPUs aggregated + - counter value + - unit of the counter value or empty + - event name + - run time of counter + - percentage of measurement time the counter was running + - optional variance if multiple values are collected with -r + - optional metric value + - optional unit of metric + +Additional metrics may be printed with all earlier fields being empty. + SEE ALSO -------- linkperf:perf-top[1], linkperf:perf-list[1] -- cgit v1.2.3 From 54b5091606c18f68a7fc8b4ab03ac4592c7d2922 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 3 Mar 2016 15:57:36 -0800 Subject: perf stat: Implement --metric-only mode Add a new mode to only print metrics. Sometimes we don't care about the raw values, just want the computed metrics. This allows more compact printing, so with -I each sample is only a single line. This also allows easier plotting and processing with other tools. The main target is with using --topdown, but it also works with -T and standard perf stat. A few metrics are not supported. To avoiding having to hardcode all the metrics in the code it uses a two pass approach: first compute dummy metrics and only print the headers in the print_metric callback. Then use the callback to print the actual values. There are some additional changes in the stat printout code to handle all metrics being on a single line. One issue is that the column code doesn't know in advance what events are not supported by the CPU, and it would be hard to find out as this could change based on dynamic conditions. That causes empty columns in some cases. The output can be fairly wide, often you may need more than 80 columns. Example: % perf stat -a -I 1000 --metric-only 1.001452803 frontend cycles idle insn per cycle stalled cycles per insn branch-misses of all branches 1.001452803 158.91% 0.66 2.39 2.92% 2.002192321 180.63% 0.76 2.08 2.96% 3.003088282 150.59% 0.62 2.57 2.84% 4.004369835 196.20% 0.98 1.62 3.79% 5.005227314 231.98% 0.84 1.90 4.71% v2: Lots of updates. v3: Use slightly narrower columns v4: Add comment Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1457049458-28956-6-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 4 + tools/perf/builtin-stat.c | 211 +++++++++++++++++++++++++++++++-- 2 files changed, 205 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 8812d7319edb..82f0951754dd 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -147,6 +147,10 @@ Print count deltas every N milliseconds (minimum: 10ms) The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. example: 'perf stat -I 1000 -e cycles -a sleep 5' +--metric-only:: +Only print computed metrics. Print them in a single line. +Don't show any raw values. Not supported with -A or --per-thread. + --per-socket:: Aggregate counts per processor socket for system-wide mode measurements. This is a useful mode to detect imbalance between sockets. To enable this mode, diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index baa82078c148..74508c9d0742 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -122,6 +122,7 @@ static bool sync_run = false; static unsigned int initial_delay = 0; static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; +static bool metric_only = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; @@ -827,6 +828,99 @@ static void print_metric_csv(void *ctx, fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); } +#define METRIC_ONLY_LEN 20 + +/* Filter out some columns that don't work well in metrics only mode */ + +static bool valid_only_metric(const char *unit) +{ + if (!unit) + return false; + if (strstr(unit, "/sec") || + strstr(unit, "hz") || + strstr(unit, "Hz") || + strstr(unit, "CPUs utilized")) + return false; + return true; +} + +static const char *fixunit(char *buf, struct perf_evsel *evsel, + const char *unit) +{ + if (!strncmp(unit, "of all", 6)) { + snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), + unit); + return buf; + } + return unit; +} + +static void print_metric_only(void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + int n; + char buf[1024]; + unsigned mlen = METRIC_ONLY_LEN; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(buf, os->evsel, unit); + if (color) + n = color_fprintf(out, color, fmt, val); + else + n = fprintf(out, fmt, val); + if (n > METRIC_ONLY_LEN) + n = METRIC_ONLY_LEN; + if (mlen < strlen(unit)) + mlen = strlen(unit) + 1; + fprintf(out, "%*s", mlen - n, ""); +} + +static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, + const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + snprintf(buf, sizeof buf, fmt, val); + vals = buf; + while (isspace(*vals)) + vals++; + ends = vals; + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + fprintf(out, "%s%s", vals, csv_sep); +} + +static void new_line_metric(void *ctx __maybe_unused) +{ +} + +static void print_metric_header(void *ctx, const char *color __maybe_unused, + const char *fmt __maybe_unused, + const char *unit, double val __maybe_unused) +{ + struct outstate *os = ctx; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + if (csv_output) + fprintf(os->fh, "%s%s", unit, csv_sep); + else + fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); +} + static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -921,9 +1015,16 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, print_metric_t pm = print_metric_std; void (*nl)(void *); - nl = new_line_std; + if (metric_only) { + nl = new_line_metric; + if (csv_output) + pm = print_metric_only_csv; + else + pm = print_metric_only; + } else + nl = new_line_std; - if (csv_output) { + if (csv_output && !metric_only) { static int aggr_fields[] = { [AGGR_GLOBAL] = 0, [AGGR_THREAD] = 1, @@ -940,6 +1041,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, os.nfields++; } if (run == 0 || ena == 0 || counter->counts->scaled == -1) { + if (metric_only) { + pm(&os, NULL, "", "", 0); + return; + } aggr_printout(counter, id, nr); fprintf(stat_config.output, "%*s%s", @@ -968,7 +1073,9 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, return; } - if (nsec_counter(counter)) + if (metric_only) + /* nothing */; + else if (nsec_counter(counter)) nsec_printout(id, nr, counter, uval); else abs_printout(id, nr, counter, uval); @@ -977,7 +1084,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, out.new_line = nl; out.ctx = &os; - if (csv_output) { + if (csv_output && !metric_only) { print_noise(counter, noise); print_running(run, ena); } @@ -985,7 +1092,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, perf_stat__print_shadow_stats(counter, uval, first_shadow_cpu(counter, id), &out); - if (!csv_output) { + if (!csv_output && !metric_only) { print_noise(counter, noise); print_running(run, ena); } @@ -1021,14 +1128,23 @@ static void print_aggr(char *prefix) int cpu, s, s2, id, nr; double uval; u64 ena, run, val; + bool first; if (!(aggr_map || aggr_get_id)) return; aggr_update_shadow(); + /* + * With metric_only everything is on a single line. + * Without each counter has its own line. + */ for (s = 0; s < aggr_map->nr; s++) { + if (prefix && metric_only) + fprintf(output, "%s", prefix); + id = aggr_map->map[s]; + first = true; evlist__for_each(evsel_list, counter) { val = ena = run = 0; nr = 0; @@ -1041,13 +1157,20 @@ static void print_aggr(char *prefix) run += perf_counts(counter->counts, cpu, 0)->run; nr++; } - if (prefix) + if (first && metric_only) { + first = false; + aggr_printout(counter, id, nr); + } + if (prefix && !metric_only) fprintf(output, "%s", prefix); uval = val * counter->scale; printout(id, nr, counter, uval, prefix, run, ena, 1.0); - fputc('\n', output); + if (!metric_only) + fputc('\n', output); } + if (metric_only) + fputc('\n', output); } } @@ -1092,12 +1215,13 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) avg_enabled = avg_stats(&ps->res_stats[1]); avg_running = avg_stats(&ps->res_stats[2]); - if (prefix) + if (prefix && !metric_only) fprintf(output, "%s", prefix); uval = avg * counter->scale; printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); - fprintf(output, "\n"); + if (!metric_only) + fprintf(output, "\n"); } /* @@ -1126,6 +1250,43 @@ static void print_counter(struct perf_evsel *counter, char *prefix) } } +static int aggr_header_lens[] = { + [AGGR_CORE] = 18, + [AGGR_SOCKET] = 12, + [AGGR_NONE] = 15, + [AGGR_THREAD] = 24, + [AGGR_GLOBAL] = 0, +}; + +static void print_metric_headers(char *prefix) +{ + struct perf_stat_output_ctx out; + struct perf_evsel *counter; + struct outstate os = { + .fh = stat_config.output + }; + + if (prefix) + fprintf(stat_config.output, "%s", prefix); + + if (!csv_output) + fprintf(stat_config.output, "%*s", + aggr_header_lens[stat_config.aggr_mode], ""); + + /* Print metrics headers only */ + evlist__for_each(evsel_list, counter) { + os.evsel = counter; + out.ctx = &os; + out.print_metric = print_metric_header; + out.new_line = new_line_metric; + os.evsel = counter; + perf_stat__print_shadow_stats(counter, 0, + 0, + &out); + } + fputc('\n', stat_config.output); +} + static void print_interval(char *prefix, struct timespec *ts) { FILE *output = stat_config.output; @@ -1133,7 +1294,7 @@ static void print_interval(char *prefix, struct timespec *ts) sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); - if (num_print_interval == 0 && !csv_output) { + if (num_print_interval == 0 && !csv_output && !metric_only) { switch (stat_config.aggr_mode) { case AGGR_SOCKET: fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); @@ -1220,6 +1381,17 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) else print_header(argc, argv); + if (metric_only) { + static int num_print_iv; + + if (num_print_iv == 0) + print_metric_headers(prefix); + if (num_print_iv++ == 25) + num_print_iv = 0; + if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) + fprintf(stat_config.output, "%s", prefix); + } + switch (stat_config.aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: @@ -1232,6 +1404,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) case AGGR_GLOBAL: evlist__for_each(evsel_list, counter) print_counter_aggr(counter, prefix); + if (metric_only) + fputc('\n', stat_config.output); break; case AGGR_NONE: evlist__for_each(evsel_list, counter) @@ -1356,6 +1530,8 @@ static const struct option stat_options[] = { "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), + OPT_BOOLEAN(0, "metric-only", &metric_only, + "Only print computed metrics. No raw values"), OPT_END() }; @@ -1997,6 +2173,21 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) goto out; } + if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { + fprintf(stderr, "--metric-only is not supported with --per-thread\n"); + goto out; + } + + if (metric_only && stat_config.aggr_mode == AGGR_NONE) { + fprintf(stderr, "--metric-only is not supported with -A\n"); + goto out; + } + + if (metric_only && run_count > 1) { + fprintf(stderr, "--metric-only is not supported with -r\n"); + goto out; + } + if (output_fd < 0) { fprintf(stderr, "argument to --log-fd must be a > 0\n"); parse_options_usage(stat_usage, stat_options, "log-fd", 0); -- cgit v1.2.3 From 206cab651d07563d766c7f4cb73f858c5df3dec5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 3 Mar 2016 15:57:37 -0800 Subject: perf stat: Add --metric-only support for -A Add metric only support for -A too. This requires a new print function that prints the metrics in the right order. v2: Fix manpage v3: Simplify nrcpus computation Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1457049458-28956-7-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 2 +- tools/perf/builtin-stat.c | 45 ++++++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 82f0951754dd..04f23b404bbc 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -149,7 +149,7 @@ The overhead percentage could be high in some cases, for instance with small, su --metric-only:: Only print computed metrics. Print them in a single line. -Don't show any raw values. Not supported with -A or --per-thread. +Don't show any raw values. Not supported with --per-thread. --per-socket:: Aggregate counts per processor socket for system-wide mode measurements. This diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 74508c9d0742..1f19f2f999c8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1250,10 +1250,40 @@ static void print_counter(struct perf_evsel *counter, char *prefix) } } +static void print_no_aggr_metric(char *prefix) +{ + int cpu; + int nrcpus = 0; + struct perf_evsel *counter; + u64 ena, run, val; + double uval; + + nrcpus = evsel_list->cpus->nr; + for (cpu = 0; cpu < nrcpus; cpu++) { + bool first = true; + + if (prefix) + fputs(prefix, stat_config.output); + evlist__for_each(evsel_list, counter) { + if (first) { + aggr_printout(counter, cpu, 0); + first = false; + } + val = perf_counts(counter->counts, cpu, 0)->val; + ena = perf_counts(counter->counts, cpu, 0)->ena; + run = perf_counts(counter->counts, cpu, 0)->run; + + uval = val * counter->scale; + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); + } + fputc('\n', stat_config.output); + } +} + static int aggr_header_lens[] = { [AGGR_CORE] = 18, [AGGR_SOCKET] = 12, - [AGGR_NONE] = 15, + [AGGR_NONE] = 6, [AGGR_THREAD] = 24, [AGGR_GLOBAL] = 0, }; @@ -1408,8 +1438,12 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) fputc('\n', stat_config.output); break; case AGGR_NONE: - evlist__for_each(evsel_list, counter) - print_counter(counter, prefix); + if (metric_only) + print_no_aggr_metric(prefix); + else { + evlist__for_each(evsel_list, counter) + print_counter(counter, prefix); + } break; case AGGR_UNSET: default: @@ -2178,11 +2212,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) goto out; } - if (metric_only && stat_config.aggr_mode == AGGR_NONE) { - fprintf(stderr, "--metric-only is not supported with -A\n"); - goto out; - } - if (metric_only && run_count > 1) { fprintf(stderr, "--metric-only is not supported with -r\n"); goto out; -- cgit v1.2.3