diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-17 11:47:46 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-17 11:47:46 -0700 |
commit | 9d9af1007bc08971953ae915d88dc9bb21344b53 (patch) | |
tree | 02090da0b271c31f19d58d80f4cff19c8ef89971 /tools/perf/builtin-diff.c | |
parent | a1e16bc7d5f7ca3599d8a7f061841c93a563665e (diff) | |
parent | 744aec4df2c5b4d12af26a57d8858af2f59ef3d0 (diff) | |
download | linux-9d9af1007bc08971953ae915d88dc9bb21344b53.tar.bz2 |
Merge tag 'perf-tools-for-v5.10-2020-10-15' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo:
- cgroup improvements for 'perf stat', allowing for compact
specification of events and cgroups in the command line.
- Support per thread topdown metrics in 'perf stat'.
- Support sample-read topdown metric group in 'perf record'
- Show start of latency in addition to its start in 'perf sched
latency'.
- Add min, max to 'perf script' futex-contention output, in addition to
avg.
- Allow usage of 'perf_event_attr->exclusive' attribute via the new
':e' event modifier.
- Add 'snapshot' command to 'perf record --control', using it with
Intel PT.
- Support FIFO file names as alternative options to 'perf record
--control'.
- Introduce branch history "streams", to compare 'perf record' runs
with 'perf diff' based on branch records and report hot streams.
- Support PE executable symbol tables using libbfd, to profile, for
instance, wine binaries.
- Add filter support for option 'perf ftrace -F/--funcs'.
- Allow configuring the 'disassembler_style' 'perf annotate' knob via
'perf config'
- Update CascadelakeX and SkylakeX JSON vendor events files.
- Add support for parsing perchip/percore JSON vendor events.
- Add power9 hv_24x7 core level metric events.
- Add L2 prefetch, ITLB instruction fetch hits JSON events for AMD
zen1.
- Enable Family 19h users by matching Zen2 AMD vendor events.
- Use debuginfod in 'perf probe' when required debug files not found
locally.
- Display negative tid in non-sample events in 'perf script'.
- Make GTK2 support opt-in
- Add build test with GTK+
- Add missing -lzstd to the fast path feature detection
- Add scripts to auto generate 'mmap', 'mremap' string<->id tables for
use in 'perf trace'.
- Show python test script in verbose mode.
- Fix uncore metric expressions
- Msan uninitialized use fixes.
- Use condition variables in 'perf bench numa'
- Autodetect python3 binary in systems without python2.
- Support md5 build ids in addition to sha1.
- Add build id 'perf test' regression test.
- Fix printable strings in python3 scripts.
- Fix off by ones in 'perf trace' in arches using libaudit.
- Fix JSON event code for events referencing std arch events.
- Introduce 'perf test' shell script for Arm CoreSight testing.
- Add rdtsc() for Arm64 for used in the PERF_RECORD_TIME_CONV metadata
event and in 'perf test tsc'.
- 'perf c2c' improvements: Add "RMT Load Hit" metric, "Total Stores",
fixes and documentation update.
- Fix usage of reloc_sym in 'perf probe' when using both kallsyms and
debuginfo files.
- Do not print 'Metric Groups:' unnecessarily in 'perf list'
- Refcounting fixes in the event parsing code.
- Add expand cgroup event 'perf test' entry.
- Fix out of bounds CPU map access when handling armv8_pmu events in
'perf stat'.
- Add build-id injection 'perf bench' benchmark.
- Enter namespace when reading build-id in 'perf inject'.
- Do not load map/dso when injecting build-id speeding up the 'perf
inject' process.
- Add --buildid-all option to avoid processing all samples, just the
mmap metadata events.
- Add feature test to check if libbfd has buildid support
- Add 'perf test' entry for PE binary format support.
- Fix typos in power8 PMU vendor events JSON files.
- Hide libtraceevent non API functions.
* tag 'perf-tools-for-v5.10-2020-10-15' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (113 commits)
perf c2c: Update documentation for metrics reorganization
perf c2c: Add metrics "RMT Load Hit"
perf c2c: Correct LLC load hit metrics
perf c2c: Change header for LLC local hit
perf c2c: Use more explicit headers for HITM
perf c2c: Change header from "LLC Load Hitm" to "Load Hitm"
perf c2c: Organize metrics based on memory hierarchy
perf c2c: Display "Total Stores" as a standalone metrics
perf c2c: Display the total numbers continuously
perf bench: Use condition variables in numa.
perf jevents: Fix event code for events referencing std arch events
perf diff: Support hot streams comparison
perf streams: Report hot streams
perf streams: Calculate the sum of total streams hits
perf streams: Link stream pair
perf streams: Compare two streams
perf streams: Get the evsel_streams by evsel_idx
perf streams: Introduce branch history "streams"
perf intel-pt: Improve PT documentation slightly
perf tools: Add support for exclusive groups/events
...
Diffstat (limited to 'tools/perf/builtin-diff.c')
-rw-r--r-- | tools/perf/builtin-diff.c | 119 |
1 files changed, 106 insertions, 13 deletions
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index f8c9bdd8269a..584e2e1a3793 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -25,6 +25,7 @@ #include "util/map.h" #include "util/spark.h" #include "util/block-info.h" +#include "util/stream.h" #include <linux/err.h> #include <linux/zalloc.h> #include <subcmd/pager.h> @@ -42,6 +43,7 @@ struct perf_diff { int range_size; int range_num; bool has_br_stack; + bool stream; }; /* Diff command specific HPP columns. */ @@ -72,6 +74,7 @@ struct data__file { struct perf_data data; int idx; struct hists *hists; + struct evlist_streams *evlist_streams; struct diff_hpp_fmt fmt[PERF_HPP_DIFF__MAX_INDEX]; }; @@ -106,6 +109,7 @@ enum { COMPUTE_DELTA_ABS, COMPUTE_CYCLES, COMPUTE_MAX, + COMPUTE_STREAM, /* After COMPUTE_MAX to avoid use current compute arrays */ }; const char *compute_names[COMPUTE_MAX] = { @@ -393,6 +397,11 @@ static int diff__process_sample_event(struct perf_tool *tool, struct perf_diff *pdiff = container_of(tool, struct perf_diff, tool); struct addr_location al; struct hists *hists = evsel__hists(evsel); + struct hist_entry_iter iter = { + .evsel = evsel, + .sample = sample, + .ops = &hist_iter_normal, + }; int ret = -1; if (perf_time__ranges_skip_sample(pdiff->ptime_range, pdiff->range_num, @@ -411,14 +420,8 @@ static int diff__process_sample_event(struct perf_tool *tool, goto out_put; } - if (compute != COMPUTE_CYCLES) { - if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, - true)) { - pr_warning("problem incrementing symbol period, " - "skipping event\n"); - goto out_put; - } - } else { + switch (compute) { + case COMPUTE_CYCLES: if (!hists__add_entry_ops(hists, &block_hist_ops, &al, NULL, NULL, NULL, sample, true)) { pr_warning("problem incrementing symbol period, " @@ -428,6 +431,23 @@ static int diff__process_sample_event(struct perf_tool *tool, hist__account_cycles(sample->branch_stack, &al, sample, false, NULL); + break; + + case COMPUTE_STREAM: + if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, + NULL)) { + pr_debug("problem adding hist entry, skipping event\n"); + goto out_put; + } + break; + + default: + if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, + true)) { + pr_warning("problem incrementing symbol period, " + "skipping event\n"); + goto out_put; + } } /* @@ -996,10 +1016,55 @@ static void data_process(void) } } +static int process_base_stream(struct data__file *data_base, + struct data__file *data_pair, + const char *title __maybe_unused) +{ + struct evlist *evlist_base = data_base->session->evlist; + struct evlist *evlist_pair = data_pair->session->evlist; + struct evsel *evsel_base, *evsel_pair; + struct evsel_streams *es_base, *es_pair; + + evlist__for_each_entry(evlist_base, evsel_base) { + evsel_pair = evsel_match(evsel_base, evlist_pair); + if (!evsel_pair) + continue; + + es_base = evsel_streams__entry(data_base->evlist_streams, + evsel_base->idx); + if (!es_base) + return -1; + + es_pair = evsel_streams__entry(data_pair->evlist_streams, + evsel_pair->idx); + if (!es_pair) + return -1; + + evsel_streams__match(es_base, es_pair); + evsel_streams__report(es_base, es_pair); + } + + return 0; +} + +static void stream_process(void) +{ + /* + * Stream comparison only supports two data files. + * perf.data.old and perf.data. data__files[0] is perf.data.old, + * data__files[1] is perf.data. + */ + process_base_stream(&data__files[0], &data__files[1], + "# Output based on old perf data:\n#\n"); +} + static void data__free(struct data__file *d) { int col; + if (d->evlist_streams) + evlist_streams__delete(d->evlist_streams); + for (col = 0; col < PERF_HPP_DIFF__MAX_INDEX; col++) { struct diff_hpp_fmt *fmt = &d->fmt[col]; @@ -1153,9 +1218,19 @@ static int __cmd_diff(void) if (pdiff.ptime_range) zfree(&pdiff.ptime_range); + + if (compute == COMPUTE_STREAM) { + d->evlist_streams = evlist__create_streams( + d->session->evlist, 5); + if (!d->evlist_streams) + goto out_delete; + } } - data_process(); + if (compute == COMPUTE_STREAM) + stream_process(); + else + data_process(); out_delete: data__for_each_file(i, d) { @@ -1228,6 +1303,8 @@ static const struct option options[] = { "only consider symbols in these pids"), OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]", "only consider symbols in these tids"), + OPT_BOOLEAN(0, "stream", &pdiff.stream, + "Enable hot streams comparison."), OPT_END() }; @@ -1887,6 +1964,9 @@ int cmd_diff(int argc, const char **argv) if (cycles_hist && (compute != COMPUTE_CYCLES)) usage_with_options(diff_usage, options); + if (pdiff.stream) + compute = COMPUTE_STREAM; + symbol__annotation_init(); if (symbol__init(NULL) < 0) @@ -1898,13 +1978,26 @@ int cmd_diff(int argc, const char **argv) if (check_file_brstack() < 0) return -1; - if (compute == COMPUTE_CYCLES && !pdiff.has_br_stack) + if ((compute == COMPUTE_CYCLES || compute == COMPUTE_STREAM) + && !pdiff.has_br_stack) { return -1; + } - if (ui_init() < 0) - return -1; + if (compute == COMPUTE_STREAM) { + symbol_conf.show_branchflag_count = true; + symbol_conf.disable_add2line_warn = true; + callchain_param.mode = CHAIN_FLAT; + callchain_param.key = CCKEY_SRCLINE; + callchain_param.branch_callstack = 1; + symbol_conf.use_callchain = true; + callchain_register_param(&callchain_param); + sort_order = "srcline,symbol,dso"; + } else { + if (ui_init() < 0) + return -1; - sort__mode = SORT_MODE__DIFF; + sort__mode = SORT_MODE__DIFF; + } if (setup_sorting(NULL) < 0) usage_with_options(diff_usage, options); |