diff options
author | Ingo Molnar <mingo@kernel.org> | 2014-03-19 08:05:47 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-03-19 08:05:47 +0100 |
commit | 538592ff0b008237ae88f5ce5fb1247127dc3ce5 (patch) | |
tree | e6d688f188c1ec0dde44af613f6ff69e8578051f | |
parent | 0afd2d51029961281572d02545c7bde1b3f4292c (diff) | |
parent | a51e87cb5a0fbebee15a3373d951dbf6f59a76c2 (diff) | |
download | linux-538592ff0b008237ae88f5ce5fb1247127dc3ce5.tar.bz2 |
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo:
User visible:
* Fixup header alignment in 'perf sched latency' output (Ramkumar Ramachandra)
* Fix off-by-one error in 'perf timechart record' argv handling (Ramkumar Ramachandra)
* Print the evsel name in the annotate stdio output, prep to fix support
outputting annotation for multiple events, not just for the first one
(Arnaldo Carvalho de Melo)
Internals:
* Use tid in mmap/mmap2 events to find maps (Don Zickus)
* Record the reason for filtering an address_location (Namhyung Kim)
* Apply all filters to an addr_location (Namhyung Kim)
* Merge al->filtered with hist_entry->filtered in report/hists (Namhyung Kim)
* Fix memory leak when synthesizing thread records (Namhyung Kim)
* Use ui__has_annotation() in 'report' (Namhyung Kim)
Cleanups:
* Remove unused thread__find_map function (Jiri Olsa)
* Remove unused simple_strtoul() function (Ramkumar Ramachandra)
Documentation:
* Update function names in debug messages (Ramkumar Ramachandra)
* Update some code references in design.txt (Ramkumar Ramachandra)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | tools/perf/builtin-report.c | 4 | ||||
-rw-r--r-- | tools/perf/builtin-sched.c | 10 | ||||
-rw-r--r-- | tools/perf/builtin-timechart.c | 4 | ||||
-rw-r--r-- | tools/perf/design.txt | 12 | ||||
-rw-r--r-- | tools/perf/tests/hists_link.c | 1 | ||||
-rw-r--r-- | tools/perf/util/annotate.c | 14 | ||||
-rw-r--r-- | tools/perf/util/event.c | 34 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 4 | ||||
-rw-r--r-- | tools/perf/util/hist.c | 9 | ||||
-rw-r--r-- | tools/perf/util/hist.h | 9 | ||||
-rw-r--r-- | tools/perf/util/include/linux/kernel.h | 6 | ||||
-rw-r--r-- | tools/perf/util/machine.c | 6 | ||||
-rw-r--r-- | tools/perf/util/symbol.h | 2 | ||||
-rw-r--r-- | tools/perf/util/thread.h | 6 |
14 files changed, 58 insertions, 63 deletions
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c47bf586fcba..c8f21137dfd8 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -231,7 +231,7 @@ static int process_sample_event(struct perf_tool *tool, return -1; } - if (al.filtered || (rep->hide_unresolved && al.sym == NULL)) + if (rep->hide_unresolved && al.sym == NULL) return 0; if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) @@ -928,7 +928,7 @@ repeat: * so don't allocate extra space that won't be used in the stdio * implementation. */ - if (use_browser == 1 && sort__has_sym) { + if (ui__has_annotation()) { symbol_conf.priv_size = sizeof(struct annotation); machines__set_symbol_filter(&session->machines, symbol__annotate_init); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 6a76a07b6789..9ac0a495c954 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1124,7 +1124,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ avg = work_list->total_lat / work_list->nb_atoms; - printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n", + printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13.6f s\n", (double)work_list->total_runtime / 1e6, work_list->nb_atoms, (double)avg / 1e6, (double)work_list->max_lat / 1e6, @@ -1527,9 +1527,9 @@ static int perf_sched__lat(struct perf_sched *sched) perf_sched__sort_lat(sched); - printf("\n ---------------------------------------------------------------------------------------------------------------\n"); - printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); - printf(" ---------------------------------------------------------------------------------------------------------------\n"); + printf("\n -----------------------------------------------------------------------------------------------------------------\n"); + printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); + printf(" -----------------------------------------------------------------------------------------------------------------\n"); next = rb_first(&sched->sorted_atom_root); @@ -1541,7 +1541,7 @@ static int perf_sched__lat(struct perf_sched *sched) next = rb_next(next); } - printf(" -----------------------------------------------------------------------------------------\n"); + printf(" -----------------------------------------------------------------------------------------------------------------\n"); printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", (double)sched->all_runtime / 1e6, sched->all_count); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 25526d6eae59..74db2568b867 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -494,7 +494,7 @@ static const char *cat_backtrace(union perf_event *event, continue; } - tal.filtered = false; + tal.filtered = 0; thread__find_addr_location(al.thread, machine, cpumode, MAP__FUNCTION, ip, &tal); @@ -1238,7 +1238,7 @@ static int timechart__record(struct timechart *tchart, int argc, const char **ar for (i = 0; i < old_power_args_nr; i++) *p++ = strdup(old_power_args[i]); - for (j = 1; j < (unsigned int)argc; j++) + for (j = 0; j < (unsigned int)argc; j++) *p++ = argv[j]; return cmd_record(rec_argc, rec_argv, NULL); diff --git a/tools/perf/design.txt b/tools/perf/design.txt index 63a0e6f04a01..a28dca2582aa 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -18,7 +18,7 @@ underlying hardware counters. Performance counters are accessed via special file descriptors. There's one file descriptor per virtual counter used. -The special file descriptor is opened via the perf_event_open() +The special file descriptor is opened via the sys_perf_event_open() system call: int sys_perf_event_open(struct perf_event_attr *hw_event_uptr, @@ -82,7 +82,7 @@ machine-specific. If 'raw_type' is 0, then the 'type' field says what kind of counter this is, with the following encoding: -enum perf_event_types { +enum perf_type_id { PERF_TYPE_HARDWARE = 0, PERF_TYPE_SOFTWARE = 1, PERF_TYPE_TRACEPOINT = 2, @@ -95,7 +95,7 @@ specified by 'event_id': * Generalized performance counter event types, used by the hw_event.event_id * parameter of the sys_perf_event_open() syscall: */ -enum hw_event_ids { +enum perf_hw_id { /* * Common hardware events, generalized by the kernel: */ @@ -129,7 +129,7 @@ software events, selected by 'event_id': * physical and sw events of the kernel (and allow the profiling of them as * well): */ -enum sw_event_ids { +enum perf_sw_ids { PERF_COUNT_SW_CPU_CLOCK = 0, PERF_COUNT_SW_TASK_CLOCK = 1, PERF_COUNT_SW_PAGE_FAULTS = 2, @@ -230,7 +230,7 @@ these events are recorded in the ring-buffer (see below). The 'comm' bit allows tracking of process comm data on process creation. This too is recorded in the ring-buffer (see below). -The 'pid' parameter to the perf_event_open() system call allows the +The 'pid' parameter to the sys_perf_event_open() system call allows the counter to be specific to a task: pid == 0: if the pid parameter is zero, the counter is attached to the @@ -260,7 +260,7 @@ The 'flags' parameter is currently unused and must be zero. The 'group_fd' parameter allows counter "groups" to be set up. A counter group has one counter which is the group "leader". The leader -is created first, with group_fd = -1 in the perf_event_open call +is created first, with group_fd = -1 in the sys_perf_event_open call that creates it. The rest of the group members are created subsequently, with group_fd giving the fd of the group leader. (A single counter on its own is created with group_fd = -1 and is diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 2b6519e0e36f..7ccbc7b6ae77 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -101,6 +101,7 @@ static struct machine *setup_fake_machine(struct machines *machines) .mmap = { .header = { .misc = PERF_RECORD_MISC_USER, }, .pid = fake_mmap_info[i].pid, + .tid = fake_mmap_info[i].pid, .start = fake_mmap_info[i].start, .len = 0x1000ULL, .pgoff = 0ULL, diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 3aa555ff9d89..809b4c50beae 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1236,6 +1236,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, struct dso *dso = map->dso; char *filename; const char *d_filename; + const char *evsel_name = perf_evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); struct disasm_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); @@ -1243,7 +1244,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int more = 0; u64 len; int width = 8; - int namelen; + int namelen, evsel_name_len, graph_dotted_len; filename = strdup(dso->long_name); if (!filename) @@ -1256,14 +1257,17 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, len = symbol__size(sym); namelen = strlen(d_filename); + evsel_name_len = strlen(evsel_name); if (perf_evsel__is_group_event(evsel)) width *= evsel->nr_members; - printf(" %-*.*s| Source code & Disassembly of %s\n", - width, width, "Percent", d_filename); - printf("-%-*.*s-------------------------------------\n", - width+namelen, width+namelen, graph_dotted_line); + printf(" %-*.*s| Source code & Disassembly of %s for %s\n", + width, width, "Percent", d_filename, evsel_name); + + graph_dotted_len = width + namelen + evsel_name_len; + printf("-%-*.*s-----------------------------------------\n", + graph_dotted_len, graph_dotted_len, graph_dotted_line); if (verbose) symbol__annotate_hits(sym, evsel); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 3e580be0f6fb..9d12aa6dd485 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,6 +1,7 @@ #include <linux/types.h> #include "event.h" #include "debug.h" +#include "hist.h" #include "machine.h" #include "sort.h" #include "string.h" @@ -445,6 +446,9 @@ int perf_event__synthesize_threads(struct perf_tool *tool, union perf_event *comm_event, *mmap_event, *fork_event; int err = -1; + if (machine__is_default_guest(machine)) + return 0; + comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); if (comm_event == NULL) goto out; @@ -457,9 +461,6 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (fork_event == NULL) goto out_free_mmap; - if (machine__is_default_guest(machine)) - return 0; - snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); proc = opendir(proc_path); @@ -705,7 +706,7 @@ void thread__find_addr_map(struct thread *thread, al->thread = thread; al->addr = addr; al->cpumode = cpumode; - al->filtered = false; + al->filtered = 0; if (machine == NULL) { al->map = NULL; @@ -731,11 +732,11 @@ void thread__find_addr_map(struct thread *thread, if ((cpumode == PERF_RECORD_MISC_GUEST_USER || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) && !perf_guest) - al->filtered = true; + al->filtered |= (1 << HIST_FILTER__GUEST); if ((cpumode == PERF_RECORD_MISC_USER || cpumode == PERF_RECORD_MISC_KERNEL) && !perf_host) - al->filtered = true; + al->filtered |= (1 << HIST_FILTER__HOST); return; } @@ -792,9 +793,6 @@ int perf_event__preprocess_sample(const union perf_event *event, if (thread == NULL) return -1; - if (thread__is_filtered(thread)) - goto out_filtered; - dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); /* * Have we already created the kernel maps for this machine? @@ -812,6 +810,10 @@ int perf_event__preprocess_sample(const union perf_event *event, dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : "<not found>"); + + if (thread__is_filtered(thread)) + al->filtered |= (1 << HIST_FILTER__THREAD); + al->sym = NULL; al->cpu = sample->cpu; @@ -823,8 +825,9 @@ int perf_event__preprocess_sample(const union perf_event *event, dso->short_name) || (dso->short_name != dso->long_name && strlist__has_entry(symbol_conf.dso_list, - dso->long_name))))) - goto out_filtered; + dso->long_name))))) { + al->filtered |= (1 << HIST_FILTER__DSO); + } al->sym = map__find_symbol(al->map, al->addr, machine->symbol_filter); @@ -832,12 +835,9 @@ int perf_event__preprocess_sample(const union perf_event *event, if (symbol_conf.sym_list && (!al->sym || !strlist__has_entry(symbol_conf.sym_list, - al->sym->name))) - goto out_filtered; - - return 0; + al->sym->name))) { + al->filtered |= (1 << HIST_FILTER__SYMBOL); + } -out_filtered: - al->filtered = true; return 0; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 26b67b11c65b..5c28d82b76c4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1023,7 +1023,7 @@ retry_sample_id: group_fd = get_group_fd(evsel, cpu, thread); retry_open: - pr_debug2("perf_event_open: pid %d cpu %d group_fd %d flags %#lx\n", + pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx\n", pid, cpus->map[cpu], group_fd, flags); FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, @@ -1032,7 +1032,7 @@ retry_open: group_fd, flags); if (FD(evsel, cpu, thread) < 0) { err = -errno; - pr_debug2("perf_event_open failed, error %d\n", + pr_debug2("sys_perf_event_open failed, error %d\n", err); goto try_fallback; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 0466efa71140..f38590d7561b 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -13,13 +13,6 @@ static bool hists__filter_entry_by_thread(struct hists *hists, static bool hists__filter_entry_by_symbol(struct hists *hists, struct hist_entry *he); -enum hist_filter { - HIST_FILTER__DSO, - HIST_FILTER__THREAD, - HIST_FILTER__PARENT, - HIST_FILTER__SYMBOL, -}; - struct callchain_param callchain_param = { .mode = CHAIN_GRAPH_REL, .min_percent = 0.5, @@ -429,7 +422,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, .weight = weight, }, .parent = sym_parent, - .filtered = symbol__parent_filter(sym_parent), + .filtered = symbol__parent_filter(sym_parent) | al->filtered, .hists = hists, .branch_info = bi, .mem_info = mi, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 0c76bf972736..1f1f513dfe7f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -14,6 +14,15 @@ struct hist_entry; struct addr_location; struct symbol; +enum hist_filter { + HIST_FILTER__DSO, + HIST_FILTER__THREAD, + HIST_FILTER__PARENT, + HIST_FILTER__SYMBOL, + HIST_FILTER__GUEST, + HIST_FILTER__HOST, +}; + /* * The kernel collects the number of events it couldn't send in a stretch and * when possible sends this number in a PERF_RECORD_LOST event. The number of diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index d8c927c868ee..9844c31b7c2b 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -94,12 +94,6 @@ static inline int scnprintf(char * buf, size_t size, const char * fmt, ...) return (i >= ssize) ? (ssize - 1) : i; } -static inline unsigned long -simple_strtoul(const char *nptr, char **endptr, int base) -{ - return strtoul(nptr, endptr, base); -} - int eprintf(int level, const char *fmt, ...) __attribute__((format(printf, 2, 3))); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 5cecd98c1bc0..a53cd0b8c151 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1027,7 +1027,7 @@ int machine__process_mmap2_event(struct machine *machine, } thread = machine__findnew_thread(machine, event->mmap2.pid, - event->mmap2.pid); + event->mmap2.tid); if (thread == NULL) goto out_problem; @@ -1075,7 +1075,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event } thread = machine__findnew_thread(machine, event->mmap.pid, - event->mmap.pid); + event->mmap.tid); if (thread == NULL) goto out_problem; @@ -1312,7 +1312,7 @@ static int machine__resolve_callchain_sample(struct machine *machine, continue; } - al.filtered = false; + al.filtered = 0; thread__find_addr_location(thread, machine, cpumode, MAP__FUNCTION, ip, &al); if (al.sym != NULL) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2553ae04b788..501e4e722e8e 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -186,7 +186,7 @@ struct addr_location { struct symbol *sym; u64 addr; char level; - bool filtered; + u8 filtered; u8 cpumode; s32 cpu; }; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 9a070743270c..9b29f085aede 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -44,12 +44,6 @@ void thread__insert_map(struct thread *thread, struct map *map); int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); size_t thread__fprintf(struct thread *thread, FILE *fp); -static inline struct map *thread__find_map(struct thread *thread, - enum map_type type, u64 addr) -{ - return thread ? map_groups__find(&thread->mg, type, addr) : NULL; -} - void thread__find_addr_map(struct thread *thread, struct machine *machine, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al); |