diff options
author | Ingo Molnar <mingo@kernel.org> | 2013-08-12 10:14:47 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-08-12 10:14:47 +0200 |
commit | 0a3d23a2568ed5e73bd4fb532dc672fa9f03b1f1 (patch) | |
tree | e1b8974fc17cf810209d2b76d4d76ce1ab46d0b0 | |
parent | 93786a5f6aeb9c032c1c240246c5aabcf457b38f (diff) | |
parent | cecb977e24da1465cdb0ff2d10d22e5891dc3e6c (diff) | |
download | linux-0a3d23a2568ed5e73bd4fb532dc672fa9f03b1f1.tar.bz2 |
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
* Do annotation using /proc/kcore and /proc/kallsyms, removing the
need for a vmlinux file kernel assembly annotation. This also improves
this use case because vmlinux has just the initial kernel image, not
what is actually in use after various code patchings by things like
alternatives, etc. From Adrian Hunter.
* Add various improvements and fixes to the "vmlinux matches kallsyms"
'perf test' entry, related to the /proc/kcore annotation feature.
* Add --initial-delay option to 'perf stat' to skip measuring for
the startup phase, from Andi Kleen.
* Add perf kvm stat live mode that combines aspects of 'perf kvm stat' record
and report, from David Ahern.
* Add option to analyze specific VM in perf kvm stat report, from David Ahern.
* Do not require /lib/modules/* on a guest, fix from Jason Wessel.
* Group leader sampling, that allows just one event in a group to sample while
the other events have just its values read, from Jiri Olsa.
* Add support for a new modifier "D", which requests that the event, or group
of events, be pinned to the PMU, from Michael Ellerman.
* Fix segmentation fault on the gtk browser, from Namhyung Kim.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
50 files changed, 2523 insertions, 192 deletions
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index efef1d37a371..62c25a25291c 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -321,6 +321,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR('$', 7, u64 *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/kernel/events/core.c b/kernel/events/core.c index 916cf1f593b4..e82e70025d42 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3568,6 +3568,15 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_EVENT_IOC_PERIOD: return perf_event_period(event, (u64 __user *)arg); + case PERF_EVENT_IOC_ID: + { + u64 id = primary_event_id(event); + + if (copy_to_user((void __user *)arg, &id, sizeof(id))) + return -EFAULT; + return 0; + } + case PERF_EVENT_IOC_SET_OUTPUT: { int ret; @@ -4379,7 +4388,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, list_for_each_entry(sub, &leader->sibling_list, group_entry) { n = 0; - if (sub != event) + if ((sub != event) && + (sub->state == PERF_EVENT_STATE_ACTIVE)) sub->pmu->read(sub); values[n++] = perf_event_count(sub); diff --git a/tools/lib/lk/Makefile b/tools/lib/lk/Makefile index 280dd8205430..3dba0a4aebbf 100644 --- a/tools/lib/lk/Makefile +++ b/tools/lib/lk/Makefile @@ -3,21 +3,6 @@ include ../../scripts/Makefile.include CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar -# Makefiles suck: This macro sets a default value of $(2) for the -# variable named by $(1), unless the variable has been set by -# environment or command line. This is necessary for CC and AR -# because make sets default values, so the simpler ?= approach -# won't work as expected. -define allow-override - $(if $(or $(findstring environment,$(origin $(1))),\ - $(findstring command line,$(origin $(1)))),,\ - $(eval $(1) = $(2))) -endef - -# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. -$(call allow-override,CC,$(CROSS_COMPILE)gcc) -$(call allow-override,AR,$(CROSS_COMPILE)ar) - # guard against environment variables LIB_H= LIB_OBJS= diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 826f3d6d1d28..6fce6a622206 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -29,6 +29,8 @@ counted. The following modifiers exist: G - guest counting (in KVM guests) H - host counting (not in KVM guests) p - precise level + S - read sample value (PERF_SAMPLE_READ) + D - pin the event to the PMU The 'p' modifier can be used for specifying how precise the instruction address should be. The 'p' modifier can be specified multiple times: diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 2fe87fb558f0..73c9759005a3 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical cores. To enable this mod use --per-core in addition to -a. (system-wide). The output includes the core number and the number of online logical processors on that physical processor. +-D msecs:: +--initial-delay msecs:: +After starting the program, wait msecs before measuring. This is useful to +filter out the startup phase of the program, which is often very different. + EXAMPLES -------- diff --git a/tools/perf/Makefile b/tools/perf/Makefile index bfd12d02a304..e0d3d9f96771 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -392,6 +392,7 @@ LIB_OBJS += $(OUTPUT)tests/sw-clock.o ifeq ($(ARCH),x86) LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o endif +LIB_OBJS += $(OUTPUT)tests/code-reading.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o BUILTIN_OBJS += $(OUTPUT)builtin-bench.o diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index f1117441cdcd..9570c2b0f83c 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -10,11 +10,11 @@ u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc) { - u64 time, quot, rem; + u64 t, quot, rem; - time = ns - tc->time_zero; - quot = time / tc->time_mult; - rem = time % tc->time_mult; + t = ns - tc->time_zero; + quot = t / tc->time_mult; + rem = t % tc->time_mult; return (quot << tc->time_shift) + (rem << tc->time_shift) / tc->time_mult; } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 1d8de2e4a407..f012a98c726c 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -206,7 +206,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool, } thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, - event->ip.ip, &al); + event->ip.ip, &al, NULL); if (al.map != NULL) { if (!al.map->dso->hit) { diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 24b78aecc928..2ceec817a2fb 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -2,22 +2,26 @@ #include "perf.h" #include "util/evsel.h" +#include "util/evlist.h" #include "util/util.h" #include "util/cache.h" #include "util/symbol.h" #include "util/thread.h" #include "util/header.h" #include "util/session.h" - +#include "util/intlist.h" #include "util/parse-options.h" #include "util/trace-event.h" #include "util/debug.h" #include <lk/debugfs.h> #include "util/tool.h" #include "util/stat.h" +#include "util/top.h" #include <sys/prctl.h> +#include <sys/timerfd.h> +#include <termios.h> #include <semaphore.h> #include <pthread.h> #include <math.h> @@ -82,6 +86,8 @@ struct exit_reasons_table { struct perf_kvm_stat { struct perf_tool tool; + struct perf_record_opts opts; + struct perf_evlist *evlist; struct perf_session *session; const char *file_name; @@ -96,10 +102,19 @@ struct perf_kvm_stat { struct kvm_events_ops *events_ops; key_cmp_fun compare; struct list_head kvm_events_cache[EVENTS_CACHE_SIZE]; + u64 total_time; u64 total_count; + u64 lost_events; + + const char *pid_str; + struct intlist *pid_list; struct rb_root result; + + int timerfd; + unsigned int display_time; + bool live; }; @@ -320,6 +335,28 @@ static void init_kvm_event_record(struct perf_kvm_stat *kvm) INIT_LIST_HEAD(&kvm->kvm_events_cache[i]); } +static void clear_events_cache_stats(struct list_head *kvm_events_cache) +{ + struct list_head *head; + struct kvm_event *event; + unsigned int i; + int j; + + for (i = 0; i < EVENTS_CACHE_SIZE; i++) { + head = &kvm_events_cache[i]; + list_for_each_entry(event, head, hash_entry) { + /* reset stats for event */ + event->total.time = 0; + init_stats(&event->total.stats); + + for (j = 0; j < event->max_vcpu; ++j) { + event->vcpu[j].time = 0; + init_stats(&event->vcpu[j].stats); + } + } + } +} + static int kvm_events_hash_fn(u64 key) { return key & (EVENTS_CACHE_SIZE - 1); @@ -472,7 +509,11 @@ static bool handle_end_event(struct perf_kvm_stat *kvm, vcpu_record->last_event = NULL; vcpu_record->start_time = 0; - BUG_ON(timestamp < time_begin); + /* seems to happen once in a while during live mode */ + if (timestamp < time_begin) { + pr_debug("End time before begin time; skipping event.\n"); + return true; + } time_diff = timestamp - time_begin; return update_kvm_event(event, vcpu, time_diff); @@ -550,6 +591,8 @@ static int compare_kvm_event_ ## func(struct kvm_event *one, \ GET_EVENT_KEY(time, time); COMPARE_EVENT_KEY(count, stats.n); COMPARE_EVENT_KEY(mean, stats.mean); +GET_EVENT_KEY(max, stats.max); +GET_EVENT_KEY(min, stats.min); #define DEF_SORT_NAME_KEY(name, compare_key) \ { #name, compare_kvm_event_ ## compare_key } @@ -639,43 +682,81 @@ static struct kvm_event *pop_from_result(struct rb_root *result) return container_of(node, struct kvm_event, rb); } -static void print_vcpu_info(int vcpu) +static void print_vcpu_info(struct perf_kvm_stat *kvm) { + int vcpu = kvm->trace_vcpu; + pr_info("Analyze events for "); + if (kvm->live) { + if (kvm->opts.target.system_wide) + pr_info("all VMs, "); + else if (kvm->opts.target.pid) + pr_info("pid(s) %s, ", kvm->opts.target.pid); + else + pr_info("dazed and confused on what is monitored, "); + } + if (vcpu == -1) pr_info("all VCPUs:\n\n"); else pr_info("VCPU %d:\n\n", vcpu); } +static void show_timeofday(void) +{ + char date[64]; + struct timeval tv; + struct tm ltime; + + gettimeofday(&tv, NULL); + if (localtime_r(&tv.tv_sec, <ime)) { + strftime(date, sizeof(date), "%H:%M:%S", <ime); + pr_info("%s.%06ld", date, tv.tv_usec); + } else + pr_info("00:00:00.000000"); + + return; +} + static void print_result(struct perf_kvm_stat *kvm) { char decode[20]; struct kvm_event *event; int vcpu = kvm->trace_vcpu; + if (kvm->live) { + puts(CONSOLE_CLEAR); + show_timeofday(); + } + pr_info("\n\n"); - print_vcpu_info(vcpu); + print_vcpu_info(kvm); pr_info("%20s ", kvm->events_ops->name); pr_info("%10s ", "Samples"); pr_info("%9s ", "Samples%"); pr_info("%9s ", "Time%"); + pr_info("%10s ", "Min Time"); + pr_info("%10s ", "Max Time"); pr_info("%16s ", "Avg time"); pr_info("\n\n"); while ((event = pop_from_result(&kvm->result))) { - u64 ecount, etime; + u64 ecount, etime, max, min; ecount = get_event_count(event, vcpu); etime = get_event_time(event, vcpu); + max = get_event_max(event, vcpu); + min = get_event_min(event, vcpu); kvm->events_ops->decode_key(kvm, &event->key, decode); pr_info("%20s ", decode); pr_info("%10llu ", (unsigned long long)ecount); pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100); pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100); + pr_info("%8" PRIu64 "us ", min / 1000); + pr_info("%8" PRIu64 "us ", max / 1000); pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount/1e3, kvm_event_rel_stddev(vcpu, event)); pr_info("\n"); @@ -683,6 +764,29 @@ static void print_result(struct perf_kvm_stat *kvm) pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n", kvm->total_count, kvm->total_time / 1e3); + + if (kvm->lost_events) + pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events); +} + +static int process_lost_event(struct perf_tool *tool, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, tool); + + kvm->lost_events++; + return 0; +} + +static bool skip_sample(struct perf_kvm_stat *kvm, + struct perf_sample *sample) +{ + if (kvm->pid_list && intlist__find(kvm->pid_list, sample->pid) == NULL) + return true; + + return false; } static int process_sample_event(struct perf_tool *tool, @@ -691,10 +795,14 @@ static int process_sample_event(struct perf_tool *tool, struct perf_evsel *evsel, struct machine *machine) { - struct thread *thread = machine__findnew_thread(machine, sample->tid); + struct thread *thread; struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, tool); + if (skip_sample(kvm, sample)) + return 0; + + thread = machine__findnew_thread(machine, sample->tid); if (thread == NULL) { pr_debug("problem processing %d event, skipping it.\n", event->header.type); @@ -707,10 +815,20 @@ static int process_sample_event(struct perf_tool *tool, return 0; } -static int get_cpu_isa(struct perf_session *session) +static int cpu_isa_config(struct perf_kvm_stat *kvm) { - char *cpuid = session->header.env.cpuid; - int isa; + char buf[64], *cpuid; + int err, isa; + + if (kvm->live) { + err = get_cpuid(buf, sizeof(buf)); + if (err != 0) { + pr_err("Failed to look up CPU type (Intel or AMD)\n"); + return err; + } + cpuid = buf; + } else + cpuid = kvm->session->header.env.cpuid; if (strstr(cpuid, "Intel")) isa = 1; @@ -718,10 +836,361 @@ static int get_cpu_isa(struct perf_session *session) isa = 0; else { pr_err("CPU %s is not supported.\n", cpuid); - isa = -ENOTSUP; + return -ENOTSUP; + } + + if (isa == 1) { + kvm->exit_reasons = vmx_exit_reasons; + kvm->exit_reasons_size = ARRAY_SIZE(vmx_exit_reasons); + kvm->exit_reasons_isa = "VMX"; + } + + return 0; +} + +static bool verify_vcpu(int vcpu) +{ + if (vcpu != -1 && vcpu < 0) { + pr_err("Invalid vcpu:%d.\n", vcpu); + return false; + } + + return true; +} + +/* keeping the max events to a modest level to keep + * the processing of samples per mmap smooth. + */ +#define PERF_KVM__MAX_EVENTS_PER_MMAP 25 + +static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, + u64 *mmap_time) +{ + union perf_event *event; + struct perf_sample sample; + s64 n = 0; + int err; + + *mmap_time = ULLONG_MAX; + while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) { + err = perf_evlist__parse_sample(kvm->evlist, event, &sample); + if (err) { + pr_err("Failed to parse sample\n"); + return -1; + } + + err = perf_session_queue_event(kvm->session, event, &sample, 0); + if (err) { + pr_err("Failed to enqueue sample: %d\n", err); + return -1; + } + + /* save time stamp of our first sample for this mmap */ + if (n == 0) + *mmap_time = sample.time; + + /* limit events per mmap handled all at once */ + n++; + if (n == PERF_KVM__MAX_EVENTS_PER_MMAP) + break; + } + + return n; +} + +static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm) +{ + int i, err, throttled = 0; + s64 n, ntotal = 0; + u64 flush_time = ULLONG_MAX, mmap_time; + + for (i = 0; i < kvm->evlist->nr_mmaps; i++) { + n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time); + if (n < 0) + return -1; + + /* flush time is going to be the minimum of all the individual + * mmap times. Essentially, we flush all the samples queued up + * from the last pass under our minimal start time -- that leaves + * a very small race for samples to come in with a lower timestamp. + * The ioctl to return the perf_clock timestamp should close the + * race entirely. + */ + if (mmap_time < flush_time) + flush_time = mmap_time; + + ntotal += n; + if (n == PERF_KVM__MAX_EVENTS_PER_MMAP) + throttled = 1; + } + + /* flush queue after each round in which we processed events */ + if (ntotal) { + kvm->session->ordered_samples.next_flush = flush_time; + err = kvm->tool.finished_round(&kvm->tool, NULL, kvm->session); + if (err) { + if (kvm->lost_events) + pr_info("\nLost events: %" PRIu64 "\n\n", + kvm->lost_events); + return err; + } + } + + return throttled; +} + +static volatile int done; + +static void sig_handler(int sig __maybe_unused) +{ + done = 1; +} + +static int perf_kvm__timerfd_create(struct perf_kvm_stat *kvm) +{ + struct itimerspec new_value; + int rc = -1; + + kvm->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK); + if (kvm->timerfd < 0) { + pr_err("timerfd_create failed\n"); + goto out; + } + + new_value.it_value.tv_sec = kvm->display_time; + new_value.it_value.tv_nsec = 0; + new_value.it_interval.tv_sec = kvm->display_time; + new_value.it_interval.tv_nsec = 0; + + if (timerfd_settime(kvm->timerfd, 0, &new_value, NULL) != 0) { + pr_err("timerfd_settime failed: %d\n", errno); + close(kvm->timerfd); + goto out; + } + + rc = 0; +out: + return rc; +} + +static int perf_kvm__handle_timerfd(struct perf_kvm_stat *kvm) +{ + uint64_t c; + int rc; + + rc = read(kvm->timerfd, &c, sizeof(uint64_t)); + if (rc < 0) { + if (errno == EAGAIN) + return 0; + + pr_err("Failed to read timer fd: %d\n", errno); + return -1; + } + + if (rc != sizeof(uint64_t)) { + pr_err("Error reading timer fd - invalid size returned\n"); + return -1; + } + + if (c != 1) + pr_debug("Missed timer beats: %" PRIu64 "\n", c-1); + + /* update display */ + sort_result(kvm); + print_result(kvm); + + /* reset counts */ + clear_events_cache_stats(kvm->kvm_events_cache); + kvm->total_count = 0; + kvm->total_time = 0; + kvm->lost_events = 0; + + return 0; +} + +static int fd_set_nonblock(int fd) +{ + long arg = 0; + + arg = fcntl(fd, F_GETFL); + if (arg < 0) { + pr_err("Failed to get current flags for fd %d\n", fd); + return -1; + } + + if (fcntl(fd, F_SETFL, arg | O_NONBLOCK) < 0) { + pr_err("Failed to set non-block option on fd %d\n", fd); + return -1; + } + + return 0; +} + +static +int perf_kvm__handle_stdin(struct termios *tc_now, struct termios *tc_save) +{ + int c; + + tcsetattr(0, TCSANOW, tc_now); + c = getc(stdin); + tcsetattr(0, TCSAFLUSH, tc_save); + + if (c == 'q') + return 1; + + return 0; +} + +static int kvm_events_live_report(struct perf_kvm_stat *kvm) +{ + struct pollfd *pollfds = NULL; + int nr_fds, nr_stdin, ret, err = -EINVAL; + struct termios tc, save; + + /* live flag must be set first */ + kvm->live = true; + + ret = cpu_isa_config(kvm); + if (ret < 0) + return ret; + + if (!verify_vcpu(kvm->trace_vcpu) || + !select_key(kvm) || + !register_kvm_events_ops(kvm)) { + goto out; + } + + init_kvm_event_record(kvm); + + tcgetattr(0, &save); + tc = save; + tc.c_lflag &= ~(ICANON | ECHO); + tc.c_cc[VMIN] = 0; + tc.c_cc[VTIME] = 0; + + signal(SIGINT, sig_handler); + signal(SIGTERM, sig_handler); + + /* copy pollfds -- need to add timerfd and stdin */ + nr_fds = kvm->evlist->nr_fds; + pollfds = zalloc(sizeof(struct pollfd) * (nr_fds + 2)); + if (!pollfds) { + err = -ENOMEM; + goto out; + } + memcpy(pollfds, kvm->evlist->pollfd, + sizeof(struct pollfd) * kvm->evlist->nr_fds); + + /* add timer fd */ + if (perf_kvm__timerfd_create(kvm) < 0) { + err = -1; + goto out; } - return isa; + pollfds[nr_fds].fd = kvm->timerfd; + pollfds[nr_fds].events = POLLIN; + nr_fds++; + + pollfds[nr_fds].fd = fileno(stdin); + pollfds[nr_fds].events = POLLIN; + nr_stdin = nr_fds; + nr_fds++; + if (fd_set_nonblock(fileno(stdin)) != 0) + goto out; + + /* everything is good - enable the events and process */ + perf_evlist__enable(kvm->evlist); + + while (!done) { + int rc; + + rc = perf_kvm__mmap_read(kvm); + if (rc < 0) + break; + + err = perf_kvm__handle_timerfd(kvm); + if (err) + goto out; + + if (pollfds[nr_stdin].revents & POLLIN) + done = perf_kvm__handle_stdin(&tc, &save); + + if (!rc && !done) + err = poll(pollfds, nr_fds, 100); + } + + perf_evlist__disable(kvm->evlist); + + if (err == 0) { + sort_result(kvm); + print_result(kvm); + } + +out: + if (kvm->timerfd >= 0) + close(kvm->timerfd); + + if (pollfds) + free(pollfds); + + return err; +} + +static int kvm_live_open_events(struct perf_kvm_stat *kvm) +{ + int err, rc = -1; + struct perf_evsel *pos; + struct perf_evlist *evlist = kvm->evlist; + + perf_evlist__config(evlist, &kvm->opts); + + /* + * Note: exclude_{guest,host} do not apply here. + * This command processes KVM tracepoints from host only + */ + list_for_each_entry(pos, &evlist->entries, node) { + struct perf_event_attr *attr = &pos->attr; + + /* make sure these *are* set */ + attr->sample_type |= PERF_SAMPLE_TID; + attr->sample_type |= PERF_SAMPLE_TIME; + attr->sample_type |= PERF_SAMPLE_CPU; + attr->sample_type |= PERF_SAMPLE_RAW; + /* make sure these are *not*; want as small a sample as possible */ + attr->sample_type &= ~PERF_SAMPLE_PERIOD; + attr->sample_type &= ~PERF_SAMPLE_IP; + attr->sample_type &= ~PERF_SAMPLE_CALLCHAIN; + attr->sample_type &= ~PERF_SAMPLE_ADDR; + attr->sample_type &= ~PERF_SAMPLE_READ; + attr->mmap = 0; + attr->comm = 0; + attr->task = 0; + + attr->sample_period = 1; + + attr->watermark = 0; + attr->wakeup_events = 1000; + + /* will enable all once we are ready */ + attr->disabled = 1; + } + + err = perf_evlist__open(evlist); + if (err < 0) { + printf("Couldn't create the events: %s\n", strerror(errno)); + goto out; + } + + if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages, false) < 0) { + ui__error("Failed to mmap the events: %s\n", strerror(errno)); + perf_evlist__close(evlist); + goto out; + } + + rc = 0; + +out: + return rc; } static int read_events(struct perf_kvm_stat *kvm) @@ -749,28 +1218,24 @@ static int read_events(struct perf_kvm_stat *kvm) * Do not use 'isa' recorded in kvm_exit tracepoint since it is not * traced in the old kernel. */ - ret = get_cpu_isa(kvm->session); - + ret = cpu_isa_config(kvm); if (ret < 0) return ret; - if (ret == 1) { - kvm->exit_reasons = vmx_exit_reasons; - kvm->exit_reasons_size = ARRAY_SIZE(vmx_exit_reasons); - kvm->exit_reasons_isa = "VMX"; - } - return perf_session__process_events(kvm->session, &kvm->tool); } -static bool verify_vcpu(int vcpu) +static int parse_target_str(struct perf_kvm_stat *kvm) { - if (vcpu != -1 && vcpu < 0) { - pr_err("Invalid vcpu:%d.\n", vcpu); - return false; + if (kvm->pid_str) { + kvm->pid_list = intlist__new(kvm->pid_str); + if (kvm->pid_list == NULL) { + pr_err("Error parsing process id string\n"); + return -EINVAL; + } } - return true; + return 0; } static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm) @@ -778,6 +1243,9 @@ static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm) int ret = -EINVAL; int vcpu = kvm->trace_vcpu; + if (parse_target_str(kvm) != 0) + goto exit; + if (!verify_vcpu(vcpu)) goto exit; @@ -801,16 +1269,11 @@ exit: return ret; } -static const char * const record_args[] = { - "record", - "-R", - "-f", - "-m", "1024", - "-c", "1", - "-e", "kvm:kvm_entry", - "-e", "kvm:kvm_exit", - "-e", "kvm:kvm_mmio", - "-e", "kvm:kvm_pio", +static const char * const kvm_events_tp[] = { + "kvm:kvm_entry", + "kvm:kvm_exit", + "kvm:kvm_mmio", + "kvm:kvm_pio", }; #define STRDUP_FAIL_EXIT(s) \ @@ -826,8 +1289,16 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) { unsigned int rec_argc, i, j; const char **rec_argv; + const char * const record_args[] = { + "record", + "-R", + "-f", + "-m", "1024", + "-c", "1", + }; - rec_argc = ARRAY_SIZE(record_args) + argc + 2; + rec_argc = ARRAY_SIZE(record_args) + argc + 2 + + 2 * ARRAY_SIZE(kvm_events_tp); rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (rec_argv == NULL) @@ -836,6 +1307,11 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]); + for (j = 0; j < ARRAY_SIZE(kvm_events_tp); j++) { + rec_argv[i++] = "-e"; + rec_argv[i++] = STRDUP_FAIL_EXIT(kvm_events_tp[j]); + } + rec_argv[i++] = STRDUP_FAIL_EXIT("-o"); rec_argv[i++] = STRDUP_FAIL_EXIT(kvm->file_name); @@ -856,6 +1332,8 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv) OPT_STRING('k', "key", &kvm->sort_key, "sort-key", "key for sorting: sample(sort by samples number)" " time (sort by avg time)"), + OPT_STRING('p', "pid", &kvm->pid_str, "pid", + "analyze events only for given process id(s)"), OPT_END() }; @@ -878,6 +1356,186 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv) return kvm_events_report_vcpu(kvm); } +static struct perf_evlist *kvm_live_event_list(void) +{ + struct perf_evlist *evlist; + char *tp, *name, *sys; + unsigned int j; + int err = -1; + + evlist = perf_evlist__new(); + if (evlist == NULL) + return NULL; + + for (j = 0; j < ARRAY_SIZE(kvm_events_tp); j++) { + + tp = strdup(kvm_events_tp[j]); + if (tp == NULL) + goto out; + + /* split tracepoint into subsystem and name */ + sys = tp; + name = strchr(tp, ':'); + if (name == NULL) { + pr_err("Error parsing %s tracepoint: subsystem delimiter not found\n", + kvm_events_tp[j]); + free(tp); + goto out; + } + *name = '\0'; + name++; + + if (perf_evlist__add_newtp(evlist, sys, name, NULL)) { + pr_err("Failed to add %s tracepoint to the list\n", kvm_events_tp[j]); + free(tp); + goto out; + } + + free(tp); + } + + err = 0; + +out: + if (err) { + perf_evlist__delete(evlist); + evlist = NULL; + } + + return evlist; +} + +static int kvm_events_live(struct perf_kvm_stat *kvm, + int argc, const char **argv) +{ + char errbuf[BUFSIZ]; + int err; + + const struct option live_options[] = { + OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid", + "record events on existing process id"), + OPT_UINTEGER('m', "mmap-pages", &kvm->opts.mmap_pages, + "number of mmap data pages"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show counter open errors, etc)"), + OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide, + "system-wide collection from all CPUs"), + OPT_UINTEGER('d', "display", &kvm->display_time, + "time in seconds between display updates"), + OPT_STRING(0, "event", &kvm->report_event, "report event", + "event for reporting: vmexit, mmio, ioport"), + OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu, + "vcpu id to report"), + OPT_STRING('k', "key", &kvm->sort_key, "sort-key", + "key for sorting: sample(sort by samples number)" + " time (sort by avg time)"), + OPT_END() + }; + const char * const live_usage[] = { + "perf kvm stat live [<options>]", + NULL + }; + + + /* event handling */ + kvm->tool.sample = process_sample_event; + kvm->tool.comm = perf_event__process_comm; + kvm->tool.exit = perf_event__process_exit; + kvm->tool.fork = perf_event__process_fork; + kvm->tool.lost = process_lost_event; + kvm->tool.ordered_samples = true; + perf_tool__fill_defaults(&kvm->tool); + + /* set defaults */ + kvm->display_time = 1; + kvm->opts.user_interval = 1; + kvm->opts.mmap_pages = 512; + kvm->opts.target.uses_mmap = false; + kvm->opts.target.uid_str = NULL; + kvm->opts.target.uid = UINT_MAX; + + symbol__init(); + disable_buildid_cache(); + + use_browser = 0; + setup_browser(false); + + if (argc) { + argc = parse_options(argc, argv, live_options, + live_usage, 0); + if (argc) + usage_with_options(live_usage, live_options); + } + + /* + * target related setups + */ + err = perf_target__validate(&kvm->opts.target); + if (err) { + perf_target__strerror(&kvm->opts.target, err, errbuf, BUFSIZ); + ui__warning("%s", errbuf); + } + + if (perf_target__none(&kvm->opts.target)) + kvm->opts.target.system_wide = true; + + + /* + * generate the event list + */ + kvm->evlist = kvm_live_event_list(); + if (kvm->evlist == NULL) { + err = -1; + goto out; + } + + symbol_conf.nr_events = kvm->evlist->nr_entries; + + if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0) + usage_with_options(live_usage, live_options); + + /* + * perf session + */ + kvm->session = perf_session__new(NULL, O_WRONLY, false, false, &kvm->tool); + if (kvm->session == NULL) { + err = -ENOMEM; + goto out; + } + kvm->session->evlist = kvm->evlist; + perf_session__set_id_hdr_size(kvm->session); + + + if (perf_target__has_task(&kvm->opts.target)) + perf_event__synthesize_thread_map(&kvm->tool, + kvm->evlist->threads, + perf_event__process, + &kvm->session->machines.host); + else + perf_event__synthesize_threads(&kvm->tool, perf_event__process, + &kvm->session->machines.host); + + + err = kvm_live_open_events(kvm); + if (err) + goto out; + + err = kvm_events_live_report(kvm); + +out: + exit_browser(0); + + if (kvm->session) + perf_session__delete(kvm->session); + kvm->session = NULL; + if (kvm->evlist) { + perf_evlist__delete_maps(kvm->evlist); + perf_evlist__delete(kvm->evlist); + } + + return err; +} + static void print_kvm_stat_usage(void) { printf("Usage: perf kvm stat <command>\n\n"); @@ -885,6 +1543,7 @@ static void print_kvm_stat_usage(void) printf("# Available commands:\n"); printf("\trecord: record kvm events\n"); printf("\treport: report statistical data of kvm events\n"); + printf("\tlive: live reporting of statistical data of kvm events\n"); printf("\nOtherwise, it is the alias of 'perf stat':\n"); } @@ -914,6 +1573,9 @@ static int kvm_cmd_stat(const char *file_name, int argc, const char **argv) if (!strncmp(argv[1], "rep", 3)) return kvm_events_report(&kvm, argc - 1 , argv + 1); + if (!strncmp(argv[1], "live", 4)) + return kvm_events_live(&kvm, argc - 1 , argv + 1); + perf_stat: return cmd_stat(argc, argv, NULL); } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 1cad37014673..cd616ff5d221 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -341,10 +341,10 @@ static void print_sample_addr(union perf_event *event, return; thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, - sample->addr, &al); + sample->addr, &al, NULL); if (!al.map) thread__find_addr_map(thread, machine, cpumode, MAP__VARIABLE, - sample->addr, &al); + sample->addr, &al, NULL); al.cpu = sample->cpu; al.sym = NULL; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 352fbd7ff4a1..f686d5ff594e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -100,6 +100,7 @@ static const char *pre_cmd = NULL; static const char *post_cmd = NULL; static bool sync_run = false; static unsigned int interval = 0; +static unsigned int initial_delay = 0; static bool forever = false; static struct timespec ref_time; static struct cpu_map *aggr_map; @@ -254,7 +255,8 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) if (!perf_target__has_task(&target) && perf_evsel__is_group_leader(evsel)) { attr->disabled = 1; - attr->enable_on_exec = 1; + if (!initial_delay) + attr->enable_on_exec = 1; } return perf_evsel__open_per_thread(evsel, evsel_list->threads); @@ -414,6 +416,22 @@ static void print_interval(void) list_for_each_entry(counter, &evsel_list->entries, node) print_counter_aggr(counter, prefix); } + + fflush(output); +} + +static void handle_initial_delay(void) +{ + struct perf_evsel *counter; + + if (initial_delay) { + const int ncpus = cpu_map__nr(evsel_list->cpus), + nthreads = thread_map__nr(evsel_list->threads); + + usleep(initial_delay * 1000); + list_for_each_entry(counter, &evsel_list->entries, node) + perf_evsel__enable(counter, ncpus, nthreads); + } } static int __run_perf_stat(int argc, const char **argv) @@ -486,6 +504,7 @@ static int __run_perf_stat(int argc, const char **argv) if (forks) { perf_evlist__start_workload(evsel_list); + handle_initial_delay(); if (interval) { while (!waitpid(child_pid, &status, WNOHANG)) { @@ -497,6 +516,7 @@ static int __run_perf_stat(int argc, const char **argv) if (WIFSIGNALED(status)) psignal(WTERMSIG(status), argv[0]); } else { + handle_initial_delay(); while (!done) { nanosleep(&ts, NULL); if (interval) @@ -1419,6 +1439,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "aggregate counts per processor socket", AGGR_SOCKET), OPT_SET_UINT(0, "per-core", &aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), + OPT_UINTEGER('D', "delay", &initial_delay, + "ms to wait before starting measurement after program start"), OPT_END() }; const char * const stat_usage[] = { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index bbf463572777..440c3b371401 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -103,7 +103,8 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) /* * We can't annotate with just /proc/kallsyms */ - if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) { + if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS && + !dso__is_kcore(map->dso)) { pr_err("Can't annotate %s: No vmlinux file was found in the " "path\n", sym->name); sleep(1); @@ -238,8 +239,6 @@ out_unlock: pthread_mutex_unlock(¬es->lock); } -static const char CONSOLE_CLEAR[] = "[H[2J"; - static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, struct addr_location *al, struct perf_sample *sample) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0e4b67f6bbd1..da7ae01c8394 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -18,6 +18,7 @@ static struct syscall_fmt { } syscall_fmts[] = { { .name = "access", .errmsg = true, }, { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, + { .name = "connect", .errmsg = true, }, { .name = "fstat", .errmsg = true, .alias = "newfstat", }, { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, { .name = "futex", .errmsg = true, }, diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py index b11cca584238..2225162ee1fc 100755 --- a/tools/perf/python/twatch.py +++ b/tools/perf/python/twatch.py @@ -21,7 +21,7 @@ def main(): evsel = perf.evsel(task = 1, comm = 1, mmap = 0, wakeup_events = 1, watermark = 1, sample_id_all = 1, - sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID) + sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU) evsel.open(cpus = cpus, threads = threads); evlist = perf.evlist(cpus, threads) evlist.add(evsel) diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling new file mode 100644 index 000000000000..658f5d60c873 --- /dev/null +++ b/tools/perf/tests/attr/test-record-group-sampling @@ -0,0 +1,36 @@ +[config] +command = record +args = -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 + +[event-1:base-record] +fd=1 +group_fd=-1 +sample_type=343 +read_format=12 +inherit=0 + +[event-2:base-record] +fd=2 +group_fd=1 + +# cache-misses +type=0 +config=3 + +# default | PERF_SAMPLE_READ +sample_type=343 + +# PERF_FORMAT_ID | PERF_FORMAT_GROUP +read_format=12 + +mmap=0 +comm=0 +enable_on_exec=0 +disabled=0 + +# inherit is disabled for group sampling +inherit=0 + +# sampling disabled +sample_freq=0 +sample_period=0 diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index b7b4049fabbb..f5af19244a05 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -100,6 +100,10 @@ static struct test { }, #endif { + .desc = "Test object code reading", + .func = test__code_reading, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c new file mode 100644 index 000000000000..8e0943b966d7 --- /dev/null +++ b/tools/perf/tests/code-reading.c @@ -0,0 +1,573 @@ +#include <sys/types.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> +#include <inttypes.h> +#include <ctype.h> +#include <string.h> + +#include "parse-events.h" +#include "evlist.h" +#include "evsel.h" +#include "thread_map.h" +#include "cpumap.h" +#include "machine.h" +#include "event.h" +#include "thread.h" + +#include "tests.h" + +#define BUFSZ 1024 +#define READLEN 128 + +struct state { + u64 done[1024]; + size_t done_cnt; +}; + +static unsigned int hex(char c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + return c - 'A' + 10; +} + +static void read_objdump_line(const char *line, size_t line_len, void **buf, + size_t *len) +{ + const char *p; + size_t i; + + /* Skip to a colon */ + p = strchr(line, ':'); + if (!p) + return; + i = p + 1 - line; + + /* Read bytes */ + while (*len) { + char c1, c2; + + /* Skip spaces */ + for (; i < line_len; i++) { + if (!isspace(line[i])) + break; + } + /* Get 2 hex digits */ + if (i >= line_len || !isxdigit(line[i])) + break; + c1 = line[i++]; + if (i >= line_len || !isxdigit(line[i])) + break; + c2 = line[i++]; + /* Followed by a space */ + if (i < line_len && line[i] && !isspace(line[i])) + break; + /* Store byte */ + *(unsigned char *)*buf = (hex(c1) << 4) | hex(c2); + *buf += 1; + *len -= 1; + } +} + +static int read_objdump_output(FILE *f, void **buf, size_t *len) +{ + char *line = NULL; + size_t line_len; + ssize_t ret; + int err = 0; + + while (1) { + ret = getline(&line, &line_len, f); + if (feof(f)) + break; + if (ret < 0) { + pr_debug("getline failed\n"); + err = -1; + break; + } + read_objdump_line(line, ret, buf, len); + } + + free(line); + + return err; +} + +static int read_via_objdump(const char *filename, u64 addr, void *buf, + size_t len) +{ + char cmd[PATH_MAX * 2]; + const char *fmt; + FILE *f; + int ret; + + fmt = "%s -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s"; + ret = snprintf(cmd, sizeof(cmd), fmt, "objdump", addr, addr + len, + filename); + if (ret <= 0 || (size_t)ret >= sizeof(cmd)) + return -1; + + pr_debug("Objdump command is: %s\n", cmd); + + /* Ignore objdump errors */ + strcat(cmd, " 2>/dev/null"); + + f = popen(cmd, "r"); + if (!f) { + pr_debug("popen failed\n"); + return -1; + } + + ret = read_objdump_output(f, &buf, &len); + if (len) { + pr_debug("objdump read too few bytes\n"); + if (!ret) + ret = len; + } + + pclose(f); + + return ret; +} + +static int read_object_code(u64 addr, size_t len, u8 cpumode, + struct thread *thread, struct machine *machine, + struct state *state) +{ + struct addr_location al; + unsigned char buf1[BUFSZ]; + unsigned char buf2[BUFSZ]; + size_t ret_len; + u64 objdump_addr; + int ret; + + pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr); + + thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, addr, + &al, NULL); + if (!al.map || !al.map->dso) { + pr_debug("thread__find_addr_map failed\n"); + return -1; + } + + pr_debug("File is: %s\n", al.map->dso->long_name); + + if (al.map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS && + !dso__is_kcore(al.map->dso)) { + pr_debug("Unexpected kernel address - skipping\n"); + return 0; + } + + pr_debug("On file address is: %#"PRIx64"\n", al.addr); + + if (len > BUFSZ) + len = BUFSZ; + + /* Do not go off the map */ + if (addr + len > al.map->end) + len = al.map->end - addr; + + /* Read the object code using perf */ + ret_len = dso__data_read_offset(al.map->dso, machine, al.addr, buf1, + len); + if (ret_len != len) { + pr_debug("dso__data_read_offset failed\n"); + return -1; + } + + /* + * Converting addresses for use by objdump requires more information. + * map__load() does that. See map__rip_2objdump() for details. + */ + if (map__load(al.map, NULL)) + return -1; + + /* objdump struggles with kcore - try each map only once */ + if (dso__is_kcore(al.map->dso)) { + size_t d; + + for (d = 0; d < state->done_cnt; d++) { + if (state->done[d] == al.map->start) { + pr_debug("kcore map tested already"); + pr_debug(" - skipping\n"); + return 0; + } + } + if (state->done_cnt >= ARRAY_SIZE(state->done)) { + pr_debug("Too many kcore maps - skipping\n"); + return 0; + } + state->done[state->done_cnt++] = al.map->start; + } + + /* Read the object code using objdump */ + objdump_addr = map__rip_2objdump(al.map, al.addr); + ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len); + if (ret > 0) { + /* + * The kernel maps are inaccurate - assume objdump is right in + * that case. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + len -= ret; + if (len) { + pr_debug("Reducing len to %zu\n", len); + } else if (dso__is_kcore(al.map->dso)) { + /* + * objdump cannot handle very large segments + * that may be found in kcore. + */ + pr_debug("objdump failed for kcore"); + pr_debug(" - skipping\n"); + return 0; + } else { + return -1; + } + } + } + if (ret < 0) { + pr_debug("read_via_objdump failed\n"); + return -1; + } + + /* The results should be identical */ + if (memcmp(buf1, buf2, len)) { + pr_debug("Bytes read differ from those read by objdump\n"); + return -1; + } + pr_debug("Bytes read match those read by objdump\n"); + + return 0; +} + +static int process_sample_event(struct machine *machine, + struct perf_evlist *evlist, + union perf_event *event, struct state *state) +{ + struct perf_sample sample; + struct thread *thread; + u8 cpumode; + + if (perf_evlist__parse_sample(evlist, event, &sample)) { + pr_debug("perf_evlist__parse_sample failed\n"); + return -1; + } + + thread = machine__findnew_thread(machine, sample.pid); + if (!thread) { + pr_debug("machine__findnew_thread failed\n"); + return -1; + } + + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + return read_object_code(sample.ip, READLEN, cpumode, thread, machine, + state); +} + +static int process_event(struct machine *machine, struct perf_evlist *evlist, + union perf_event *event, struct state *state) +{ + if (event->header.type == PERF_RECORD_SAMPLE) + return process_sample_event(machine, evlist, event, state); + + if (event->header.type < PERF_RECORD_MAX) + return machine__process_event(machine, event); + + return 0; +} + +static int process_events(struct machine *machine, struct perf_evlist *evlist, + struct state *state) +{ + union perf_event *event; + int i, ret; + + for (i = 0; i < evlist->nr_mmaps; i++) { + while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { + ret = process_event(machine, evlist, event, state); + if (ret < 0) + return ret; + } + } + return 0; +} + +static int comp(const void *a, const void *b) +{ + return *(int *)a - *(int *)b; +} + +static void do_sort_something(void) +{ + size_t sz = 40960; + int buf[sz], i; + + for (i = 0; i < (int)sz; i++) + buf[i] = sz - i - 1; + + qsort(buf, sz, sizeof(int), comp); + + for (i = 0; i < (int)sz; i++) { + if (buf[i] != i) { + pr_debug("qsort failed\n"); + break; + } + } +} + +static void sort_something(void) +{ + int i; + + for (i = 0; i < 10; i++) + do_sort_something(); +} + +static void syscall_something(void) +{ + int pipefd[2]; + int i; + + for (i = 0; i < 1000; i++) { + if (pipe(pipefd) < 0) { + pr_debug("pipe failed\n"); + break; + } + close(pipefd[1]); + close(pipefd[0]); + } +} + +static void fs_something(void) +{ + const char *test_file_name = "temp-perf-code-reading-test-file--"; + FILE *f; + int i; + + for (i = 0; i < 1000; i++) { + f = fopen(test_file_name, "w+"); + if (f) { + fclose(f); + unlink(test_file_name); + } + } +} + +static void do_something(void) +{ + fs_something(); + + sort_something(); + + syscall_something(); +} + +enum { + TEST_CODE_READING_OK, + TEST_CODE_READING_NO_VMLINUX, + TEST_CODE_READING_NO_KCORE, + TEST_CODE_READING_NO_ACCESS, + TEST_CODE_READING_NO_KERNEL_OBJ, +}; + +static int do_test_code_reading(bool try_kcore) +{ + struct machines machines; + struct machine *machine; + struct thread *thread; + struct perf_record_opts opts = { + .mmap_pages = UINT_MAX, + .user_freq = UINT_MAX, + .user_interval = ULLONG_MAX, + .freq = 4000, + .target = { + .uses_mmap = true, + }, + }; + struct state state = { + .done_cnt = 0, + }; + struct thread_map *threads = NULL; + struct cpu_map *cpus = NULL; + struct perf_evlist *evlist = NULL; + struct perf_evsel *evsel = NULL; + int err = -1, ret; + pid_t pid; + struct map *map; + bool have_vmlinux, have_kcore, excl_kernel = false; + + pid = getpid(); + + machines__init(&machines); + machine = &machines.host; + + ret = machine__create_kernel_maps(machine); + if (ret < 0) { + pr_debug("machine__create_kernel_maps failed\n"); + goto out_err; + } + + /* Force the use of kallsyms instead of vmlinux to try kcore */ + if (try_kcore) + symbol_conf.kallsyms_name = "/proc/kallsyms"; + + /* Load kernel map */ + map = machine->vmlinux_maps[MAP__FUNCTION]; + ret = map__load(map, NULL); + if (ret < 0) { + pr_debug("map__load failed\n"); + goto out_err; + } + have_vmlinux = dso__is_vmlinux(map->dso); + have_kcore = dso__is_kcore(map->dso); + + /* 2nd time through we just try kcore */ + if (try_kcore && !have_kcore) + return TEST_CODE_READING_NO_KCORE; + + /* No point getting kernel events if there is no kernel object */ + if (!have_vmlinux && !have_kcore) + excl_kernel = true; + + threads = thread_map__new_by_tid(pid); + if (!threads) { + pr_debug("thread_map__new_by_tid failed\n"); + goto out_err; + } + + ret = perf_event__synthesize_thread_map(NULL, threads, + perf_event__process, machine); + if (ret < 0) { + pr_debug("perf_event__synthesize_thread_map failed\n"); + goto out_err; + } + + thread = machine__findnew_thread(machine, pid); + if (!thread) { + pr_debug("machine__findnew_thread failed\n"); + goto out_err; + } + + cpus = cpu_map__new(NULL); + if (!cpus) { + pr_debug("cpu_map__new failed\n"); + goto out_err; + } + + while (1) { + const char *str; + + evlist = perf_evlist__new(); + if (!evlist) { + pr_debug("perf_evlist__new failed\n"); + goto out_err; + } + + perf_evlist__set_maps(evlist, cpus, threads); + + if (excl_kernel) + str = "cycles:u"; + else + str = "cycles"; + pr_debug("Parsing event '%s'\n", str); + ret = parse_events(evlist, str); + if (ret < 0) { + pr_debug("parse_events failed\n"); + goto out_err; + } + + perf_evlist__config(evlist, &opts); + + evsel = perf_evlist__first(evlist); + + evsel->attr.comm = 1; + evsel->attr.disabled = 1; + evsel->attr.enable_on_exec = 0; + + ret = perf_evlist__open(evlist); + if (ret < 0) { + if (!excl_kernel) { + excl_kernel = true; + perf_evlist__delete(evlist); + evlist = NULL; + continue; + } + pr_debug("perf_evlist__open failed\n"); + goto out_err; + } + break; + } + + ret = perf_evlist__mmap(evlist, UINT_MAX, false); + if (ret < 0) { + pr_debug("perf_evlist__mmap failed\n"); + goto out_err; + } + + perf_evlist__enable(evlist); + + do_something(); + + perf_evlist__disable(evlist); + + ret = process_events(machine, evlist, &state); + if (ret < 0) + goto out_err; + + if (!have_vmlinux && !have_kcore && !try_kcore) + err = TEST_CODE_READING_NO_KERNEL_OBJ; + else if (!have_vmlinux && !try_kcore) + err = TEST_CODE_READING_NO_VMLINUX; + else if (excl_kernel) + err = TEST_CODE_READING_NO_ACCESS; + else + err = TEST_CODE_READING_OK; +out_err: + if (evlist) { + perf_evlist__munmap(evlist); + perf_evlist__close(evlist); + perf_evlist__delete(evlist); + } + if (cpus) + cpu_map__delete(cpus); + if (threads) + thread_map__delete(threads); + machines__destroy_kernel_maps(&machines); + machine__delete_threads(machine); + machines__exit(&machines); + + return err; +} + +int test__code_reading(void) +{ + int ret; + + ret = do_test_code_reading(false); + if (!ret) + ret = do_test_code_reading(true); + + switch (ret) { + case TEST_CODE_READING_OK: + return 0; + case TEST_CODE_READING_NO_VMLINUX: + fprintf(stderr, " (no vmlinux)"); + return 0; + case TEST_CODE_READING_NO_KCORE: + fprintf(stderr, " (no kcore)"); + return 0; + case TEST_CODE_READING_NO_ACCESS: + fprintf(stderr, " (no access)"); + return 0; + case TEST_CODE_READING_NO_KERNEL_OBJ: + fprintf(stderr, " (no kernel obj)"); + return 0; + default: + return -1; + }; +} diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 344c844ffc1e..48114d164e9f 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -452,6 +452,7 @@ static int test__checkevent_pmu_events(struct perf_evlist *evlist) evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned); return 0; } @@ -520,6 +521,7 @@ static int test__group1(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* cycles:upp */ evsel = perf_evsel__next(evsel); @@ -535,6 +537,7 @@ static int test__group1(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); return 0; } @@ -560,6 +563,7 @@ static int test__group2(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* cache-references + :u modifier */ evsel = perf_evsel__next(evsel); @@ -574,6 +578,7 @@ static int test__group2(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* cycles:k */ evsel = perf_evsel__next(evsel); @@ -587,6 +592,7 @@ static int test__group2(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); return 0; } @@ -615,6 +621,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused) !strcmp(leader->group_name, "group1")); TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* group1 cycles:kppp */ evsel = perf_evsel__next(evsel); @@ -631,6 +638,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* group2 cycles + G modifier */ evsel = leader = perf_evsel__next(evsel); @@ -648,6 +656,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused) !strcmp(leader->group_name, "group2")); TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* group2 1:3 + G modifier */ evsel = perf_evsel__next(evsel); @@ -661,6 +670,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* instructions:u */ evsel = perf_evsel__next(evsel); @@ -674,6 +684,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); return 0; } @@ -701,6 +712,7 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* instructions:kp + p */ evsel = perf_evsel__next(evsel); @@ -716,6 +728,7 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); return 0; } @@ -742,6 +755,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* instructions + G */ evsel = perf_evsel__next(evsel); @@ -756,6 +770,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* cycles:G */ evsel = leader = perf_evsel__next(evsel); @@ -772,6 +787,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* instructions:G */ evsel = perf_evsel__next(evsel); @@ -963,6 +979,142 @@ static int test__group_gh4(struct perf_evlist *evlist) return 0; } +static int test__leader_sample1(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries); + + /* cycles - sampling group leader */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); + + /* cache-misses - not sampling */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); + + /* branch-misses - not sampling */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); + + return 0; +} + +static int test__leader_sample2(struct perf_evlist *evlist __maybe_unused) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + + /* instructions - sampling group leader */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); + + /* branch-misses - not sampling */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); + + return 0; +} + +static int test__checkevent_pinned_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong pinned", evsel->attr.pinned); + + return test__checkevent_symbolic_name(evlist); +} + +static int test__pinned_group(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries); + + /* cycles - group leader */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong pinned", evsel->attr.pinned); + + /* cache-misses - can not be pinned, but will go on with the leader */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config); + TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned); + + /* branch-misses - ditto */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config); + TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned); + + return 0; +} + static int count_tracepoints(void) { char events_path[PATH_MAX]; @@ -1179,6 +1331,22 @@ static struct evlist_test test__events[] = { .name = "{cycles:G,cache-misses:H}:uG", .check = test__group_gh4, }, + [38] = { + .name = "{cycles,cache-misses,branch-misses}:S", + .check = test__leader_sample1, + }, + [39] = { + .name = "{instructions,branch-misses}:Su", + .check = test__leader_sample2, + }, + [40] = { + .name = "instructions:uDp", + .check = test__checkevent_pinned_modifier, + }, + [41] = { + .name = "{cycles,cache-misses,branch-misses}:D", + .check = test__pinned_group, + }, }; static struct evlist_test test__events_pmu[] = { diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index d22202aa16e9..c748f532b20f 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -36,5 +36,6 @@ int test__bp_signal_overflow(void); int test__task_exit(void); int test__sw_clock_freq(void); int test__perf_time_to_tsc(void); +int test__code_reading(void); #endif /* TESTS_H */ diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index add15392c622..2bd13edcbc17 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -16,6 +16,8 @@ static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused, return 0; } +#define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x)) + int test__vmlinux_matches_kallsyms(void) { int err = -1; @@ -25,6 +27,7 @@ int test__vmlinux_matches_kallsyms(void) struct machine kallsyms, vmlinux; enum map_type type = MAP__FUNCTION; struct ref_reloc_sym ref_reloc_sym = { .name = "_stext", }; + u64 mem_start, mem_end; /* * Step 1: @@ -73,7 +76,7 @@ int test__vmlinux_matches_kallsyms(void) goto out; } - ref_reloc_sym.addr = sym->start; + ref_reloc_sym.addr = UM(sym->start); /* * Step 5: @@ -123,10 +126,14 @@ int test__vmlinux_matches_kallsyms(void) if (sym->start == sym->end) continue; - first_pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL); + mem_start = vmlinux_map->unmap_ip(vmlinux_map, sym->start); + mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end); + + first_pair = machine__find_kernel_symbol(&kallsyms, type, + mem_start, NULL, NULL); pair = first_pair; - if (pair && pair->start == sym->start) { + if (pair && UM(pair->start) == mem_start) { next_pair: if (strcmp(sym->name, pair->name) == 0) { /* @@ -138,10 +145,11 @@ next_pair: * off the real size. More than that and we * _really_ have a problem. */ - s64 skew = sym->end - pair->end; + s64 skew = mem_end - UM(pair->end); if (llabs(skew) >= page_size) pr_debug("%#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n", - sym->start, sym->name, sym->end, pair->end); + mem_start, sym->name, mem_end, + UM(pair->end)); /* * Do not count this as a failure, because we @@ -159,7 +167,7 @@ detour: if (nnd) { struct symbol *next = rb_entry(nnd, struct symbol, rb_node); - if (next->start == sym->start) { + if (UM(next->start) == mem_start) { pair = next; goto next_pair; } @@ -172,10 +180,11 @@ detour: } pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n", - sym->start, sym->name, pair->name); + mem_start, sym->name, pair->name); } } else - pr_debug("%#" PRIx64 ": %s not on kallsyms\n", sym->start, sym->name); + pr_debug("%#" PRIx64 ": %s not on kallsyms\n", + mem_start, sym->name); err = -1; } @@ -208,16 +217,19 @@ detour: for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node), *pair; - pair = map_groups__find(&kallsyms.kmaps, type, pos->start); + mem_start = vmlinux_map->unmap_ip(vmlinux_map, pos->start); + mem_end = vmlinux_map->unmap_ip(vmlinux_map, pos->end); + + pair = map_groups__find(&kallsyms.kmaps, type, mem_start); if (pair == NULL || pair->priv) continue; - if (pair->start == pos->start) { + if (pair->start == mem_start) { pair->priv = 1; pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as", pos->start, pos->end, pos->pgoff, pos->dso->name); - if (pos->pgoff != pair->pgoff || pos->end != pair->end) - pr_info(": \n*%" PRIx64 "-%" PRIx64 " %" PRIx64 "", + if (mem_end != pair->end) + pr_info(":\n*%" PRIx64 "-%" PRIx64 " %" PRIx64, pair->start, pair->end, pair->pgoff); pr_info(" %s\n", pair->dso->name); pair->priv = 1; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index cc64d3f7fc36..08545ae46992 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -428,6 +428,14 @@ static void annotate_browser__init_asm_mode(struct annotate_browser *browser) browser->b.nr_entries = browser->nr_asm_entries; } +#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64) + +static int sym_title(struct symbol *sym, struct map *map, char *title, + size_t sz) +{ + return snprintf(title, sz, "%s %s", sym->name, map->dso->long_name); +} + static bool annotate_browser__callq(struct annotate_browser *browser, struct perf_evsel *evsel, struct hist_browser_timer *hbt) @@ -438,6 +446,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser, struct annotation *notes; struct symbol *target; u64 ip; + char title[SYM_TITLE_MAX_SIZE]; if (!ins__is_call(dl->ins)) return false; @@ -461,7 +470,8 @@ static bool annotate_browser__callq(struct annotate_browser *browser, pthread_mutex_unlock(¬es->lock); symbol__tui_annotate(target, ms->map, evsel, hbt); - ui_browser__show_title(&browser->b, sym->name); + sym_title(sym, ms->map, title, sizeof(title)); + ui_browser__show_title(&browser->b, title); return true; } @@ -495,7 +505,7 @@ static bool annotate_browser__jump(struct annotate_browser *browser) dl = annotate_browser__find_offset(browser, dl->ops.target.offset, &idx); if (dl == NULL) { - ui_helpline__puts("Invallid jump offset"); + ui_helpline__puts("Invalid jump offset"); return true; } @@ -653,8 +663,10 @@ static int annotate_browser__run(struct annotate_browser *browser, const char *help = "Press 'h' for help on key bindings"; int delay_secs = hbt ? hbt->refresh : 0; int key; + char title[SYM_TITLE_MAX_SIZE]; - if (ui_browser__show(&browser->b, sym->name, help) < 0) + sym_title(sym, ms->map, title, sizeof(title)); + if (ui_browser__show(&browser->b, title, help) < 0) return -1; annotate_browser__calc_percent(browser, evsel); @@ -720,7 +732,7 @@ static int annotate_browser__run(struct annotate_browser *browser, "s Toggle source code view\n" "/ Search string\n" "r Run available scripts\n" - "? Search previous string\n"); + "? Search string backwards\n"); continue; case 'r': { diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index cb2ed1980147..2ca66cc1160f 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -109,8 +109,6 @@ __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) void perf_gtk__init_hpp(void) { - perf_hpp__column_enable(PERF_HPP__OVERHEAD); - perf_hpp__init(); perf_hpp__format[PERF_HPP__OVERHEAD].color = diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index d102716c43a1..bfc5a27597d6 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -110,10 +110,10 @@ static int jump__parse(struct ins_operands *ops) { const char *s = strchr(ops->raw, '+'); - ops->target.addr = strtoll(ops->raw, NULL, 16); + ops->target.addr = strtoull(ops->raw, NULL, 16); if (s++ != NULL) - ops->target.offset = strtoll(s, NULL, 16); + ops->target.offset = strtoull(s, NULL, 16); else ops->target.offset = UINT64_MAX; @@ -821,11 +821,55 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, if (dl == NULL) return -1; + if (dl->ops.target.offset == UINT64_MAX) + dl->ops.target.offset = dl->ops.target.addr - + map__rip_2objdump(map, sym->start); + + /* + * kcore has no symbols, so add the call target name if it is on the + * same map. + */ + if (dl->ins && ins__is_call(dl->ins) && !dl->ops.target.name) { + struct symbol *s; + u64 ip = dl->ops.target.addr; + + if (ip >= map->start && ip <= map->end) { + ip = map->map_ip(map, ip); + s = map__find_symbol(map, ip, NULL); + if (s && s->start == ip) + dl->ops.target.name = strdup(s->name); + } + } + disasm__add(¬es->src->source, dl); return 0; } +static void delete_last_nop(struct symbol *sym) +{ + struct annotation *notes = symbol__annotation(sym); + struct list_head *list = ¬es->src->source; + struct disasm_line *dl; + + while (!list_empty(list)) { + dl = list_entry(list->prev, struct disasm_line, node); + + if (dl->ins && dl->ins->ops) { + if (dl->ins->ops != &nop_ops) + return; + } else { + if (!strstr(dl->line, " nop ") && + !strstr(dl->line, " nopl ") && + !strstr(dl->line, " nopw ")) + return; + } + + list_del(&dl->node); + disasm_line__free(dl); + } +} + int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize) { struct dso *dso = map->dso; @@ -864,7 +908,8 @@ fallback: free_filename = false; } - if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) { + if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS && + !dso__is_kcore(dso)) { char bf[BUILD_ID_SIZE * 2 + 16] = " with build id "; char *build_id_msg = NULL; @@ -898,7 +943,7 @@ fallback: snprintf(command, sizeof(command), "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -d %s %s -C %s|grep -v %s|expand", + " -d %s %s -C %s 2>/dev/null|grep -v %s|expand", objdump_path ? objdump_path : "objdump", disassembler_style ? "-M " : "", disassembler_style ? disassembler_style : "", @@ -918,6 +963,13 @@ fallback: if (symbol__parse_objdump_line(sym, map, file, privsize) < 0) break; + /* + * kallsyms does not have symbol sizes so there may a nop at the end. + * Remove it. + */ + if (dso__is_kcore(dso)) + delete_last_nop(sym); + pclose(file); out_free_filename: if (free_filename) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 5295625c0c00..3a0f5089379c 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -33,7 +33,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, } thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, - event->ip.ip, &al); + event->ip.ip, &al, NULL); if (al.map != NULL) al.map->dso->hit = 1; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index c4374f07603c..e3c1ff8512c8 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -78,6 +78,8 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, symbol_conf.symfs, build_id_hex, build_id_hex + 2); break; + case DSO_BINARY_TYPE__VMLINUX: + case DSO_BINARY_TYPE__GUEST_VMLINUX: case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: snprintf(file, size, "%s%s", symbol_conf.symfs, dso->long_name); @@ -93,11 +95,14 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, dso->long_name); break; + case DSO_BINARY_TYPE__KCORE: + case DSO_BINARY_TYPE__GUEST_KCORE: + snprintf(file, size, "%s", dso->long_name); + break; + default: case DSO_BINARY_TYPE__KALLSYMS: - case DSO_BINARY_TYPE__VMLINUX: case DSO_BINARY_TYPE__GUEST_KALLSYMS: - case DSO_BINARY_TYPE__GUEST_VMLINUX: case DSO_BINARY_TYPE__JAVA_JIT: case DSO_BINARY_TYPE__NOT_FOUND: ret = -1; @@ -419,6 +424,7 @@ struct dso *dso__new(const char *name) dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; dso->data_type = DSO_BINARY_TYPE__NOT_FOUND; dso->loaded = 0; + dso->rel = 0; dso->sorted_by_name = 0; dso->has_build_id = 0; dso->kernel = DSO_TYPE_USER; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index d51aaf272c68..b793053335d6 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -3,6 +3,7 @@ #include <linux/types.h> #include <linux/rbtree.h> +#include <stdbool.h> #include "types.h" #include "map.h" @@ -20,6 +21,8 @@ enum dso_binary_type { DSO_BINARY_TYPE__SYSTEM_PATH_DSO, DSO_BINARY_TYPE__GUEST_KMODULE, DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, + DSO_BINARY_TYPE__KCORE, + DSO_BINARY_TYPE__GUEST_KCORE, DSO_BINARY_TYPE__NOT_FOUND, }; @@ -84,6 +87,7 @@ struct dso { u8 lname_alloc:1; u8 sorted_by_name; u8 loaded; + u8 rel; u8 build_id[BUILD_ID_SIZE]; const char *short_name; char *long_name; @@ -146,4 +150,17 @@ size_t dso__fprintf_buildid(struct dso *dso, FILE *fp); size_t dso__fprintf_symbols_by_name(struct dso *dso, enum map_type type, FILE *fp); size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp); + +static inline bool dso__is_vmlinux(struct dso *dso) +{ + return dso->data_type == DSO_BINARY_TYPE__VMLINUX || + dso->data_type == DSO_BINARY_TYPE__GUEST_VMLINUX; +} + +static inline bool dso__is_kcore(struct dso *dso) +{ + return dso->data_type == DSO_BINARY_TYPE__KCORE || + dso->data_type == DSO_BINARY_TYPE__GUEST_KCORE; +} + #endif /* __PERF_DSO */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 95412705d0d2..cc7c0c9c9ea6 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -592,9 +592,10 @@ int perf_event__process(struct perf_tool *tool __maybe_unused, void thread__find_addr_map(struct thread *self, struct machine *machine, u8 cpumode, enum map_type type, u64 addr, - struct addr_location *al) + struct addr_location *al, symbol_filter_t filter) { struct map_groups *mg = &self->mg; + bool load_map = false; al->thread = self; al->addr = addr; @@ -609,11 +610,13 @@ void thread__find_addr_map(struct thread *self, if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { al->level = 'k'; mg = &machine->kmaps; + load_map = true; } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) { al->level = '.'; } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { al->level = 'g'; mg = &machine->kmaps; + load_map = true; } else { /* * 'u' means guest os user space. @@ -654,8 +657,15 @@ try_again: mg = &machine->kmaps; goto try_again; } - } else + } else { + /* + * Kernel maps might be changed when loading symbols so loading + * must be done prior to using kernel maps. + */ + if (load_map) + map__load(al->map, filter); al->addr = al->map->map_ip(al->map, al->addr); + } } void thread__find_addr_location(struct thread *thread, struct machine *machine, @@ -663,7 +673,7 @@ void thread__find_addr_location(struct thread *thread, struct machine *machine, struct addr_location *al, symbol_filter_t filter) { - thread__find_addr_map(thread, machine, cpumode, type, addr, al); + thread__find_addr_map(thread, machine, cpumode, type, addr, al, filter); if (al->map != NULL) al->sym = map__find_symbol(al->map, al->addr, filter); else @@ -699,7 +709,7 @@ int perf_event__preprocess_sample(const union perf_event *event, machine__create_kernel_maps(machine); thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, - event->ip.ip, al); + event->ip.ip, al, filter); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : "<not found>"); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 1ebb8fb0178c..6119a649d861 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -80,6 +80,23 @@ struct stack_dump { char *data; }; +struct sample_read_value { + u64 value; + u64 id; +}; + +struct sample_read { + u64 time_enabled; + u64 time_running; + union { + struct { + u64 nr; + struct sample_read_value *values; + } group; + struct sample_read_value one; + }; +}; + struct perf_sample { u64 ip; u32 pid, tid; @@ -97,6 +114,7 @@ struct perf_sample { struct branch_stack *branch_stack; struct regs_dump user_regs; struct stack_dump user_stack; + struct sample_read read; }; #define PERF_MEM_DATA_SRC_NONE \ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 42ea4e947eb8..c7d111f74553 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -302,6 +302,24 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist, { u64 read_data[4] = { 0, }; int id_idx = 1; /* The first entry is the counter value */ + u64 id; + int ret; + + ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); + if (!ret) + goto add; + + if (errno != ENOTTY) + return -1; + + /* Legacy way to get event id.. All hail to old kernels! */ + + /* + * This way does not work with group format read, so bail + * out in that case. + */ + if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) + return -1; if (!(evsel->attr.read_format & PERF_FORMAT_ID) || read(fd, &read_data, sizeof(read_data)) == -1) @@ -312,25 +330,39 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist, if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) ++id_idx; - perf_evlist__id_add(evlist, evsel, cpu, thread, read_data[id_idx]); + id = read_data[id_idx]; + + add: + perf_evlist__id_add(evlist, evsel, cpu, thread, id); return 0; } -struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) +struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) { struct hlist_head *head; struct perf_sample_id *sid; int hash; - if (evlist->nr_entries == 1) - return perf_evlist__first(evlist); - hash = hash_64(id, PERF_EVLIST__HLIST_BITS); head = &evlist->heads[hash]; hlist_for_each_entry(sid, head, node) if (sid->id == id) - return sid->evsel; + return sid; + + return NULL; +} + +struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) +{ + struct perf_sample_id *sid; + + if (evlist->nr_entries == 1) + return perf_evlist__first(evlist); + + sid = perf_evlist__id2sid(evlist, id); + if (sid) + return sid->evsel; if (!perf_evlist__sample_id_all(evlist)) return perf_evlist__first(evlist); @@ -662,6 +694,32 @@ u64 perf_evlist__sample_type(struct perf_evlist *evlist) return first->attr.sample_type; } +bool perf_evlist__valid_read_format(struct perf_evlist *evlist) +{ + struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; + u64 read_format = first->attr.read_format; + u64 sample_type = first->attr.sample_type; + + list_for_each_entry_continue(pos, &evlist->entries, node) { + if (read_format != pos->attr.read_format) + return false; + } + + /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ + if ((sample_type & PERF_SAMPLE_READ) && + !(read_format & PERF_FORMAT_ID)) { + return false; + } + + return true; +} + +u64 perf_evlist__read_format(struct perf_evlist *evlist) +{ + struct perf_evsel *first = perf_evlist__first(evlist); + return first->attr.read_format; +} + u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) { struct perf_evsel *first = perf_evlist__first(evlist); @@ -779,13 +837,6 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); /* - * Do a dummy execvp to get the PLT entry resolved, - * so we avoid the resolver overhead on the real - * execvp call. - */ - execvp("", (char **)argv); - - /* * Tell the parent we're ready to go */ close(child_ready_pipe[1]); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 0583d36252be..327ababa67b6 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -78,6 +78,8 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); +struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); + union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx); int perf_evlist__open(struct perf_evlist *evlist); @@ -118,6 +120,7 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist); void __perf_evlist__set_leader(struct list_head *list); void perf_evlist__set_leader(struct perf_evlist *evlist); +u64 perf_evlist__read_format(struct perf_evlist *evlist); u64 perf_evlist__sample_type(struct perf_evlist *evlist); bool perf_evlist__sample_id_all(struct perf_evlist *evlist); u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); @@ -127,6 +130,7 @@ int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *even bool perf_evlist__valid_sample_type(struct perf_evlist *evlist); bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); +bool perf_evlist__valid_read_format(struct perf_evlist *evlist); void perf_evlist__splice_list_tail(struct perf_evlist *evlist, struct list_head *list, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8bed0c1a1399..960394ea1e3a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -490,6 +490,7 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) { + struct perf_evsel *leader = evsel->leader; struct perf_event_attr *attr = &evsel->attr; int track = !evsel->idx; /* only the first counter needs these */ @@ -499,6 +500,25 @@ void perf_evsel__config(struct perf_evsel *evsel, perf_evsel__set_sample_bit(evsel, IP); perf_evsel__set_sample_bit(evsel, TID); + if (evsel->sample_read) { + perf_evsel__set_sample_bit(evsel, READ); + + /* + * We need ID even in case of single event, because + * PERF_SAMPLE_READ process ID specific data. + */ + perf_evsel__set_sample_id(evsel); + + /* + * Apply group format only if we belong to group + * with more than one members. + */ + if (leader->nr_members > 1) { + attr->read_format |= PERF_FORMAT_GROUP; + attr->inherit = 0; + } + } + /* * We default some events to a 1 default interval. But keep * it a weak assumption overridable by the user. @@ -514,6 +534,15 @@ void perf_evsel__config(struct perf_evsel *evsel, } } + /* + * Disable sampling for all group members other + * than leader in case leader 'leads' the sampling. + */ + if ((leader != evsel) && leader->sample_read) { + attr->sample_freq = 0; + attr->sample_period = 0; + } + if (opts->no_samples) attr->sample_freq = 0; @@ -605,15 +634,15 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) return evsel->fd != NULL ? 0 : -ENOMEM; } -int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, - const char *filter) +static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ncpus, int nthreads, + int ioc, void *arg) { int cpu, thread; for (cpu = 0; cpu < ncpus; cpu++) { for (thread = 0; thread < nthreads; thread++) { int fd = FD(evsel, cpu, thread), - err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter); + err = ioctl(fd, ioc, arg); if (err) return err; @@ -623,6 +652,21 @@ int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, return 0; } +int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, + const char *filter) +{ + return perf_evsel__run_ioctl(evsel, ncpus, nthreads, + PERF_EVENT_IOC_SET_FILTER, + (void *)filter); +} + +int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads) +{ + return perf_evsel__run_ioctl(evsel, ncpus, nthreads, + PERF_EVENT_IOC_ENABLE, + 0); +} + int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) { evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); @@ -1096,8 +1140,34 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, } if (type & PERF_SAMPLE_READ) { - fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n"); - return -1; + u64 read_format = evsel->attr.read_format; + + if (read_format & PERF_FORMAT_GROUP) + data->read.group.nr = *array; + else + data->read.one.value = *array; + + array++; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + data->read.time_enabled = *array; + array++; + } + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + data->read.time_running = *array; + array++; + } + + /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ + if (read_format & PERF_FORMAT_GROUP) { + data->read.group.values = (struct sample_read_value *) array; + array = (void *) array + data->read.group.nr * + sizeof(struct sample_read_value); + } else { + data->read.one.id = *array; + array++; + } } if (type & PERF_SAMPLE_CALLCHAIN) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 3f156ccc1acb..532a5f925da0 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -38,6 +38,9 @@ struct perf_sample_id { struct hlist_node node; u64 id; struct perf_evsel *evsel; + + /* Holds total ID period value for PERF_SAMPLE_READ processing. */ + u64 period; }; /** struct perf_evsel - event selector @@ -76,6 +79,7 @@ struct perf_evsel { /* parse modifier helper */ int exclude_GH; int nr_members; + int sample_read; struct perf_evsel *leader; char *group_name; }; @@ -142,6 +146,7 @@ void perf_evsel__set_sample_id(struct perf_evsel *evsel); int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, const char *filter); +int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index f9f9d6381b9a..6fcc358138ae 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -628,10 +628,8 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type, struct map *map = machine->vmlinux_maps[type]; int ret = dso__load_vmlinux_path(map->dso, map, filter); - if (ret > 0) { + if (ret > 0) dso__set_loaded(map->dso, type); - map__reloc_vmlinux(map); - } return ret; } @@ -808,7 +806,10 @@ static int machine__create_modules(struct machine *machine) free(line); fclose(file); - return machine__set_modules_path(machine); + if (machine__set_modules_path(machine) < 0) { + pr_debug("Problems setting modules path maps, continuing anyway...\n"); + } + return 0; out_delete_line: free(line); @@ -858,6 +859,18 @@ static void machine__set_kernel_mmap_len(struct machine *machine, } } +static bool machine__uses_kcore(struct machine *machine) +{ + struct dso *dso; + + list_for_each_entry(dso, &machine->kernel_dsos, node) { + if (dso__is_kcore(dso)) + return true; + } + + return false; +} + static int machine__process_kernel_mmap_event(struct machine *machine, union perf_event *event) { @@ -866,6 +879,10 @@ static int machine__process_kernel_mmap_event(struct machine *machine, enum dso_kernel_type kernel_type; bool is_kernel_mmap; + /* If we have maps from kcore then we do not need or want any others */ + if (machine__uses_kcore(machine)) + return 0; + machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix)); if (machine__is_host(machine)) kernel_type = DSO_TYPE_KERNEL; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 8bcdf9e54089..9e8304ca343e 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -182,12 +182,6 @@ int map__load(struct map *map, symbol_filter_t filter) #endif return -1; } - /* - * Only applies to the kernel, as its symtabs aren't relative like the - * module ones. - */ - if (map->dso->kernel) - map__reloc_vmlinux(map); return 0; } @@ -254,14 +248,18 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp) /* * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN. - * map->dso->adjust_symbols==1 for ET_EXEC-like cases. + * map->dso->adjust_symbols==1 for ET_EXEC-like cases except ET_REL which is + * relative to section start. */ u64 map__rip_2objdump(struct map *map, u64 rip) { - u64 addr = map->dso->adjust_symbols ? - map->unmap_ip(map, rip) : /* RIP -> IP */ - rip; - return addr; + if (!map->dso->adjust_symbols) + return rip; + + if (map->dso->rel) + return rip - map->pgoff; + + return map->unmap_ip(map, rip); } void map_groups__init(struct map_groups *mg) @@ -513,35 +511,6 @@ int map_groups__clone(struct map_groups *mg, return 0; } -static u64 map__reloc_map_ip(struct map *map, u64 ip) -{ - return ip + (s64)map->pgoff; -} - -static u64 map__reloc_unmap_ip(struct map *map, u64 ip) -{ - return ip - (s64)map->pgoff; -} - -void map__reloc_vmlinux(struct map *map) -{ - struct kmap *kmap = map__kmap(map); - s64 reloc; - - if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr) - return; - - reloc = (kmap->ref_reloc_sym->unrelocated_addr - - kmap->ref_reloc_sym->addr); - - if (!reloc) - return; - - map->map_ip = map__reloc_map_ip; - map->unmap_ip = map__reloc_unmap_ip; - map->pgoff = reloc; -} - void maps__insert(struct rb_root *maps, struct map *map) { struct rb_node **p = &maps->rb_node; @@ -586,3 +555,21 @@ struct map *maps__find(struct rb_root *maps, u64 ip) return NULL; } + +struct map *maps__first(struct rb_root *maps) +{ + struct rb_node *first = rb_first(maps); + + if (first) + return rb_entry(first, struct map, rb_node); + return NULL; +} + +struct map *maps__next(struct map *map) +{ + struct rb_node *next = rb_next(&map->rb_node); + + if (next) + return rb_entry(next, struct map, rb_node); + return NULL; +} diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index a887f2c9dfbb..2cc93cbf0e17 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -112,6 +112,8 @@ size_t __map_groups__fprintf_maps(struct map_groups *mg, void maps__insert(struct rb_root *maps, struct map *map); void maps__remove(struct rb_root *maps, struct map *map); struct map *maps__find(struct rb_root *maps, u64 addr); +struct map *maps__first(struct rb_root *maps); +struct map *maps__next(struct map *map); void map_groups__init(struct map_groups *mg); void map_groups__exit(struct map_groups *mg); int map_groups__clone(struct map_groups *mg, @@ -139,6 +141,17 @@ static inline struct map *map_groups__find(struct map_groups *mg, return maps__find(&mg->maps[type], addr); } +static inline struct map *map_groups__first(struct map_groups *mg, + enum map_type type) +{ + return maps__first(&mg->maps[type]); +} + +static inline struct map *map_groups__next(struct map *map) +{ + return maps__next(map); +} + struct symbol *map_groups__find_symbol(struct map_groups *mg, enum map_type type, u64 addr, struct map **mapp, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2c460ede0a69..9cba92386a82 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -687,6 +687,8 @@ struct event_modifier { int eG; int precise; int exclude_GH; + int sample_read; + int pinned; }; static int get_event_modifier(struct event_modifier *mod, char *str, @@ -698,6 +700,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int eH = evsel ? evsel->attr.exclude_host : 0; int eG = evsel ? evsel->attr.exclude_guest : 0; int precise = evsel ? evsel->attr.precise_ip : 0; + int sample_read = 0; + int pinned = evsel ? evsel->attr.pinned : 0; int exclude = eu | ek | eh; int exclude_GH = evsel ? evsel->exclude_GH : 0; @@ -730,6 +734,10 @@ static int get_event_modifier(struct event_modifier *mod, char *str, /* use of precise requires exclude_guest */ if (!exclude_GH) eG = 1; + } else if (*str == 'S') { + sample_read = 1; + } else if (*str == 'D') { + pinned = 1; } else break; @@ -756,6 +764,9 @@ static int get_event_modifier(struct event_modifier *mod, char *str, mod->eG = eG; mod->precise = precise; mod->exclude_GH = exclude_GH; + mod->sample_read = sample_read; + mod->pinned = pinned; + return 0; } @@ -768,7 +779,7 @@ static int check_modifier(char *str) char *p = str; /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHppp") - 1)) + if (strlen(str) > (sizeof("ukhGHpppSD") - 1)) return -1; while (*p) { @@ -806,6 +817,10 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->attr.exclude_host = mod.eH; evsel->attr.exclude_guest = mod.eG; evsel->exclude_GH = mod.exclude_GH; + evsel->sample_read = mod.sample_read; + + if (perf_evsel__is_group_leader(evsel)) + evsel->attr.pinned = mod.pinned; } return 0; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index e9d1134c2c68..0790452658b3 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -82,7 +82,8 @@ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ name [a-zA-Z_*?][a-zA-Z0-9_*?]* name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?]* -modifier_event [ukhpGH]+ +/* If you add a modifier you need to update check_modifier() */ +modifier_event [ukhpGHSD]+ modifier_bp [rwx]{1,3} %% diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 272c9cf57122..dedaeb22b7b9 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -71,6 +71,11 @@ static int perf_session__open(struct perf_session *self, bool force) goto out_close; } + if (!perf_evlist__valid_read_format(self->evlist)) { + pr_err("non matching read_format"); + goto out_close; + } + self->size = input_stat.st_size; return 0; @@ -245,7 +250,7 @@ static int process_finished_round(struct perf_tool *tool, union perf_event *event, struct perf_session *session); -static void perf_tool__fill_defaults(struct perf_tool *tool) +void perf_tool__fill_defaults(struct perf_tool *tool) { if (tool->sample == NULL) tool->sample = process_event_sample_stub; @@ -490,7 +495,7 @@ static int perf_session_deliver_event(struct perf_session *session, u64 file_offset); static int flush_sample_queue(struct perf_session *s, - struct perf_tool *tool) + struct perf_tool *tool) { struct ordered_samples *os = &s->ordered_samples; struct list_head *head = &os->samples; @@ -638,7 +643,7 @@ static void __queue_event(struct sample_queue *new, struct perf_session *s) #define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue)) -static int perf_session_queue_event(struct perf_session *s, union perf_event *event, +int perf_session_queue_event(struct perf_session *s, union perf_event *event, struct perf_sample *sample, u64 file_offset) { struct ordered_samples *os = &s->ordered_samples; @@ -749,6 +754,36 @@ static void perf_session__print_tstamp(struct perf_session *session, printf("%" PRIu64 " ", sample->time); } +static void sample_read__printf(struct perf_sample *sample, u64 read_format) +{ + printf("... sample_read:\n"); + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + printf("...... time enabled %016" PRIx64 "\n", + sample->read.time_enabled); + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + printf("...... time running %016" PRIx64 "\n", + sample->read.time_running); + + if (read_format & PERF_FORMAT_GROUP) { + u64 i; + + printf(".... group nr %" PRIu64 "\n", sample->read.group.nr); + + for (i = 0; i < sample->read.group.nr; i++) { + struct sample_read_value *value; + + value = &sample->read.group.values[i]; + printf("..... id %016" PRIx64 + ", value %016" PRIx64 "\n", + value->id, value->value); + } + } else + printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n", + sample->read.one.id, sample->read.one.value); +} + static void dump_event(struct perf_session *session, union perf_event *event, u64 file_offset, struct perf_sample *sample) { @@ -798,6 +833,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_DATA_SRC) printf(" . data_src: 0x%"PRIx64"\n", sample->data_src); + + if (sample_type & PERF_SAMPLE_READ) + sample_read__printf(sample, evsel->attr.read_format); } static struct machine * @@ -822,6 +860,75 @@ static struct machine * return &session->machines.host; } +static int deliver_sample_value(struct perf_session *session, + struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct sample_read_value *v, + struct machine *machine) +{ + struct perf_sample_id *sid; + + sid = perf_evlist__id2sid(session->evlist, v->id); + if (sid) { + sample->id = v->id; + sample->period = v->value - sid->period; + sid->period = v->value; + } + + if (!sid || sid->evsel == NULL) { + ++session->stats.nr_unknown_id; + return 0; + } + + return tool->sample(tool, event, sample, sid->evsel, machine); +} + +static int deliver_sample_group(struct perf_session *session, + struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + int ret = -EINVAL; + u64 i; + + for (i = 0; i < sample->read.group.nr; i++) { + ret = deliver_sample_value(session, tool, event, sample, + &sample->read.group.values[i], + machine); + if (ret) + break; + } + + return ret; +} + +static int +perf_session__deliver_sample(struct perf_session *session, + struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + /* We know evsel != NULL. */ + u64 sample_type = evsel->attr.sample_type; + u64 read_format = evsel->attr.read_format; + + /* Standard sample delievery. */ + if (!(sample_type & PERF_SAMPLE_READ)) + return tool->sample(tool, event, sample, evsel, machine); + + /* For PERF_SAMPLE_READ we have either single or group mode. */ + if (read_format & PERF_FORMAT_GROUP) + return deliver_sample_group(session, tool, event, sample, + machine); + else + return deliver_sample_value(session, tool, event, sample, + &sample->read.one, machine); +} + static int perf_session_deliver_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, @@ -864,7 +971,8 @@ static int perf_session_deliver_event(struct perf_session *session, ++session->stats.nr_unprocessable_samples; return 0; } - return tool->sample(tool, event, sample, evsel, machine); + return perf_session__deliver_sample(session, tool, event, + sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); case PERF_RECORD_COMM: @@ -1411,8 +1519,13 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, printf("\t%16" PRIx64, node->ip); if (print_sym) { printf(" "); - symbol__fprintf_symname(node->sym, stdout); + if (print_symoffset) { + al.addr = node->ip; + symbol__fprintf_symname_offs(node->sym, &al, stdout); + } else + symbol__fprintf_symname(node->sym, stdout); } + if (print_dso) { printf(" ("); map__fprintf_dsoname(node->map, stdout); diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index ad8d3d4ef14e..8bed17e64a96 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -56,6 +56,11 @@ int __perf_session__process_events(struct perf_session *self, int perf_session__process_events(struct perf_session *self, struct perf_tool *tool); +int perf_session_queue_event(struct perf_session *s, union perf_event *event, + struct perf_sample *sample, u64 file_offset); + +void perf_tool__fill_defaults(struct perf_tool *tool); + int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel *evsel, struct thread *thread, struct ip_callchain *chain, diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 7c59c28afcc5..6506b3dfb605 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -10,6 +10,12 @@ void update_stats(struct stats *stats, u64 val) delta = val - stats->mean; stats->mean += delta / stats->n; stats->M2 += delta*(val - stats->mean); + + if (val > stats->max) + stats->max = val; + + if (val < stats->min) + stats->min = val; } double avg_stats(struct stats *stats) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 588367c3c767..ae8ccd7227cf 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -6,6 +6,7 @@ struct stats { double n, mean, M2; + u64 max, min; }; void update_stats(struct stats *stats, u64 val); @@ -13,4 +14,12 @@ double avg_stats(struct stats *stats); double stddev_stats(struct stats *stats); double rel_stddev_stats(double stddev, double avg); +static inline void init_stats(struct stats *stats) +{ + stats->n = 0.0; + stats->mean = 0.0; + stats->M2 = 0.0; + stats->min = (u64) -1; + stats->max = 0; +} #endif diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 4b12bf850325..a7b9ab557380 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -599,11 +599,13 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, if (dso->kernel == DSO_TYPE_USER) { GElf_Shdr shdr; ss->adjust_symbols = (ehdr.e_type == ET_EXEC || + ehdr.e_type == ET_REL || elf_section_by_name(elf, &ehdr, &shdr, ".gnu.prelink_undo", NULL) != NULL); } else { - ss->adjust_symbols = 0; + ss->adjust_symbols = ehdr.e_type == ET_EXEC || + ehdr.e_type == ET_REL; } ss->name = strdup(name); @@ -624,6 +626,37 @@ out_close: return err; } +/** + * ref_reloc_sym_not_found - has kernel relocation symbol been found. + * @kmap: kernel maps and relocation reference symbol + * + * This function returns %true if we are dealing with the kernel maps and the + * relocation reference symbol has not yet been found. Otherwise %false is + * returned. + */ +static bool ref_reloc_sym_not_found(struct kmap *kmap) +{ + return kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name && + !kmap->ref_reloc_sym->unrelocated_addr; +} + +/** + * ref_reloc - kernel relocation offset. + * @kmap: kernel maps and relocation reference symbol + * + * This function returns the offset of kernel addresses as determined by using + * the relocation reference symbol i.e. if the kernel has not been relocated + * then the return value is zero. + */ +static u64 ref_reloc(struct kmap *kmap) +{ + if (kmap && kmap->ref_reloc_sym && + kmap->ref_reloc_sym->unrelocated_addr) + return kmap->ref_reloc_sym->addr - + kmap->ref_reloc_sym->unrelocated_addr; + return 0; +} + int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, symbol_filter_t filter, int kmodule) @@ -642,8 +675,17 @@ int dso__load_sym(struct dso *dso, struct map *map, Elf_Scn *sec, *sec_strndx; Elf *elf; int nr = 0; + bool remap_kernel = false, adjust_kernel_syms = false; dso->symtab_type = syms_ss->type; + dso->rel = syms_ss->ehdr.e_type == ET_REL; + + /* + * Modules may already have symbols from kallsyms, but those symbols + * have the wrong values for the dso maps, so remove them. + */ + if (kmodule && syms_ss->symtab) + symbols__delete(&dso->symbols[map->type]); if (!syms_ss->symtab) { syms_ss->symtab = syms_ss->dynsym; @@ -681,7 +723,31 @@ int dso__load_sym(struct dso *dso, struct map *map, nr_syms = shdr.sh_size / shdr.sh_entsize; memset(&sym, 0, sizeof(sym)); - dso->adjust_symbols = runtime_ss->adjust_symbols; + + /* + * The kernel relocation symbol is needed in advance in order to adjust + * kernel maps correctly. + */ + if (ref_reloc_sym_not_found(kmap)) { + elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { + const char *elf_name = elf_sym__name(&sym, symstrs); + + if (strcmp(elf_name, kmap->ref_reloc_sym->name)) + continue; + kmap->ref_reloc_sym->unrelocated_addr = sym.st_value; + break; + } + } + + dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); + /* + * Initial kernel and module mappings do not map to the dso. For + * function mappings, flag the fixups. + */ + if (map->type == MAP__FUNCTION && (dso->kernel || kmodule)) { + remap_kernel = true; + adjust_kernel_syms = dso->adjust_symbols; + } elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { struct symbol *f; const char *elf_name = elf_sym__name(&sym, symstrs); @@ -690,10 +756,6 @@ int dso__load_sym(struct dso *dso, struct map *map, const char *section_name; bool used_opd = false; - if (kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name && - strcmp(elf_name, kmap->ref_reloc_sym->name) == 0) - kmap->ref_reloc_sym->unrelocated_addr = sym.st_value; - if (!is_label && !elf_sym__is_a(&sym, map->type)) continue; @@ -745,20 +807,55 @@ int dso__load_sym(struct dso *dso, struct map *map, (sym.st_value & 1)) --sym.st_value; - if (dso->kernel != DSO_TYPE_USER || kmodule) { + if (dso->kernel || kmodule) { char dso_name[PATH_MAX]; + /* Adjust symbol to map to file offset */ + if (adjust_kernel_syms) + sym.st_value -= shdr.sh_addr - shdr.sh_offset; + if (strcmp(section_name, (curr_dso->short_name + dso->short_name_len)) == 0) goto new_symbol; if (strcmp(section_name, ".text") == 0) { + /* + * The initial kernel mapping is based on + * kallsyms and identity maps. Overwrite it to + * map to the kernel dso. + */ + if (remap_kernel && dso->kernel) { + remap_kernel = false; + map->start = shdr.sh_addr + + ref_reloc(kmap); + map->end = map->start + shdr.sh_size; + map->pgoff = shdr.sh_offset; + map->map_ip = map__map_ip; + map->unmap_ip = map__unmap_ip; + /* Ensure maps are correctly ordered */ + map_groups__remove(kmap->kmaps, map); + map_groups__insert(kmap->kmaps, map); + } + + /* + * The initial module mapping is based on + * /proc/modules mapped to offset zero. + * Overwrite it to map to the module dso. + */ + if (remap_kernel && kmodule) { + remap_kernel = false; + map->pgoff = shdr.sh_offset; + } + curr_map = map; curr_dso = dso; goto new_symbol; } + if (!kmap) + goto new_symbol; + snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name); @@ -781,8 +878,16 @@ int dso__load_sym(struct dso *dso, struct map *map, dso__delete(curr_dso); goto out_elf_end; } - curr_map->map_ip = identity__map_ip; - curr_map->unmap_ip = identity__map_ip; + if (adjust_kernel_syms) { + curr_map->start = shdr.sh_addr + + ref_reloc(kmap); + curr_map->end = curr_map->start + + shdr.sh_size; + curr_map->pgoff = shdr.sh_offset; + } else { + curr_map->map_ip = identity__map_ip; + curr_map->unmap_ip = identity__map_ip; + } curr_dso->symtab_type = dso->symtab_type; map_groups__insert(kmap->kmaps, curr_map); dsos__add(&dso->node, curr_dso); @@ -846,6 +951,57 @@ out_elf_end: return err; } +static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data) +{ + GElf_Phdr phdr; + size_t i, phdrnum; + int err; + u64 sz; + + if (elf_getphdrnum(elf, &phdrnum)) + return -1; + + for (i = 0; i < phdrnum; i++) { + if (gelf_getphdr(elf, i, &phdr) == NULL) + return -1; + if (phdr.p_type != PT_LOAD) + continue; + if (exe) { + if (!(phdr.p_flags & PF_X)) + continue; + } else { + if (!(phdr.p_flags & PF_R)) + continue; + } + sz = min(phdr.p_memsz, phdr.p_filesz); + if (!sz) + continue; + err = mapfn(phdr.p_vaddr, sz, phdr.p_offset, data); + if (err) + return err; + } + return 0; +} + +int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, + bool *is_64_bit) +{ + int err; + Elf *elf; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + return -1; + + if (is_64_bit) + *is_64_bit = (gelf_getclass(elf) == ELFCLASS64); + + err = elf_read_maps(elf, exe, mapfn, data); + + elf_end(elf); + return err; +} + void symbol__elf_init(void) { elf_version(EV_CURRENT); diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index a7390cde63bc..3a802c300fc5 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -301,6 +301,13 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, return 0; } +int file__read_maps(int fd __maybe_unused, bool exe __maybe_unused, + mapfn_t mapfn __maybe_unused, void *data __maybe_unused, + bool *is_64_bit __maybe_unused) +{ + return -1; +} + void symbol__elf_init(void) { } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 02718e728d59..77f3b95bb46d 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -87,6 +87,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb) { s64 a; s64 b; + size_t na, nb; /* Prefer a symbol with non zero length */ a = syma->end - syma->start; @@ -120,11 +121,21 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb) else if (a > b) return SYMBOL_B; - /* If all else fails, choose the symbol with the longest name */ - if (strlen(syma->name) >= strlen(symb->name)) + /* Choose the symbol with the longest name */ + na = strlen(syma->name); + nb = strlen(symb->name); + if (na > nb) return SYMBOL_A; - else + else if (na < nb) + return SYMBOL_B; + + /* Avoid "SyS" kernel syscall aliases */ + if (na >= 3 && !strncmp(syma->name, "SyS", 3)) + return SYMBOL_B; + if (na >= 10 && !strncmp(syma->name, "compat_SyS", 10)) return SYMBOL_B; + + return SYMBOL_A; } void symbols__fixup_duplicate(struct rb_root *symbols) @@ -316,6 +327,16 @@ static struct symbol *symbols__find(struct rb_root *symbols, u64 ip) return NULL; } +static struct symbol *symbols__first(struct rb_root *symbols) +{ + struct rb_node *n = rb_first(symbols); + + if (n) + return rb_entry(n, struct symbol, rb_node); + + return NULL; +} + struct symbol_name_rb_node { struct rb_node rb_node; struct symbol sym; @@ -386,6 +407,11 @@ struct symbol *dso__find_symbol(struct dso *dso, return symbols__find(&dso->symbols[type], addr); } +struct symbol *dso__first_symbol(struct dso *dso, enum map_type type) +{ + return symbols__first(&dso->symbols[type]); +} + struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, const char *name) { @@ -522,6 +548,53 @@ static int dso__load_all_kallsyms(struct dso *dso, const char *filename, return kallsyms__parse(filename, &args, map__process_kallsym_symbol); } +static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, + symbol_filter_t filter) +{ + struct map_groups *kmaps = map__kmap(map)->kmaps; + struct map *curr_map; + struct symbol *pos; + int count = 0, moved = 0; + struct rb_root *root = &dso->symbols[map->type]; + struct rb_node *next = rb_first(root); + + while (next) { + char *module; + + pos = rb_entry(next, struct symbol, rb_node); + next = rb_next(&pos->rb_node); + + module = strchr(pos->name, '\t'); + if (module) + *module = '\0'; + + curr_map = map_groups__find(kmaps, map->type, pos->start); + + if (!curr_map || (filter && filter(curr_map, pos))) { + rb_erase(&pos->rb_node, root); + symbol__delete(pos); + } else { + pos->start -= curr_map->start - curr_map->pgoff; + if (pos->end) + pos->end -= curr_map->start - curr_map->pgoff; + if (curr_map != map) { + rb_erase(&pos->rb_node, root); + symbols__insert( + &curr_map->dso->symbols[curr_map->type], + pos); + ++moved; + } else { + ++count; + } + } + } + + /* Symbols have been adjusted */ + dso->adjust_symbols = 1; + + return count + moved; +} + /* * Split the symbols into maps, making sure there are no overlaps, i.e. the * kernel range is broken in several maps, named [kernel].N, as we don't have @@ -663,6 +736,161 @@ bool symbol__restricted_filename(const char *filename, return restricted; } +struct kcore_mapfn_data { + struct dso *dso; + enum map_type type; + struct list_head maps; +}; + +static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) +{ + struct kcore_mapfn_data *md = data; + struct map *map; + + map = map__new2(start, md->dso, md->type); + if (map == NULL) + return -ENOMEM; + + map->end = map->start + len; + map->pgoff = pgoff; + + list_add(&map->node, &md->maps); + + return 0; +} + +/* + * If kallsyms is referenced by name then we look for kcore in the same + * directory. + */ +static bool kcore_filename_from_kallsyms_filename(char *kcore_filename, + const char *kallsyms_filename) +{ + char *name; + + strcpy(kcore_filename, kallsyms_filename); + name = strrchr(kcore_filename, '/'); + if (!name) + return false; + + if (!strcmp(name, "/kallsyms")) { + strcpy(name, "/kcore"); + return true; + } + + return false; +} + +static int dso__load_kcore(struct dso *dso, struct map *map, + const char *kallsyms_filename) +{ + struct map_groups *kmaps = map__kmap(map)->kmaps; + struct machine *machine = kmaps->machine; + struct kcore_mapfn_data md; + struct map *old_map, *new_map, *replacement_map = NULL; + bool is_64_bit; + int err, fd; + char kcore_filename[PATH_MAX]; + struct symbol *sym; + + /* This function requires that the map is the kernel map */ + if (map != machine->vmlinux_maps[map->type]) + return -EINVAL; + + if (!kcore_filename_from_kallsyms_filename(kcore_filename, + kallsyms_filename)) + return -EINVAL; + + md.dso = dso; + md.type = map->type; + INIT_LIST_HEAD(&md.maps); + + fd = open(kcore_filename, O_RDONLY); + if (fd < 0) + return -EINVAL; + + /* Read new maps into temporary lists */ + err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md, + &is_64_bit); + if (err) + goto out_err; + + if (list_empty(&md.maps)) { + err = -EINVAL; + goto out_err; + } + + /* Remove old maps */ + old_map = map_groups__first(kmaps, map->type); + while (old_map) { + struct map *next = map_groups__next(old_map); + + if (old_map != map) + map_groups__remove(kmaps, old_map); + old_map = next; + } + + /* Find the kernel map using the first symbol */ + sym = dso__first_symbol(dso, map->type); + list_for_each_entry(new_map, &md.maps, node) { + if (sym && sym->start >= new_map->start && + sym->start < new_map->end) { + replacement_map = new_map; + break; + } + } + + if (!replacement_map) + replacement_map = list_entry(md.maps.next, struct map, node); + + /* Add new maps */ + while (!list_empty(&md.maps)) { + new_map = list_entry(md.maps.next, struct map, node); + list_del(&new_map->node); + if (new_map == replacement_map) { + map->start = new_map->start; + map->end = new_map->end; + map->pgoff = new_map->pgoff; + map->map_ip = new_map->map_ip; + map->unmap_ip = new_map->unmap_ip; + map__delete(new_map); + /* Ensure maps are correctly ordered */ + map_groups__remove(kmaps, map); + map_groups__insert(kmaps, map); + } else { + map_groups__insert(kmaps, new_map); + } + } + + /* + * Set the data type and long name so that kcore can be read via + * dso__data_read_addr(). + */ + if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + dso->data_type = DSO_BINARY_TYPE__GUEST_KCORE; + else + dso->data_type = DSO_BINARY_TYPE__KCORE; + dso__set_long_name(dso, strdup(kcore_filename)); + + close(fd); + + if (map->type == MAP__FUNCTION) + pr_debug("Using %s for kernel object code\n", kcore_filename); + else + pr_debug("Using %s for kernel data\n", kcore_filename); + + return 0; + +out_err: + while (!list_empty(&md.maps)) { + map = list_entry(md.maps.next, struct map, node); + list_del(&map->node); + map__delete(map); + } + close(fd); + return -EINVAL; +} + int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, symbol_filter_t filter) { @@ -680,7 +908,10 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, else dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS; - return dso__split_kallsyms(dso, map, filter); + if (!dso__load_kcore(dso, map, filename)) + return dso__split_kallsyms_for_kcore(dso, map, filter); + else + return dso__split_kallsyms(dso, map, filter); } static int dso__load_perf_map(struct dso *dso, struct map *map, @@ -843,10 +1074,15 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (!runtime_ss && syms_ss) runtime_ss = syms_ss; - if (syms_ss) - ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, 0); - else + if (syms_ss) { + int km; + + km = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || + dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE; + ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, km); + } else { ret = -1; + } if (ret > 0) { int nr_plt; @@ -906,6 +1142,10 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, symsrc__destroy(&ss); if (err > 0) { + if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + dso->data_type = DSO_BINARY_TYPE__GUEST_VMLINUX; + else + dso->data_type = DSO_BINARY_TYPE__VMLINUX; dso__set_long_name(dso, (char *)vmlinux); dso__set_loaded(dso, map->type); pr_debug("Using %s for symbols\n", symfs_vmlinux); @@ -978,7 +1218,7 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map, dso__set_long_name(dso, strdup(symbol_conf.vmlinux_name)); dso->lname_alloc = 1; - goto out_fixup; + return err; } return err; } @@ -986,7 +1226,7 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map, if (vmlinux_path != NULL) { err = dso__load_vmlinux_path(dso, map, filter); if (err > 0) - goto out_fixup; + return err; } /* do not try local files if a symfs was given */ @@ -1045,9 +1285,8 @@ do_kallsyms: pr_debug("Using %s for symbols\n", kallsyms_filename); free(kallsyms_allocated_filename); - if (err > 0) { + if (err > 0 && !dso__is_kcore(dso)) { dso__set_long_name(dso, strdup("[kernel.kallsyms]")); -out_fixup: map__fixup_start(map); map__fixup_end(map); } @@ -1078,7 +1317,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map, if (symbol_conf.default_guest_vmlinux_name != NULL) { err = dso__load_vmlinux(dso, map, symbol_conf.default_guest_vmlinux_name, filter); - goto out_try_fixup; + return err; } kallsyms_filename = symbol_conf.default_guest_kallsyms; @@ -1092,13 +1331,9 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map, err = dso__load_kallsyms(dso, kallsyms_filename, map, filter); if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); - -out_try_fixup: - if (err > 0) { - if (kallsyms_filename != NULL) { - machine__mmap_name(machine, path, sizeof(path)); - dso__set_long_name(dso, strdup(path)); - } + if (err > 0 && !dso__is_kcore(dso)) { + machine__mmap_name(machine, path, sizeof(path)); + dso__set_long_name(dso, strdup(path)); map__fixup_start(map); map__fixup_end(map); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5f720dc076da..fd5b70ea2981 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -215,6 +215,7 @@ struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, const char *name); +struct symbol *dso__first_symbol(struct dso *dso, enum map_type type); int filename__read_build_id(const char *filename, void *bf, size_t size); int sysfs__read_build_id(const char *filename, void *bf, size_t size); @@ -247,4 +248,8 @@ void symbols__fixup_duplicate(struct rb_root *symbols); void symbols__fixup_end(struct rb_root *symbols); void __map_groups__fixup_end(struct map_groups *mg, enum map_type type); +typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); +int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, + bool *is_64_bit); + #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 0fe1f9c05865..f98d1d983547 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -41,7 +41,7 @@ static inline struct map *thread__find_map(struct thread *self, void thread__find_addr_map(struct thread *thread, struct machine *machine, u8 cpumode, enum map_type type, u64 addr, - struct addr_location *al); + struct addr_location *al, symbol_filter_t filter); void thread__find_addr_location(struct thread *thread, struct machine *machine, u8 cpumode, enum map_type type, u64 addr, diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index df46be93d902..b554ffc462b6 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -39,6 +39,8 @@ struct perf_top { float min_percent; }; +#define CONSOLE_CLEAR "[H[2J" + size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); void perf_top__reset_sample_counters(struct perf_top *top); #endif /* __PERF_TOP_H */ diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c index 958723ba3d2e..5bbd4947c27d 100644 --- a/tools/perf/util/unwind.c +++ b/tools/perf/util/unwind.c @@ -272,7 +272,7 @@ static struct map *find_map(unw_word_t ip, struct unwind_info *ui) struct addr_location al; thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, &al); + MAP__FUNCTION, ip, &al, NULL); return al.map; } @@ -349,7 +349,7 @@ static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, ssize_t size; thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, - MAP__FUNCTION, addr, &al); + MAP__FUNCTION, addr, &al, NULL); if (!al.map) { pr_debug("unwind: no map for %lx\n", (unsigned long)addr); return -1; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 9a0658405760..6d17b18e915d 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -328,3 +328,36 @@ void put_tracing_file(char *file) { free(file); } + +int parse_nsec_time(const char *str, u64 *ptime) +{ + u64 time_sec, time_nsec; + char *end; + + time_sec = strtoul(str, &end, 10); + if (*end != '.' && *end != '\0') + return -1; + + if (*end == '.') { + int i; + char nsec_buf[10]; + + if (strlen(++end) > 9) + return -1; + + strncpy(nsec_buf, end, 9); + nsec_buf[9] = '\0'; + + /* make it nsec precision */ + for (i = strlen(nsec_buf); i < 9; i++) + nsec_buf[i] = '0'; + + time_nsec = strtoul(nsec_buf, &end, 10); + if (*end != '\0') + return -1; + } else + time_nsec = 0; + + *ptime = time_sec * NSEC_PER_SEC + time_nsec; + return 0; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index cc1574edcd9a..a53535949043 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -208,6 +208,8 @@ static inline int has_extension(const char *filename, const char *ext) #define NSEC_PER_MSEC 1000000L #endif +int parse_nsec_time(const char *str, u64 *ptime); + extern unsigned char sane_ctype[256]; #define GIT_SPACE 0x01 #define GIT_DIGIT 0x02 |