summaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-04-27 17:02:24 +0200
committerIngo Molnar <mingo@kernel.org>2016-04-27 17:02:24 +0200
commita8944c5bf86dc6c153a71f2a386738c0d3f5ff9c (patch)
treea251b1d510831dc071eadbbbe3e38a85fe643365 /kernel/events
parent67d61296ffcc850bffdd4466430cb91e5328f39a (diff)
parent4cb93446c587d56e2a54f4f83113daba2c0b6dee (diff)
downloadlinux-a8944c5bf86dc6c153a71f2a386738c0d3f5ff9c.tar.bz2
Merge tag 'perf-core-for-mingo-20160427' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - perf trace --pf maj/min/all works with --call-graph: (Arnaldo Carvalho de Melo) Tracing write syscalls and major page faults with callchains while starting firefox, limiting the stack to 5 frames: # perf trace -e write --pf maj --max-stack 5 firefox 589.549 ( 0.014 ms): firefox/15377 write(fd: 4, buf: 0x7fff80acc898, count: 151) = 151 [0xfaed] (/usr/lib64/libpthread-2.22.so) fire_glxtest_process+0x5c (/usr/lib64/firefox/libxul.so) InstallGdkErrorHandler+0x41 (/usr/lib64/firefox/libxul.so) XREMain::XRE_mainInit+0x12c (/usr/lib64/firefox/libxul.so) XREMain::XRE_main+0x1e4 (/usr/lib64/firefox/libxul.so) 760.704 ( 0.000 ms): firefox/15332 majfault [gtk_tree_view_accessible_get_type+0x0] => /usr/lib64/libgtk-3.so.0.1800.9@0xa0850 (x.) gtk_tree_view_accessible_get_type+0x0 (/usr/lib64/libgtk-3.so.0.1800.9) gtk_tree_view_class_intern_init+0x1a54 (/usr/lib64/libgtk-3.so.0.1800.9) g_type_class_ref+0x6dd (/usr/lib64/libgobject-2.0.so.0.4600.2) [0x115378] (/usr/lib64/libgnutls.so.30.6.3) This automagically selects "--call-graph dwarf", use "--call-graph fp" on systems where -fno-omit-frame-pointer was used to built the components of interest, to incur in less overhead, or tune "--call-graph dwarf" appropriately, see 'perf record --help'. - Allow /proc/sys/kernel/perf_event_max_stack, that defaults to the old hard coded value of PERF_MAX_STACK_DEPTH (127), useful for huge callstacks for things like Groovy, Ruby, etc, and also to reduce overhead by limiting it to a smaller value, upcoming work will allow this to be done per-event (Arnaldo Carvalho de Melo) - Make 'perf trace --min-stack' be honoured by --pf and --event (Arnaldo Carvalho de Melo) - Make 'perf evlist -v' decode perf_event_attr->branch_sample_type (Arnaldo Carvalho de Melo) # perf record --call lbr usleep 1 # perf evlist -v cycles:ppp: ... sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK, ... branch_sample_type: USER|CALL_STACK|NO_FLAGS|NO_CYCLES # - Clear dummy entry accumulated period, fixing such 'perf top/report' output as: (Kan Liang) 4769.98% 0.01% 0.00% 0.01% tchain_edit [kernel] [k] update_fast_timekeeper - System calls with pid_t arguments gets them augmented with the COMM event more thoroughly: # trace -e perf_event_open perf stat -e cycles -p 15608 6.876 ( 0.014 ms): perf_event_open(attr_uptr: 0x2ae20d8, pid: 15608 (hexchat), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3 6.882 ( 0.005 ms): perf_event_open(attr_uptr: 0x2ae20d8, pid: 15639 (gmain), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4 6.889 ( 0.005 ms): perf_event_open(attr_uptr: 0x2ae20d8, pid: 15640 (gdbus), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5 ^^^^^^^^^^^^^^^^^^ ^C - Fix offline module name mismatch issue in 'perf probe' (Ravi Bangoria) - Fix module probe issue if no dwarf support in (Ravi Bangoria) Assorted fixes: - Fix off-by-one in write_buildid() (Andrey Ryabinin) - Fix segfault when printing callchains in 'perf script' (Chris Phlipot) - Replace assignment with comparison on assert check in 'perf test' entry (Colin Ian King) - Fix off-by-one comparison in intel-pt code (Colin Ian King) - Close target file on error path in 'perf probe' (Masami Hiramatsu) - Set default kprobe group name if not given in 'perf probe' (Masami Hiramatsu) - Avoid partial perf_event_header reads (Wang Nan) Infrastructure changes: - Update x86's syscall_64.tbl copy, adding preadv2 & pwritev2 (Arnaldo Carvalho de Melo) - Make the x86 clean quiet wrt syscall table removal (Jiri Olsa) Cleanups: - Simplify wrapper for LOCK_PI in 'perf bench futex' (Davidlohr Bueso) - Remove duplicate const qualifier (Eric Engestrom) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/callchain.c35
1 files changed, 33 insertions, 2 deletions
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 343c22f5e867..b9325e7dcba1 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -18,6 +18,14 @@ struct callchain_cpus_entries {
struct perf_callchain_entry *cpu_entries[0];
};
+int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
+
+static inline size_t perf_callchain_entry__sizeof(void)
+{
+ return (sizeof(struct perf_callchain_entry) +
+ sizeof(__u64) * sysctl_perf_event_max_stack);
+}
+
static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
static atomic_t nr_callchain_events;
static DEFINE_MUTEX(callchain_mutex);
@@ -73,7 +81,7 @@ static int alloc_callchain_buffers(void)
if (!entries)
return -ENOMEM;
- size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
+ size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS;
for_each_possible_cpu(cpu) {
entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
@@ -147,7 +155,8 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
cpu = smp_processor_id();
- return &entries->cpu_entries[cpu][*rctx];
+ return (((void *)entries->cpu_entries[cpu]) +
+ (*rctx * perf_callchain_entry__sizeof()));
}
static void
@@ -215,3 +224,25 @@ exit_put:
return entry;
}
+
+int perf_event_max_stack_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int new_value = sysctl_perf_event_max_stack, ret;
+ struct ctl_table new_table = *table;
+
+ new_table.data = &new_value;
+ ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos);
+ if (ret || !write)
+ return ret;
+
+ mutex_lock(&callchain_mutex);
+ if (atomic_read(&nr_callchain_events))
+ ret = -EBUSY;
+ else
+ sysctl_perf_event_max_stack = new_value;
+
+ mutex_unlock(&callchain_mutex);
+
+ return ret;
+}