From b2ad368bebc0f772613668e893fa176396e9094c Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 9 Jul 2012 17:10:39 -0700 Subject: tracing: Fix initialization failure path in tracing_set_tracer() If tracer->init() fails, current code will leave current_tracer pointing to an unusable tracer, which at best makes 'current_tracer' report inaccurate value. Fix the issue by pointing current_tracer to nop tracer, and only update current_tracer with the new one after all the initialization succeeds. Signed-off-by: Anton Vorontsov Acked-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 49249c28690d..44ee11e31b82 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3172,10 +3172,10 @@ static int tracing_set_tracer(const char *buf) } destroy_trace_option_files(topts); - current_trace = t; + current_trace = &nop_trace; - topts = create_trace_option_files(current_trace); - if (current_trace->use_max_tr) { + topts = create_trace_option_files(t); + if (t->use_max_tr) { int cpu; /* we need to make per cpu buffer sizes equivalent */ for_each_tracing_cpu(cpu) { @@ -3195,6 +3195,7 @@ static int tracing_set_tracer(const char *buf) goto out; } + current_trace = t; trace_branch_enable(tr); out: mutex_unlock(&trace_types_lock); -- cgit v1.2.3 From 21f679404a0c28bd5b1b3aff2a7218bbff4cb43d Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 9 Jul 2012 17:10:42 -0700 Subject: tracing/function: Introduce persistent trace option This patch introduces 'func_ptrace' option, now available in /sys/kernel/debug/tracing/options when function tracer is selected. The patch also adds some tiny code that calls back to pstore to record the trace. The callback is no-op when PSTORE=n. Signed-off-by: Anton Vorontsov Acked-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_functions.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c7b0c6a7db09..13770abd7a12 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "trace.h" @@ -74,6 +75,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) preempt_enable_notrace(); } +/* Our two options */ +enum { + TRACE_FUNC_OPT_STACK = 0x1, + TRACE_FUNC_OPT_PSTORE = 0x2, +}; + +static struct tracer_flags func_flags; + static void function_trace_call(unsigned long ip, unsigned long parent_ip) { @@ -97,6 +106,12 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { + /* + * So far tracing doesn't support multiple buffers, so + * we make an explicit call for now. + */ + if (unlikely(func_flags.val & TRACE_FUNC_OPT_PSTORE)) + pstore_ftrace_call(ip, parent_ip); pc = preempt_count(); trace_function(tr, ip, parent_ip, flags, pc); } @@ -158,14 +173,12 @@ static struct ftrace_ops trace_stack_ops __read_mostly = .flags = FTRACE_OPS_FL_GLOBAL, }; -/* Our two options */ -enum { - TRACE_FUNC_OPT_STACK = 0x1, -}; - static struct tracer_opt func_opts[] = { #ifdef CONFIG_STACKTRACE { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) }, +#endif +#ifdef CONFIG_PSTORE_FTRACE + { TRACER_OPT(func_pstore, TRACE_FUNC_OPT_PSTORE) }, #endif { } /* Always set a last empty entry */ }; @@ -217,6 +230,8 @@ static int func_set_flag(u32 old_flags, u32 bit, int set) register_ftrace_function(&trace_ops); } + return 0; + } else if (bit == TRACE_FUNC_OPT_PSTORE) { return 0; } -- cgit v1.2.3 From f555f1231a69846d57099760f9c361982600ffa2 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 9 Jul 2012 17:10:46 -0700 Subject: tracing/function: Convert func_set_flag() to a switch statement Since the function accepts just one bit, we can use the switch construction instead of if/else if/... Just a cosmetic change, there should be no functional changes. Suggested-by: Steven Rostedt Signed-off-by: Anton Vorontsov Acked-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_functions.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 13770abd7a12..a426f410c060 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -217,10 +217,11 @@ static void tracing_stop_function_trace(void) static int func_set_flag(u32 old_flags, u32 bit, int set) { - if (bit == TRACE_FUNC_OPT_STACK) { + switch (bit) { + case TRACE_FUNC_OPT_STACK: /* do nothing if already set */ if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK)) - return 0; + break; if (set) { unregister_ftrace_function(&trace_ops); @@ -230,12 +231,14 @@ static int func_set_flag(u32 old_flags, u32 bit, int set) register_ftrace_function(&trace_ops); } - return 0; - } else if (bit == TRACE_FUNC_OPT_PSTORE) { - return 0; + break; + case TRACE_FUNC_OPT_PSTORE: + break; + default: + return -EINVAL; } - return -EINVAL; + return 0; } static struct tracer function_trace __read_mostly = -- cgit v1.2.3 From e6dab5ffab59e910ec0e3355f4a6f29f7a7be474 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Wed, 11 Jul 2012 18:14:58 +0400 Subject: perf/trace: Add ability to set a target task for events A few events are interesting not only for a current task. For example, sched_stat_* events are interesting for a task which wakes up. For this reason, it will be good if such events will be delivered to a target task too. Now a target task can be set by using __perf_task(). The original idea and a draft patch belongs to Peter Zijlstra. I need these events for profiling sleep times. sched_switch is used for getting callchains and sched_stat_* is used for getting time periods. These events are combined in user space, then it can be analyzed by perf tools. Inspired-by: Peter Zijlstra Cc: Steven Rostedt Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Arun Sharma Signed-off-by: Andrew Vagin Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1342016098-213063-1-git-send-email-avagin@openvz.org Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 5 +++-- include/linux/perf_event.h | 3 ++- include/trace/events/sched.h | 4 ++++ include/trace/ftrace.h | 6 +++++- kernel/events/callchain.c | 9 ++++++++- kernel/events/core.c | 30 ++++++++++++++++++++++++++++-- kernel/events/internal.h | 3 ++- kernel/trace/trace_event_perf.c | 2 +- kernel/trace/trace_kprobe.c | 6 ++++-- kernel/trace/trace_syscalls.c | 4 ++-- kernel/trace/trace_uprobe.c | 2 +- 11 files changed, 60 insertions(+), 14 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index af961d6f7ab1..642928cf57b4 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -306,9 +306,10 @@ extern void *perf_trace_buf_prepare(int size, unsigned short type, static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, - u64 count, struct pt_regs *regs, void *head) + u64 count, struct pt_regs *regs, void *head, + struct task_struct *task) { - perf_tp_event(addr, count, raw_data, size, regs, head, rctx); + perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task); } #endif diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 76c5c8b724a7..7602ccb3f40e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1272,7 +1272,8 @@ static inline bool perf_paranoid_kernel(void) extern void perf_event_init(void); extern void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, struct pt_regs *regs, - struct hlist_head *head, int rctx); + struct hlist_head *head, int rctx, + struct task_struct *task); extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index ea7a2035456d..5a8671e8a67f 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -73,6 +73,9 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, __entry->prio = p->prio; __entry->success = success; __entry->target_cpu = task_cpu(p); + ) + TP_perf_assign( + __perf_task(p); ), TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", @@ -325,6 +328,7 @@ DECLARE_EVENT_CLASS(sched_stat_template, ) TP_perf_assign( __perf_count(delay); + __perf_task(tsk); ), TP_printk("comm=%s pid=%d delay=%Lu [ns]", diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c6bc2faaf261..a763888a36f9 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -712,6 +712,9 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call #undef __perf_count #define __perf_count(c) __count = (c) +#undef __perf_task +#define __perf_task(t) __task = (t) + #undef TP_perf_assign #define TP_perf_assign(args...) args @@ -725,6 +728,7 @@ perf_trace_##call(void *__data, proto) \ struct ftrace_raw_##call *entry; \ struct pt_regs __regs; \ u64 __addr = 0, __count = 1; \ + struct task_struct *__task = NULL; \ struct hlist_head *head; \ int __entry_size; \ int __data_size; \ @@ -752,7 +756,7 @@ perf_trace_##call(void *__data, proto) \ \ head = this_cpu_ptr(event_call->perf_events); \ perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ - __count, &__regs, head); \ + __count, &__regs, head, __task); \ } /* diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 6581a040f399..98d4597f43d6 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -153,7 +153,8 @@ put_callchain_entry(int rctx) put_recursion_context(__get_cpu_var(callchain_recursion), rctx); } -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +struct perf_callchain_entry * +perf_callchain(struct perf_event *event, struct pt_regs *regs) { int rctx; struct perf_callchain_entry *entry; @@ -178,6 +179,12 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) } if (regs) { + /* + * Disallow cross-task user callchains. + */ + if (event->ctx->task && event->ctx->task != current) + goto exit_put; + perf_callchain_store(entry, PERF_CONTEXT_USER); perf_callchain_user(entry, regs); } diff --git a/kernel/events/core.c b/kernel/events/core.c index f1cf0edeb39a..b7935fcec7d9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4039,7 +4039,7 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_CALLCHAIN) { int size = 1; - data->callchain = perf_callchain(regs); + data->callchain = perf_callchain(event, regs); if (data->callchain) size += data->callchain->nr; @@ -5209,7 +5209,8 @@ static int perf_tp_event_match(struct perf_event *event, } void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, - struct pt_regs *regs, struct hlist_head *head, int rctx) + struct pt_regs *regs, struct hlist_head *head, int rctx, + struct task_struct *task) { struct perf_sample_data data; struct perf_event *event; @@ -5228,6 +5229,31 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, perf_swevent_event(event, count, &data, regs); } + /* + * If we got specified a target task, also iterate its context and + * deliver this event there too. + */ + if (task && task != current) { + struct perf_event_context *ctx; + struct trace_entry *entry = record; + + rcu_read_lock(); + ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]); + if (!ctx) + goto unlock; + + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (event->attr.type != PERF_TYPE_TRACEPOINT) + continue; + if (event->attr.config != entry->type) + continue; + if (perf_tp_event_match(event, &data, regs)) + perf_swevent_event(event, count, &data, regs); + } +unlock: + rcu_read_unlock(); + } + perf_swevent_put_recursion_context(rctx); } EXPORT_SYMBOL_GPL(perf_tp_event); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index b0b107f90afc..a096c19f2c2a 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -101,7 +101,8 @@ __output_copy(struct perf_output_handle *handle, } /* Callchain handling */ -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +extern struct perf_callchain_entry * +perf_callchain(struct perf_event *event, struct pt_regs *regs); extern int get_callchain_buffers(void); extern void put_callchain_buffers(void); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index fee3752ae8f6..8a6d2ee2086c 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -281,7 +281,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip) head = this_cpu_ptr(event_function.perf_events); perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, - 1, ®s, head); + 1, ®s, head, NULL); #undef ENTRY_SIZE } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b31d3d5699fe..1a2117043bb1 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1002,7 +1002,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); head = this_cpu_ptr(call->perf_events); - perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); + perf_trace_buf_submit(entry, size, rctx, + entry->ip, 1, regs, head, NULL); } /* Kretprobe profile handler */ @@ -1033,7 +1034,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); head = this_cpu_ptr(call->perf_events); - perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); + perf_trace_buf_submit(entry, size, rctx, + entry->ret_ip, 1, regs, head, NULL); } #endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 96fc73369099..60e4d7875672 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -532,7 +532,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) (unsigned long *)&rec->args); head = this_cpu_ptr(sys_data->enter_event->perf_events); - perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); + perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); } int perf_sysenter_enable(struct ftrace_event_call *call) @@ -608,7 +608,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) rec->ret = syscall_get_return_value(current, regs); head = this_cpu_ptr(sys_data->exit_event->perf_events); - perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); + perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); } int perf_sysexit_enable(struct ftrace_event_call *call) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 2b36ac68549e..03003cd7dd96 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -670,7 +670,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); head = this_cpu_ptr(call->perf_events); - perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); + perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL); out: preempt_enable(); -- cgit v1.2.3