From 80a9b64e2c156b6523e7a01f2ba6e5d86e722814 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Mar 2015 10:40:38 -0400 Subject: ring-buffer: Replace this_cpu_*() with __this_cpu_*() It has come to my attention that this_cpu_read/write are horrible on architectures other than x86. Worse yet, they actually disable preemption or interrupts! This caused some unexpected tracing results on ARM. 101.356868: preempt_count_add <-ring_buffer_lock_reserve 101.356870: preempt_count_sub <-ring_buffer_lock_reserve The ring_buffer_lock_reserve has recursion protection that requires accessing a per cpu variable. But since preempt_disable() is traced, it too got traced while accessing the variable that is suppose to prevent recursion like this. The generic version of this_cpu_read() and write() are: #define this_cpu_generic_read(pcp) \ ({ typeof(pcp) ret__; \ preempt_disable(); \ ret__ = *this_cpu_ptr(&(pcp)); \ preempt_enable(); \ ret__; \ }) #define this_cpu_generic_to_op(pcp, val, op) \ do { \ unsigned long flags; \ raw_local_irq_save(flags); \ *__this_cpu_ptr(&(pcp)) op val; \ raw_local_irq_restore(flags); \ } while (0) Which is unacceptable for locations that know they are within preempt disabled or interrupt disabled locations. Paul McKenney stated that __this_cpu_() versions produce much better code on other architectures than this_cpu_() does, if we know that the call is done in a preempt disabled location. I also changed the recursive_unlock() to use two local variables instead of accessing the per_cpu variable twice. Link: http://lkml.kernel.org/r/20150317114411.GE3589@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/20150317104038.312e73d1@gandalf.local.home Cc: stable@vger.kernel.org Acked-by: Christoph Lameter Reported-by: Uwe Kleine-Koenig Tested-by: Uwe Kleine-Koenig Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5040d44fe5a3..922048a0f7ea 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2679,7 +2679,7 @@ static DEFINE_PER_CPU(unsigned int, current_context); static __always_inline int trace_recursive_lock(void) { - unsigned int val = this_cpu_read(current_context); + unsigned int val = __this_cpu_read(current_context); int bit; if (in_interrupt()) { @@ -2696,18 +2696,17 @@ static __always_inline int trace_recursive_lock(void) return 1; val |= (1 << bit); - this_cpu_write(current_context, val); + __this_cpu_write(current_context, val); return 0; } static __always_inline void trace_recursive_unlock(void) { - unsigned int val = this_cpu_read(current_context); + unsigned int val = __this_cpu_read(current_context); - val--; - val &= this_cpu_read(current_context); - this_cpu_write(current_context, val); + val &= val & (val - 1); + __this_cpu_write(current_context, val); } #else -- cgit v1.2.3 From bbedb179944c29e5e449603163eec9951116fe39 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 11 Mar 2015 22:13:57 -0500 Subject: tracing: %pF is only for function pointers Use %pS for actual addresses, otherwise you'll get bad output on arches like ppc64 where %pF expects a function descriptor. Link: http://lkml.kernel.org/r/1426130037-17956-22-git-send-email-scottwood@freescale.com Signed-off-by: Scott Wood Signed-off-by: Steven Rostedt --- include/trace/events/btrfs.h | 4 ++-- include/trace/events/ext3.h | 2 +- include/trace/events/ext4.h | 6 +++--- include/trace/events/module.h | 4 ++-- include/trace/events/random.h | 10 +++++----- kernel/trace/trace_entries.h | 6 +++--- tools/lib/traceevent/event-parse.c | 2 +- 7 files changed, 17 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 1faecea101f3..572e6503394a 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -962,7 +962,7 @@ TRACE_EVENT(alloc_extent_state, __entry->ip = IP ), - TP_printk("state=%p; mask = %s; caller = %pF", __entry->state, + TP_printk("state=%p; mask = %s; caller = %pS", __entry->state, show_gfp_flags(__entry->mask), (void *)__entry->ip) ); @@ -982,7 +982,7 @@ TRACE_EVENT(free_extent_state, __entry->ip = IP ), - TP_printk(" state=%p; caller = %pF", __entry->state, + TP_printk(" state=%p; caller = %pS", __entry->state, (void *)__entry->ip) ); diff --git a/include/trace/events/ext3.h b/include/trace/events/ext3.h index 6797b9de90ed..7f20707849bb 100644 --- a/include/trace/events/ext3.h +++ b/include/trace/events/ext3.h @@ -144,7 +144,7 @@ TRACE_EVENT(ext3_mark_inode_dirty, __entry->ip = IP; ), - TP_printk("dev %d,%d ino %lu caller %pF", + TP_printk("dev %d,%d ino %lu caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (void *)__entry->ip) ); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 6e5abd6d38a2..47fca36ee426 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -240,7 +240,7 @@ TRACE_EVENT(ext4_mark_inode_dirty, __entry->ip = IP; ), - TP_printk("dev %d,%d ino %lu caller %pF", + TP_printk("dev %d,%d ino %lu caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (void *)__entry->ip) ); @@ -1762,7 +1762,7 @@ TRACE_EVENT(ext4_journal_start, __entry->rsv_blocks = rsv_blocks; ), - TP_printk("dev %d,%d blocks, %d rsv_blocks, %d caller %pF", + TP_printk("dev %d,%d blocks, %d rsv_blocks, %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blocks, __entry->rsv_blocks, (void *)__entry->ip) ); @@ -1784,7 +1784,7 @@ TRACE_EVENT(ext4_journal_start_reserved, __entry->blocks = blocks; ), - TP_printk("dev %d,%d blocks, %d caller %pF", + TP_printk("dev %d,%d blocks, %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blocks, (void *)__entry->ip) ); diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 81c4c183d348..28c45997e451 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -84,7 +84,7 @@ DECLARE_EVENT_CLASS(module_refcnt, __assign_str(name, mod->name); ), - TP_printk("%s call_site=%pf refcnt=%d", + TP_printk("%s call_site=%ps refcnt=%d", __get_str(name), (void *)__entry->ip, __entry->refcnt) ); @@ -121,7 +121,7 @@ TRACE_EVENT(module_request, __assign_str(name, name); ), - TP_printk("%s wait=%d call_site=%pf", + TP_printk("%s wait=%d call_site=%ps", __get_str(name), (int)__entry->wait, (void *)__entry->ip) ); diff --git a/include/trace/events/random.h b/include/trace/events/random.h index 805af6db41cc..4684de344c5d 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -22,7 +22,7 @@ TRACE_EVENT(add_device_randomness, __entry->IP = IP; ), - TP_printk("bytes %d caller %pF", + TP_printk("bytes %d caller %pS", __entry->bytes, (void *)__entry->IP) ); @@ -43,7 +43,7 @@ DECLARE_EVENT_CLASS(random__mix_pool_bytes, __entry->IP = IP; ), - TP_printk("%s pool: bytes %d caller %pF", + TP_printk("%s pool: bytes %d caller %pS", __entry->pool_name, __entry->bytes, (void *)__entry->IP) ); @@ -82,7 +82,7 @@ TRACE_EVENT(credit_entropy_bits, ), TP_printk("%s pool: bits %d entropy_count %d entropy_total %d " - "caller %pF", __entry->pool_name, __entry->bits, + "caller %pS", __entry->pool_name, __entry->bits, __entry->entropy_count, __entry->entropy_total, (void *)__entry->IP) ); @@ -207,7 +207,7 @@ DECLARE_EVENT_CLASS(random__get_random_bytes, __entry->IP = IP; ), - TP_printk("nbytes %d caller %pF", __entry->nbytes, (void *)__entry->IP) + TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP) ); DEFINE_EVENT(random__get_random_bytes, get_random_bytes, @@ -242,7 +242,7 @@ DECLARE_EVENT_CLASS(random__extract_entropy, __entry->IP = IP; ), - TP_printk("%s pool: nbytes %d entropy_count %d caller %pF", + TP_printk("%s pool: nbytes %d entropy_count %d caller %pS", __entry->pool_name, __entry->nbytes, __entry->entropy_count, (void *)__entry->IP) ); diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e2d027ac66a2..ee7b94a4810a 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -223,7 +223,7 @@ FTRACE_ENTRY(bprint, bprint_entry, __dynamic_array( u32, buf ) ), - F_printk("%pf: %s", + F_printk("%ps: %s", (void *)__entry->ip, __entry->fmt), FILTER_OTHER @@ -238,7 +238,7 @@ FTRACE_ENTRY(print, print_entry, __dynamic_array( char, buf ) ), - F_printk("%pf: %s", + F_printk("%ps: %s", (void *)__entry->ip, __entry->buf), FILTER_OTHER @@ -253,7 +253,7 @@ FTRACE_ENTRY(bputs, bputs_entry, __field( const char *, str ) ), - F_printk("%pf: %s", + F_printk("%ps: %s", (void *)__entry->ip, __entry->str), FILTER_OTHER diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index afe20ed9fac8..2c0bd8f2aad0 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3976,7 +3976,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc if (asprintf(&arg->atom.atom, "%lld", ip) < 0) goto out_free; - /* skip the first "%pf: " */ + /* skip the first "%ps: " */ for (ptr = fmt + 5, bptr = data + field->offset; bptr < data + size && *ptr; ptr++) { int ls = 0; -- cgit v1.2.3 From 754cb0071a5c9576ccfa6523969ef6a2f6a71676 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Tue, 3 Mar 2015 15:21:33 +0800 Subject: tracing: remove ftrace:function TRACE_EVENT_FL_USE_CALL_FILTER flag TRACE_EVENT_FL_USE_CALL_FILTER flag in ftrace:functon event can be removed. This flag was first introduced in commit f306cc82a93d ("tracing: Update event filters for multibuffer"). Now, the only place uses this flag is ftrace:function, but the filter of ftrace:function has a different code path with events/syscalls and events/tracepoints. It uses ftrace_filter_write() and perf's ftrace_profile_set_filter() to set the filter, the functionality of file 'tracing/events/ftrace/function/filter' is bypassed in function init_pred(), in which case, neither call->filter nor file->filter is used. So we can safely remove TRACE_EVENT_FL_USE_CALL_FILTER flag from ftrace:function events. Link: http://lkml.kernel.org/r/1425367294-27852-1-git-send-email-hekuang@huawei.com Signed-off-by: He Kuang Signed-off-by: Steven Rostedt --- kernel/trace/trace_export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 12e2b99be862..174a6a71146c 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -177,7 +177,7 @@ struct ftrace_event_call __used event_##call = { \ }, \ .event.type = etype, \ .print_fmt = print, \ - .flags = TRACE_EVENT_FL_IGNORE_ENABLE | TRACE_EVENT_FL_USE_CALL_FILTER, \ + .flags = TRACE_EVENT_FL_IGNORE_ENABLE, \ }; \ struct ftrace_event_call __used \ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; -- cgit v1.2.3 From d9a16d3ab8770357015c85a07387f1d2676a4773 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 12 Mar 2015 16:58:34 +1100 Subject: trace: Don't use __weak in header files The commit that added a check for this to checkpatch says: "Using weak declarations can have unintended link defects. The __weak on the declaration causes non-weak definitions to become weak." In this case, when a PowerPC kernel is built with CONFIG_KPROBE_EVENT but not CONFIG_UPROBE_EVENT, it generates the following warning: WARNING: 1 bad relocations c0000000014f2190 R_PPC64_ADDR64 uprobes_fetch_type_table This is fixed by passing the fetch_table arrays to traceprobe_parse_probe_arg() which also means that they can never be NULL. Link: http://lkml.kernel.org/r/20150312165834.4482cb48@canb.auug.org.au Acked-by: Masami Hiramatsu Signed-off-by: Stephen Rothwell Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 5 +++-- kernel/trace/trace_probe.c | 19 +++++++------------ kernel/trace/trace_probe.h | 10 ++-------- kernel/trace/trace_uprobe.c | 5 +++-- 4 files changed, 15 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d73f565b4e06..f34c3ad1b5f4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -250,7 +250,7 @@ DEFINE_FETCH_symbol(string_size) #define fetch_file_offset_string_size NULL /* Fetch type information table */ -const struct fetch_type kprobes_fetch_type_table[] = { +static const struct fetch_type kprobes_fetch_type_table[] = { /* Special types */ [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1, "__data_loc char[]"), @@ -760,7 +760,8 @@ static int create_trace_kprobe(int argc, char **argv) /* Parse fetch argument */ ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg, - is_return, true); + is_return, true, + kprobes_fetch_type_table); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index b983b2fd2ca1..1769a81da8a7 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -356,17 +356,14 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, /* Recursive argument parser */ static int parse_probe_arg(char *arg, const struct fetch_type *t, - struct fetch_param *f, bool is_return, bool is_kprobe) + struct fetch_param *f, bool is_return, bool is_kprobe, + const struct fetch_type *ftbl) { - const struct fetch_type *ftbl; unsigned long param; long offset; char *tmp; int ret = 0; - ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table; - BUG_ON(ftbl == NULL); - switch (arg[0]) { case '$': ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe); @@ -447,7 +444,7 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, dprm->fetch_size = get_fetch_size_function(t, dprm->fetch, ftbl); ret = parse_probe_arg(arg, t2, &dprm->orig, is_return, - is_kprobe); + is_kprobe, ftbl); if (ret) kfree(dprm); else { @@ -505,15 +502,12 @@ static int __parse_bitfield_probe_arg(const char *bf, /* String length checking wrapper */ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, - struct probe_arg *parg, bool is_return, bool is_kprobe) + struct probe_arg *parg, bool is_return, bool is_kprobe, + const struct fetch_type *ftbl) { - const struct fetch_type *ftbl; const char *t; int ret; - ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table; - BUG_ON(ftbl == NULL); - if (strlen(arg) > MAX_ARGSTR_LEN) { pr_info("Argument is too long.: %s\n", arg); return -ENOSPC; @@ -535,7 +529,8 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, } parg->offset = *size; *size += parg->type->size; - ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, is_kprobe); + ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, + is_kprobe, ftbl); if (ret >= 0 && t != NULL) ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 4f815fbce16d..e30f6cce4af6 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -229,13 +229,6 @@ ASSIGN_FETCH_FUNC(file_offset, ftype), \ #define FETCH_TYPE_STRING 0 #define FETCH_TYPE_STRSIZE 1 -/* - * Fetch type information table. - * It's declared as a weak symbol due to conditional compilation. - */ -extern __weak const struct fetch_type kprobes_fetch_type_table[]; -extern __weak const struct fetch_type uprobes_fetch_type_table[]; - #ifdef CONFIG_KPROBE_EVENT struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); @@ -333,7 +326,8 @@ find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) } extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size, - struct probe_arg *parg, bool is_return, bool is_kprobe); + struct probe_arg *parg, bool is_return, bool is_kprobe, + const struct fetch_type *ftbl); extern int traceprobe_conflict_field_name(const char *name, struct probe_arg *args, int narg); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 7dc1c8abecd6..74865465e0b7 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -196,7 +196,7 @@ DEFINE_FETCH_file_offset(string) DEFINE_FETCH_file_offset(string_size) /* Fetch type information table */ -const struct fetch_type uprobes_fetch_type_table[] = { +static const struct fetch_type uprobes_fetch_type_table[] = { /* Special types */ [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1, "__data_loc char[]"), @@ -535,7 +535,8 @@ static int create_trace_uprobe(int argc, char **argv) /* Parse fetch argument */ ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg, - is_return, false); + is_return, false, + uprobes_fetch_type_table); if (ret) { pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; -- cgit v1.2.3 From d631c8cceb1d1d06f372878935949d421585186b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 27 Mar 2015 17:39:49 -0400 Subject: ring-buffer: Remove duplicate use of '&' in recursive code A clean up of the recursive protection code changed val = this_cpu_read(current_context); val--; val &= this_cpu_read(current_context); to val = this_cpu_read(current_context); val &= val & (val - 1); Which has a duplicate use of '&' as the above is the same as val = val & (val - 1); Actually, it would be best to remove that line altogether and just add it to where it is used. And Christoph even mentioned that it can be further compacted to just a single line: __this_cpu_and(current_context, __this_cpu_read(current_context) - 1); Link: http://lkml.kernel.org/alpine.DEB.2.11.1503271423580.23114@gentwo.org Suggested-by: Christoph Lameter Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 922048a0f7ea..0315d43176d8 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2703,10 +2703,7 @@ static __always_inline int trace_recursive_lock(void) static __always_inline void trace_recursive_unlock(void) { - unsigned int val = __this_cpu_read(current_context); - - val &= val & (val - 1); - __this_cpu_write(current_context, val); + __this_cpu_and(current_context, __this_cpu_read(current_context) - 1); } #else -- cgit v1.2.3 From 00ccbf2f5b7580cd7dcdaeda84828d14f0cba3c9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 19 Feb 2015 15:56:14 +0100 Subject: ftrace/x86: Let dynamic trampolines call ops->func even for dynamic fops Dynamically allocated trampolines call ftrace_ops_get_func to get the function which they should call. For dynamic fops (FTRACE_OPS_FL_DYNAMIC flag is set) ftrace_ops_list_func is always returned. This is reasonable for static trampolines but goes against the main advantage of dynamic ones, that is avoidance of going through the list of all registered callbacks for functions that are only being traced by a single callback. We can fix it by returning ops->func (or recursion safe version) from ftrace_ops_get_func whenever it is possible for dynamic trampolines. Note that dynamic trampolines are not allowed for dynamic fops if CONFIG_PREEMPT=y. Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1501291023000.25445@pobox.suse.cz Link: http://lkml.kernel.org/r/1424357773-13536-1-git-send-email-mbenes@suse.cz Reported-by: Miroslav Benes Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4f228024055b..d01d238d8ef4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -249,6 +249,19 @@ static void update_function_graph_func(void); static inline void update_function_graph_func(void) { } #endif + +static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) +{ + /* + * If this is a dynamic ops or we force list func, + * then it needs to call the list anyway. + */ + if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC) + return ftrace_ops_list_func; + + return ftrace_ops_get_func(ops); +} + static void update_ftrace_function(void) { ftrace_func_t func; @@ -270,7 +283,7 @@ static void update_ftrace_function(void) * then have the mcount trampoline call the function directly. */ } else if (ftrace_ops_list->next == &ftrace_list_end) { - func = ftrace_ops_get_func(ftrace_ops_list); + func = ftrace_ops_get_list_func(ftrace_ops_list); } else { /* Just use the default ftrace_ops */ @@ -5208,13 +5221,6 @@ static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, */ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) { - /* - * If this is a dynamic ops or we force list func, - * then it needs to call the list anyway. - */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC) - return ftrace_ops_list_func; - /* * If the func handles its own recursion, call it directly. * Otherwise call the recursion protected function that -- cgit v1.2.3 From 0c564a538aa934ad15b2145aaf8b64f3feb0be63 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 24 Mar 2015 17:58:09 -0400 Subject: tracing: Add TRACE_DEFINE_ENUM() macro to map enums to their values Several tracepoints use the helper functions __print_symbolic() or __print_flags() and pass in enums that do the mapping between the binary data stored and the value to print. This works well for reading the ASCII trace files, but when the data is read via userspace tools such as perf and trace-cmd, the conversion of the binary value to a human string format is lost if an enum is used, as userspace does not have access to what the ENUM is. For example, the tracepoint trace_tlb_flush() has: __print_symbolic(REC->reason, { TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" }, { TLB_REMOTE_SHOOTDOWN, "remote shootdown" }, { TLB_LOCAL_SHOOTDOWN, "local shootdown" }, { TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" }) Which maps the enum values to the strings they represent. But perf and trace-cmd do no know what value TLB_LOCAL_MM_SHOOTDOWN is, and would not be able to map it. With TRACE_DEFINE_ENUM(), developers can place these in the event header files and ftrace will convert the enums to their values: By adding: TRACE_DEFINE_ENUM(TLB_FLUSH_ON_TASK_SWITCH); TRACE_DEFINE_ENUM(TLB_REMOTE_SHOOTDOWN); TRACE_DEFINE_ENUM(TLB_LOCAL_SHOOTDOWN); TRACE_DEFINE_ENUM(TLB_LOCAL_MM_SHOOTDOWN); $ cat /sys/kernel/debug/tracing/events/tlb/tlb_flush/format [...] __print_symbolic(REC->reason, { 0, "flush on task switch" }, { 1, "remote shootdown" }, { 2, "local shootdown" }, { 3, "local mm shootdown" }) The above is what userspace expects to see, and tools do not need to be modified to parse them. Link: http://lkml.kernel.org/r/20150403013802.220157513@goodmis.org Cc: Guilherme Cox Cc: Tony Luck Cc: Xie XiuQi Acked-by: Namhyung Kim Reviewed-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/asm-generic/vmlinux.lds.h | 5 +- include/linux/ftrace_event.h | 2 +- include/linux/tracepoint.h | 8 +++ include/trace/ftrace.h | 22 ++++++- kernel/trace/trace.c | 26 ++++++++- kernel/trace/trace.h | 2 + kernel/trace/trace_events.c | 119 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 178 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ac78910d7416..f8e8b34dc427 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -124,7 +124,10 @@ #define FTRACE_EVENTS() . = ALIGN(8); \ VMLINUX_SYMBOL(__start_ftrace_events) = .; \ *(_ftrace_events) \ - VMLINUX_SYMBOL(__stop_ftrace_events) = .; + VMLINUX_SYMBOL(__stop_ftrace_events) = .; \ + VMLINUX_SYMBOL(__start_ftrace_enum_maps) = .; \ + *(_ftrace_enum_map) \ + VMLINUX_SYMBOL(__stop_ftrace_enum_maps) = .; #else #define FTRACE_EVENTS() #endif diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 62b8fac7ded5..112cf49d9576 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -285,7 +285,7 @@ struct ftrace_event_call { struct tracepoint *tp; }; struct trace_event event; - const char *print_fmt; + char *print_fmt; struct event_filter *filter; void *mod; void *data; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index c72851328ca9..a5f7f3ecafa3 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -36,6 +36,12 @@ struct tracepoint { struct tracepoint_func __rcu *funcs; }; +struct trace_enum_map { + const char *system; + const char *enum_string; + unsigned long enum_value; +}; + extern int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data); extern int @@ -87,6 +93,8 @@ extern void syscall_unregfunc(void); #define PARAMS(args...) args +#define TRACE_DEFINE_ENUM(x) + #endif /* _LINUX_TRACEPOINT_H */ /* diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 2f9b95b6d3fb..37d4b10b111d 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -33,6 +33,19 @@ TRACE_MAKE_SYSTEM_STR(); +#undef TRACE_DEFINE_ENUM +#define TRACE_DEFINE_ENUM(a) \ + static struct trace_enum_map __used __initdata \ + __##TRACE_SYSTEM##_##a = \ + { \ + .system = TRACE_SYSTEM_STRING, \ + .enum_string = #a, \ + .enum_value = a \ + }; \ + static struct trace_enum_map __used \ + __attribute__((section("_ftrace_enum_map"))) \ + *TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a + /* * DECLARE_EVENT_CLASS can be used to add a generic function * handlers for events. That is, if all events have the same @@ -136,6 +149,9 @@ TRACE_MAKE_SYSTEM_STR(); * The size of an array is also encoded, in the higher 16 bits of . */ +#undef TRACE_DEFINE_ENUM +#define TRACE_DEFINE_ENUM(a) + #undef __field #define __field(type, item) @@ -553,7 +569,7 @@ static inline notrace int ftrace_get_offsets_##call( \ * .trace = ftrace_raw_output_, <-- stage 2 * }; * - * static const char print_fmt_[] = ; + * static char print_fmt_[] = ; * * static struct ftrace_event_class __used event_class_