From 6cafbe159416822f6d3dfd711bf4c39050c650ba Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 20 Jun 2017 10:44:58 -0400 Subject: ftrace: Add a ftrace_free_mem() function for modules to use In order to be able to trace module init functions, the module code needs to tell ftrace what is being freed when the init sections are freed. Use the code that the main init calls to tell ftrace to free the main init sections. This requires passing in a start and end address to free. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2e028854bac7..47fc404ad233 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -151,8 +151,10 @@ struct ftrace_ops_hash { }; void ftrace_free_init_mem(void); +void ftrace_free_mem(void *start, void *end); #else static inline void ftrace_free_init_mem(void) { } +static inline void ftrace_free_mem(void *start, void *end) { } #endif /* -- cgit v1.2.3 From 3e234289f86b12985ef8909cd34525fcb66c4efb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 3 Mar 2017 18:00:22 -0500 Subject: ftrace: Allow module init functions to be traced Allow for module init sections to be traced as well as core kernel init sections. Now that filtering modules functions can be stored, for when they are loaded, it makes sense to be able to trace them. Cc: Jessica Yu Cc: Rusty Russell Signed-off-by: Steven Rostedt (VMware) --- include/linux/init.h | 4 +--- kernel/module.c | 2 ++ kernel/trace/ftrace.c | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/init.h b/include/linux/init.h index 94769d687cf0..a779c1816437 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -39,7 +39,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold __inittrace __latent_entropy +#define __init __section(.init.text) __cold __latent_entropy #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) @@ -68,10 +68,8 @@ #ifdef MODULE #define __exitused -#define __inittrace notrace #else #define __exitused __used -#define __inittrace #endif #define __exit __section(.exit.text) __exitused __cold notrace diff --git a/kernel/module.c b/kernel/module.c index de66ec825992..58bca427ac3f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3473,6 +3473,8 @@ static noinline int do_init_module(struct module *mod) if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) async_synchronize_full(); + ftrace_free_mem(mod->init_layout.base, mod->init_layout.base + + mod->init_layout.size); mutex_lock(&module_mutex); /* Drop initial reference. */ module_put(mod); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 84cb5928665a..d7297e866e4a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5752,7 +5752,8 @@ void ftrace_release_mod(struct module *mod) last_pg = &ftrace_pages_start; for (pg = ftrace_pages_start; pg; pg = *last_pg) { rec = &pg->records[0]; - if (within_module_core(rec->ip, mod)) { + if (within_module_core(rec->ip, mod) || + within_module_init(rec->ip, mod)) { /* * As core pages are first, the first * page should never be a module page. @@ -5821,7 +5822,8 @@ void ftrace_module_enable(struct module *mod) * not part of this module, then skip this pg, * which the "break" will do. */ - if (!within_module_core(rec->ip, mod)) + if (!within_module_core(rec->ip, mod) && + !within_module_init(rec->ip, mod)) break; cnt = 0; -- cgit v1.2.3 From aba4b5c22cbac296f4081a0476d0c55828f135b4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 1 Sep 2017 08:35:38 -0400 Subject: ftrace: Save module init functions kallsyms symbols for tracing If function tracing is active when the module init functions are freed, then store them to be referenced by kallsyms. As module init functions can now be traced on module load, they were useless: ># echo ':mod:snd_seq' > set_ftrace_filter ># echo function > current_tracer ># modprobe snd_seq ># cat trace # tracer: function # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | modprobe-2786 [000] .... 3189.037874: 0xffffffffa0860000 <-do_one_initcall modprobe-2786 [000] .... 3189.037876: 0xffffffffa086004d <-0xffffffffa086000f modprobe-2786 [000] .... 3189.037876: 0xffffffffa086010d <-0xffffffffa0860018 modprobe-2786 [000] .... 3189.037877: 0xffffffffa086011a <-0xffffffffa0860021 modprobe-2786 [000] .... 3189.037877: 0xffffffffa0860080 <-0xffffffffa086002a modprobe-2786 [000] .... 3189.039523: 0xffffffffa0860400 <-0xffffffffa0860033 modprobe-2786 [000] .... 3189.039523: 0xffffffffa086038a <-0xffffffffa086041c modprobe-2786 [000] .... 3189.039591: 0xffffffffa086038a <-0xffffffffa0860436 modprobe-2786 [000] .... 3189.039657: 0xffffffffa086038a <-0xffffffffa0860450 modprobe-2786 [000] .... 3189.039719: 0xffffffffa0860127 <-0xffffffffa086003c modprobe-2786 [000] .... 3189.039742: snd_seq_create_kernel_client <-0xffffffffa08601f6 When the output is shown, the kallsyms for the module init functions have already been freed, and the output of the trace can not convert them to their function names. Now this looks like this: # tracer: function # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | modprobe-2463 [002] .... 174.243237: alsa_seq_init <-do_one_initcall modprobe-2463 [002] .... 174.243239: client_init_data <-alsa_seq_init modprobe-2463 [002] .... 174.243240: snd_sequencer_memory_init <-alsa_seq_init modprobe-2463 [002] .... 174.243240: snd_seq_queues_init <-alsa_seq_init modprobe-2463 [002] .... 174.243240: snd_sequencer_device_init <-alsa_seq_init modprobe-2463 [002] .... 174.244860: snd_seq_info_init <-alsa_seq_init modprobe-2463 [002] .... 174.244861: create_info_entry <-snd_seq_info_init modprobe-2463 [002] .... 174.244936: create_info_entry <-snd_seq_info_init modprobe-2463 [002] .... 174.245003: create_info_entry <-snd_seq_info_init modprobe-2463 [002] .... 174.245072: snd_seq_system_client_init <-alsa_seq_init modprobe-2463 [002] .... 174.245094: snd_seq_create_kernel_client <-snd_seq_system_client_init Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 20 ++++++- kernel/kallsyms.c | 5 ++ kernel/module.c | 2 +- kernel/trace/ftrace.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 168 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 47fc404ad233..202b40784c4e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -51,6 +51,21 @@ static inline void early_trace_init(void) { } struct module; struct ftrace_hash; +#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \ + defined(CONFIG_DYNAMIC_FTRACE) +const char * +ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym); +#else +static inline const char * +ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + return NULL; +} +#endif + + #ifdef CONFIG_FUNCTION_TRACER extern int ftrace_enabled; @@ -151,10 +166,10 @@ struct ftrace_ops_hash { }; void ftrace_free_init_mem(void); -void ftrace_free_mem(void *start, void *end); +void ftrace_free_mem(struct module *mod, void *start, void *end); #else static inline void ftrace_free_init_mem(void) { } -static inline void ftrace_free_mem(void *start, void *end) { } +static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } #endif /* @@ -272,6 +287,7 @@ static inline int ftrace_nr_registered_ops(void) static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } static inline void ftrace_free_init_mem(void) { } +static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_STACK_TRACER diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 127e7cfafa55..976ecb9275d9 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -337,6 +338,10 @@ const char *kallsyms_lookup(unsigned long addr, if (!ret) ret = bpf_address_lookup(addr, symbolsize, offset, modname, namebuf); + + if (!ret) + ret = ftrace_mod_address_lookup(addr, symbolsize, + offset, modname, namebuf); return ret; } diff --git a/kernel/module.c b/kernel/module.c index 58bca427ac3f..279a469dc375 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3473,7 +3473,7 @@ static noinline int do_init_module(struct module *mod) if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) async_synchronize_full(); - ftrace_free_mem(mod->init_layout.base, mod->init_layout.base + + ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base + mod->init_layout.size); mutex_lock(&module_mutex); /* Drop initial reference. */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d7297e866e4a..86dbbfb353db 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5675,6 +5675,21 @@ static int ftrace_process_locs(struct module *mod, return ret; } +struct ftrace_mod_func { + struct list_head list; + char *name; + unsigned long ip; + unsigned int size; +}; + +struct ftrace_mod_map { + struct list_head list; + struct module *mod; + unsigned long start_addr; + unsigned long end_addr; + struct list_head funcs; +}; + #ifdef CONFIG_MODULES #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) @@ -5868,9 +5883,123 @@ void ftrace_module_init(struct module *mod) ftrace_process_locs(mod, mod->ftrace_callsites, mod->ftrace_callsites + mod->num_ftrace_callsites); } + +static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, + struct dyn_ftrace *rec) +{ + struct ftrace_mod_func *mod_func; + unsigned long symsize; + unsigned long offset; + char str[KSYM_SYMBOL_LEN]; + char *modname; + const char *ret; + + ret = kallsyms_lookup(rec->ip, &symsize, &offset, &modname, str); + if (!ret) + return; + + mod_func = kmalloc(sizeof(*mod_func), GFP_KERNEL); + if (!mod_func) + return; + + mod_func->name = kstrdup(str, GFP_KERNEL); + if (!mod_func->name) { + kfree(mod_func); + return; + } + + mod_func->ip = rec->ip - offset; + mod_func->size = symsize; + + list_add_rcu(&mod_func->list, &mod_map->funcs); +} + +static LIST_HEAD(ftrace_mod_maps); + +static struct ftrace_mod_map * +allocate_ftrace_mod_map(struct module *mod, + unsigned long start, unsigned long end) +{ + struct ftrace_mod_map *mod_map; + + mod_map = kmalloc(sizeof(*mod_map), GFP_KERNEL); + if (!mod_map) + return NULL; + + mod_map->mod = mod; + mod_map->start_addr = start; + mod_map->end_addr = end; + + INIT_LIST_HEAD_RCU(&mod_map->funcs); + + list_add_rcu(&mod_map->list, &ftrace_mod_maps); + + return mod_map; +} + +static const char * +ftrace_func_address_lookup(struct ftrace_mod_map *mod_map, + unsigned long addr, unsigned long *size, + unsigned long *off, char *sym) +{ + struct ftrace_mod_func *found_func = NULL; + struct ftrace_mod_func *mod_func; + + list_for_each_entry_rcu(mod_func, &mod_map->funcs, list) { + if (addr >= mod_func->ip && + addr < mod_func->ip + mod_func->size) { + found_func = mod_func; + break; + } + } + + if (found_func) { + if (size) + *size = found_func->size; + if (off) + *off = addr - found_func->ip; + if (sym) + strlcpy(sym, found_func->name, KSYM_NAME_LEN); + + return found_func->name; + } + + return NULL; +} + +const char * +ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + struct ftrace_mod_map *mod_map; + const char *ret = NULL; + + preempt_disable(); + list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { + ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym); + if (ret) { + if (modname) + *modname = mod_map->mod->name; + break; + } + } + preempt_enable(); + + return ret; +} + +#else +static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, + struct dyn_ftrace *rec) { } +static inline struct ftrace_mod_map * +allocate_ftrace_mod_map(struct module *mod, + unsigned long start, unsigned long end) +{ + return NULL; +} #endif /* CONFIG_MODULES */ -void ftrace_free_mem(void *start_ptr, void *end_ptr) +void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) { unsigned long start = (unsigned long)(start_ptr); unsigned long end = (unsigned long)(end_ptr); @@ -5878,6 +6007,7 @@ void ftrace_free_mem(void *start_ptr, void *end_ptr) struct ftrace_page *pg; struct dyn_ftrace *rec; struct dyn_ftrace key; + struct ftrace_mod_map *mod_map = NULL; int order; key.ip = start; @@ -5885,6 +6015,14 @@ void ftrace_free_mem(void *start_ptr, void *end_ptr) mutex_lock(&ftrace_lock); + /* + * If we are freeing module init memory, then check if + * any tracer is active. If so, we need to save a mapping of + * the module functions being freed with the address. + */ + if (mod && ftrace_ops_list != &ftrace_list_end) + mod_map = allocate_ftrace_mod_map(mod, start, end); + for (pg = ftrace_pages_start; pg; last_pg = &pg->next, pg = *last_pg) { if (end < pg->records[0].ip || start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE)) @@ -5895,6 +6033,10 @@ void ftrace_free_mem(void *start_ptr, void *end_ptr) ftrace_cmp_recs); if (!rec) continue; + + if (mod_map) + save_ftrace_mod_rec(mod_map, rec); + pg->index--; ftrace_update_tot_cnt--; if (!pg->index) { @@ -5920,7 +6062,7 @@ void __init ftrace_free_init_mem(void) void *start = (void *)(&__init_begin); void *end = (void *)(&__init_end); - ftrace_free_mem(start, end); + ftrace_free_mem(NULL, start, end); } void __init ftrace_init(void) -- cgit v1.2.3 From 6171a0310a06a7a0cb83713fa7068bdd4192de19 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 6 Sep 2017 08:40:41 -0400 Subject: ftrace/kallsyms: Have /proc/kallsyms show saved mod init functions If a module is loaded while tracing is enabled, then there's a possibility that the module init functions were traced. These functions have their name and address stored by ftrace such that it can translate the function address that is written into the buffer into a human readable function name. As userspace tools may be doing the same, they need a way to map function names to their address as well. This is done through reading /proc/kallsyms. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 9 +++++++++ kernel/kallsyms.c | 38 ++++++++++++++++++++++++++++++++------ kernel/trace/ftrace.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 202b40784c4e..346f8294e40a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -56,6 +56,9 @@ struct ftrace_hash; const char * ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym); +int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, + char *module_name, int *exported); #else static inline const char * ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, @@ -63,6 +66,12 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, { return NULL; } +static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, + char *module_name, int *exported) +{ + return -1; +} #endif diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 976ecb9275d9..1966fe1c2b57 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -479,6 +479,7 @@ EXPORT_SYMBOL(__print_symbol); struct kallsym_iter { loff_t pos; loff_t pos_mod_end; + loff_t pos_ftrace_mod_end; unsigned long value; unsigned int nameoff; /* If iterating in core kernel symbols. */ char type; @@ -501,11 +502,25 @@ static int get_ksymbol_mod(struct kallsym_iter *iter) return 1; } +static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter) +{ + int ret = ftrace_mod_get_kallsym(iter->pos - iter->pos_mod_end, + &iter->value, &iter->type, + iter->name, iter->module_name, + &iter->exported); + if (ret < 0) { + iter->pos_ftrace_mod_end = iter->pos; + return 0; + } + + return 1; +} + static int get_ksymbol_bpf(struct kallsym_iter *iter) { iter->module_name[0] = '\0'; iter->exported = 0; - return bpf_get_kallsym(iter->pos - iter->pos_mod_end, + return bpf_get_kallsym(iter->pos - iter->pos_ftrace_mod_end, &iter->value, &iter->type, iter->name) < 0 ? 0 : 1; } @@ -530,20 +545,31 @@ static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) iter->name[0] = '\0'; iter->nameoff = get_symbol_offset(new_pos); iter->pos = new_pos; - if (new_pos == 0) + if (new_pos == 0) { iter->pos_mod_end = 0; + iter->pos_ftrace_mod_end = 0; + } } static int update_iter_mod(struct kallsym_iter *iter, loff_t pos) { iter->pos = pos; - if (iter->pos_mod_end > 0 && - iter->pos_mod_end < iter->pos) + if (iter->pos_ftrace_mod_end > 0 && + iter->pos_ftrace_mod_end < iter->pos) return get_ksymbol_bpf(iter); - if (!get_ksymbol_mod(iter)) - return get_ksymbol_bpf(iter); + if (iter->pos_mod_end > 0 && + iter->pos_mod_end < iter->pos) { + if (!get_ksymbol_ftrace_mod(iter)) + return get_ksymbol_bpf(iter); + return 1; + } + + if (!get_ksymbol_mod(iter)) { + if (!get_ksymbol_ftrace_mod(iter)) + return get_ksymbol_bpf(iter); + } return 1; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a5824408bed9..9e99bd55732e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5689,6 +5689,7 @@ struct ftrace_mod_map { unsigned long start_addr; unsigned long end_addr; struct list_head funcs; + unsigned int num_funcs; }; #ifdef CONFIG_MODULES @@ -5940,6 +5941,8 @@ static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, mod_func->ip = rec->ip - offset; mod_func->size = symsize; + mod_map->num_funcs++; + list_add_rcu(&mod_func->list, &mod_map->funcs); } @@ -5956,6 +5959,7 @@ allocate_ftrace_mod_map(struct module *mod, mod_map->mod = mod; mod_map->start_addr = start; mod_map->end_addr = end; + mod_map->num_funcs = 0; INIT_LIST_HEAD_RCU(&mod_map->funcs); @@ -6016,6 +6020,42 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, return ret; } +int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, + char *module_name, int *exported) +{ + struct ftrace_mod_map *mod_map; + struct ftrace_mod_func *mod_func; + + preempt_disable(); + list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { + + if (symnum >= mod_map->num_funcs) { + symnum -= mod_map->num_funcs; + continue; + } + + list_for_each_entry_rcu(mod_func, &mod_map->funcs, list) { + if (symnum > 1) { + symnum--; + continue; + } + + *value = mod_func->ip; + *type = 'T'; + strlcpy(name, mod_func->name, KSYM_NAME_LEN); + strlcpy(module_name, mod_map->mod->name, MODULE_NAME_LEN); + *exported = 1; + preempt_enable(); + return 0; + } + WARN_ON(1); + break; + } + preempt_enable(); + return -ERANGE; +} + #else static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, struct dyn_ftrace *rec) { } -- cgit v1.2.3 From d59158162e032917a428704160a2063a02405ec6 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 10 Oct 2017 15:51:37 -0700 Subject: tracing: Add support for preempt and irq enable/disable events Preempt and irq trace events can be used for tracing the start and end of an atomic section which can be used by a trace viewer like systrace to graphically view the start and end of an atomic section and correlate them with latencies and scheduling issues. This also serves as a prelude to using synthetic events or probes to rewrite the preempt and irqsoff tracers, along with numerous benefits of using trace events features for these events. Link: http://lkml.kernel.org/r/20171006005432.14244-3-joelaf@google.com Link: http://lkml.kernel.org/r/20171010225137.17370-1-joelaf@google.com Cc: Peter Zilstra Cc: kernel-team@android.com Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 3 +- include/trace/events/preemptirq.h | 70 +++++++++++++++++++++++++++++++++++++++ kernel/trace/Kconfig | 11 ++++++ kernel/trace/Makefile | 1 + kernel/trace/trace_irqsoff.c | 35 +++++++++++++++++++- 5 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 include/trace/events/preemptirq.h (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 346f8294e40a..1f8545caa691 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -769,7 +769,8 @@ static inline unsigned long get_lock_parent_ip(void) static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif -#ifdef CONFIG_PREEMPT_TRACER +#if defined(CONFIG_PREEMPT_TRACER) || \ + (defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS)) extern void trace_preempt_on(unsigned long a0, unsigned long a1); extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h new file mode 100644 index 000000000000..f5024c560d8f --- /dev/null +++ b/include/trace/events/preemptirq.h @@ -0,0 +1,70 @@ +#ifdef CONFIG_PREEMPTIRQ_EVENTS + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM preemptirq + +#if !defined(_TRACE_PREEMPTIRQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PREEMPTIRQ_H + +#include +#include +#include +#include + +DECLARE_EVENT_CLASS(preemptirq_template, + + TP_PROTO(unsigned long ip, unsigned long parent_ip), + + TP_ARGS(ip, parent_ip), + + TP_STRUCT__entry( + __field(u32, caller_offs) + __field(u32, parent_offs) + ), + + TP_fast_assign( + __entry->caller_offs = (u32)(ip - (unsigned long)_stext); + __entry->parent_offs = (u32)(parent_ip - (unsigned long)_stext); + ), + + TP_printk("caller=%pF parent=%pF", + (void *)((unsigned long)(_stext) + __entry->caller_offs), + (void *)((unsigned long)(_stext) + __entry->parent_offs)) +); + +#ifndef CONFIG_PROVE_LOCKING +DEFINE_EVENT(preemptirq_template, irq_disable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); + +DEFINE_EVENT(preemptirq_template, irq_enable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); +#endif + +#ifdef CONFIG_DEBUG_PREEMPT +DEFINE_EVENT(preemptirq_template, preempt_disable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); + +DEFINE_EVENT(preemptirq_template, preempt_enable, + TP_PROTO(unsigned long ip, unsigned long parent_ip), + TP_ARGS(ip, parent_ip)); +#endif + +#endif /* _TRACE_PREEMPTIRQ_H */ + +#include + +#else /* !CONFIG_PREEMPTIRQ_EVENTS */ + +#define trace_irq_enable(...) +#define trace_irq_disable(...) +#define trace_preempt_enable(...) +#define trace_preempt_disable(...) +#define trace_irq_enable_rcuidle(...) +#define trace_irq_disable_rcuidle(...) +#define trace_preempt_enable_rcuidle(...) +#define trace_preempt_disable_rcuidle(...) + +#endif diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 434c840e2d82..b8395a020821 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -160,6 +160,17 @@ config FUNCTION_GRAPH_TRACER address on the current task structure into a stack of calls. +config PREEMPTIRQ_EVENTS + bool "Enable trace events for preempt and irq disable/enable" + select TRACE_IRQFLAGS + depends on DEBUG_PREEMPT || !PROVE_LOCKING + default n + help + Enable tracing of disable and enable events for preemption and irqs. + For tracing preempt disable/enable events, DEBUG_PREEMPT must be + enabled. For tracing irq disable/enable events, PROVE_LOCKING must + be disabled. + config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 90f2701d92a7..9f62eee61f14 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_TRACING) += trace_printk.o obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o +obj-$(CONFIG_PREEMPTIRQ_EVENTS) += trace_irqsoff.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 0e3033c00474..03ecb4465ee4 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -16,6 +16,9 @@ #include "trace.h" +#define CREATE_TRACE_POINTS +#include + #if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_PREEMPT_TRACER) static struct trace_array *irqsoff_trace __read_mostly; static int tracer_enabled __read_mostly; @@ -777,26 +780,53 @@ static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { } #endif #if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PROVE_LOCKING) +/* Per-cpu variable to prevent redundant calls when IRQs already off */ +static DEFINE_PER_CPU(int, tracing_irq_cpu); + void trace_hardirqs_on(void) { + if (!this_cpu_read(tracing_irq_cpu)) + return; + + trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); tracer_hardirqs_on(); + + this_cpu_write(tracing_irq_cpu, 0); } EXPORT_SYMBOL(trace_hardirqs_on); void trace_hardirqs_off(void) { + if (this_cpu_read(tracing_irq_cpu)) + return; + + this_cpu_write(tracing_irq_cpu, 1); + + trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); tracer_hardirqs_off(); } EXPORT_SYMBOL(trace_hardirqs_off); __visible void trace_hardirqs_on_caller(unsigned long caller_addr) { + if (!this_cpu_read(tracing_irq_cpu)) + return; + + trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr); tracer_hardirqs_on_caller(caller_addr); + + this_cpu_write(tracing_irq_cpu, 0); } EXPORT_SYMBOL(trace_hardirqs_on_caller); __visible void trace_hardirqs_off_caller(unsigned long caller_addr) { + if (this_cpu_read(tracing_irq_cpu)) + return; + + this_cpu_write(tracing_irq_cpu, 1); + + trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr); tracer_hardirqs_off_caller(caller_addr); } EXPORT_SYMBOL(trace_hardirqs_off_caller); @@ -818,14 +848,17 @@ inline void print_irqtrace_events(struct task_struct *curr) } #endif -#ifdef CONFIG_PREEMPT_TRACER +#if defined(CONFIG_PREEMPT_TRACER) || \ + (defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS)) void trace_preempt_on(unsigned long a0, unsigned long a1) { + trace_preempt_enable_rcuidle(a0, a1); tracer_preempt_on(a0, a1); } void trace_preempt_off(unsigned long a0, unsigned long a1) { + trace_preempt_disable_rcuidle(a0, a1); tracer_preempt_off(a0, a1); } #endif -- cgit v1.2.3 From e83543b495598233c3741b23bed7c82161c93e24 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 10 Oct 2017 19:12:56 -0400 Subject: tracing/xen: Hide events that are not used when X86_PAE is not defined TRACE_EVENTS() take up memory. If they are defined but not used, then they simply waste space. If their use case is behind a define, then the trace events should be as well. The trace events xen_mmu_set_pte_atomic, xen_mmu_pte_clear, and xen_mmu_pmd_clear are not used when CONFIG_X86_PAE is not defined. Link: http://lkml.kernel.org/r/20171010191256.3d6d72cb@gandalf.local.home Reviewed-by: Boris Ostrovsky Reviewed-by: Jeremy Linton Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/xen.h | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index 1b4fed72f573..6118d82334ec 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -147,7 +147,6 @@ DECLARE_EVENT_CLASS(xen_mmu__set_pte, TP_ARGS(ptep, pteval)) DEFINE_XEN_MMU_SET_PTE(xen_mmu_set_pte); -DEFINE_XEN_MMU_SET_PTE(xen_mmu_set_pte_atomic); TRACE_EVENT(xen_mmu_set_pte_at, TP_PROTO(struct mm_struct *mm, unsigned long addr, @@ -169,21 +168,6 @@ TRACE_EVENT(xen_mmu_set_pte_at, (int)sizeof(pteval_t) * 2, (unsigned long long)__entry->pteval) ); -TRACE_EVENT(xen_mmu_pte_clear, - TP_PROTO(struct mm_struct *mm, unsigned long addr, pte_t *ptep), - TP_ARGS(mm, addr, ptep), - TP_STRUCT__entry( - __field(struct mm_struct *, mm) - __field(unsigned long, addr) - __field(pte_t *, ptep) - ), - TP_fast_assign(__entry->mm = mm; - __entry->addr = addr; - __entry->ptep = ptep), - TP_printk("mm %p addr %lx ptep %p", - __entry->mm, __entry->addr, __entry->ptep) - ); - TRACE_DEFINE_SIZEOF(pmdval_t); TRACE_EVENT(xen_mmu_set_pmd, @@ -201,6 +185,24 @@ TRACE_EVENT(xen_mmu_set_pmd, (int)sizeof(pmdval_t) * 2, (unsigned long long)__entry->pmdval) ); +#ifdef CONFIG_X86_PAE +DEFINE_XEN_MMU_SET_PTE(xen_mmu_set_pte_atomic); + +TRACE_EVENT(xen_mmu_pte_clear, + TP_PROTO(struct mm_struct *mm, unsigned long addr, pte_t *ptep), + TP_ARGS(mm, addr, ptep), + TP_STRUCT__entry( + __field(struct mm_struct *, mm) + __field(unsigned long, addr) + __field(pte_t *, ptep) + ), + TP_fast_assign(__entry->mm = mm; + __entry->addr = addr; + __entry->ptep = ptep), + TP_printk("mm %p addr %lx ptep %p", + __entry->mm, __entry->addr, __entry->ptep) + ); + TRACE_EVENT(xen_mmu_pmd_clear, TP_PROTO(pmd_t *pmdp), TP_ARGS(pmdp), @@ -210,6 +212,7 @@ TRACE_EVENT(xen_mmu_pmd_clear, TP_fast_assign(__entry->pmdp = pmdp), TP_printk("pmdp %p", __entry->pmdp) ); +#endif #if CONFIG_PGTABLE_LEVELS >= 4 -- cgit v1.2.3 From f40a37cb4916f17806b8a89d8eb76f6943c69189 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 12 Oct 2017 18:46:32 -0400 Subject: tracing, memcg, vmscan: Hide trace events when not in use When trace events are defined but not used they still create data structures and functions for their use, even though nothing may be using them. The trace events mm_vmscan_memcg_reclaim_begin, mm_vmscan_memcg_softlimit_reclaim_begin, mm_vmscan_memcg_reclaim_end, and mm_vmscan_memcg_softlimit_reclaim_end are not used if CONFIG_MEMCG is not defined. Do not create these trace events unless CONFIG_MEMCG is defined. Link: http://lkml.kernel.org/r/20171012184632.2bd247cd@gandalf.local.home Acked-by: Michal Hocko Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/vmscan.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 27e8a5c77579..cde5fea0a179 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -133,6 +133,7 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_b TP_ARGS(order, may_writepage, gfp_flags, classzone_idx) ); +#ifdef CONFIG_MEMCG DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin, TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx), @@ -146,6 +147,7 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_ TP_ARGS(order, may_writepage, gfp_flags, classzone_idx) ); +#endif /* CONFIG_MEMCG */ DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_end_template, @@ -171,6 +173,7 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_direct_reclaim_end TP_ARGS(nr_reclaimed) ); +#ifdef CONFIG_MEMCG DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end, TP_PROTO(unsigned long nr_reclaimed), @@ -184,6 +187,7 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_re TP_ARGS(nr_reclaimed) ); +#endif /* CONFIG_MEMCG */ TRACE_EVENT(mm_shrink_slab_start, TP_PROTO(struct shrinker *shr, struct shrink_control *sc, -- cgit v1.2.3 From 136412474b77c14a26e018a7f2c1e5fcee700d3d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 13 Oct 2017 10:06:25 -0400 Subject: tracing, dma-buf: Remove unused trace event dma_fence_annotate_wait_on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit e941759c74 ("fence: dma-buf cross-device synchronization") added trace event fence_annotate_wait_on, but never used it. It was renamed to dma_fence_annotate_wait_on by commit f54d186700 ("dma-buf: Rename struct fence to dma_fence") but still not used. As defined trace events have data structures and functions created for them, it is a waste of memory if they are not used. Remove the unused trace event. Link: http://lkml.kernel.org/r/20171013100625.6c820059@gandalf.local.home Reviewed-by: Christian König Acked-by: Sumit Semwal Signed-off-by: Steven Rostedt (VMware) --- drivers/dma-buf/dma-fence.c | 1 - include/trace/events/dma_fence.h | 40 ---------------------------------------- 2 files changed, 41 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 9a302799040e..5d101c4053e0 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -27,7 +27,6 @@ #define CREATE_TRACE_POINTS #include -EXPORT_TRACEPOINT_SYMBOL(dma_fence_annotate_wait_on); EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal); diff --git a/include/trace/events/dma_fence.h b/include/trace/events/dma_fence.h index 1157cb4c3c6f..50fbdefe6c81 100644 --- a/include/trace/events/dma_fence.h +++ b/include/trace/events/dma_fence.h @@ -8,46 +8,6 @@ struct dma_fence; -TRACE_EVENT(dma_fence_annotate_wait_on, - - /* fence: the fence waiting on f1, f1: the fence to be waited on. */ - TP_PROTO(struct dma_fence *fence, struct dma_fence *f1), - - TP_ARGS(fence, f1), - - TP_STRUCT__entry( - __string(driver, fence->ops->get_driver_name(fence)) - __string(timeline, fence->ops->get_timeline_name(fence)) - __field(unsigned int, context) - __field(unsigned int, seqno) - - __string(waiting_driver, f1->ops->get_driver_name(f1)) - __string(waiting_timeline, f1->ops->get_timeline_name(f1)) - __field(unsigned int, waiting_context) - __field(unsigned int, waiting_seqno) - ), - - TP_fast_assign( - __assign_str(driver, fence->ops->get_driver_name(fence)) - __assign_str(timeline, fence->ops->get_timeline_name(fence)) - __entry->context = fence->context; - __entry->seqno = fence->seqno; - - __assign_str(waiting_driver, f1->ops->get_driver_name(f1)) - __assign_str(waiting_timeline, f1->ops->get_timeline_name(f1)) - __entry->waiting_context = f1->context; - __entry->waiting_seqno = f1->seqno; - - ), - - TP_printk("driver=%s timeline=%s context=%u seqno=%u " \ - "waits on driver=%s timeline=%s context=%u seqno=%u", - __get_str(driver), __get_str(timeline), __entry->context, - __entry->seqno, - __get_str(waiting_driver), __get_str(waiting_timeline), - __entry->waiting_context, __entry->waiting_seqno) -); - DECLARE_EVENT_CLASS(dma_fence, TP_PROTO(struct dma_fence *fence), -- cgit v1.2.3 From 8fd0fbbe8888f295eb34172a7e47bf7d3a0a4687 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Oct 2017 09:45:29 +0200 Subject: perf/ftrace: Revert ("perf/ftrace: Fix double traces of perf on ftrace:function") Revert commit: 75e8387685f6 ("perf/ftrace: Fix double traces of perf on ftrace:function") The reason I instantly stumbled on that patch is that it only addresses the ftrace situation and doesn't mention the other _5_ places that use this interface. It doesn't explain why those don't have the problem and if not, why their solution doesn't work for ftrace. It doesn't, but this is just putting more duct tape on. Link: http://lkml.kernel.org/r/20171011080224.200565770@infradead.org Cc: Zhou Chengming Cc: Jiri Olsa Cc: Ingo Molnar Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- include/linux/perf_event.h | 2 +- include/linux/trace_events.h | 4 ++-- kernel/events/core.c | 13 ++++--------- kernel/trace/trace_event_perf.c | 4 +--- kernel/trace/trace_kprobe.c | 4 ++-- kernel/trace/trace_syscalls.c | 4 ++-- kernel/trace/trace_uprobe.c | 2 +- 7 files changed, 13 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8e22f24ded6a..569d1b54e201 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1184,7 +1184,7 @@ extern void perf_event_init(void); extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, - struct task_struct *task, struct perf_event *event); + struct task_struct *task); extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 2e0f22298fe9..a6349b76fd39 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -507,9 +507,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, - struct task_struct *task, struct perf_event *event) + struct task_struct *task) { - perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event); + perf_tp_event(type, count, raw_data, size, regs, head, rctx, task); } #endif diff --git a/kernel/events/core.c b/kernel/events/core.c index 6bc21e202ae4..b8db80c5513b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7954,15 +7954,16 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, } } perf_tp_event(call->event.type, count, raw_data, size, regs, head, - rctx, task, NULL); + rctx, task); } EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit); void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, - struct task_struct *task, struct perf_event *event) + struct task_struct *task) { struct perf_sample_data data; + struct perf_event *event; struct perf_raw_record raw = { .frag = { @@ -7976,15 +7977,9 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, perf_trace_buf_update(record, event_type); - /* Use the given event instead of the hlist */ - if (event) { + hlist_for_each_entry_rcu(event, head, hlist_entry) { if (perf_tp_event_match(event, &data, regs)) perf_swevent_event(event, count, &data, regs); - } else { - hlist_for_each_entry_rcu(event, head, hlist_entry) { - if (perf_tp_event_match(event, &data, regs)) - perf_swevent_event(event, count, &data, regs); - } } /* diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 13ba2d3f6a91..562fa69df5d3 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -306,7 +306,6 @@ static void perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *pt_regs) { - struct perf_event *event; struct ftrace_entry *entry; struct hlist_head *head; struct pt_regs regs; @@ -330,9 +329,8 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, entry->ip = ip; entry->parent_ip = parent_ip; - event = container_of(ops, struct perf_event, ftrace_ops); perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, - 1, ®s, head, NULL, event); + 1, ®s, head, NULL); #undef ENTRY_SIZE } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index af6134f2e597..996902a526d4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1200,7 +1200,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, - head, NULL, NULL); + head, NULL); } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1236,7 +1236,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, - head, NULL, NULL); + head, NULL); } NOKPROBE_SYMBOL(kretprobe_perf_func); #endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 696afe72d3b1..934b0da72679 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -622,7 +622,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) perf_trace_buf_submit(rec, size, rctx, sys_data->enter_event->event.type, 1, regs, - head, NULL, NULL); + head, NULL); } static int perf_sysenter_enable(struct trace_event_call *call) @@ -716,7 +716,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) } perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type, - 1, regs, head, NULL, NULL); + 1, regs, head, NULL); } static int perf_sysexit_enable(struct trace_event_call *call) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index b34965e62fb9..402120ba4594 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1156,7 +1156,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, } perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, - head, NULL, NULL); + head, NULL); out: preempt_enable(); } -- cgit v1.2.3 From 466c81c45b650deca213fda3d0ec4761667379a9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Oct 2017 17:15:47 +0200 Subject: perf/ftrace: Fix function trace events The function-trace <-> perf interface is a tad messed up. Where all the other trace <-> perf interfaces use a single trace hook registration and use per-cpu RCU based hlist to iterate the events, function-trace actually needs multiple hook registrations in order to minimize function entry patching when filters are present. The end result is that we iterate events both on the trace hook and on the hlist, which results in reporting events multiple times. Since function-trace cannot use the regular scheme, fix it the other way around, use singleton hlists. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 5 +++ kernel/trace/trace_event_perf.c | 80 +++++++++++++++++++++++++---------------- 2 files changed, 54 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index a6349b76fd39..ca4e67e466a7 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -173,6 +173,11 @@ enum trace_reg { TRACE_REG_PERF_UNREGISTER, TRACE_REG_PERF_OPEN, TRACE_REG_PERF_CLOSE, + /* + * These (ADD/DEL) use a 'boolean' return value, where 1 (true) means a + * custom action was taken and the default action is not to be + * performed. + */ TRACE_REG_PERF_ADD, TRACE_REG_PERF_DEL, #endif diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 562fa69df5d3..e73f9ab15939 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -240,27 +240,41 @@ void perf_trace_destroy(struct perf_event *p_event) int perf_trace_add(struct perf_event *p_event, int flags) { struct trace_event_call *tp_event = p_event->tp_event; - struct hlist_head __percpu *pcpu_list; - struct hlist_head *list; - - pcpu_list = tp_event->perf_events; - if (WARN_ON_ONCE(!pcpu_list)) - return -EINVAL; if (!(flags & PERF_EF_START)) p_event->hw.state = PERF_HES_STOPPED; - list = this_cpu_ptr(pcpu_list); - hlist_add_head_rcu(&p_event->hlist_entry, list); + /* + * If TRACE_REG_PERF_ADD returns false; no custom action was performed + * and we need to take the default action of enqueueing our event on + * the right per-cpu hlist. + */ + if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event)) { + struct hlist_head __percpu *pcpu_list; + struct hlist_head *list; + + pcpu_list = tp_event->perf_events; + if (WARN_ON_ONCE(!pcpu_list)) + return -EINVAL; + + list = this_cpu_ptr(pcpu_list); + hlist_add_head_rcu(&p_event->hlist_entry, list); + } - return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event); + return 0; } void perf_trace_del(struct perf_event *p_event, int flags) { struct trace_event_call *tp_event = p_event->tp_event; - hlist_del_rcu(&p_event->hlist_entry); - tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); + + /* + * If TRACE_REG_PERF_DEL returns false; no custom action was performed + * and we need to take the default action of dequeueing our event from + * the right per-cpu hlist. + */ + if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event)) + hlist_del_rcu(&p_event->hlist_entry); } void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp) @@ -307,14 +321,24 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *pt_regs) { struct ftrace_entry *entry; - struct hlist_head *head; + struct perf_event *event; + struct hlist_head head; struct pt_regs regs; int rctx; - head = this_cpu_ptr(event_function.perf_events); - if (hlist_empty(head)) + if ((unsigned long)ops->private != smp_processor_id()) return; + event = container_of(ops, struct perf_event, ftrace_ops); + + /* + * @event->hlist entry is NULL (per INIT_HLIST_NODE), and all + * the perf code does is hlist_for_each_entry_rcu(), so we can + * get away with simply setting the @head.first pointer in order + * to create a singular list. + */ + head.first = &event->hlist_entry; + #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ sizeof(u64)) - sizeof(u32)) @@ -330,7 +354,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, entry->ip = ip; entry->parent_ip = parent_ip; perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, - 1, ®s, head, NULL); + 1, ®s, &head, NULL); #undef ENTRY_SIZE } @@ -339,8 +363,10 @@ static int perf_ftrace_function_register(struct perf_event *event) { struct ftrace_ops *ops = &event->ftrace_ops; - ops->flags |= FTRACE_OPS_FL_PER_CPU | FTRACE_OPS_FL_RCU; - ops->func = perf_ftrace_function_call; + ops->flags |= FTRACE_OPS_FL_RCU; + ops->func = perf_ftrace_function_call; + ops->private = (void *)(unsigned long)nr_cpu_ids; + return register_ftrace_function(ops); } @@ -352,19 +378,11 @@ static int perf_ftrace_function_unregister(struct perf_event *event) return ret; } -static void perf_ftrace_function_enable(struct perf_event *event) -{ - ftrace_function_local_enable(&event->ftrace_ops); -} - -static void perf_ftrace_function_disable(struct perf_event *event) -{ - ftrace_function_local_disable(&event->ftrace_ops); -} - int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data) { + struct perf_event *event = data; + switch (type) { case TRACE_REG_REGISTER: case TRACE_REG_UNREGISTER: @@ -377,11 +395,11 @@ int perf_ftrace_event_register(struct trace_event_call *call, case TRACE_REG_PERF_CLOSE: return perf_ftrace_function_unregister(data); case TRACE_REG_PERF_ADD: - perf_ftrace_function_enable(data); - return 0; + event->ftrace_ops.private = (void *)(unsigned long)smp_processor_id(); + return 1; case TRACE_REG_PERF_DEL: - perf_ftrace_function_disable(data); - return 0; + event->ftrace_ops.private = (void *)(unsigned long)nr_cpu_ids; + return 1; } return -EINVAL; -- cgit v1.2.3 From b3a88803ac5b4bda26017b485c8722a8487fefb7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Oct 2017 09:45:32 +0200 Subject: ftrace: Kill FTRACE_OPS_FL_PER_CPU The one and only user of FTRACE_OPS_FL_PER_CPU is gone, remove the lot. Link: http://lkml.kernel.org/r/20171011080224.372422809@infradead.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 83 +++++++++----------------------------------------- kernel/trace/ftrace.c | 55 ++++----------------------------- 2 files changed, 20 insertions(+), 118 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1f8545caa691..252e334e7b5f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -102,10 +102,6 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * ENABLED - set/unset when ftrace_ops is registered/unregistered * DYNAMIC - set when ftrace_ops is registered to denote dynamically * allocated ftrace_ops which need special care - * PER_CPU - set manualy by ftrace_ops user to denote the ftrace_ops - * could be controlled by following calls: - * ftrace_function_local_enable - * ftrace_function_local_disable * SAVE_REGS - The ftrace_ops wants regs saved at each function called * and passed to the callback. If this flag is set, but the * architecture does not support passing regs @@ -149,21 +145,20 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); enum { FTRACE_OPS_FL_ENABLED = 1 << 0, FTRACE_OPS_FL_DYNAMIC = 1 << 1, - FTRACE_OPS_FL_PER_CPU = 1 << 2, - FTRACE_OPS_FL_SAVE_REGS = 1 << 3, - FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 4, - FTRACE_OPS_FL_RECURSION_SAFE = 1 << 5, - FTRACE_OPS_FL_STUB = 1 << 6, - FTRACE_OPS_FL_INITIALIZED = 1 << 7, - FTRACE_OPS_FL_DELETED = 1 << 8, - FTRACE_OPS_FL_ADDING = 1 << 9, - FTRACE_OPS_FL_REMOVING = 1 << 10, - FTRACE_OPS_FL_MODIFYING = 1 << 11, - FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 12, - FTRACE_OPS_FL_IPMODIFY = 1 << 13, - FTRACE_OPS_FL_PID = 1 << 14, - FTRACE_OPS_FL_RCU = 1 << 15, - FTRACE_OPS_FL_TRACE_ARRAY = 1 << 16, + FTRACE_OPS_FL_SAVE_REGS = 1 << 2, + FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 3, + FTRACE_OPS_FL_RECURSION_SAFE = 1 << 4, + FTRACE_OPS_FL_STUB = 1 << 5, + FTRACE_OPS_FL_INITIALIZED = 1 << 6, + FTRACE_OPS_FL_DELETED = 1 << 7, + FTRACE_OPS_FL_ADDING = 1 << 8, + FTRACE_OPS_FL_REMOVING = 1 << 9, + FTRACE_OPS_FL_MODIFYING = 1 << 10, + FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 11, + FTRACE_OPS_FL_IPMODIFY = 1 << 12, + FTRACE_OPS_FL_PID = 1 << 13, + FTRACE_OPS_FL_RCU = 1 << 14, + FTRACE_OPS_FL_TRACE_ARRAY = 1 << 15, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -198,7 +193,6 @@ struct ftrace_ops { unsigned long flags; void *private; ftrace_func_t saved_func; - int __percpu *disabled; #ifdef CONFIG_DYNAMIC_FTRACE struct ftrace_ops_hash local_hash; struct ftrace_ops_hash *func_hash; @@ -230,55 +224,6 @@ int register_ftrace_function(struct ftrace_ops *ops); int unregister_ftrace_function(struct ftrace_ops *ops); void clear_ftrace_function(void); -/** - * ftrace_function_local_enable - enable ftrace_ops on current cpu - * - * This function enables tracing on current cpu by decreasing - * the per cpu control variable. - * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption - * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. - */ -static inline void ftrace_function_local_enable(struct ftrace_ops *ops) -{ - if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) - return; - - (*this_cpu_ptr(ops->disabled))--; -} - -/** - * ftrace_function_local_disable - disable ftrace_ops on current cpu - * - * This function disables tracing on current cpu by increasing - * the per cpu control variable. - * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption - * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. - */ -static inline void ftrace_function_local_disable(struct ftrace_ops *ops) -{ - if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) - return; - - (*this_cpu_ptr(ops->disabled))++; -} - -/** - * ftrace_function_local_disabled - returns ftrace_ops disabled value - * on current cpu - * - * This function returns value of ftrace_ops::disabled on current cpu. - * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption - * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. - */ -static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) -{ - WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU)); - return *this_cpu_ptr(ops->disabled); -} - extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op, struct pt_regs *regs); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e0a98225666b..2fd3edaec6de 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -203,30 +203,6 @@ void clear_ftrace_function(void) ftrace_trace_function = ftrace_stub; } -static void per_cpu_ops_disable_all(struct ftrace_ops *ops) -{ - int cpu; - - for_each_possible_cpu(cpu) - *per_cpu_ptr(ops->disabled, cpu) = 1; -} - -static int per_cpu_ops_alloc(struct ftrace_ops *ops) -{ - int __percpu *disabled; - - if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) - return -EINVAL; - - disabled = alloc_percpu(int); - if (!disabled) - return -ENOMEM; - - ops->disabled = disabled; - per_cpu_ops_disable_all(ops); - return 0; -} - static void ftrace_sync(struct work_struct *work) { /* @@ -262,8 +238,8 @@ static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) * If this is a dynamic, RCU, or per CPU ops, or we force list func, * then it needs to call the list anyway. */ - if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU | - FTRACE_OPS_FL_RCU) || FTRACE_FORCE_LIST_FUNC) + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_RCU) || + FTRACE_FORCE_LIST_FUNC) return ftrace_ops_list_func; return ftrace_ops_get_func(ops); @@ -422,11 +398,6 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; - if (ops->flags & FTRACE_OPS_FL_PER_CPU) { - if (per_cpu_ops_alloc(ops)) - return -ENOMEM; - } - add_ftrace_ops(&ftrace_ops_list, ops); /* Always save the function, and reset at unregistering */ @@ -2727,11 +2698,6 @@ void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops) { } -static void per_cpu_ops_free(struct ftrace_ops *ops) -{ - free_percpu(ops->disabled); -} - static void ftrace_startup_enable(int command) { if (saved_ftrace_func != ftrace_trace_function) { @@ -2833,7 +2799,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * not currently active, we can just free them * without synchronizing all CPUs. */ - if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) + if (ops->flags & FTRACE_OPS_FL_DYNAMIC) goto free_ops; return 0; @@ -2880,7 +2846,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * The same goes for freeing the per_cpu data of the per_cpu * ops. */ - if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) { + if (ops->flags & FTRACE_OPS_FL_DYNAMIC) { /* * We need to do a hard force of sched synchronization. * This is because we use preempt_disable() to do RCU, but @@ -2903,9 +2869,6 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) free_ops: arch_ftrace_trampoline_free(ops); - - if (ops->flags & FTRACE_OPS_FL_PER_CPU) - per_cpu_ops_free(ops); } return 0; @@ -6355,10 +6318,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, * If any of the above fails then the op->func() is not executed. */ if ((!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) && - (!(op->flags & FTRACE_OPS_FL_PER_CPU) || - !ftrace_function_local_disabled(op)) && ftrace_ops_test(op, ip, regs)) { - if (FTRACE_WARN_ON(!op->func)) { pr_warn("op=%p %pS\n", op, op); goto out; @@ -6416,10 +6376,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); - if (!(op->flags & FTRACE_OPS_FL_PER_CPU) || - !ftrace_function_local_disabled(op)) { - op->func(ip, parent_ip, op, regs); - } + op->func(ip, parent_ip, op, regs); preempt_enable_notrace(); trace_clear_recursion(bit); @@ -6443,7 +6400,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) * or does per cpu logic, then we need to call the assist handler. */ if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE) || - ops->flags & (FTRACE_OPS_FL_RCU | FTRACE_OPS_FL_PER_CPU)) + ops->flags & FTRACE_OPS_FL_RCU) return ftrace_ops_assist_func; return ops->func; -- cgit v1.2.3 From b5ca66f9c0cf8c1cfeca12f001b86486c670279d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 13 Oct 2017 10:21:50 -0400 Subject: tracing, thermal: Hide devfreq trace events when not in use As trace events when defined create data structures and functions to process them, defining trace events when not using them is a waste of memory. The trace events thermal_power_devfreq_get_power and thermal_power_devfreq_limit are only used when CONFIG_DEVFREQ_THERMAL is set. Make those events only defined when that is set as well. Link: http://lkml.kernel.org/r/20171013102150.0050cb74@gandalf.local.home Acked-by: Javi Merino Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/thermal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h index 6cde5b3514c2..1fdacdb94e77 100644 --- a/include/trace/events/thermal.h +++ b/include/trace/events/thermal.h @@ -148,6 +148,7 @@ TRACE_EVENT(thermal_power_cpu_limit, __entry->power) ); +#ifdef CONFIG_DEVFREQ_THERMAL TRACE_EVENT(thermal_power_devfreq_get_power, TP_PROTO(struct thermal_cooling_device *cdev, struct devfreq_dev_status *status, unsigned long freq, @@ -203,6 +204,7 @@ TRACE_EVENT(thermal_power_devfreq_limit, __get_str(type), __entry->freq, __entry->cdev_state, __entry->power) ); +#endif /* CONFIG_DEVFREQ_THERMAL */ #endif /* _TRACE_THERMAL_H */ /* This part must be outside protection */ -- cgit v1.2.3 From a96a5037ed0f52e2d86739f4a1ef985bd036e575 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 13 Oct 2017 10:23:09 -0400 Subject: tracing, thermal: Hide cpu cooling trace events when not in use As trace events when defined create data structures and functions to process them, defining trace events when not using them is a waste of memory. The trace events thermal_power_cpu_get_power and thermal_power_cpu_limit are only used when CONFIG_CPU_THERMAL is set. Make those events only defined when that is set as well. Link: http://lkml.kernel.org/r/20171013102309.2c4ef81a@gandalf.local.home Cc: Eduardo Valentin Acked-by: Javi Merino Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/thermal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h index 1fdacdb94e77..8af8f130950e 100644 --- a/include/trace/events/thermal.h +++ b/include/trace/events/thermal.h @@ -90,6 +90,7 @@ TRACE_EVENT(thermal_zone_trip, show_tzt_type(__entry->trip_type)) ); +#ifdef CONFIG_CPU_THERMAL TRACE_EVENT(thermal_power_cpu_get_power, TP_PROTO(const struct cpumask *cpus, unsigned long freq, u32 *load, size_t load_len, u32 dynamic_power, u32 static_power), @@ -147,6 +148,7 @@ TRACE_EVENT(thermal_power_cpu_limit, __get_bitmask(cpumask), __entry->freq, __entry->cdev_state, __entry->power) ); +#endif /* CONFIG_CPU_THERMAL */ #ifdef CONFIG_DEVFREQ_THERMAL TRACE_EVENT(thermal_power_devfreq_get_power, -- cgit v1.2.3