From 60d53e2c3b75e79c83970fe73db79123d9462c7c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:07:20 +0900 Subject: tracing/probe: Split trace_event related data from trace_probe Split the trace_event related data from trace_probe data structure and introduce trace_probe_event data structure for its folder. This trace_probe_event data structure can have multiple trace_probe. Link: http://lkml.kernel.org/r/156095683995.28024.7552150340561557873.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 157 ++++++++++++++++++++++++++++++----------- kernel/trace/trace_probe.c | 54 ++++++++++----- kernel/trace/trace_probe.h | 48 ++++++++++--- kernel/trace/trace_uprobe.c | 165 +++++++++++++++++++++++++++++++++----------- 4 files changed, 311 insertions(+), 113 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9d483ad9bb6c..eac6344a2e7c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -180,20 +180,33 @@ unsigned long trace_kprobe_address(struct trace_kprobe *tk) return addr; } +static nokprobe_inline struct trace_kprobe * +trace_kprobe_primary_from_call(struct trace_event_call *call) +{ + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return NULL; + + return container_of(tp, struct trace_kprobe, tp); +} + bool trace_kprobe_on_func_entry(struct trace_event_call *call) { - struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + struct trace_kprobe *tk = trace_kprobe_primary_from_call(call); - return kprobe_on_func_entry(tk->rp.kp.addr, + return tk ? kprobe_on_func_entry(tk->rp.kp.addr, tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name, - tk->rp.kp.addr ? 0 : tk->rp.kp.offset); + tk->rp.kp.addr ? 0 : tk->rp.kp.offset) : false; } bool trace_kprobe_error_injectable(struct trace_event_call *call) { - struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + struct trace_kprobe *tk = trace_kprobe_primary_from_call(call); - return within_error_injection_list(trace_kprobe_address(tk)); + return tk ? within_error_injection_list(trace_kprobe_address(tk)) : + false; } static int register_kprobe_event(struct trace_kprobe *tk); @@ -291,32 +304,75 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk) return ret; } +static void __disable_trace_kprobe(struct trace_probe *tp) +{ + struct trace_probe *pos; + struct trace_kprobe *tk; + + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tk = container_of(pos, struct trace_kprobe, tp); + if (!trace_kprobe_is_registered(tk)) + continue; + if (trace_kprobe_is_return(tk)) + disable_kretprobe(&tk->rp); + else + disable_kprobe(&tk->rp.kp); + } +} + /* * Enable trace_probe * if the file is NULL, enable "perf" handler, or enable "trace" handler. */ -static int -enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) +static int enable_trace_kprobe(struct trace_event_call *call, + struct trace_event_file *file) { - bool enabled = trace_probe_is_enabled(&tk->tp); + struct trace_probe *pos, *tp; + struct trace_kprobe *tk; + bool enabled; int ret = 0; + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; + enabled = trace_probe_is_enabled(tp); + + /* This also changes "enabled" state */ if (file) { - ret = trace_probe_add_file(&tk->tp, file); + ret = trace_probe_add_file(tp, file); if (ret) return ret; } else - trace_probe_set_flag(&tk->tp, TP_FLAG_PROFILE); + trace_probe_set_flag(tp, TP_FLAG_PROFILE); if (enabled) return 0; - ret = __enable_trace_kprobe(tk); - if (ret) { + enabled = false; + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tk = container_of(pos, struct trace_kprobe, tp); + if (trace_kprobe_has_gone(tk)) + continue; + ret = __enable_trace_kprobe(tk); + if (ret) { + if (enabled) { + __disable_trace_kprobe(tp); + enabled = false; + } + break; + } + enabled = true; + } + + if (!enabled) { + /* No probe is enabled. Roll back */ if (file) - trace_probe_remove_file(&tk->tp, file); + trace_probe_remove_file(tp, file); else - trace_probe_clear_flag(&tk->tp, TP_FLAG_PROFILE); + trace_probe_clear_flag(tp, TP_FLAG_PROFILE); + if (!ret) + /* Since all probes are gone, this is not available */ + ret = -EADDRNOTAVAIL; } return ret; @@ -326,11 +382,14 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) * Disable trace_probe * if the file is NULL, disable "perf" handler, or disable "trace" handler. */ -static int -disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) +static int disable_trace_kprobe(struct trace_event_call *call, + struct trace_event_file *file) { - struct trace_probe *tp = &tk->tp; - int ret = 0; + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; if (file) { if (!trace_probe_get_file_link(tp, file)) @@ -341,12 +400,8 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) } else trace_probe_clear_flag(tp, TP_FLAG_PROFILE); - if (!trace_probe_is_enabled(tp) && trace_kprobe_is_registered(tk)) { - if (trace_kprobe_is_return(tk)) - disable_kretprobe(&tk->rp); - else - disable_kprobe(&tk->rp.kp); - } + if (!trace_probe_is_enabled(tp)) + __disable_trace_kprobe(tp); out: if (file) @@ -358,7 +413,7 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) */ trace_probe_remove_file(tp, file); - return ret; + return 0; } #if defined(CONFIG_KPROBES_ON_FTRACE) && \ @@ -1089,7 +1144,10 @@ print_kprobe_event(struct trace_iterator *iter, int flags, struct trace_probe *tp; field = (struct kprobe_trace_entry_head *)iter->ent; - tp = container_of(event, struct trace_probe, call.event); + tp = trace_probe_primary_from_call( + container_of(event, struct trace_event_call, event)); + if (WARN_ON_ONCE(!tp)) + goto out; trace_seq_printf(s, "%s: (", trace_probe_name(tp)); @@ -1116,7 +1174,10 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, struct trace_probe *tp; field = (struct kretprobe_trace_entry_head *)iter->ent; - tp = container_of(event, struct trace_probe, call.event); + tp = trace_probe_primary_from_call( + container_of(event, struct trace_event_call, event)); + if (WARN_ON_ONCE(!tp)) + goto out; trace_seq_printf(s, "%s: (", trace_probe_name(tp)); @@ -1145,23 +1206,31 @@ static int kprobe_event_define_fields(struct trace_event_call *event_call) { int ret; struct kprobe_trace_entry_head field; - struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data; + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(event_call); + if (WARN_ON_ONCE(!tp)) + return -ENOENT; DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); - return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp); + return traceprobe_define_arg_fields(event_call, sizeof(field), tp); } static int kretprobe_event_define_fields(struct trace_event_call *event_call) { int ret; struct kretprobe_trace_entry_head field; - struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data; + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(event_call); + if (WARN_ON_ONCE(!tp)) + return -ENOENT; DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); - return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp); + return traceprobe_define_arg_fields(event_call, sizeof(field), tp); } #ifdef CONFIG_PERF_EVENTS @@ -1289,20 +1358,19 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, static int kprobe_register(struct trace_event_call *event, enum trace_reg type, void *data) { - struct trace_kprobe *tk = (struct trace_kprobe *)event->data; struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: - return enable_trace_kprobe(tk, file); + return enable_trace_kprobe(event, file); case TRACE_REG_UNREGISTER: - return disable_trace_kprobe(tk, file); + return disable_trace_kprobe(event, file); #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return enable_trace_kprobe(tk, NULL); + return enable_trace_kprobe(event, NULL); case TRACE_REG_PERF_UNREGISTER: - return disable_trace_kprobe(tk, NULL); + return disable_trace_kprobe(event, NULL); case TRACE_REG_PERF_OPEN: case TRACE_REG_PERF_CLOSE: case TRACE_REG_PERF_ADD: @@ -1369,7 +1437,6 @@ static inline void init_trace_event_call(struct trace_kprobe *tk) call->flags = TRACE_EVENT_FL_KPROBE; call->class->reg = kprobe_register; - call->data = tk; } static int register_kprobe_event(struct trace_kprobe *tk) @@ -1432,7 +1499,9 @@ void destroy_local_trace_kprobe(struct trace_event_call *event_call) { struct trace_kprobe *tk; - tk = container_of(event_call, struct trace_kprobe, tp.call); + tk = trace_kprobe_primary_from_call(event_call); + if (unlikely(!tk)) + return; if (trace_probe_is_enabled(&tk->tp)) { WARN_ON(1); @@ -1577,7 +1646,8 @@ static __init int kprobe_trace_self_tests_init(void) pr_warn("error on getting probe file.\n"); warn++; } else - enable_trace_kprobe(tk, file); + enable_trace_kprobe( + trace_probe_event_call(&tk->tp), file); } } @@ -1598,7 +1668,8 @@ static __init int kprobe_trace_self_tests_init(void) pr_warn("error on getting probe file.\n"); warn++; } else - enable_trace_kprobe(tk, file); + enable_trace_kprobe( + trace_probe_event_call(&tk->tp), file); } } @@ -1631,7 +1702,8 @@ static __init int kprobe_trace_self_tests_init(void) pr_warn("error on getting probe file.\n"); warn++; } else - disable_trace_kprobe(tk, file); + disable_trace_kprobe( + trace_probe_event_call(&tk->tp), file); } tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM); @@ -1649,7 +1721,8 @@ static __init int kprobe_trace_self_tests_init(void) pr_warn("error on getting probe file.\n"); warn++; } else - disable_trace_kprobe(tk, file); + disable_trace_kprobe( + trace_probe_event_call(&tk->tp), file); } ret = trace_run_command("-:testprobe", create_or_delete_trace_kprobe); diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index fb6bfbc5bf86..28733bd6b607 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -889,41 +889,59 @@ int traceprobe_define_arg_fields(struct trace_event_call *event_call, void trace_probe_cleanup(struct trace_probe *tp) { - struct trace_event_call *call = trace_probe_event_call(tp); int i; for (i = 0; i < tp->nr_args; i++) traceprobe_free_probe_arg(&tp->args[i]); - if (call->class) - kfree(call->class->system); - kfree(call->name); - kfree(call->print_fmt); + if (tp->event) { + struct trace_event_call *call = trace_probe_event_call(tp); + + kfree(tp->event->class.system); + kfree(call->name); + kfree(call->print_fmt); + kfree(tp->event); + tp->event = NULL; + } } int trace_probe_init(struct trace_probe *tp, const char *event, const char *group) { - struct trace_event_call *call = trace_probe_event_call(tp); + struct trace_event_call *call; + int ret = 0; if (!event || !group) return -EINVAL; - call->class = &tp->class; - call->name = kstrdup(event, GFP_KERNEL); - if (!call->name) + tp->event = kzalloc(sizeof(struct trace_probe_event), GFP_KERNEL); + if (!tp->event) return -ENOMEM; - tp->class.system = kstrdup(group, GFP_KERNEL); - if (!tp->class.system) { - kfree(call->name); - call->name = NULL; - return -ENOMEM; + call = trace_probe_event_call(tp); + call->class = &tp->event->class; + call->name = kstrdup(event, GFP_KERNEL); + if (!call->name) { + ret = -ENOMEM; + goto error; + } + + tp->event->class.system = kstrdup(group, GFP_KERNEL); + if (!tp->event->class.system) { + ret = -ENOMEM; + goto error; } - INIT_LIST_HEAD(&tp->files); - INIT_LIST_HEAD(&tp->class.fields); + INIT_LIST_HEAD(&tp->event->files); + INIT_LIST_HEAD(&tp->event->class.fields); + INIT_LIST_HEAD(&tp->event->probes); + INIT_LIST_HEAD(&tp->list); + list_add(&tp->event->probes, &tp->list); return 0; + +error: + trace_probe_cleanup(tp); + return ret; } int trace_probe_register_event_call(struct trace_probe *tp) @@ -952,7 +970,7 @@ int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file) link->file = file; INIT_LIST_HEAD(&link->list); - list_add_tail_rcu(&link->list, &tp->files); + list_add_tail_rcu(&link->list, &tp->event->files); trace_probe_set_flag(tp, TP_FLAG_TRACE); return 0; } @@ -983,7 +1001,7 @@ int trace_probe_remove_file(struct trace_probe *tp, synchronize_rcu(); kfree(link); - if (list_empty(&tp->files)) + if (list_empty(&tp->event->files)) trace_probe_clear_flag(tp, TP_FLAG_TRACE); return 0; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index d1714820efe1..0b84abb884c2 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -222,11 +222,18 @@ struct probe_arg { const struct fetch_type *type; /* Type of this argument */ }; -struct trace_probe { +/* Event call and class holder */ +struct trace_probe_event { unsigned int flags; /* For TP_FLAG_* */ struct trace_event_class class; struct trace_event_call call; struct list_head files; + struct list_head probes; +}; + +struct trace_probe { + struct list_head list; + struct trace_probe_event *event; ssize_t size; /* trace entry size */ unsigned int nr_args; struct probe_arg args[]; @@ -240,19 +247,19 @@ struct event_file_link { static inline bool trace_probe_test_flag(struct trace_probe *tp, unsigned int flag) { - return !!(tp->flags & flag); + return !!(tp->event->flags & flag); } static inline void trace_probe_set_flag(struct trace_probe *tp, unsigned int flag) { - tp->flags |= flag; + tp->event->flags |= flag; } static inline void trace_probe_clear_flag(struct trace_probe *tp, unsigned int flag) { - tp->flags &= ~flag; + tp->event->flags &= ~flag; } static inline bool trace_probe_is_enabled(struct trace_probe *tp) @@ -262,29 +269,48 @@ static inline bool trace_probe_is_enabled(struct trace_probe *tp) static inline const char *trace_probe_name(struct trace_probe *tp) { - return trace_event_name(&tp->call); + return trace_event_name(&tp->event->call); } static inline const char *trace_probe_group_name(struct trace_probe *tp) { - return tp->call.class->system; + return tp->event->call.class->system; } static inline struct trace_event_call * trace_probe_event_call(struct trace_probe *tp) { - return &tp->call; + return &tp->event->call; +} + +static inline struct trace_probe_event * +trace_probe_event_from_call(struct trace_event_call *event_call) +{ + return container_of(event_call, struct trace_probe_event, call); +} + +static inline struct trace_probe * +trace_probe_primary_from_call(struct trace_event_call *call) +{ + struct trace_probe_event *tpe = trace_probe_event_from_call(call); + + return list_first_entry(&tpe->probes, struct trace_probe, list); +} + +static inline struct list_head *trace_probe_probe_list(struct trace_probe *tp) +{ + return &tp->event->probes; } static inline int trace_probe_unregister_event_call(struct trace_probe *tp) { /* tp->event is unregistered in trace_remove_event_call() */ - return trace_remove_event_call(&tp->call); + return trace_remove_event_call(&tp->event->call); } static inline bool trace_probe_has_single_file(struct trace_probe *tp) { - return !!list_is_singular(&tp->files); + return !!list_is_singular(&tp->event->files); } int trace_probe_init(struct trace_probe *tp, const char *event, @@ -298,9 +324,9 @@ struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp, struct trace_event_file *file); #define trace_probe_for_each_link(pos, tp) \ - list_for_each_entry(pos, &(tp)->files, list) + list_for_each_entry(pos, &(tp)->event->files, list) #define trace_probe_for_each_link_rcu(pos, tp) \ - list_for_each_entry_rcu(pos, &(tp)->files, list) + list_for_each_entry_rcu(pos, &(tp)->event->files, list) /* Check the name is good for event/group/fields */ static inline bool is_good_name(const char *name) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 1ceedb9146b1..ac799abb7da9 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -293,6 +293,18 @@ static bool trace_uprobe_match(const char *system, const char *event, (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0); } +static nokprobe_inline struct trace_uprobe * +trace_uprobe_primary_from_call(struct trace_event_call *call) +{ + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return NULL; + + return container_of(tp, struct trace_uprobe, tp); +} + /* * Allocate new trace_uprobe and initialize it (including uprobes). */ @@ -897,7 +909,10 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e u8 *data; entry = (struct uprobe_trace_entry_head *)iter->ent; - tu = container_of(event, struct trace_uprobe, tp.call.event); + tu = trace_uprobe_primary_from_call( + container_of(event, struct trace_event_call, event)); + if (unlikely(!tu)) + goto out; if (is_ret_probe(tu)) { trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", @@ -924,27 +939,71 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self, enum uprobe_filter_ctx ctx, struct mm_struct *mm); -static int -probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, - filter_func_t filter) +static int trace_uprobe_enable(struct trace_uprobe *tu, filter_func_t filter) { - bool enabled = trace_probe_is_enabled(&tu->tp); int ret; + tu->consumer.filter = filter; + tu->inode = d_real_inode(tu->path.dentry); + + if (tu->ref_ctr_offset) + ret = uprobe_register_refctr(tu->inode, tu->offset, + tu->ref_ctr_offset, &tu->consumer); + else + ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); + + if (ret) + tu->inode = NULL; + + return ret; +} + +static void __probe_event_disable(struct trace_probe *tp) +{ + struct trace_probe *pos; + struct trace_uprobe *tu; + + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); + if (!tu->inode) + continue; + + WARN_ON(!uprobe_filter_is_empty(&tu->filter)); + + uprobe_unregister(tu->inode, tu->offset, &tu->consumer); + tu->inode = NULL; + } +} + +static int probe_event_enable(struct trace_event_call *call, + struct trace_event_file *file, filter_func_t filter) +{ + struct trace_probe *pos, *tp; + struct trace_uprobe *tu; + bool enabled; + int ret; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; + enabled = trace_probe_is_enabled(tp); + + /* This may also change "enabled" state */ if (file) { - if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) + if (trace_probe_test_flag(tp, TP_FLAG_PROFILE)) return -EINTR; - ret = trace_probe_add_file(&tu->tp, file); + ret = trace_probe_add_file(tp, file); if (ret < 0) return ret; } else { - if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) + if (trace_probe_test_flag(tp, TP_FLAG_TRACE)) return -EINTR; - trace_probe_set_flag(&tu->tp, TP_FLAG_PROFILE); + trace_probe_set_flag(tp, TP_FLAG_PROFILE); } + tu = container_of(tp, struct trace_uprobe, tp); WARN_ON(!uprobe_filter_is_empty(&tu->filter)); if (enabled) @@ -954,18 +1013,15 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, if (ret) goto err_flags; - tu->consumer.filter = filter; - tu->inode = d_real_inode(tu->path.dentry); - if (tu->ref_ctr_offset) { - ret = uprobe_register_refctr(tu->inode, tu->offset, - tu->ref_ctr_offset, &tu->consumer); - } else { - ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); + ret = trace_uprobe_enable(tu, filter); + if (ret) { + __probe_event_disable(tp); + goto err_buffer; + } } - if (ret) - goto err_buffer; - return 0; err_buffer: @@ -973,33 +1029,35 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, err_flags: if (file) - trace_probe_remove_file(&tu->tp, file); + trace_probe_remove_file(tp, file); else - trace_probe_clear_flag(&tu->tp, TP_FLAG_PROFILE); + trace_probe_clear_flag(tp, TP_FLAG_PROFILE); return ret; } -static void -probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file) +static void probe_event_disable(struct trace_event_call *call, + struct trace_event_file *file) { - if (!trace_probe_is_enabled(&tu->tp)) + struct trace_probe *tp; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return; + + if (!trace_probe_is_enabled(tp)) return; if (file) { - if (trace_probe_remove_file(&tu->tp, file) < 0) + if (trace_probe_remove_file(tp, file) < 0) return; - if (trace_probe_is_enabled(&tu->tp)) + if (trace_probe_is_enabled(tp)) return; } else - trace_probe_clear_flag(&tu->tp, TP_FLAG_PROFILE); - - WARN_ON(!uprobe_filter_is_empty(&tu->filter)); - - uprobe_unregister(tu->inode, tu->offset, &tu->consumer); - tu->inode = NULL; + trace_probe_clear_flag(tp, TP_FLAG_PROFILE); + __probe_event_disable(tp); uprobe_buffer_disable(); } @@ -1007,7 +1065,11 @@ static int uprobe_event_define_fields(struct trace_event_call *event_call) { int ret, size; struct uprobe_trace_entry_head field; - struct trace_uprobe *tu = event_call->data; + struct trace_uprobe *tu; + + tu = trace_uprobe_primary_from_call(event_call); + if (unlikely(!tu)) + return -ENODEV; if (is_ret_probe(tu)) { DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0); @@ -1100,6 +1162,27 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) return err; } +static int uprobe_perf_multi_call(struct trace_event_call *call, + struct perf_event *event, + int (*op)(struct trace_uprobe *tu, struct perf_event *event)) +{ + struct trace_probe *pos, *tp; + struct trace_uprobe *tu; + int ret = 0; + + tp = trace_probe_primary_from_call(call); + if (WARN_ON_ONCE(!tp)) + return -ENODEV; + + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); + ret = op(tu, event); + if (ret) + break; + } + + return ret; +} static bool uprobe_perf_filter(struct uprobe_consumer *uc, enum uprobe_filter_ctx ctx, struct mm_struct *mm) { @@ -1213,30 +1296,29 @@ static int trace_uprobe_register(struct trace_event_call *event, enum trace_reg type, void *data) { - struct trace_uprobe *tu = event->data; struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: - return probe_event_enable(tu, file, NULL); + return probe_event_enable(event, file, NULL); case TRACE_REG_UNREGISTER: - probe_event_disable(tu, file); + probe_event_disable(event, file); return 0; #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return probe_event_enable(tu, NULL, uprobe_perf_filter); + return probe_event_enable(event, NULL, uprobe_perf_filter); case TRACE_REG_PERF_UNREGISTER: - probe_event_disable(tu, NULL); + probe_event_disable(event, NULL); return 0; case TRACE_REG_PERF_OPEN: - return uprobe_perf_open(tu, data); + return uprobe_perf_multi_call(event, data, uprobe_perf_open); case TRACE_REG_PERF_CLOSE: - return uprobe_perf_close(tu, data); + return uprobe_perf_multi_call(event, data, uprobe_perf_close); #endif default: @@ -1330,7 +1412,6 @@ static inline void init_trace_event_call(struct trace_uprobe *tu) call->flags = TRACE_EVENT_FL_UPROBE | TRACE_EVENT_FL_CAP_ANY; call->class->reg = trace_uprobe_register; - call->data = tu; } static int register_uprobe_event(struct trace_uprobe *tu) @@ -1399,7 +1480,7 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call) { struct trace_uprobe *tu; - tu = container_of(event_call, struct trace_uprobe, tp.call); + tu = trace_uprobe_primary_from_call(event_call); free_trace_uprobe(tu); } -- cgit v1.2.3 From cb8e7a8d55e052fdcfd1a567305a9a180fb61c57 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:07:29 +0900 Subject: tracing/dynevent: Delete all matched events When user gives an event name to delete, delete all matched events instead of the first one. This means if there are several events which have same name but different group (subsystem) name, those are removed if user passed only the event name, e.g. # cat kprobe_events p:group1/testevent _do_fork p:group2/testevent fork_idle # echo -:testevent >> kprobe_events # cat kprobe_events # Link: http://lkml.kernel.org/r/156095684958.28024.16597826267117453638.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_dynevent.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index fa100ed3b4de..1cc55c50c491 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -61,10 +61,12 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) for_each_dyn_event_safe(pos, n) { if (type && type != pos->ops) continue; - if (pos->ops->match(system, event, pos)) { - ret = pos->ops->free(pos); + if (!pos->ops->match(system, event, pos)) + continue; + + ret = pos->ops->free(pos); + if (ret) break; - } } mutex_unlock(&event_mutex); -- cgit v1.2.3 From 30199137c899d7e416a2adc58bf09bec217ce9ca Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:07:39 +0900 Subject: tracing/dynevent: Pass extra arguments to match operation Pass extra arguments to match operation for checking exact match. If the event doesn't support exact match, it will be ignored. Link: http://lkml.kernel.org/r/156095685930.28024.10405547027475590975.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_dynevent.c | 4 +++- kernel/trace/trace_dynevent.h | 7 ++++--- kernel/trace/trace_events_hist.c | 4 ++-- kernel/trace/trace_kprobe.c | 4 ++-- kernel/trace/trace_uprobe.c | 4 ++-- 5 files changed, 13 insertions(+), 10 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 1cc55c50c491..a41fed46c285 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -47,6 +47,7 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) return -EINVAL; event++; } + argc--; argv++; p = strchr(event, '/'); if (p) { @@ -61,7 +62,8 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) for_each_dyn_event_safe(pos, n) { if (type && type != pos->ops) continue; - if (!pos->ops->match(system, event, pos)) + if (!pos->ops->match(system, event, + argc, (const char **)argv, pos)) continue; ret = pos->ops->free(pos); diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h index 8c334064e4d6..46898138d2df 100644 --- a/kernel/trace/trace_dynevent.h +++ b/kernel/trace/trace_dynevent.h @@ -31,8 +31,9 @@ struct dyn_event; * @is_busy: Check whether given event is busy so that it can not be deleted. * Return true if it is busy, otherwides false. * @free: Delete the given event. Return 0 if success, otherwides error. - * @match: Check whether given event and system name match this event. - * Return true if it matches, otherwides false. + * @match: Check whether given event and system name match this event. The argc + * and argv is used for exact match. Return true if it matches, otherwides + * false. * * Except for @create, these methods are called under holding event_mutex. */ @@ -43,7 +44,7 @@ struct dyn_event_operations { bool (*is_busy)(struct dyn_event *ev); int (*free)(struct dyn_event *ev); bool (*match)(const char *system, const char *event, - struct dyn_event *ev); + int argc, const char **argv, struct dyn_event *ev); }; /* Register new dyn_event type -- must be called at first */ diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index ca6b0dff60c5..65e7d071ed28 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -374,7 +374,7 @@ static int synth_event_show(struct seq_file *m, struct dyn_event *ev); static int synth_event_release(struct dyn_event *ev); static bool synth_event_is_busy(struct dyn_event *ev); static bool synth_event_match(const char *system, const char *event, - struct dyn_event *ev); + int argc, const char **argv, struct dyn_event *ev); static struct dyn_event_operations synth_event_ops = { .create = synth_event_create, @@ -422,7 +422,7 @@ static bool synth_event_is_busy(struct dyn_event *ev) } static bool synth_event_match(const char *system, const char *event, - struct dyn_event *ev) + int argc, const char **argv, struct dyn_event *ev) { struct synth_event *sev = to_synth_event(ev); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index eac6344a2e7c..e8f72431b866 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -39,7 +39,7 @@ static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_kprobe_release(struct dyn_event *ev); static bool trace_kprobe_is_busy(struct dyn_event *ev); static bool trace_kprobe_match(const char *system, const char *event, - struct dyn_event *ev); + int argc, const char **argv, struct dyn_event *ev); static struct dyn_event_operations trace_kprobe_ops = { .create = trace_kprobe_create, @@ -138,7 +138,7 @@ static bool trace_kprobe_is_busy(struct dyn_event *ev) } static bool trace_kprobe_match(const char *system, const char *event, - struct dyn_event *ev) + int argc, const char **argv, struct dyn_event *ev) { struct trace_kprobe *tk = to_trace_kprobe(ev); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index ac799abb7da9..2862e6829e48 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -44,7 +44,7 @@ static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_uprobe_release(struct dyn_event *ev); static bool trace_uprobe_is_busy(struct dyn_event *ev); static bool trace_uprobe_match(const char *system, const char *event, - struct dyn_event *ev); + int argc, const char **argv, struct dyn_event *ev); static struct dyn_event_operations trace_uprobe_ops = { .create = trace_uprobe_create, @@ -285,7 +285,7 @@ static bool trace_uprobe_is_busy(struct dyn_event *ev) } static bool trace_uprobe_match(const char *system, const char *event, - struct dyn_event *ev) + int argc, const char **argv, struct dyn_event *ev) { struct trace_uprobe *tu = to_trace_uprobe(ev); -- cgit v1.2.3 From ca89bc071d5e4e981dcc52e0ca90f4500d332e42 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:07:49 +0900 Subject: tracing/kprobe: Add multi-probe per event support Add multi-probe per one event support to kprobe events. User can define several different probes on one trace event if those events have same "event signature", e.g. # echo p:testevent _do_fork > kprobe_events # echo p:testevent fork_idle >> kprobe_events # kprobe_events p:kprobes/testevent _do_fork p:kprobes/testevent fork_idle The event signature is defined by kprobe type (retprobe or not), the number of args, argument names, and argument types. Note that this only support appending method. Delete event operation will delete all probes on the event. Link: http://lkml.kernel.org/r/156095686913.28024.9357292202316540742.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 4 +-- kernel/trace/trace_kprobe.c | 52 ++++++++++++++++++++++++++++++++++----- kernel/trace/trace_probe.c | 59 ++++++++++++++++++++++++++++++++++++++------- kernel/trace/trace_probe.h | 14 ++++++++++- 4 files changed, 111 insertions(+), 18 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 563e80f9006a..a8505d84b76e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4815,11 +4815,11 @@ static const char readme_msg[] = #endif #endif /* CONFIG_STACK_TRACER */ #ifdef CONFIG_DYNAMIC_EVENTS - " dynamic_events\t\t- Add/remove/show the generic dynamic events\n" + " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #ifdef CONFIG_KPROBE_EVENTS - " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n" + " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #ifdef CONFIG_UPROBE_EVENTS diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index e8f72431b866..f43098bf62dd 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -492,6 +492,10 @@ static void __unregister_trace_kprobe(struct trace_kprobe *tk) /* Unregister a trace_probe and probe_event */ static int unregister_trace_kprobe(struct trace_kprobe *tk) { + /* If other probes are on the event, just unregister kprobe */ + if (trace_probe_has_sibling(&tk->tp)) + goto unreg; + /* Enabled event can not be unregistered */ if (trace_probe_is_enabled(&tk->tp)) return -EBUSY; @@ -500,12 +504,38 @@ static int unregister_trace_kprobe(struct trace_kprobe *tk) if (unregister_kprobe_event(tk)) return -EBUSY; +unreg: __unregister_trace_kprobe(tk); dyn_event_remove(&tk->devent); + trace_probe_unlink(&tk->tp); return 0; } +static int append_trace_kprobe(struct trace_kprobe *tk, struct trace_kprobe *to) +{ + int ret; + + /* Append to existing event */ + ret = trace_probe_append(&tk->tp, &to->tp); + if (ret) + return ret; + + /* Register k*probe */ + ret = __register_trace_kprobe(tk); + if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) { + pr_warn("This probe might be able to register after target module is loaded. Continue.\n"); + ret = 0; + } + + if (ret) + trace_probe_unlink(&tk->tp); + else + dyn_event_add(&tk->devent); + + return ret; +} + /* Register a trace_probe and probe_event */ static int register_trace_kprobe(struct trace_kprobe *tk) { @@ -514,14 +544,24 @@ static int register_trace_kprobe(struct trace_kprobe *tk) mutex_lock(&event_mutex); - /* Delete old (same name) event if exist */ old_tk = find_trace_kprobe(trace_probe_name(&tk->tp), trace_probe_group_name(&tk->tp)); if (old_tk) { - ret = unregister_trace_kprobe(old_tk); - if (ret < 0) - goto end; - free_trace_kprobe(old_tk); + if (trace_kprobe_is_return(tk) != trace_kprobe_is_return(old_tk)) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, DIFF_PROBE_TYPE); + ret = -EEXIST; + } else { + ret = trace_probe_compare_arg_type(&tk->tp, &old_tk->tp); + if (ret) { + /* Note that argument starts index = 2 */ + trace_probe_log_set_index(ret + 1); + trace_probe_log_err(0, DIFF_ARG_TYPE); + ret = -EEXIST; + } else + ret = append_trace_kprobe(tk, old_tk); + } + goto end; } /* Register new event */ @@ -755,7 +795,7 @@ static int trace_kprobe_create(int argc, const char *argv[]) trace_probe_log_err(0, BAD_INSN_BNDRY); else if (ret == -ENOENT) trace_probe_log_err(0, BAD_PROBE_ADDR); - else if (ret != -ENOMEM) + else if (ret != -ENOMEM && ret != -EEXIST) trace_probe_log_err(0, FAIL_REG_PROBE); goto error; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 28733bd6b607..651a1449acde 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -886,6 +886,35 @@ int traceprobe_define_arg_fields(struct trace_event_call *event_call, return 0; } +static void trace_probe_event_free(struct trace_probe_event *tpe) +{ + kfree(tpe->class.system); + kfree(tpe->call.name); + kfree(tpe->call.print_fmt); + kfree(tpe); +} + +int trace_probe_append(struct trace_probe *tp, struct trace_probe *to) +{ + if (trace_probe_has_sibling(tp)) + return -EBUSY; + + list_del_init(&tp->list); + trace_probe_event_free(tp->event); + + tp->event = to->event; + list_add_tail(&tp->list, trace_probe_probe_list(to)); + + return 0; +} + +void trace_probe_unlink(struct trace_probe *tp) +{ + list_del_init(&tp->list); + if (list_empty(trace_probe_probe_list(tp))) + trace_probe_event_free(tp->event); + tp->event = NULL; +} void trace_probe_cleanup(struct trace_probe *tp) { @@ -894,15 +923,8 @@ void trace_probe_cleanup(struct trace_probe *tp) for (i = 0; i < tp->nr_args; i++) traceprobe_free_probe_arg(&tp->args[i]); - if (tp->event) { - struct trace_event_call *call = trace_probe_event_call(tp); - - kfree(tp->event->class.system); - kfree(call->name); - kfree(call->print_fmt); - kfree(tp->event); - tp->event = NULL; - } + if (tp->event) + trace_probe_unlink(tp); } int trace_probe_init(struct trace_probe *tp, const char *event, @@ -1006,3 +1028,22 @@ int trace_probe_remove_file(struct trace_probe *tp, return 0; } + +/* + * Return the smallest index of different type argument (start from 1). + * If all argument types and name are same, return 0. + */ +int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b) +{ + int i; + + for (i = 0; i < a->nr_args; i++) { + if ((b->nr_args <= i) || + ((a->args[i].type != b->args[i].type) || + (a->args[i].count != b->args[i].count) || + strcmp(a->args[i].name, b->args[i].name))) + return i + 1; + } + + return 0; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 0b84abb884c2..39926e8a344b 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -302,6 +302,13 @@ static inline struct list_head *trace_probe_probe_list(struct trace_probe *tp) return &tp->event->probes; } +static inline bool trace_probe_has_sibling(struct trace_probe *tp) +{ + struct list_head *list = trace_probe_probe_list(tp); + + return !list_empty(list) && !list_is_singular(list); +} + static inline int trace_probe_unregister_event_call(struct trace_probe *tp) { /* tp->event is unregistered in trace_remove_event_call() */ @@ -316,12 +323,15 @@ static inline bool trace_probe_has_single_file(struct trace_probe *tp) int trace_probe_init(struct trace_probe *tp, const char *event, const char *group); void trace_probe_cleanup(struct trace_probe *tp); +int trace_probe_append(struct trace_probe *tp, struct trace_probe *to); +void trace_probe_unlink(struct trace_probe *tp); int trace_probe_register_event_call(struct trace_probe *tp); int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file); int trace_probe_remove_file(struct trace_probe *tp, struct trace_event_file *file); struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp, struct trace_event_file *file); +int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b); #define trace_probe_for_each_link(pos, tp) \ list_for_each_entry(pos, &(tp)->event->files, list) @@ -419,7 +429,9 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(ARG_TOO_LONG, "Argument expression is too long"), \ C(NO_ARG_BODY, "No argument expression"), \ C(BAD_INSN_BNDRY, "Probe point is not an instruction boundary"),\ - C(FAIL_REG_PROBE, "Failed to register probe event"), + C(FAIL_REG_PROBE, "Failed to register probe event"),\ + C(DIFF_PROBE_TYPE, "Probe type is different from existing probe"),\ + C(DIFF_ARG_TYPE, "Argument type or name is different from existing probe"), #undef C #define C(a, b) TP_ERR_##a -- cgit v1.2.3 From 41af3cf587f476f9a879b08219324c8b456e6a4c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:07:58 +0900 Subject: tracing/uprobe: Add multi-probe per uprobe event support Allow user to define several probes on one uprobe event. Note that this only support appending method. So deleting event will delete all probes on the event. Link: http://lkml.kernel.org/r/156095687876.28024.13840331032234992863.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- kernel/trace/trace_uprobe.c | 60 +++++++++++++++++++++++++++++++-------------- 2 files changed, 43 insertions(+), 19 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a8505d84b76e..c7797a81a37e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4823,7 +4823,7 @@ static const char readme_msg[] = "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #ifdef CONFIG_UPROBE_EVENTS - " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n" + " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 2862e6829e48..d84e09abb8de 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -364,15 +364,32 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu) { int ret; + if (trace_probe_has_sibling(&tu->tp)) + goto unreg; + ret = unregister_uprobe_event(tu); if (ret) return ret; +unreg: dyn_event_remove(&tu->devent); + trace_probe_unlink(&tu->tp); free_trace_uprobe(tu); return 0; } +static int append_trace_uprobe(struct trace_uprobe *tu, struct trace_uprobe *to) +{ + int ret; + + /* Append to existing event */ + ret = trace_probe_append(&tu->tp, &to->tp); + if (!ret) + dyn_event_add(&tu->devent); + + return ret; +} + /* * Uprobe with multiple reference counter is not allowed. i.e. * If inode and offset matches, reference counter offset *must* @@ -382,25 +399,21 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu) * as the new one does not conflict with any other existing * ones. */ -static struct trace_uprobe *find_old_trace_uprobe(struct trace_uprobe *new) +static int validate_ref_ctr_offset(struct trace_uprobe *new) { struct dyn_event *pos; - struct trace_uprobe *tmp, *old = NULL; + struct trace_uprobe *tmp; struct inode *new_inode = d_real_inode(new->path.dentry); - old = find_probe_event(trace_probe_name(&new->tp), - trace_probe_group_name(&new->tp)); - for_each_trace_uprobe(tmp, pos) { - if ((old ? old != tmp : true) && - new_inode == d_real_inode(tmp->path.dentry) && + if (new_inode == d_real_inode(tmp->path.dentry) && new->offset == tmp->offset && new->ref_ctr_offset != tmp->ref_ctr_offset) { pr_warn("Reference counter offset mismatch."); - return ERR_PTR(-EINVAL); + return -EINVAL; } } - return old; + return 0; } /* Register a trace_uprobe and probe_event */ @@ -411,18 +424,29 @@ static int register_trace_uprobe(struct trace_uprobe *tu) mutex_lock(&event_mutex); - /* register as an event */ - old_tu = find_old_trace_uprobe(tu); - if (IS_ERR(old_tu)) { - ret = PTR_ERR(old_tu); + ret = validate_ref_ctr_offset(tu); + if (ret) goto end; - } + /* register as an event */ + old_tu = find_probe_event(trace_probe_name(&tu->tp), + trace_probe_group_name(&tu->tp)); if (old_tu) { - /* delete old event */ - ret = unregister_trace_uprobe(old_tu); - if (ret) - goto end; + if (is_ret_probe(tu) != is_ret_probe(old_tu)) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, DIFF_PROBE_TYPE); + ret = -EEXIST; + } else { + ret = trace_probe_compare_arg_type(&tu->tp, &old_tu->tp); + if (ret) { + /* Note that argument starts index = 2 */ + trace_probe_log_set_index(ret + 1); + trace_probe_log_err(0, DIFF_ARG_TYPE); + ret = -EEXIST; + } else + ret = append_trace_uprobe(tu, old_tu); + } + goto end; } ret = register_uprobe_event(tu); -- cgit v1.2.3 From eb5bf81330a722d0079d28eed13d3a9355d938bf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:08:08 +0900 Subject: tracing/kprobe: Add per-probe delete from event Allow user to delete a probe from event. This is done by head match. For example, if we have 2 probes on an event $ cat kprobe_events p:kprobes/testprobe _do_fork r1=%ax r2=%dx p:kprobes/testprobe idle_fork r1=%ax r2=%cx Then you can remove one of them by passing the head of definition which identify the probe. $ echo "-:kprobes/testprobe idle_fork" >> kprobe_events Link: http://lkml.kernel.org/r/156095688848.28024.15798690082378432435.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 25 ++++++++++++++++++++++++- kernel/trace/trace_probe.c | 18 ++++++++++++++++++ kernel/trace/trace_probe.h | 2 ++ 3 files changed, 44 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f43098bf62dd..18c4175b6585 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -137,13 +137,36 @@ static bool trace_kprobe_is_busy(struct dyn_event *ev) return trace_probe_is_enabled(&tk->tp); } +static bool trace_kprobe_match_command_head(struct trace_kprobe *tk, + int argc, const char **argv) +{ + char buf[MAX_ARGSTR_LEN + 1]; + + if (!argc) + return true; + + if (!tk->symbol) + snprintf(buf, sizeof(buf), "0x%p", tk->rp.kp.addr); + else if (tk->rp.kp.offset) + snprintf(buf, sizeof(buf), "%s+%u", + trace_kprobe_symbol(tk), tk->rp.kp.offset); + else + snprintf(buf, sizeof(buf), "%s", trace_kprobe_symbol(tk)); + if (strcmp(buf, argv[0])) + return false; + argc--; argv++; + + return trace_probe_match_command_args(&tk->tp, argc, argv); +} + static bool trace_kprobe_match(const char *system, const char *event, int argc, const char **argv, struct dyn_event *ev) { struct trace_kprobe *tk = to_trace_kprobe(ev); return strcmp(trace_probe_name(&tk->tp), event) == 0 && - (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0); + (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0) && + trace_kprobe_match_command_head(tk, argc, argv); } static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 651a1449acde..f8c3c65c035d 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1047,3 +1047,21 @@ int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b) return 0; } + +bool trace_probe_match_command_args(struct trace_probe *tp, + int argc, const char **argv) +{ + char buf[MAX_ARGSTR_LEN + 1]; + int i; + + if (tp->nr_args < argc) + return false; + + for (i = 0; i < argc; i++) { + snprintf(buf, sizeof(buf), "%s=%s", + tp->args[i].name, tp->args[i].comm); + if (strcmp(buf, argv[i])) + return false; + } + return true; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 39926e8a344b..2dcc4e317787 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -332,6 +332,8 @@ int trace_probe_remove_file(struct trace_probe *tp, struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp, struct trace_event_file *file); int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b); +bool trace_probe_match_command_args(struct trace_probe *tp, + int argc, const char **argv); #define trace_probe_for_each_link(pos, tp) \ list_for_each_entry(pos, &(tp)->event->files, list) -- cgit v1.2.3 From ab10d69eb714961d1eca4129e4f8cda5e0618f66 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:08:18 +0900 Subject: tracing/uprobe: Add per-probe delete from event Add per-probe delete method from one event passing the head of definition. In other words, the events which match the head N parameters are deleted. Link: http://lkml.kernel.org/r/156095689811.28024.221706761151739433.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_uprobe.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index d84e09abb8de..84925b5b6db5 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -284,13 +284,42 @@ static bool trace_uprobe_is_busy(struct dyn_event *ev) return trace_probe_is_enabled(&tu->tp); } +static bool trace_uprobe_match_command_head(struct trace_uprobe *tu, + int argc, const char **argv) +{ + char buf[MAX_ARGSTR_LEN + 1]; + int len; + + if (!argc) + return true; + + len = strlen(tu->filename); + if (strncmp(tu->filename, argv[0], len) || argv[0][len] != ':') + return false; + + if (tu->ref_ctr_offset == 0) + snprintf(buf, sizeof(buf), "0x%0*lx", + (int)(sizeof(void *) * 2), tu->offset); + else + snprintf(buf, sizeof(buf), "0x%0*lx(0x%lx)", + (int)(sizeof(void *) * 2), tu->offset, + tu->ref_ctr_offset); + if (strcmp(buf, &argv[0][len + 1])) + return false; + + argc--; argv++; + + return trace_probe_match_command_args(&tu->tp, argc, argv); +} + static bool trace_uprobe_match(const char *system, const char *event, int argc, const char **argv, struct dyn_event *ev) { struct trace_uprobe *tu = to_trace_uprobe(ev); return strcmp(trace_probe_name(&tu->tp), event) == 0 && - (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0); + (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0) && + trace_uprobe_match_command_head(tu, argc, argv); } static nokprobe_inline struct trace_uprobe * -- cgit v1.2.3 From 6218bf9f4d2942e88d97b60abc8c2ca0532e41a8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:08:27 +0900 Subject: tracing/probe: Add immediate parameter support Add immediate value parameter (\1234) support to probe events. This allows you to specify an immediate (or dummy) parameter instead of fetching from memory or register. This feature looks odd, but imagine when you put a probe on a code to trace some data. If the code is compiled into 2 instructions and 1 instruction has a value but other has nothing since it is optimized out. In that case, you can not fold those into one event, even if ftrace supported multiple probes on one event. With this feature, you can set a dummy value like foo=\deadbeef instead of something like foo=%di. Link: http://lkml.kernel.org/r/156095690733.28024.13258186548822649469.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/kprobetrace.rst | 1 + Documentation/trace/uprobetracer.rst | 1 + kernel/trace/trace.c | 2 +- kernel/trace/trace_probe.c | 18 ++++++++++++++++++ kernel/trace/trace_probe.h | 1 + 5 files changed, 22 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index fbb314bfa112..55993055902c 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -52,6 +52,7 @@ Synopsis of kprobe_events $retval : Fetch return value.(\*2) $comm : Fetch current task comm. +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4) + \IMM : Store an immediate value to the argument. NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types diff --git a/Documentation/trace/uprobetracer.rst b/Documentation/trace/uprobetracer.rst index 6e75a6c5a2c8..98cde99939d7 100644 --- a/Documentation/trace/uprobetracer.rst +++ b/Documentation/trace/uprobetracer.rst @@ -45,6 +45,7 @@ Synopsis of uprobe_tracer $retval : Fetch return value.(\*1) $comm : Fetch current task comm. +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*2)(\*3) + \IMM : Store an immediate value to the argument. NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c7797a81a37e..fb4003c10151 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4848,7 +4848,7 @@ static const char readme_msg[] = #else "\t $stack, $stack, $retval, $comm,\n" #endif - "\t +|-[u]()\n" + "\t +|-[u](), \\imm-value\n" "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" "\t b@/, ustring,\n" "\t \\[\\]\n" diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index f8c3c65c035d..fb90baec3cd8 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -316,6 +316,17 @@ inval_var: return -EINVAL; } +static int str_to_immediate(char *str, unsigned long *imm) +{ + if (isdigit(str[0])) + return kstrtoul(str, 0, imm); + else if (str[0] == '-') + return kstrtol(str, 0, (long *)imm); + else if (str[0] == '+') + return kstrtol(str + 1, 0, (long *)imm); + return -EINVAL; +} + /* Recursive argument parser */ static int parse_probe_arg(char *arg, const struct fetch_type *type, @@ -444,6 +455,13 @@ parse_probe_arg(char *arg, const struct fetch_type *type, code->offset = offset; } break; + case '\\': /* Immediate value */ + ret = str_to_immediate(arg + 1, &code->immediate); + if (ret) + trace_probe_log_err(offs + 1, BAD_IMM); + else + code->op = FETCH_OP_IMM; + break; } if (!ret && code->op == FETCH_OP_NOP) { /* Parsed, but do not find fetch method */ diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 2dcc4e317787..cc113b82a4ce 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -408,6 +408,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_VAR, "Invalid $-valiable specified"), \ C(BAD_REG_NAME, "Invalid register name"), \ C(BAD_MEM_ADDR, "Invalid memory address"), \ + C(BAD_IMM, "Invalid immediate value"), \ C(FILE_ON_KPROBE, "File offset is not available with kprobe"), \ C(BAD_FILE_OFFS, "Invalid file offset value"), \ C(SYM_ON_UPROBE, "Symbol is not available with uprobe"), \ -- cgit v1.2.3 From a42e3c4de9642d5de524a0a48a7ce96872662dca Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jun 2019 00:08:37 +0900 Subject: tracing/probe: Add immediate string parameter support Add immediate string parameter (\"string") support to probe events. This allows you to specify an immediate (or dummy) parameter instead of fetching a string from memory. This feature looks odd, but imagine that you put a probe on a code to trace some string data. If the code is compiled into 2 instructions and 1 instruction has a string on memory but other has no string since it is optimized out. In that case, you can not fold those into one event, even if ftrace supported multiple probes on one event. With this feature, you can set a dummy string like foo=\"(optimized)":string instead of something like foo=+0(+0(%bp)):string. Link: http://lkml.kernel.org/r/156095691687.28024.13372712423865047991.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- kernel/trace/trace_kprobe.c | 3 +++ kernel/trace/trace_probe.c | 56 +++++++++++++++++++++++++++++++++------------ kernel/trace/trace_probe.h | 2 ++ kernel/trace/trace_uprobe.c | 3 +++ 5 files changed, 51 insertions(+), 15 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fb4003c10151..3916b72de715 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4848,7 +4848,7 @@ static const char readme_msg[] = #else "\t $stack, $stack, $retval, $comm,\n" #endif - "\t +|-[u](), \\imm-value\n" + "\t +|-[u](), \\imm-value, \\\"imm-string\"\n" "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" "\t b@/, ustring,\n" "\t \\[\\]\n" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 18c4175b6585..7579c53bb053 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1083,6 +1083,9 @@ retry: case FETCH_OP_COMM: val = (unsigned long)current->comm; break; + case FETCH_OP_DATA: + val = (unsigned long)code->data; + break; #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API case FETCH_OP_ARG: val = regs_get_kernel_argument(regs, code->param); diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index fb90baec3cd8..1e67fef06e53 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -327,6 +327,18 @@ static int str_to_immediate(char *str, unsigned long *imm) return -EINVAL; } +static int __parse_imm_string(char *str, char **pbuf, int offs) +{ + size_t len = strlen(str); + + if (str[len - 1] != '"') { + trace_probe_log_err(offs + len, IMMSTR_NO_CLOSE); + return -EINVAL; + } + *pbuf = kstrndup(str, len - 1, GFP_KERNEL); + return 0; +} + /* Recursive argument parser */ static int parse_probe_arg(char *arg, const struct fetch_type *type, @@ -441,7 +453,8 @@ parse_probe_arg(char *arg, const struct fetch_type *type, ret = parse_probe_arg(arg, t2, &code, end, flags, offs); if (ret) break; - if (code->op == FETCH_OP_COMM) { + if (code->op == FETCH_OP_COMM || + code->op == FETCH_OP_DATA) { trace_probe_log_err(offs, COMM_CANT_DEREF); return -EINVAL; } @@ -456,11 +469,19 @@ parse_probe_arg(char *arg, const struct fetch_type *type, } break; case '\\': /* Immediate value */ - ret = str_to_immediate(arg + 1, &code->immediate); - if (ret) - trace_probe_log_err(offs + 1, BAD_IMM); - else - code->op = FETCH_OP_IMM; + if (arg[1] == '"') { /* Immediate string */ + ret = __parse_imm_string(arg + 2, &tmp, offs + 2); + if (ret) + break; + code->op = FETCH_OP_DATA; + code->data = tmp; + } else { + ret = str_to_immediate(arg + 1, &code->immediate); + if (ret) + trace_probe_log_err(offs + 1, BAD_IMM); + else + code->op = FETCH_OP_IMM; + } break; } if (!ret && code->op == FETCH_OP_NOP) { @@ -560,8 +581,11 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, } } - /* Since $comm can not be dereferred, we can find $comm by strcmp */ - if (strcmp(arg, "$comm") == 0) { + /* + * Since $comm and immediate string can not be dereferred, + * we can find those by strcmp. + */ + if (strcmp(arg, "$comm") == 0 || strncmp(arg, "\\\"", 2) == 0) { /* The type of $comm must be "string", and not an array. */ if (parg->count || (t && strcmp(t, "string"))) return -EINVAL; @@ -598,7 +622,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, if (!strcmp(parg->type->name, "string") || !strcmp(parg->type->name, "ustring")) { if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF && - code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM) { + code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM && + code->op != FETCH_OP_DATA) { trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_STRING); ret = -EINVAL; @@ -607,9 +632,10 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) || parg->count) { /* - * IMM and COMM is pointing actual address, those must - * be kept, and if parg->count != 0, this is an array - * of string pointers instead of string address itself. + * IMM, DATA and COMM is pointing actual address, those + * must be kept, and if parg->count != 0, this is an + * array of string pointers instead of string address + * itself. */ code++; if (code->op != FETCH_OP_NOP) { @@ -683,7 +709,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, fail: if (ret) { for (code = tmp; code < tmp + FETCH_INSN_MAX; code++) - if (code->op == FETCH_NOP_SYMBOL) + if (code->op == FETCH_NOP_SYMBOL || + code->op == FETCH_OP_DATA) kfree(code->data); } kfree(tmp); @@ -754,7 +781,8 @@ void traceprobe_free_probe_arg(struct probe_arg *arg) struct fetch_insn *code = arg->code; while (code && code->op != FETCH_OP_END) { - if (code->op == FETCH_NOP_SYMBOL) + if (code->op == FETCH_NOP_SYMBOL || + code->op == FETCH_OP_DATA) kfree(code->data); code++; } diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index cc113b82a4ce..f805cc4cbe7c 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -89,6 +89,7 @@ enum fetch_op { FETCH_OP_COMM, /* Current comm */ FETCH_OP_ARG, /* Function argument : .param */ FETCH_OP_FOFFS, /* File offset: .immediate */ + FETCH_OP_DATA, /* Allocated data: .data */ // Stage 2 (dereference) op FETCH_OP_DEREF, /* Dereference: .offset */ FETCH_OP_UDEREF, /* User-space Dereference: .offset */ @@ -409,6 +410,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_REG_NAME, "Invalid register name"), \ C(BAD_MEM_ADDR, "Invalid memory address"), \ C(BAD_IMM, "Invalid immediate value"), \ + C(IMMSTR_NO_CLOSE, "String is not closed with '\"'"), \ C(FILE_ON_KPROBE, "File offset is not available with kprobe"), \ C(BAD_FILE_OFFS, "Invalid file offset value"), \ C(SYM_ON_UPROBE, "Symbol is not available with uprobe"), \ diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 84925b5b6db5..cbf4da4bf367 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -248,6 +248,9 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, case FETCH_OP_COMM: val = FETCH_TOKEN_COMM; break; + case FETCH_OP_DATA: + val = (unsigned long)code->data; + break; case FETCH_OP_FOFFS: val = translate_user_vaddr(code->immediate); break; -- cgit v1.2.3 From f7edb451fa51e44e62177347ea7850aa0e901ea5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 7 Aug 2019 11:28:59 -0400 Subject: tracing/arm64: Have max stack tracer handle the case of return address after data Most archs (well at least x86) store the function call return address on the stack before storing the local variables for the function. The max stack tracer depends on this in its algorithm to display the stack size of each function it finds in the back trace. Some archs (arm64), may store the return address (from its link register) just before calling a nested function. There's no reason to save the link register on leaf functions, as it wont be updated. This breaks the algorithm of the max stack tracer. Add a new define ARCH_FTRACE_SHIFT_STACK_TRACER that an architecture may set if it stores the return address (link register) after it stores the function's local variables, and have the stack trace shift the values of the mapped stack size to the appropriate functions. Link: 20190802094103.163576-1-jiping.ma2@windriver.com Reported-by: Jiping Ma Acked-by: Will Deacon Signed-off-by: Steven Rostedt (VMware) --- arch/arm64/include/asm/ftrace.h | 13 +++++++++++++ kernel/trace/trace_stack.c | 14 ++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'kernel/trace') diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 5ab5200b2bdc..d48667b04c41 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -14,6 +14,19 @@ #define MCOUNT_ADDR ((unsigned long)_mcount) #define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE +/* + * Currently, gcc tends to save the link register after the local variables + * on the stack. This causes the max stack tracer to report the function + * frame sizes for the wrong functions. By defining + * ARCH_FTRACE_SHIFT_STACK_TRACER, it will tell the stack tracer to expect + * to find the return address on the stack after the local variables have + * been set up. + * + * Note, this may change in the future, and we will need to deal with that + * if it were to happen. + */ +#define ARCH_FTRACE_SHIFT_STACK_TRACER 1 + #ifndef __ASSEMBLY__ #include diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 5d16f73898db..642a850af81a 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -158,6 +158,20 @@ static void check_stack(unsigned long ip, unsigned long *stack) i++; } +#ifdef ARCH_FTRACE_SHIFT_STACK_TRACER + /* + * Some archs will store the link register before calling + * nested functions. This means the saved return address + * comes after the local storage, and we need to shift + * for that. + */ + if (x > 1) { + memmove(&stack_trace_index[0], &stack_trace_index[1], + sizeof(stack_trace_index[0]) * (x - 1)); + x--; + } +#endif + stack_trace_nr_entries = x; if (task_stack_end_corrupted(current)) { -- cgit v1.2.3 From 58fe7a87db51ea00596187765dabfc2c4ea2b436 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 7 Aug 2019 12:27:30 -0400 Subject: tracing: Document the stack trace algorithm in the comments As the max stack tracer algorithm is not that easy to understand from the code, add comments that explain the algorithm and mentions how ARCH_FTRACE_SHIFT_STACK_TRACER affects it. Link: http://lkml.kernel.org/r/20190806123455.487ac02b@gandalf.local.home Suggested-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_stack.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 642a850af81a..ec9a34a97129 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -53,6 +53,104 @@ static void print_max_stack(void) } } +/* + * The stack tracer looks for a maximum stack at each call from a function. It + * registers a callback from ftrace, and in that callback it examines the stack + * size. It determines the stack size from the variable passed in, which is the + * address of a local variable in the stack_trace_call() callback function. + * The stack size is calculated by the address of the local variable to the top + * of the current stack. If that size is smaller than the currently saved max + * stack size, nothing more is done. + * + * If the size of the stack is greater than the maximum recorded size, then the + * following algorithm takes place. + * + * For architectures (like x86) that store the function's return address before + * saving the function's local variables, the stack will look something like + * this: + * + * [ top of stack ] + * 0: sys call entry frame + * 10: return addr to entry code + * 11: start of sys_foo frame + * 20: return addr to sys_foo + * 21: start of kernel_func_bar frame + * 30: return addr to kernel_func_bar + * 31: [ do trace stack here ] + * + * The save_stack_trace() is called returning all the functions it finds in the + * current stack. Which would be (from the bottom of the stack to the top): + * + * return addr to kernel_func_bar + * return addr to sys_foo + * return addr to entry code + * + * Now to figure out how much each of these functions' local variable size is, + * a search of the stack is made to find these values. When a match is made, it + * is added to the stack_dump_trace[] array. The offset into the stack is saved + * in the stack_trace_index[] array. The above example would show: + * + * stack_dump_trace[] | stack_trace_index[] + * ------------------ + ------------------- + * return addr to kernel_func_bar | 30 + * return addr to sys_foo | 20 + * return addr to entry | 10 + * + * The print_max_stack() function above, uses these values to print the size of + * each function's portion of the stack. + * + * for (i = 0; i < nr_entries; i++) { + * size = i == nr_entries - 1 ? stack_trace_index[i] : + * stack_trace_index[i] - stack_trace_index[i+1] + * print "%d %d %d %s\n", i, stack_trace_index[i], size, stack_dump_trace[i]); + * } + * + * The above shows + * + * depth size location + * ----- ---- -------- + * 0 30 10 kernel_func_bar + * 1 20 10 sys_foo + * 2 10 10 entry code + * + * Now for architectures that might save the return address after the functions + * local variables (saving the link register before calling nested functions), + * this will cause the stack to look a little different: + * + * [ top of stack ] + * 0: sys call entry frame + * 10: start of sys_foo_frame + * 19: return addr to entry code << lr saved before calling kernel_func_bar + * 20: start of kernel_func_bar frame + * 29: return addr to sys_foo_frame << lr saved before calling next function + * 30: [ do trace stack here ] + * + * Although the functions returned by save_stack_trace() may be the same, the + * placement in the stack will be different. Using the same algorithm as above + * would yield: + * + * stack_dump_trace[] | stack_trace_index[] + * ------------------ + ------------------- + * return addr to kernel_func_bar | 30 + * return addr to sys_foo | 29 + * return addr to entry | 19 + * + * Where the mapping is off by one: + * + * kernel_func_bar stack frame size is 29 - 19 not 30 - 29! + * + * To fix this, if the architecture sets ARCH_RET_ADDR_AFTER_LOCAL_VARS the + * values in stack_trace_index[] are shifted by one to and the number of + * stack trace entries is decremented by one. + * + * stack_dump_trace[] | stack_trace_index[] + * ------------------ + ------------------- + * return addr to kernel_func_bar | 29 + * return addr to sys_foo | 19 + * + * Although the entry function is not displayed, the first function (sys_foo) + * will still include the stack size of it. + */ static void check_stack(unsigned long ip, unsigned long *stack) { unsigned long this_size, flags; unsigned long *p, *top, *start; -- cgit v1.2.3 From a47b53e95accfd2814efe39dfca06dbd45cd857a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 13 Aug 2019 12:14:35 -0400 Subject: tracing: Rename tracing_reset() to tracing_reset_cpu() The name tracing_reset() was a misnomer, as it really only reset a single CPU buffer. Rename it to tracing_reset_cpu() and also make it static and remove the prototype from trace.h, as it is only used in a single function. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 6 +++--- kernel/trace/trace.h | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3916b72de715..e917aa783675 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1854,7 +1854,7 @@ int __init register_tracer(struct tracer *type) return ret; } -void tracing_reset(struct trace_buffer *buf, int cpu) +static void tracing_reset_cpu(struct trace_buffer *buf, int cpu) { struct ring_buffer *buffer = buf->buffer; @@ -4251,7 +4251,7 @@ static int tracing_open(struct inode *inode, struct file *file) if (cpu == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(trace_buf); else - tracing_reset(trace_buf, cpu); + tracing_reset_cpu(trace_buf, cpu); } if (file->f_mode & FMODE_READ) { @@ -6742,7 +6742,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, if (iter->cpu_file == RING_BUFFER_ALL_CPUS) tracing_reset_online_cpus(&tr->max_buffer); else - tracing_reset(&tr->max_buffer, iter->cpu_file); + tracing_reset_cpu(&tr->max_buffer, iter->cpu_file); } break; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 005f08629b8b..26b0a08f3c7d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -677,7 +677,6 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); -void tracing_reset(struct trace_buffer *buf, int cpu); void tracing_reset_online_cpus(struct trace_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); -- cgit v1.2.3 From ac68154626ab7fe4ce5f424937c34f42a3e20c5b Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Fri, 12 Jul 2019 09:53:08 +0800 Subject: tracing: Add "gfp_t" support in synthetic_events Add "gfp_t" support in synthetic_events, then the "gfp_t" type parameter in some functions can be traced. Prints the gfp flags as hex in addition to the human-readable flag string. Example output: whoopsie-630 [000] ...1 78.969452: testevent: bar=b20 (GFP_ATOMIC|__GFP_ZERO) rcuc/0-11 [000] ...1 81.097555: testevent: bar=a20 (GFP_ATOMIC) rcuc/0-11 [000] ...1 81.583123: testevent: bar=a20 (GFP_ATOMIC) Link: http://lkml.kernel.org/r/20190712015308.9908-1-zhengjun.xing@linux.intel.com Signed-off-by: Zhengjun Xing [ Added printing of flag names ] Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 65e7d071ed28..3a6e42aa08e6 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -13,6 +13,10 @@ #include #include +/* for gfp flag names */ +#include +#include + #include "tracing_map.h" #include "trace.h" #include "trace_dynevent.h" @@ -752,6 +756,8 @@ static int synth_field_size(char *type) size = sizeof(unsigned long); else if (strcmp(type, "pid_t") == 0) size = sizeof(pid_t); + else if (strcmp(type, "gfp_t") == 0) + size = sizeof(gfp_t); else if (synth_field_is_string(type)) size = synth_field_string_size(type); @@ -792,6 +798,8 @@ static const char *synth_field_fmt(char *type) fmt = "%lu"; else if (strcmp(type, "pid_t") == 0) fmt = "%d"; + else if (strcmp(type, "gfp_t") == 0) + fmt = "%x"; else if (synth_field_is_string(type)) fmt = "%s"; @@ -834,9 +842,20 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, i == se->n_fields - 1 ? "" : " "); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { + struct trace_print_flags __flags[] = { + __def_gfpflag_names, {-1, NULL} }; + trace_seq_printf(s, print_fmt, se->fields[i]->name, entry->fields[n_u64], i == se->n_fields - 1 ? "" : " "); + + if (strcmp(se->fields[i]->type, "gfp_t") == 0) { + trace_seq_puts(s, " ("); + trace_print_flags_seq(s, "|", + entry->fields[n_u64], + __flags); + trace_seq_putc(s, ')'); + } n_u64++; } } -- cgit v1.2.3 From 08468754c16e731d31538a8b1b0b433be2410a89 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 10 Sep 2019 22:33:36 +0800 Subject: ftrace: Simplify ftrace hash lookup code in clear_func_from_hash() Function ftrace_lookup_ip() will check empty hash table. So we don't need extra check outside. Link: http://lkml.kernel.org/r/20190910143336.13472-1-changbin.du@gmail.com Signed-off-by: Changbin Du Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f9821a3374e9..c4cc048eb594 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6036,11 +6036,7 @@ clear_func_from_hash(struct ftrace_init_func *func, struct ftrace_hash *hash) { struct ftrace_func_entry *entry; - if (ftrace_hash_empty(hash)) - return; - - entry = __ftrace_lookup_ip(hash, func->ip); - + entry = ftrace_lookup_ip(hash, func->ip); /* * Do not allow this rec to match again. * Yeah, it may waste some memory, but will be removed -- cgit v1.2.3 From 119cdbdb95a66203c0bca09474427c297186f7a3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 6 Aug 2019 18:15:43 +0300 Subject: tracing: Be more clever when dumping hex in __print_hex() Hex dump as many as 16 bytes at once in trace_print_hex_seq() instead of byte-by-byte approach. Link: http://lkml.kernel.org/r/20190806151543.86061-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_output.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index cab4a5398f1d..d54ce252b05a 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -219,10 +219,10 @@ trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len, { int i; const char *ret = trace_seq_buffer_ptr(p); + const char *fmt = concatenate ? "%*phN" : "%*ph"; - for (i = 0; i < buf_len; i++) - trace_seq_printf(p, "%s%2.2x", concatenate || i == 0 ? "" : " ", - buf[i]); + for (i = 0; i < buf_len; i += 16) + trace_seq_printf(p, fmt, min(buf_len - i, 16), &buf[i]); trace_seq_putc(p, 0); return ret; -- cgit v1.2.3 From 17f8607a1658a8e70415eef67909f990d13017b5 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 1 Sep 2019 17:02:01 -0500 Subject: tracing: Make sure variable reference alias has correct var_ref_idx Original changelog from Steve Rostedt (except last sentence which explains the problem, and the Fixes: tag): I performed a three way histogram with the following commands: echo 'irq_lat u64 lat pid_t pid' > synthetic_events echo 'wake_lat u64 lat u64 irqlat pid_t pid' >> synthetic_events echo 'hist:keys=common_pid:irqts=common_timestamp.usecs if function == 0xffffffff81200580' > events/timer/hrtimer_start/trigger echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$irqts:onmatch(timer.hrtimer_start).irq_lat($lat,pid) if common_flags & 1' > events/sched/sched_waking/trigger echo 'hist:keys=pid:wakets=common_timestamp.usecs,irqlat=lat' > events/synthetic/irq_lat/trigger echo 'hist:keys=next_pid:lat=common_timestamp.usecs-$wakets,irqlat=$irqlat:onmatch(synthetic.irq_lat).wake_lat($lat,$irqlat,next_pid)' > events/sched/sched_switch/trigger echo 1 > events/synthetic/wake_lat/enable Basically I wanted to see: hrtimer_start (calling function tick_sched_timer) Note: # grep tick_sched_timer /proc/kallsyms ffffffff81200580 t tick_sched_timer And save the time of that, and then record sched_waking if it is called in interrupt context and with the same pid as the hrtimer_start, it will record the latency between that and the waking event. I then look at when the task that is woken is scheduled in, and record the latency between the wakeup and the task running. At the end, the wake_lat synthetic event will show the wakeup to scheduled latency, as well as the irq latency in from hritmer_start to the wakeup. The problem is that I found this: -0 [007] d... 190.485261: wake_lat: lat=27 irqlat=190485230 pid=698 -0 [005] d... 190.485283: wake_lat: lat=40 irqlat=190485239 pid=10 -0 [002] d... 190.488327: wake_lat: lat=56 irqlat=190488266 pid=335 -0 [005] d... 190.489330: wake_lat: lat=64 irqlat=190489262 pid=10 -0 [003] d... 190.490312: wake_lat: lat=43 irqlat=190490265 pid=77 -0 [005] d... 190.493322: wake_lat: lat=54 irqlat=190493262 pid=10 -0 [005] d... 190.497305: wake_lat: lat=35 irqlat=190497267 pid=10 -0 [005] d... 190.501319: wake_lat: lat=50 irqlat=190501264 pid=10 The irqlat seemed quite large! Investigating this further, if I had enabled the irq_lat synthetic event, I noticed this: -0 [002] d.s. 249.429308: irq_lat: lat=164968 pid=335 -0 [002] d... 249.429369: wake_lat: lat=55 irqlat=249429308 pid=335 Notice that the timestamp of the irq_lat "249.429308" is awfully similar to the reported irqlat variable. In fact, all instances were like this. It appeared that: irqlat=$irqlat Wasn't assigning the old $irqlat to the new irqlat variable, but instead was assigning the $irqts to it. The issue is that assigning the old $irqlat to the new irqlat variable creates a variable reference alias, but the alias creation code forgets to make sure the alias uses the same var_ref_idx to access the reference. Link: http://lkml.kernel.org/r/1567375321.5282.12.camel@kernel.org Cc: Linux Trace Devel Cc: linux-rt-users Cc: stable@vger.kernel.org Fixes: 7e8b88a30b085 ("tracing: Add hist trigger support for variable reference aliases") Reported-by: Steven Rostedt (VMware) Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 3a6e42aa08e6..9468bd8d44a2 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -2804,6 +2804,8 @@ static struct hist_field *create_alias(struct hist_trigger_data *hist_data, return NULL; } + alias->var_ref_idx = var_ref->var_ref_idx; + return alias; } -- cgit v1.2.3 From d59fae6fea39efe65bb3d3310aaa2a54b5f55c0d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 17 Sep 2019 14:11:37 +0900 Subject: tracing/kprobe: Fix NULL pointer access in trace_porbe_unlink() Fix NULL pointer access in trace_probe_unlink() by initializing trace_probe.list correctly in trace_probe_init(). In the error case of trace_probe_init(), it can call trace_probe_unlink() before initializing trace_probe.list member. This causes NULL pointer dereference at list_del_init() in trace_probe_unlink(). Syzbot reported : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 8633 Comm: syz-executor797 Not tainted 5.3.0-rc8-next-20190915 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__list_del_entry_valid+0x85/0xf5 lib/list_debug.c:51 Code: 0f 84 e1 00 00 00 48 b8 22 01 00 00 00 00 ad de 49 39 c4 0f 84 e2 00 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 e2 48 c1 ea 03 <80> 3c 02 00 75 53 49 8b 14 24 4c 39 f2 0f 85 99 00 00 00 49 8d 7d RSP: 0018:ffff888090a7f9d8 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff88809b6f90c0 RCX: ffffffff817c0ca9 RDX: 0000000000000000 RSI: ffffffff817c0a73 RDI: ffff88809b6f90c8 RBP: ffff888090a7f9f0 R08: ffff88809a04e600 R09: ffffed1015d26aed R10: ffffed1015d26aec R11: ffff8880ae935763 R12: 0000000000000000 R13: 0000000000000000 R14: ffff88809b6f90c0 R15: ffff88809b6f90d0 FS: 0000555556f99880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000006cc090 CR3: 00000000962b2000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __list_del_entry include/linux/list.h:131 [inline] list_del_init include/linux/list.h:190 [inline] trace_probe_unlink+0x1f/0x200 kernel/trace/trace_probe.c:959 trace_probe_cleanup+0xd3/0x110 kernel/trace/trace_probe.c:973 trace_probe_init+0x3f2/0x510 kernel/trace/trace_probe.c:1011 alloc_trace_uprobe+0x5e/0x250 kernel/trace/trace_uprobe.c:353 create_local_trace_uprobe+0x109/0x4a0 kernel/trace/trace_uprobe.c:1508 perf_uprobe_init+0x131/0x210 kernel/trace/trace_event_perf.c:314 perf_uprobe_event_init+0x106/0x1a0 kernel/events/core.c:8898 perf_try_init_event+0x135/0x590 kernel/events/core.c:10184 perf_init_event kernel/events/core.c:10228 [inline] perf_event_alloc.part.0+0x1b89/0x33d0 kernel/events/core.c:10505 perf_event_alloc kernel/events/core.c:10887 [inline] __do_sys_perf_event_open+0xa2d/0x2d00 kernel/events/core.c:10989 __se_sys_perf_event_open kernel/events/core.c:10871 [inline] __x64_sys_perf_event_open+0xbe/0x150 kernel/events/core.c:10871 do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Link: http://lkml.kernel.org/r/156869709721.22406.5153754822203046939.stgit@devnote2 Reported-by: syzbot+2f807f4d3a2a4e87f18f@syzkaller.appspotmail.com Fixes: ca89bc071d5e ("tracing/kprobe: Add multi-probe per event support") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 1e67fef06e53..baf58a3612c0 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -986,6 +986,12 @@ int trace_probe_init(struct trace_probe *tp, const char *event, if (!tp->event) return -ENOMEM; + INIT_LIST_HEAD(&tp->event->files); + INIT_LIST_HEAD(&tp->event->class.fields); + INIT_LIST_HEAD(&tp->event->probes); + INIT_LIST_HEAD(&tp->list); + list_add(&tp->event->probes, &tp->list); + call = trace_probe_event_call(tp); call->class = &tp->event->class; call->name = kstrdup(event, GFP_KERNEL); @@ -999,11 +1005,6 @@ int trace_probe_init(struct trace_probe *tp, const char *event, ret = -ENOMEM; goto error; } - INIT_LIST_HEAD(&tp->event->files); - INIT_LIST_HEAD(&tp->event->class.fields); - INIT_LIST_HEAD(&tp->event->probes); - INIT_LIST_HEAD(&tp->list); - list_add(&tp->event->probes, &tp->list); return 0; -- cgit v1.2.3 From 44d00dc7ceab1732ebd5f5aae601b24dacdf10c5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 18 Sep 2019 17:55:37 +0900 Subject: tracing/probe: Fix to allow user to enable events on unloaded modules Fix to allow user to enable probe events on unloaded modules. This operations was allowed before commit 60d53e2c3b75 ("tracing/probe: Split trace_event related data from trace_probe"), because if users need to probe module init functions, they have to enable those probe events before loading module. Link: http://lkml.kernel.org/r/156879693733.31056.9331322616994665167.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 60d53e2c3b75 ("tracing/probe: Split trace_event related data from trace_probe") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7579c53bb053..0ba3239c0270 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -371,31 +371,24 @@ static int enable_trace_kprobe(struct trace_event_call *call, if (enabled) return 0; - enabled = false; list_for_each_entry(pos, trace_probe_probe_list(tp), list) { tk = container_of(pos, struct trace_kprobe, tp); if (trace_kprobe_has_gone(tk)) continue; ret = __enable_trace_kprobe(tk); - if (ret) { - if (enabled) { - __disable_trace_kprobe(tp); - enabled = false; - } + if (ret) break; - } enabled = true; } - if (!enabled) { - /* No probe is enabled. Roll back */ + if (ret) { + /* Failed to enable one of them. Roll back all */ + if (enabled) + __disable_trace_kprobe(tp); if (file) trace_probe_remove_file(tp, file); else trace_probe_clear_flag(tp, TP_FLAG_PROFILE); - if (!ret) - /* Since all probes are gone, this is not available */ - ret = -EADDRNOTAVAIL; } return ret; -- cgit v1.2.3 From fe60b0ce8e7335269722ec080173a9411a9d58a5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 18 Sep 2019 17:55:46 +0900 Subject: tracing/probe: Reject exactly same probe event Reject exactly same probe events as existing probes. Multiprobe allows user to define multiple probes on same event. If user appends a probe which exactly same definition (same probe address and same arguments) on existing event, the event will record same probe information twice. That can be confusing users, so reject it. Link: http://lkml.kernel.org/r/156879694602.31056.5533024778165036763.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 52 ++++++++++++++++++++++++++++++++++++++------- kernel/trace/trace_probe.h | 3 ++- kernel/trace/trace_uprobe.c | 52 ++++++++++++++++++++++++++++++++++++++------- 3 files changed, 90 insertions(+), 17 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 0ba3239c0270..a6697e28ddda 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -528,10 +528,53 @@ unreg: return 0; } +static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig, + struct trace_kprobe *comp) +{ + struct trace_probe_event *tpe = orig->tp.event; + struct trace_probe *pos; + int i; + + list_for_each_entry(pos, &tpe->probes, list) { + orig = container_of(pos, struct trace_kprobe, tp); + if (strcmp(trace_kprobe_symbol(orig), + trace_kprobe_symbol(comp)) || + trace_kprobe_offset(orig) != trace_kprobe_offset(comp)) + continue; + + /* + * trace_probe_compare_arg_type() ensured that nr_args and + * each argument name and type are same. Let's compare comm. + */ + for (i = 0; i < orig->tp.nr_args; i++) { + if (strcmp(orig->tp.args[i].comm, + comp->tp.args[i].comm)) + continue; + } + + return true; + } + + return false; +} + static int append_trace_kprobe(struct trace_kprobe *tk, struct trace_kprobe *to) { int ret; + ret = trace_probe_compare_arg_type(&tk->tp, &to->tp); + if (ret) { + /* Note that argument starts index = 2 */ + trace_probe_log_set_index(ret + 1); + trace_probe_log_err(0, DIFF_ARG_TYPE); + return -EEXIST; + } + if (trace_kprobe_has_same_kprobe(to, tk)) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, SAME_PROBE); + return -EEXIST; + } + /* Append to existing event */ ret = trace_probe_append(&tk->tp, &to->tp); if (ret) @@ -568,14 +611,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk) trace_probe_log_err(0, DIFF_PROBE_TYPE); ret = -EEXIST; } else { - ret = trace_probe_compare_arg_type(&tk->tp, &old_tk->tp); - if (ret) { - /* Note that argument starts index = 2 */ - trace_probe_log_set_index(ret + 1); - trace_probe_log_err(0, DIFF_ARG_TYPE); - ret = -EEXIST; - } else - ret = append_trace_kprobe(tk, old_tk); + ret = append_trace_kprobe(tk, old_tk); } goto end; } diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index f805cc4cbe7c..4ee703728aec 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -436,7 +436,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_INSN_BNDRY, "Probe point is not an instruction boundary"),\ C(FAIL_REG_PROBE, "Failed to register probe event"),\ C(DIFF_PROBE_TYPE, "Probe type is different from existing probe"),\ - C(DIFF_ARG_TYPE, "Argument type or name is different from existing probe"), + C(DIFF_ARG_TYPE, "Argument type or name is different from existing probe"),\ + C(SAME_PROBE, "There is already the exact same probe event"), #undef C #define C(a, b) TP_ERR_##a diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index cbf4da4bf367..34dd6d0016a3 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -410,10 +410,53 @@ unreg: return 0; } +static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig, + struct trace_uprobe *comp) +{ + struct trace_probe_event *tpe = orig->tp.event; + struct trace_probe *pos; + struct inode *comp_inode = d_real_inode(comp->path.dentry); + int i; + + list_for_each_entry(pos, &tpe->probes, list) { + orig = container_of(pos, struct trace_uprobe, tp); + if (comp_inode != d_real_inode(orig->path.dentry) || + comp->offset != orig->offset) + continue; + + /* + * trace_probe_compare_arg_type() ensured that nr_args and + * each argument name and type are same. Let's compare comm. + */ + for (i = 0; i < orig->tp.nr_args; i++) { + if (strcmp(orig->tp.args[i].comm, + comp->tp.args[i].comm)) + continue; + } + + return true; + } + + return false; +} + static int append_trace_uprobe(struct trace_uprobe *tu, struct trace_uprobe *to) { int ret; + ret = trace_probe_compare_arg_type(&tu->tp, &to->tp); + if (ret) { + /* Note that argument starts index = 2 */ + trace_probe_log_set_index(ret + 1); + trace_probe_log_err(0, DIFF_ARG_TYPE); + return -EEXIST; + } + if (trace_uprobe_has_same_uprobe(to, tu)) { + trace_probe_log_set_index(0); + trace_probe_log_err(0, SAME_PROBE); + return -EEXIST; + } + /* Append to existing event */ ret = trace_probe_append(&tu->tp, &to->tp); if (!ret) @@ -469,14 +512,7 @@ static int register_trace_uprobe(struct trace_uprobe *tu) trace_probe_log_err(0, DIFF_PROBE_TYPE); ret = -EEXIST; } else { - ret = trace_probe_compare_arg_type(&tu->tp, &old_tu->tp); - if (ret) { - /* Note that argument starts index = 2 */ - trace_probe_log_set_index(ret + 1); - trace_probe_log_err(0, DIFF_ARG_TYPE); - ret = -EEXIST; - } else - ret = append_trace_uprobe(tu, old_tu); + ret = append_trace_uprobe(tu, old_tu); } goto end; } -- cgit v1.2.3