From 953ae45a0c25e09428d4a03d7654f97ab8a36647 Mon Sep 17 00:00:00 2001 From: Divya Indi Date: Wed, 14 Aug 2019 10:55:25 -0700 Subject: tracing: Adding NULL checks for trace_array descriptor pointer As part of commit f45d1225adb0 ("tracing: Kernel access to Ftrace instances") we exported certain functions. Here, we are adding some additional NULL checks to ensure safe usage by users of these APIs. Link: http://lkml.kernel.org/r/1565805327-579-4-git-send-email-divya.indi@oracle.com Signed-off-by: Divya Indi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index fba87d10f0c1..2a3ac2365445 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -793,6 +793,8 @@ int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) char *event = NULL, *sub = NULL, *match; int ret; + if (!tr) + return -ENOENT; /* * The buf format can be : * *: means any event by that name. -- cgit v1.2.3 From 28879787147358e8ffcae397f11748de3dd26577 Mon Sep 17 00:00:00 2001 From: Divya Indi Date: Wed, 20 Nov 2019 11:08:38 -0800 Subject: tracing: Adding new functions for kernel access to Ftrace instances Adding 2 new functions - 1) struct trace_array *trace_array_get_by_name(const char *name); Return pointer to a trace array with given name. If it does not exist, create and return pointer to the new trace array. 2) int trace_array_set_clr_event(struct trace_array *tr, const char *system ,const char *event, bool enable); Enable/Disable events to this trace array. Additionally, - To handle reference counters, export trace_array_put() - Due to introduction of the above 2 new functions, we no longer need to export - ftrace_set_clr_event & trace_array_create APIs. Link: http://lkml.kernel.org/r/1574276919-11119-2-git-send-email-divya.indi@oracle.com Signed-off-by: Divya Indi Reviewed-by: Aruna Ramakrishna Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace.h | 3 +- include/linux/trace_events.h | 3 +- kernel/trace/trace.c | 96 +++++++++++++++++++++++++++++++++++--------- kernel/trace/trace.h | 1 - kernel/trace/trace_events.c | 27 ++++++++++++- 5 files changed, 106 insertions(+), 24 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/include/linux/trace.h b/include/linux/trace.h index 24fcf07812ae..7fd86d3c691f 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -29,7 +29,8 @@ struct trace_array; void trace_printk_init_buffers(void); int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...); -struct trace_array *trace_array_create(const char *name); +void trace_array_put(struct trace_array *tr); +struct trace_array *trace_array_get_by_name(const char *name); int trace_array_destroy(struct trace_array *tr); #endif /* CONFIG_TRACING */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 60a41b7069dd..4c6e15605766 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -555,7 +555,8 @@ extern int trace_event_get_offsets(struct trace_event_call *call); int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); int trace_set_clr_event(const char *system, const char *event, int set); - +int trace_array_set_clr_event(struct trace_array *tr, const char *system, + const char *event, bool enable); /* * The double __builtin_constant_p is because gcc will give us an error * if we try to allocate the static variable to fmt if it is not a diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 42659ce6ac0c..02a23a6e5e00 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -301,12 +301,24 @@ static void __trace_array_put(struct trace_array *this_tr) this_tr->ref--; } +/** + * trace_array_put - Decrement the reference counter for this trace array. + * + * NOTE: Use this when we no longer need the trace array returned by + * trace_array_get_by_name(). This ensures the trace array can be later + * destroyed. + * + */ void trace_array_put(struct trace_array *this_tr) { + if (!this_tr) + return; + mutex_lock(&trace_types_lock); __trace_array_put(this_tr); mutex_unlock(&trace_types_lock); } +EXPORT_SYMBOL_GPL(trace_array_put); int tracing_check_open_get_tr(struct trace_array *tr) { @@ -8437,24 +8449,15 @@ static void update_tracer_options(struct trace_array *tr) mutex_unlock(&trace_types_lock); } -struct trace_array *trace_array_create(const char *name) +static struct trace_array *trace_array_create(const char *name) { struct trace_array *tr; int ret; - mutex_lock(&event_mutex); - mutex_lock(&trace_types_lock); - - ret = -EEXIST; - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr->name && strcmp(tr->name, name) == 0) - goto out_unlock; - } - ret = -ENOMEM; tr = kzalloc(sizeof(*tr), GFP_KERNEL); if (!tr) - goto out_unlock; + return ERR_PTR(ret); tr->name = kstrdup(name, GFP_KERNEL); if (!tr->name) @@ -8499,8 +8502,8 @@ struct trace_array *trace_array_create(const char *name) list_add(&tr->list, &ftrace_trace_arrays); - mutex_unlock(&trace_types_lock); - mutex_unlock(&event_mutex); + tr->ref++; + return tr; @@ -8510,24 +8513,77 @@ struct trace_array *trace_array_create(const char *name) kfree(tr->name); kfree(tr); - out_unlock: - mutex_unlock(&trace_types_lock); - mutex_unlock(&event_mutex); - return ERR_PTR(ret); } -EXPORT_SYMBOL_GPL(trace_array_create); static int instance_mkdir(const char *name) { - return PTR_ERR_OR_ZERO(trace_array_create(name)); + struct trace_array *tr; + int ret; + + mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); + + ret = -EEXIST; + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr->name && strcmp(tr->name, name) == 0) + goto out_unlock; + } + + tr = trace_array_create(name); + + ret = PTR_ERR_OR_ZERO(tr); + +out_unlock: + mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); + return ret; +} + +/** + * trace_array_get_by_name - Create/Lookup a trace array, given its name. + * @name: The name of the trace array to be looked up/created. + * + * Returns pointer to trace array with given name. + * NULL, if it cannot be created. + * + * NOTE: This function increments the reference counter associated with the + * trace array returned. This makes sure it cannot be freed while in use. + * Use trace_array_put() once the trace array is no longer needed. + * + */ +struct trace_array *trace_array_get_by_name(const char *name) +{ + struct trace_array *tr; + + mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); + + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr->name && strcmp(tr->name, name) == 0) + goto out_unlock; + } + + tr = trace_array_create(name); + + if (IS_ERR(tr)) + tr = NULL; +out_unlock: + if (tr) + tr->ref++; + + mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); + return tr; } +EXPORT_SYMBOL_GPL(trace_array_get_by_name); static int __remove_instance(struct trace_array *tr) { int i; - if (tr->ref || (tr->current_trace && tr->current_trace->ref)) + /* Reference counter for a newly created trace array = 1. */ + if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref)) return -EBUSY; list_del(&tr->list); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2df8aed6a8f0..ca7fccafbcbb 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -345,7 +345,6 @@ extern struct list_head ftrace_trace_arrays; extern struct mutex trace_types_lock; extern int trace_array_get(struct trace_array *tr); -extern void trace_array_put(struct trace_array *tr); extern int tracing_check_open_get_tr(struct trace_array *tr); extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 2a3ac2365445..6b3a69e9aa6a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -827,7 +827,6 @@ int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) return ret; } -EXPORT_SYMBOL_GPL(ftrace_set_clr_event); /** * trace_set_clr_event - enable or disable an event @@ -852,6 +851,32 @@ int trace_set_clr_event(const char *system, const char *event, int set) } EXPORT_SYMBOL_GPL(trace_set_clr_event); +/** + * trace_array_set_clr_event - enable or disable an event for a trace array. + * @tr: concerned trace array. + * @system: system name to match (NULL for any system) + * @event: event name to match (NULL for all events, within system) + * @enable: true to enable, false to disable + * + * This is a way for other parts of the kernel to enable or disable + * event recording. + * + * Returns 0 on success, -EINVAL if the parameters do not match any + * registered events. + */ +int trace_array_set_clr_event(struct trace_array *tr, const char *system, + const char *event, bool enable) +{ + int set; + + if (!tr) + return -ENOENT; + + set = (enable == true) ? 1 : 0; + return __ftrace_set_clr_event(tr, NULL, system, event, set); +} +EXPORT_SYMBOL_GPL(trace_array_set_clr_event); + /* 128 should be much more than enough */ #define EVENT_BUF_SIZE 127 -- cgit v1.2.3 From 6c3edaf9fd6a3be7fb5bc6931897c24cd3848f84 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 29 Nov 2019 20:52:18 -0800 Subject: tracing: Introduce trace event injection We have been trying to use rasdaemon to monitor hardware errors like correctable memory errors. rasdaemon uses trace events to monitor various hardware errors. In order to test it, we have to inject some hardware errors, unfortunately not all of them provide error injections. MCE does provide a way to inject MCE errors, but errors like PCI error and devlink error don't, it is not easy to add error injection to each of them. Instead, it is relatively easier to just allow users to inject trace events in a generic way so that all trace events can be injected. This patch introduces trace event injection, where a new 'inject' is added to each tracepoint directory. Users could write into this file with key=value pairs to specify the value of each fields of the trace event, all unspecified fields are set to zero values by default. For example, for the net/net_dev_queue tracepoint, we can inject: INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject echo "" > $INJECT echo "name='test'" > $INJECT echo "name='test' len=1024" > $INJECT cat /sys/kernel/debug/tracing/trace ... <...>-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0 <...>-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0 <...>-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024 Triggers could be triggered as usual too: echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger echo "len=1025" > $INJECT cat /sys/kernel/debug/tracing/trace ... bash-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0 bash-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0 bash-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024 bash-614 [001] .N.1 284.236349: => event_inject_write => vfs_write => ksys_write => do_syscall_64 => entry_SYSCALL_64_after_hwframe The only thing that can't be injected is string pointers as they require constant string pointers, this can't be done at run time. Link: http://lkml.kernel.org/r/20191130045218.18979-1-xiyou.wangcong@gmail.com Cc: Ingo Molnar Signed-off-by: Cong Wang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 9 + kernel/trace/Makefile | 1 + kernel/trace/trace.h | 1 + kernel/trace/trace_events.c | 6 + kernel/trace/trace_events_inject.c | 331 +++++++++++++++++++++++++++++++++++++ 5 files changed, 348 insertions(+) create mode 100644 kernel/trace/trace_events_inject.c (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index f67620499faa..29a9c5058b62 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -672,6 +672,15 @@ config HIST_TRIGGERS See Documentation/trace/histogram.rst. If in doubt, say N. +config TRACE_EVENT_INJECT + bool "Trace event injection" + depends on TRACING + help + Allow user-space to inject a specific trace event into the ring + buffer. This is mainly used for testing purpose. + + If unsure, say N. + config MMIOTRACE_TEST tristate "Test module for mmiotrace" depends on MMIOTRACE && m diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c2b2148bb1d2..0e63db62225f 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o +obj-$(CONFIG_TRACE_EVENT_INJECT) += trace_events_inject.o obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ca7fccafbcbb..63bf60f79398 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1601,6 +1601,7 @@ extern struct list_head ftrace_events; extern const struct file_operations event_trigger_fops; extern const struct file_operations event_hist_fops; +extern const struct file_operations event_inject_fops; #ifdef CONFIG_HIST_TRIGGERS extern int register_trigger_hist_cmd(void); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6b3a69e9aa6a..c6de3cebc127 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2044,6 +2044,12 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) trace_create_file("format", 0444, file->dir, call, &ftrace_event_format_fops); +#ifdef CONFIG_TRACE_EVENT_INJECT + if (call->event.type && call->class->reg) + trace_create_file("inject", 0200, file->dir, file, + &event_inject_fops); +#endif + return 0; } diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c new file mode 100644 index 000000000000..d43710718ee5 --- /dev/null +++ b/kernel/trace/trace_events_inject.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * trace_events_inject - trace event injection + * + * Copyright (C) 2019 Cong Wang + */ + +#include +#include +#include +#include +#include + +#include "trace.h" + +static int +trace_inject_entry(struct trace_event_file *file, void *rec, int len) +{ + struct trace_event_buffer fbuffer; + struct ring_buffer *buffer; + int written = 0; + void *entry; + + rcu_read_lock_sched(); + buffer = file->tr->trace_buffer.buffer; + entry = trace_event_buffer_reserve(&fbuffer, file, len); + if (entry) { + memcpy(entry, rec, len); + written = len; + trace_event_buffer_commit(&fbuffer); + } + rcu_read_unlock_sched(); + + return written; +} + +static int +parse_field(char *str, struct trace_event_call *call, + struct ftrace_event_field **pf, u64 *pv) +{ + struct ftrace_event_field *field; + char *field_name; + int s, i = 0; + int len; + u64 val; + + if (!str[i]) + return 0; + /* First find the field to associate to */ + while (isspace(str[i])) + i++; + s = i; + while (isalnum(str[i]) || str[i] == '_') + i++; + len = i - s; + if (!len) + return -EINVAL; + + field_name = kmemdup_nul(str + s, len, GFP_KERNEL); + if (!field_name) + return -ENOMEM; + field = trace_find_event_field(call, field_name); + kfree(field_name); + if (!field) + return -ENOENT; + + *pf = field; + while (isspace(str[i])) + i++; + if (str[i] != '=') + return -EINVAL; + i++; + while (isspace(str[i])) + i++; + s = i; + if (isdigit(str[i]) || str[i] == '-') { + char *num, c; + int ret; + + /* Make sure the field is not a string */ + if (is_string_field(field)) + return -EINVAL; + + if (str[i] == '-') + i++; + + /* We allow 0xDEADBEEF */ + while (isalnum(str[i])) + i++; + num = str + s; + c = str[i]; + if (c != '\0' && !isspace(c)) + return -EINVAL; + str[i] = '\0'; + /* Make sure it is a value */ + if (field->is_signed) + ret = kstrtoll(num, 0, &val); + else + ret = kstrtoull(num, 0, &val); + str[i] = c; + if (ret) + return ret; + + *pv = val; + return i; + } else if (str[i] == '\'' || str[i] == '"') { + char q = str[i]; + + /* Make sure the field is OK for strings */ + if (!is_string_field(field)) + return -EINVAL; + + for (i++; str[i]; i++) { + if (str[i] == '\\' && str[i + 1]) { + i++; + continue; + } + if (str[i] == q) + break; + } + if (!str[i]) + return -EINVAL; + + /* Skip quotes */ + s++; + len = i - s; + if (len >= MAX_FILTER_STR_VAL) + return -EINVAL; + + *pv = (unsigned long)(str + s); + str[i] = 0; + /* go past the last quote */ + i++; + return i; + } + + return -EINVAL; +} + +static int trace_get_entry_size(struct trace_event_call *call) +{ + struct ftrace_event_field *field; + struct list_head *head; + int size = 0; + + head = trace_get_fields(call); + list_for_each_entry(field, head, link) { + if (field->size + field->offset > size) + size = field->size + field->offset; + } + + return size; +} + +static void *trace_alloc_entry(struct trace_event_call *call, int *size) +{ + int entry_size = trace_get_entry_size(call); + struct ftrace_event_field *field; + struct list_head *head; + void *entry = NULL; + + /* We need an extra '\0' at the end. */ + entry = kzalloc(entry_size + 1, GFP_KERNEL); + if (!entry) + return NULL; + + head = trace_get_fields(call); + list_for_each_entry(field, head, link) { + if (!is_string_field(field)) + continue; + if (field->filter_type == FILTER_STATIC_STRING) + continue; + if (field->filter_type == FILTER_DYN_STRING) { + u32 *str_item; + int str_loc = entry_size & 0xffff; + + str_item = (u32 *)(entry + field->offset); + *str_item = str_loc; /* string length is 0. */ + } else { + char **paddr; + + paddr = (char **)(entry + field->offset); + *paddr = ""; + } + } + + *size = entry_size + 1; + return entry; +} + +#define INJECT_STRING "STATIC STRING CAN NOT BE INJECTED" + +/* Caller is responsible to free the *pentry. */ +static int parse_entry(char *str, struct trace_event_call *call, void **pentry) +{ + struct ftrace_event_field *field; + unsigned long irq_flags; + void *entry = NULL; + int entry_size; + u64 val; + int len; + + entry = trace_alloc_entry(call, &entry_size); + *pentry = entry; + if (!entry) + return -ENOMEM; + + local_save_flags(irq_flags); + tracing_generic_entry_update(entry, call->event.type, irq_flags, + preempt_count()); + + while ((len = parse_field(str, call, &field, &val)) > 0) { + if (is_function_field(field)) + return -EINVAL; + + if (is_string_field(field)) { + char *addr = (char *)(unsigned long) val; + + if (field->filter_type == FILTER_STATIC_STRING) { + strlcpy(entry + field->offset, addr, field->size); + } else if (field->filter_type == FILTER_DYN_STRING) { + int str_len = strlen(addr) + 1; + int str_loc = entry_size & 0xffff; + u32 *str_item; + + entry_size += str_len; + *pentry = krealloc(entry, entry_size, GFP_KERNEL); + if (!*pentry) { + kfree(entry); + return -ENOMEM; + } + entry = *pentry; + + strlcpy(entry + (entry_size - str_len), addr, str_len); + str_item = (u32 *)(entry + field->offset); + *str_item = (str_len << 16) | str_loc; + } else { + char **paddr; + + paddr = (char **)(entry + field->offset); + *paddr = INJECT_STRING; + } + } else { + switch (field->size) { + case 1: { + u8 tmp = (u8) val; + + memcpy(entry + field->offset, &tmp, 1); + break; + } + case 2: { + u16 tmp = (u16) val; + + memcpy(entry + field->offset, &tmp, 2); + break; + } + case 4: { + u32 tmp = (u32) val; + + memcpy(entry + field->offset, &tmp, 4); + break; + } + case 8: + memcpy(entry + field->offset, &val, 8); + break; + default: + return -EINVAL; + } + } + + str += len; + } + + if (len < 0) + return len; + + return entry_size; +} + +static ssize_t +event_inject_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct trace_event_call *call; + struct trace_event_file *file; + int err = -ENODEV, size; + void *entry = NULL; + char *buf; + + if (cnt >= PAGE_SIZE) + return -EINVAL; + + buf = memdup_user_nul(ubuf, cnt); + if (IS_ERR(buf)) + return PTR_ERR(buf); + strim(buf); + + mutex_lock(&event_mutex); + file = event_file_data(filp); + if (file) { + call = file->event_call; + size = parse_entry(buf, call, &entry); + if (size < 0) + err = size; + else + err = trace_inject_entry(file, entry, size); + } + mutex_unlock(&event_mutex); + + kfree(entry); + kfree(buf); + + if (err < 0) + return err; + + *ppos += err; + return cnt; +} + +static ssize_t +event_inject_read(struct file *file, char __user *buf, size_t size, + loff_t *ppos) +{ + return -EPERM; +} + +const struct file_operations event_inject_fops = { + .open = tracing_open_generic, + .read = event_inject_read, + .write = event_inject_write, +}; -- cgit v1.2.3