From ac38fb8582d86ba887b5d07c0912dec135bf6931 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 24 Oct 2013 08:59:30 -0500 Subject: tracing: Add documentation for trace event triggers Provide a basic overview of trace event triggers and document the available trigger commands, along with a few simple examples. Link: http://lkml.kernel.org/r/2595dd9196d7b553049611f2a3f849ca75d650a2.1382622043.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt --- Documentation/trace/events.txt | 207 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) (limited to 'Documentation') diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index 37732a220d33..c94435df2037 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -287,3 +287,210 @@ their old filters): prev_pid == 0 # cat sched_wakeup/filter common_pid == 0 + +6. Event triggers +================= + +Trace events can be made to conditionally invoke trigger 'commands' +which can take various forms and are described in detail below; +examples would be enabling or disabling other trace events or invoking +a stack trace whenever the trace event is hit. Whenever a trace event +with attached triggers is invoked, the set of trigger commands +associated with that event is invoked. Any given trigger can +additionally have an event filter of the same form as described in +section 5 (Event filtering) associated with it - the command will only +be invoked if the event being invoked passes the associated filter. +If no filter is associated with the trigger, it always passes. + +Triggers are added to and removed from a particular event by writing +trigger expressions to the 'trigger' file for the given event. + +A given event can have any number of triggers associated with it, +subject to any restrictions that individual commands may have in that +regard. + +Event triggers are implemented on top of "soft" mode, which means that +whenever a trace event has one or more triggers associated with it, +the event is activated even if it isn't actually enabled, but is +disabled in a "soft" mode. That is, the tracepoint will be called, +but just will not be traced, unless of course it's actually enabled. +This scheme allows triggers to be invoked even for events that aren't +enabled, and also allows the current event filter implementation to be +used for conditionally invoking triggers. + +The syntax for event triggers is roughly based on the syntax for +set_ftrace_filter 'ftrace filter commands' (see the 'Filter commands' +section of Documentation/trace/ftrace.txt), but there are major +differences and the implementation isn't currently tied to it in any +way, so beware about making generalizations between the two. + +6.1 Expression syntax +--------------------- + +Triggers are added by echoing the command to the 'trigger' file: + + # echo 'command[:count] [if filter]' > trigger + +Triggers are removed by echoing the same command but starting with '!' +to the 'trigger' file: + + # echo '!command[:count] [if filter]' > trigger + +The [if filter] part isn't used in matching commands when removing, so +leaving that off in a '!' command will accomplish the same thing as +having it in. + +The filter syntax is the same as that described in the 'Event +filtering' section above. + +For ease of use, writing to the trigger file using '>' currently just +adds or removes a single trigger and there's no explicit '>>' support +('>' actually behaves like '>>') or truncation support to remove all +triggers (you have to use '!' for each one added.) + +6.2 Supported trigger commands +------------------------------ + +The following commands are supported: + +- enable_event/disable_event + + These commands can enable or disable another trace event whenever + the triggering event is hit. When these commands are registered, + the other trace event is activated, but disabled in a "soft" mode. + That is, the tracepoint will be called, but just will not be traced. + The event tracepoint stays in this mode as long as there's a trigger + in effect that can trigger it. + + For example, the following trigger causes kmalloc events to be + traced when a read system call is entered, and the :1 at the end + specifies that this enablement happens only once: + + # echo 'enable_event:kmem:kmalloc:1' > \ + /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger + + The following trigger causes kmalloc events to stop being traced + when a read system call exits. This disablement happens on every + read system call exit: + + # echo 'disable_event:kmem:kmalloc' > \ + /sys/kernel/debug/tracing/events/syscalls/sys_exit_read/trigger + + The format is: + + enable_event::[:count] + disable_event::[:count] + + To remove the above commands: + + # echo '!enable_event:kmem:kmalloc:1' > \ + /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger + + # echo '!disable_event:kmem:kmalloc' > \ + /sys/kernel/debug/tracing/events/syscalls/sys_exit_read/trigger + + Note that there can be any number of enable/disable_event triggers + per triggering event, but there can only be one trigger per + triggered event. e.g. sys_enter_read can have triggers enabling both + kmem:kmalloc and sched:sched_switch, but can't have two kmem:kmalloc + versions such as kmem:kmalloc and kmem:kmalloc:1 or 'kmem:kmalloc if + bytes_req == 256' and 'kmem:kmalloc if bytes_alloc == 256' (they + could be combined into a single filter on kmem:kmalloc though). + +- stacktrace + + This command dumps a stacktrace in the trace buffer whenever the + triggering event occurs. + + For example, the following trigger dumps a stacktrace every time the + kmalloc tracepoint is hit: + + # echo 'stacktrace' > \ + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger + + The following trigger dumps a stacktrace the first 5 times a kmalloc + request happens with a size >= 64K + + # echo 'stacktrace:5 if bytes_req >= 65536' > \ + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger + + The format is: + + stacktrace[:count] + + To remove the above commands: + + # echo '!stacktrace' > \ + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger + + # echo '!stacktrace:5 if bytes_req >= 65536' > \ + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger + + The latter can also be removed more simply by the following (without + the filter): + + # echo '!stacktrace:5' > \ + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger + + Note that there can be only one stacktrace trigger per triggering + event. + +- snapshot + + This command causes a snapshot to be triggered whenever the + triggering event occurs. + + The following command creates a snapshot every time a block request + queue is unplugged with a depth > 1. If you were tracing a set of + events or functions at the time, the snapshot trace buffer would + capture those events when the trigger event occured: + + # echo 'snapshot if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + To only snapshot once: + + # echo 'snapshot:1 if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + To remove the above commands: + + # echo '!snapshot if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + # echo '!snapshot:1 if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + Note that there can be only one snapshot trigger per triggering + event. + +- traceon/traceoff + + These commands turn tracing on and off when the specified events are + hit. The parameter determines how many times the tracing system is + turned on and off. If unspecified, there is no limit. + + The following command turns tracing off the first time a block + request queue is unplugged with a depth > 1. If you were tracing a + set of events or functions at the time, you could then examine the + trace buffer to see the sequence of events that led up to the + trigger event: + + # echo 'traceoff:1 if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + To always disable tracing when nr_rq > 1 : + + # echo 'traceoff if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + To remove the above commands: + + # echo '!traceoff:1 if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + # echo '!traceoff if nr_rq > 1' > \ + /sys/kernel/debug/tracing/events/block/block_unplug/trigger + + Note that there can be only one traceon or traceoff trigger per + triggering event. -- cgit v1.2.3 From 306cfe2025adcba10fb883ad0c540f5541d1b086 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 3 Jul 2013 16:44:46 +0900 Subject: tracing/uprobes: Fix documentation of uprobe registration syntax The uprobe syntax requires an offset after a file path not a symbol. Reviewed-by: Masami Hiramatsu Acked-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: zhangwei(Jovi) Cc: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim --- Documentation/trace/uprobetracer.txt | 10 +++++----- kernel/trace/trace_uprobe.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt index d9c3e682312c..8f1a8b8956fc 100644 --- a/Documentation/trace/uprobetracer.txt +++ b/Documentation/trace/uprobetracer.txt @@ -19,15 +19,15 @@ user to calculate the offset of the probepoint in the object. Synopsis of uprobe_tracer ------------------------- - p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a uprobe - r[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a return uprobe (uretprobe) - -:[GRP/]EVENT : Clear uprobe or uretprobe event + p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a uprobe + r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return uprobe (uretprobe) + -:[GRP/]EVENT : Clear uprobe or uretprobe event GRP : Group name. If omitted, "uprobes" is the default value. EVENT : Event name. If omitted, the event name is generated based - on SYMBOL+offs. + on PATH+OFFSET. PATH : Path to an executable or a library. - SYMBOL[+offs] : Symbol+offset where the probe is inserted. + OFFSET : Offset where the probe is inserted. FETCHARGS : Arguments. Each probe can have up to 128 args. %REG : Fetch register REG diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index b6dcc42ef7f5..c77b92d61551 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -211,7 +211,7 @@ end: /* * Argument syntax: - * - Add uprobe: p|r[:[GRP/]EVENT] PATH:SYMBOL [FETCHARGS] + * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] * * - Remove uprobe: -:[GRP/]EVENT */ -- cgit v1.2.3 From b079d374fd84637aba4b825a329e794990b7b486 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 3 Jul 2013 18:34:23 +0900 Subject: tracing/uprobes: Add support for full argument access methods Enable to fetch other types of argument for the uprobes. IOW, we can access stack, memory, deref, bitfield and retval from uprobes now. The format for the argument types are same as kprobes (but @SYMBOL type is not supported for uprobes), i.e: @ADDR : Fetch memory at ADDR $stackN : Fetch Nth entry of stack (N >= 0) $stack : Fetch stack address $retval : Fetch return value +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address Note that the retval only can be used with uretprobes. Original-patch-by: Hyeoncheol Lee Acked-by: Masami Hiramatsu Acked-by: Oleg Nesterov Cc: Srikar Dronamraju Cc: Oleg Nesterov Cc: zhangwei(Jovi) Cc: Arnaldo Carvalho de Melo Signed-off-by: Hyeoncheol Lee Signed-off-by: Namhyung Kim --- Documentation/trace/uprobetracer.txt | 25 +++++++++++++++++++++++++ kernel/trace/trace_probe.c | 34 ++++++++++++++++++++++------------ 2 files changed, 47 insertions(+), 12 deletions(-) (limited to 'Documentation') diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt index 8f1a8b8956fc..6e5cff263e2b 100644 --- a/Documentation/trace/uprobetracer.txt +++ b/Documentation/trace/uprobetracer.txt @@ -31,6 +31,31 @@ Synopsis of uprobe_tracer FETCHARGS : Arguments. Each probe can have up to 128 args. %REG : Fetch register REG + @ADDR : Fetch memory at ADDR (ADDR should be in userspace) + $stackN : Fetch Nth entry of stack (N >= 0) + $stack : Fetch stack address. + $retval : Fetch return value.(*) + +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) + NAME=FETCHARG : Set NAME as the argument name of FETCHARG. + FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types + (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield + are supported. + + (*) only for return probe. + (**) this is useful for fetching a field of data structures. + +Types +----- +Several types are supported for fetch-args. Uprobe tracer will access memory +by given type. Prefix 's' and 'u' means those types are signed and unsigned +respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). +String type is a special type, which fetches a "null-terminated" string from +user space. +Bitfield is another special type, which takes 3 parameters, bit-width, bit- +offset, and container-size (usually 32). The syntax is; + + b@/ + Event Profiling --------------- diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 8f7a2b6d389d..a130d612e705 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -255,12 +255,18 @@ fail: } /* Special function : only accept unsigned long */ -static __kprobes void fetch_stack_address(struct pt_regs *regs, - void *dummy, void *dest) +static __kprobes void fetch_kernel_stack_address(struct pt_regs *regs, + void *dummy, void *dest) { *(unsigned long *)dest = kernel_stack_pointer(regs); } +static __kprobes void fetch_user_stack_address(struct pt_regs *regs, + void *dummy, void *dest) +{ + *(unsigned long *)dest = user_stack_pointer(regs); +} + static fetch_func_t get_fetch_size_function(const struct fetch_type *type, fetch_func_t orig_fn, const struct fetch_type *ftbl) @@ -305,7 +311,8 @@ int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset) #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) static int parse_probe_vars(char *arg, const struct fetch_type *t, - struct fetch_param *f, bool is_return) + struct fetch_param *f, bool is_return, + bool is_kprobe) { int ret = 0; unsigned long param; @@ -317,13 +324,16 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, ret = -EINVAL; } else if (strncmp(arg, "stack", 5) == 0) { if (arg[5] == '\0') { - if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0) - f->fn = fetch_stack_address; + if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR)) + return -EINVAL; + + if (is_kprobe) + f->fn = fetch_kernel_stack_address; else - ret = -EINVAL; + f->fn = fetch_user_stack_address; } else if (isdigit(arg[5])) { ret = kstrtoul(arg + 5, 10, ¶m); - if (ret || param > PARAM_MAX_STACK) + if (ret || (is_kprobe && param > PARAM_MAX_STACK)) ret = -EINVAL; else { f->fn = t->fetch[FETCH_MTD_stack]; @@ -350,13 +360,9 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table; BUG_ON(ftbl == NULL); - /* Until uprobe_events supports only reg arguments */ - if (!is_kprobe && arg[0] != '%') - return -EINVAL; - switch (arg[0]) { case '$': - ret = parse_probe_vars(arg + 1, t, f, is_return); + ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe); break; case '%': /* named register */ @@ -377,6 +383,10 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, f->fn = t->fetch[FETCH_MTD_memory]; f->data = (void *)param; } else { + /* uprobes don't support symbols */ + if (!is_kprobe) + return -EINVAL; + ret = traceprobe_split_symbol_offset(arg + 1, &offset); if (ret) break; -- cgit v1.2.3 From b7e0bf341f6cfa92ae0a0e3d0c3496729595e1e9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 25 Nov 2013 13:42:47 +0900 Subject: tracing/uprobes: Add @+file_offset fetch method Enable to fetch data from a file offset. Currently it only supports fetching from same binary uprobe set. It'll translate the file offset to a proper virtual address in the process. The syntax is "@+OFFSET" as it does similar to normal memory fetching (@ADDR) which does no address translation. Suggested-by: Oleg Nesterov Acked-by: Masami Hiramatsu Acked-by: Oleg Nesterov Cc: Srikar Dronamraju Cc: zhangwei(Jovi) Cc: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim --- Documentation/trace/uprobetracer.txt | 1 + kernel/trace/trace_kprobe.c | 8 ++++++++ kernel/trace/trace_probe.c | 13 +++++++++++- kernel/trace/trace_probe.h | 2 ++ kernel/trace/trace_uprobe.c | 40 ++++++++++++++++++++++++++++++++++++ 5 files changed, 63 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt index 6e5cff263e2b..f1cf9a34ad9d 100644 --- a/Documentation/trace/uprobetracer.txt +++ b/Documentation/trace/uprobetracer.txt @@ -32,6 +32,7 @@ Synopsis of uprobe_tracer FETCHARGS : Arguments. Each probe can have up to 128 args. %REG : Fetch register REG @ADDR : Fetch memory at ADDR (ADDR should be in userspace) + @+OFFSET : Fetch memory at OFFSET (OFFSET from same file as PATH) $stackN : Fetch Nth entry of stack (N >= 0) $stack : Fetch stack address. $retval : Fetch return value.(*) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f94a56915e69..ce0ed8afb77e 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -239,6 +239,14 @@ DEFINE_BASIC_FETCH_FUNCS(symbol) DEFINE_FETCH_symbol(string) DEFINE_FETCH_symbol(string_size) +/* kprobes don't support file_offset fetch methods */ +#define fetch_file_offset_u8 NULL +#define fetch_file_offset_u16 NULL +#define fetch_file_offset_u32 NULL +#define fetch_file_offset_u64 NULL +#define fetch_file_offset_string NULL +#define fetch_file_offset_string_size NULL + /* Fetch type information table */ const struct fetch_type kprobes_fetch_type_table[] = { /* Special types */ diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index a130d612e705..8364a421b4df 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -374,7 +374,7 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, } break; - case '@': /* memory or symbol */ + case '@': /* memory, file-offset or symbol */ if (isdigit(arg[1])) { ret = kstrtoul(arg + 1, 0, ¶m); if (ret) @@ -382,6 +382,17 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t, f->fn = t->fetch[FETCH_MTD_memory]; f->data = (void *)param; + } else if (arg[1] == '+') { + /* kprobes don't support file offsets */ + if (is_kprobe) + return -EINVAL; + + ret = kstrtol(arg + 2, 0, &offset); + if (ret) + break; + + f->fn = t->fetch[FETCH_MTD_file_offset]; + f->data = (void *)offset; } else { /* uprobes don't support symbols */ if (!is_kprobe) diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 2d5b8f5f5310..e29d743fef5d 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -106,6 +106,7 @@ enum { FETCH_MTD_symbol, FETCH_MTD_deref, FETCH_MTD_bitfield, + FETCH_MTD_file_offset, FETCH_MTD_END, }; @@ -217,6 +218,7 @@ ASSIGN_FETCH_FUNC(memory, ftype), \ ASSIGN_FETCH_FUNC(symbol, ftype), \ ASSIGN_FETCH_FUNC(deref, ftype), \ ASSIGN_FETCH_FUNC(bitfield, ftype), \ +ASSIGN_FETCH_FUNC(file_offset, ftype), \ } \ } diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 794e8bc171f3..1fdea6d3f851 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -70,6 +70,11 @@ static int unregister_uprobe_event(struct trace_uprobe *tu); static DEFINE_MUTEX(uprobe_lock); static LIST_HEAD(uprobe_list); +struct uprobe_dispatch_data { + struct trace_uprobe *tu; + unsigned long bp_addr; +}; + static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); static int uretprobe_dispatcher(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs); @@ -175,6 +180,29 @@ static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, #define fetch_symbol_string NULL #define fetch_symbol_string_size NULL +static unsigned long translate_user_vaddr(void *file_offset) +{ + unsigned long base_addr; + struct uprobe_dispatch_data *udd; + + udd = (void *) current->utask->vaddr; + + base_addr = udd->bp_addr - udd->tu->offset; + return base_addr + (unsigned long)file_offset; +} + +#define DEFINE_FETCH_file_offset(type) \ +static __kprobes void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs,\ + void *offset, void *dest) \ +{ \ + void *vaddr = (void *)translate_user_vaddr(offset); \ + \ + FETCH_FUNC_NAME(memory, type)(regs, vaddr, dest); \ +} +DEFINE_BASIC_FETCH_FUNCS(file_offset) +DEFINE_FETCH_file_offset(string) +DEFINE_FETCH_file_offset(string_size) + /* Fetch type information table */ const struct fetch_type uprobes_fetch_type_table[] = { /* Special types */ @@ -1106,11 +1134,17 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) { struct trace_uprobe *tu; + struct uprobe_dispatch_data udd; int ret = 0; tu = container_of(con, struct trace_uprobe, consumer); tu->nhit++; + udd.tu = tu; + udd.bp_addr = instruction_pointer(regs); + + current->utask->vaddr = (unsigned long) &udd; + if (tu->tp.flags & TP_FLAG_TRACE) ret |= uprobe_trace_func(tu, regs); @@ -1125,9 +1159,15 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) { struct trace_uprobe *tu; + struct uprobe_dispatch_data udd; tu = container_of(con, struct trace_uprobe, consumer); + udd.tu = tu; + udd.bp_addr = func; + + current->utask->vaddr = (unsigned long) &udd; + if (tu->tp.flags & TP_FLAG_TRACE) uretprobe_trace_func(tu, func, regs); -- cgit v1.2.3