diff options
40 files changed, 3083 insertions, 628 deletions
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index 87cf5c010d5d..62c98e9bbdd9 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -1159,6 +1159,12 @@ Here are the available options: This simulates the original behavior of the trace file. When the file is closed, tracing will be enabled again. + hash-ptr + When set, "%p" in the event printk format displays the + hashed pointer value instead of real address. + This will be useful if you want to find out which hashed + value is corresponding to the real value in trace log. + record-cmd When any event or tracer is enabled, a hook is enabled in the sched_switch trace point to fill comm cache diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9a8ce28e4485..86e5028bfa20 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -485,7 +485,6 @@ struct dyn_ftrace { struct dyn_arch_ftrace arch; }; -int ftrace_force_update(void); int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset); int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, @@ -740,7 +739,6 @@ extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); #else /* CONFIG_DYNAMIC_FTRACE */ static inline int skip_trace(unsigned long ip) { return 0; } -static inline int ftrace_force_update(void) { return 0; } static inline void ftrace_disable_daemon(void) { } static inline void ftrace_enable_daemon(void) { } static inline void ftrace_module_init(struct module *mod) { } diff --git a/include/linux/trace.h b/include/linux/trace.h index 886a4ffd9d45..be1e130ed87c 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -34,8 +34,9 @@ int unregister_ftrace_export(struct trace_export *export); struct trace_array; void trace_printk_init_buffers(void); +__printf(3, 4) int trace_array_printk(struct trace_array *tr, unsigned long ip, - const char *fmt, ...); + const char *fmt, ...); int trace_array_init_printk(struct trace_array *tr); void trace_array_put(struct trace_array *tr); struct trace_array *trace_array_get_by_name(const char *name); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index d321fe5ad1a1..7077fec653bb 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -55,6 +55,8 @@ struct trace_event; int trace_raw_output_prep(struct trace_iterator *iter, struct trace_event *event); +extern __printf(2, 3) +void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...); /* * The trace entry - the most basic unit of tracing. This is what @@ -87,6 +89,8 @@ struct trace_iterator { unsigned long iter_flags; void *temp; /* temp holder */ unsigned int temp_size; + char *fmt; /* modified format holder */ + unsigned int fmt_size; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; @@ -148,17 +152,75 @@ enum print_line_t { enum print_line_t trace_handle_return(struct trace_seq *s); -void tracing_generic_entry_update(struct trace_entry *entry, - unsigned short type, - unsigned long flags, - int pc); +static inline void tracing_generic_entry_update(struct trace_entry *entry, + unsigned short type, + unsigned int trace_ctx) +{ + entry->preempt_count = trace_ctx & 0xff; + entry->pid = current->pid; + entry->type = type; + entry->flags = trace_ctx >> 16; +} + +unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); + +enum trace_flag_type { + TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_IRQS_NOSUPPORT = 0x02, + TRACE_FLAG_NEED_RESCHED = 0x04, + TRACE_FLAG_HARDIRQ = 0x08, + TRACE_FLAG_SOFTIRQ = 0x10, + TRACE_FLAG_PREEMPT_RESCHED = 0x20, + TRACE_FLAG_NMI = 0x40, +}; + +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) +{ + unsigned int irq_status = irqs_disabled_flags(irqflags) ? + TRACE_FLAG_IRQS_OFF : 0; + return tracing_gen_ctx_irq_test(irq_status); +} +static inline unsigned int tracing_gen_ctx(void) +{ + unsigned long irqflags; + + local_save_flags(irqflags); + return tracing_gen_ctx_flags(irqflags); +} +#else + +static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) +{ + return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); +} +static inline unsigned int tracing_gen_ctx(void) +{ + return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); +} +#endif + +static inline unsigned int tracing_gen_ctx_dec(void) +{ + unsigned int trace_ctx; + + trace_ctx = tracing_gen_ctx(); + /* + * Subtract one from the preeption counter if preemption is enabled, + * see trace_event_buffer_reserve()for details. + */ + if (IS_ENABLED(CONFIG_PREEMPTION)) + trace_ctx--; + return trace_ctx; +} + struct trace_event_file; struct ring_buffer_event * trace_event_buffer_lock_reserve(struct trace_buffer **current_buffer, struct trace_event_file *trace_file, int type, unsigned long len, - unsigned long flags, int pc); + unsigned int trace_ctx); #define TRACE_RECORD_CMDLINE BIT(0) #define TRACE_RECORD_TGID BIT(1) @@ -232,8 +294,7 @@ struct trace_event_buffer { struct ring_buffer_event *event; struct trace_event_file *trace_file; void *entry; - unsigned long flags; - int pc; + unsigned int trace_ctx; struct pt_regs *regs; }; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 966ed8980327..9cfb099da58f 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -152,25 +152,28 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #ifdef TRACEPOINTS_ENABLED #ifdef CONFIG_HAVE_STATIC_CALL -#define __DO_TRACE_CALL(name) static_call(tp_func_##name) +#define __DO_TRACE_CALL(name, args) \ + do { \ + struct tracepoint_func *it_func_ptr; \ + void *__data; \ + it_func_ptr = \ + rcu_dereference_raw((&__tracepoint_##name)->funcs); \ + if (it_func_ptr) { \ + __data = (it_func_ptr)->data; \ + static_call(tp_func_##name)(__data, args); \ + } \ + } while (0) #else -#define __DO_TRACE_CALL(name) __traceiter_##name +#define __DO_TRACE_CALL(name, args) __traceiter_##name(NULL, args) #endif /* CONFIG_HAVE_STATIC_CALL */ /* * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. - * - * Note, the proto and args passed in includes "__data" as the first parameter. - * The reason for this is to handle the "void" prototype. If a tracepoint - * has a "void" prototype, then it is invalid to declare a function - * as "(void *, void)". */ -#define __DO_TRACE(name, proto, args, cond, rcuidle) \ +#define __DO_TRACE(name, args, cond, rcuidle) \ do { \ - struct tracepoint_func *it_func_ptr; \ int __maybe_unused __idx = 0; \ - void *__data; \ \ if (!(cond)) \ return; \ @@ -190,12 +193,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) rcu_irq_enter_irqson(); \ } \ \ - it_func_ptr = \ - rcu_dereference_raw((&__tracepoint_##name)->funcs); \ - if (it_func_ptr) { \ - __data = (it_func_ptr)->data; \ - __DO_TRACE_CALL(name)(args); \ - } \ + __DO_TRACE_CALL(name, TP_ARGS(args)); \ \ if (rcuidle) { \ rcu_irq_exit_irqson(); \ @@ -206,17 +204,16 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) } while (0) #ifndef MODULE -#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) \ +#define __DECLARE_TRACE_RCU(name, proto, args, cond) \ static inline void trace_##name##_rcuidle(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ __DO_TRACE(name, \ - TP_PROTO(data_proto), \ - TP_ARGS(data_args), \ + TP_ARGS(args), \ TP_CONDITION(cond), 1); \ } #else -#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) +#define __DECLARE_TRACE_RCU(name, proto, args, cond) #endif /* @@ -231,7 +228,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * even when this tracepoint is off. This code has no purpose other than * poking RCU a bit. */ -#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ +#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ extern int __traceiter_##name(data_proto); \ DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \ extern struct tracepoint __tracepoint_##name; \ @@ -239,8 +236,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) { \ if (static_key_false(&__tracepoint_##name.key)) \ __DO_TRACE(name, \ - TP_PROTO(data_proto), \ - TP_ARGS(data_args), \ + TP_ARGS(args), \ TP_CONDITION(cond), 0); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ rcu_read_lock_sched_notrace(); \ @@ -249,7 +245,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) } \ } \ __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ - PARAMS(cond), PARAMS(data_proto), PARAMS(data_args)) \ + PARAMS(cond)) \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ { \ @@ -309,7 +305,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) rcu_dereference_raw((&__tracepoint_##_name)->funcs); \ if (it_func_ptr) { \ do { \ - it_func = (it_func_ptr)->func; \ + it_func = READ_ONCE((it_func_ptr)->func); \ __data = (it_func_ptr)->data; \ ((void(*)(void *, proto))(it_func))(__data, args); \ } while ((++it_func_ptr)->func); \ @@ -332,7 +328,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #else /* !TRACEPOINTS_ENABLED */ -#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ +#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ static inline void trace_##name(proto) \ { } \ static inline void trace_##name##_rcuidle(proto) \ @@ -412,14 +408,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define DECLARE_TRACE(name, proto, args) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()), \ - PARAMS(void *__data, proto), \ - PARAMS(__data, args)) + PARAMS(void *__data, proto)) #define DECLARE_TRACE_CONDITION(name, proto, args, cond) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \ - PARAMS(void *__data, proto), \ - PARAMS(__data, args)) + PARAMS(void *__data, proto)) #define TRACE_EVENT_FLAGS(event, flag) diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 7785961d82ba..8268bf747d6f 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -231,9 +231,11 @@ TRACE_MAKE_SYSTEM_STR(); * { * struct trace_seq *s = &iter->seq; * struct trace_event_raw_<call> *field; <-- defined in stage 1 - * struct trace_entry *entry; * struct trace_seq *p = &iter->tmp_seq; - * int ret; + * + * -------(for event)------- + * + * struct trace_entry *entry; * * entry = iter->ent; * @@ -245,14 +247,23 @@ TRACE_MAKE_SYSTEM_STR(); * field = (typeof(field))entry; * * trace_seq_init(p); - * ret = trace_seq_printf(s, "%s: ", <call>); - * if (ret) - * ret = trace_seq_printf(s, <TP_printk> "\n"); - * if (!ret) - * return TRACE_TYPE_PARTIAL_LINE; + * return trace_output_call(iter, <call>, <TP_printk> "\n"); * - * return TRACE_TYPE_HANDLED; - * } + * ------(or, for event class)------ + * + * int ret; + * + * field = (typeof(field))iter->ent; + * + * ret = trace_raw_output_prep(iter, trace_event); + * if (ret != TRACE_TYPE_HANDLED) + * return ret; + * + * trace_event_printf(iter, <TP_printk> "\n"); + * + * return trace_handle_return(s); + * ------- + * } * * This is the method used to print the raw event to the trace * output format. Note, this is not needed if the data is read @@ -364,7 +375,7 @@ trace_raw_output_##call(struct trace_iterator *iter, int flags, \ if (ret != TRACE_TYPE_HANDLED) \ return ret; \ \ - trace_seq_printf(s, print); \ + trace_event_printf(iter, print); \ \ return trace_handle_return(s); \ } \ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d5a3eb74a657..745f08fdd7a6 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -861,7 +861,6 @@ out: cpus_read_unlock(); } -#ifdef CONFIG_SYSCTL static void optimize_all_kprobes(void) { struct hlist_head *head; @@ -887,6 +886,7 @@ out: mutex_unlock(&kprobe_mutex); } +#ifdef CONFIG_SYSCTL static void unoptimize_all_kprobes(void) { struct hlist_head *head; @@ -1520,13 +1520,16 @@ valid: return ap; } -/* Return error if the kprobe is being re-registered */ -static inline int check_kprobe_rereg(struct kprobe *p) +/* + * Warn and return error if the kprobe is being re-registered since + * there must be a software bug. + */ +static inline int warn_kprobe_rereg(struct kprobe *p) { int ret = 0; mutex_lock(&kprobe_mutex); - if (__get_valid_kprobe(p)) + if (WARN_ON_ONCE(__get_valid_kprobe(p))) ret = -EINVAL; mutex_unlock(&kprobe_mutex); @@ -1614,7 +1617,7 @@ int register_kprobe(struct kprobe *p) return PTR_ERR(addr); p->addr = addr; - ret = check_kprobe_rereg(p); + ret = warn_kprobe_rereg(p); if (ret) return ret; @@ -1995,7 +1998,7 @@ int register_kretprobe(struct kretprobe *rp) return ret; /* If only rp->kp.addr is specified, check reregistering kprobes */ - if (rp->kp.addr && check_kprobe_rereg(&rp->kp)) + if (rp->kp.addr && warn_kprobe_rereg(&rp->kp)) return -EINVAL; if (kretprobe_blacklist_size) { @@ -2497,18 +2500,14 @@ static int __init init_kprobes(void) } } -#if defined(CONFIG_OPTPROBES) -#if defined(__ARCH_WANT_KPROBES_INSN_SLOT) - /* Init kprobe_optinsn_slots */ - kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; -#endif - /* By default, kprobes can be optimized */ - kprobes_allow_optimization = true; -#endif - /* By default, kprobes are armed */ kprobes_all_disarmed = false; +#if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT) + /* Init kprobe_optinsn_slots for allocation */ + kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; +#endif + err = arch_init_kprobes(); if (!err) err = register_die_notifier(&kprobe_exceptions_nb); @@ -2523,6 +2522,21 @@ static int __init init_kprobes(void) } early_initcall(init_kprobes); +#if defined(CONFIG_OPTPROBES) +static int __init init_optprobes(void) +{ + /* + * Enable kprobe optimization - this kicks the optimizer which + * depends on synchronize_rcu_tasks() and ksoftirqd, that is + * not spawned in early initcall. So delay the optimization. + */ + optimize_all_kprobes(); + + return 0; +} +subsys_initcall(init_optprobes); +#endif + #ifdef CONFIG_DEBUG_FS static void report_probe(struct seq_file *pi, struct kprobe *p, const char *sym, int offset, char *modname, struct kprobe *pp) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index c1a62ae7e812..799dbcfe65ad 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -545,7 +545,7 @@ config KPROBE_EVENTS_ON_NOTRACE using kprobe events. If kprobes can use ftrace instead of breakpoint, ftrace related - functions are protected from kprobe-events to prevent an infinit + functions are protected from kprobe-events to prevent an infinite recursion or any unexpected execution path which leads to a kernel crash. @@ -886,6 +886,10 @@ config PREEMPTIRQ_DELAY_TEST irq-disabled critical sections for 500us: modprobe preemptirq_delay_test test_mode=irq delay=500 burst_size=3 + What's more, if you want to attach the test on the cpu which the latency + tracer is running on, specify cpu_affinity=cpu_num at the end of the + command. + If unsure, say N config SYNTH_EVENT_GEN_TEST diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 9e9ee4945043..c286c13bd31a 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -72,17 +72,17 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, struct blk_io_trace *t; struct ring_buffer_event *event = NULL; struct trace_buffer *buffer = NULL; - int pc = 0; + unsigned int trace_ctx = 0; int cpu = smp_processor_id(); bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; if (blk_tracer) { buffer = blk_tr->array_buffer.buffer; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + len + cgid_len, - 0, pc); + trace_ctx); if (!event) return; t = ring_buffer_event_data(event); @@ -107,7 +107,7 @@ record_it: memcpy((void *) t + sizeof(*t) + cgid_len, data, len); if (blk_tracer) - trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); } } @@ -222,8 +222,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, struct blk_io_trace *t; unsigned long flags = 0; unsigned long *sequence; + unsigned int trace_ctx = 0; pid_t pid; - int cpu, pc = 0; + int cpu; bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; @@ -252,10 +253,10 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, tracing_record_cmdline(current); buffer = blk_tr->array_buffer.buffer; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + pdu_len + cgid_len, - 0, pc); + trace_ctx); if (!event) return; t = ring_buffer_event_data(event); @@ -301,7 +302,7 @@ record_it: memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); if (blk_tracer) { - trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); return; } } diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index 312d1a0ca3b6..8c4ffd076162 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -21,13 +21,16 @@ static ulong delay = 100; static char test_mode[12] = "irq"; static uint burst_size = 1; +static int cpu_affinity = -1; module_param_named(delay, delay, ulong, 0444); module_param_string(test_mode, test_mode, 12, 0444); module_param_named(burst_size, burst_size, uint, 0444); +module_param_named(cpu_affinity, cpu_affinity, int, 0444); MODULE_PARM_DESC(delay, "Period in microseconds (100 us default)"); MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt, irq, or alternate (default irq)"); MODULE_PARM_DESC(burst_size, "The size of a burst (default 1)"); +MODULE_PARM_DESC(cpu_affinity, "Cpu num test is running on"); static struct completion done; @@ -36,7 +39,9 @@ static struct completion done; static void busy_wait(ulong time) { u64 start, end; + start = trace_clock_local(); + do { end = trace_clock_local(); if (kthread_should_stop()) @@ -47,6 +52,7 @@ static void busy_wait(ulong time) static __always_inline void irqoff_test(void) { unsigned long flags; + local_irq_save(flags); busy_wait(delay); local_irq_restore(flags); @@ -113,6 +119,14 @@ static int preemptirq_delay_run(void *data) { int i; int s = MIN(burst_size, NR_TEST_FUNCS); + struct cpumask cpu_mask; + + if (cpu_affinity > -1) { + cpumask_clear(&cpu_mask); + cpumask_set_cpu(cpu_affinity, &cpu_mask); + if (set_cpus_allowed_ptr(current, &cpu_mask)) + pr_err("cpu_affinity:%d, failed\n", cpu_affinity); + } for (i = 0; i < s; i++) (testfuncs[i])(i); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index ec08f948dd80..b9dad3500041 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1112,8 +1112,7 @@ static struct list_head *rb_list_head(struct list_head *list) * its flags will be non zero. */ static inline int -rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_page *page, struct list_head *list) +rb_is_head_page(struct buffer_page *page, struct list_head *list) { unsigned long val; @@ -1142,8 +1141,7 @@ static bool rb_is_reader_page(struct buffer_page *page) /* * rb_set_list_to_head - set a list_head to be pointing to head. */ -static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer, - struct list_head *list) +static void rb_set_list_to_head(struct list_head *list) { unsigned long *ptr; @@ -1166,7 +1164,7 @@ static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) /* * Set the previous list pointer to have the HEAD flag. */ - rb_set_list_to_head(cpu_buffer, head->list.prev); + rb_set_list_to_head(head->list.prev); } static void rb_list_head_clear(struct list_head *list) @@ -1241,8 +1239,7 @@ static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer, old_flag, RB_PAGE_NORMAL); } -static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_page **bpage) +static inline void rb_inc_page(struct buffer_page **bpage) { struct list_head *p = rb_list_head((*bpage)->list.next); @@ -1274,11 +1271,11 @@ rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) */ for (i = 0; i < 3; i++) { do { - if (rb_is_head_page(cpu_buffer, page, page->list.prev)) { + if (rb_is_head_page(page, page->list.prev)) { cpu_buffer->head_page = page; return page; } - rb_inc_page(cpu_buffer, &page); + rb_inc_page(&page); } while (page != head); } @@ -1824,7 +1821,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) cond_resched(); to_remove_page = tmp_iter_page; - rb_inc_page(cpu_buffer, &tmp_iter_page); + rb_inc_page(&tmp_iter_page); /* update the counters */ page_entries = rb_page_entries(to_remove_page); @@ -2062,10 +2059,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, put_online_cpus(); } else { - /* Make sure this CPU has been initialized */ - if (!cpumask_test_cpu(cpu_id, buffer->cpumask)) - goto out; - cpu_buffer = buffer->buffers[cpu_id]; if (nr_pages == cpu_buffer->nr_pages) @@ -2271,7 +2264,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) if (iter->head_page == cpu_buffer->reader_page) iter->head_page = rb_set_head_page(cpu_buffer); else - rb_inc_page(cpu_buffer, &iter->head_page); + rb_inc_page(&iter->head_page); iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp; iter->head = 0; @@ -2374,7 +2367,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * want the outer most commit to reset it. */ new_head = next_page; - rb_inc_page(cpu_buffer, &new_head); + rb_inc_page(&new_head); ret = rb_head_page_set_head(cpu_buffer, new_head, next_page, RB_PAGE_NORMAL); @@ -2526,7 +2519,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, next_page = tail_page; - rb_inc_page(cpu_buffer, &next_page); + rb_inc_page(&next_page); /* * If for some reason, we had an interrupt storm that made @@ -2552,7 +2545,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, * the buffer, unless the commit page is still on the * reader page. */ - if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) { + if (rb_is_head_page(next_page, &tail_page->list)) { /* * If the commit is not on the reader page, then @@ -2583,7 +2576,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, * have filled up the buffer with events * from interrupts and such, and wrapped. * - * Note, if the tail page is also the on the + * Note, if the tail page is also on the * reader_page, we let it move out. */ if (unlikely((cpu_buffer->commit_page != @@ -2879,7 +2872,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) return; local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); - rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); + rb_inc_page(&cpu_buffer->commit_page); /* add barrier to keep gcc from optimizing too much */ barrier(); } @@ -3638,14 +3631,14 @@ rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer, * Because the commit page may be on the reader page we * start with the next page and check the end loop there. */ - rb_inc_page(cpu_buffer, &bpage); + rb_inc_page(&bpage); start = bpage; do { if (bpage->page == (void *)addr) { local_dec(&bpage->entries); return; } - rb_inc_page(cpu_buffer, &bpage); + rb_inc_page(&bpage); } while (bpage != start); /* commit not part of this buffer?? */ @@ -4367,7 +4360,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->pages = reader->list.prev; /* The reader page will be pointing to the new head */ - rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); + rb_set_list_to_head(&cpu_buffer->reader_page->list); /* * We want to make sure we read the overruns after we set up our @@ -4406,7 +4399,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * Now make the new head point back to the reader page. */ rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list; - rb_inc_page(cpu_buffer, &cpu_buffer->head_page); + rb_inc_page(&cpu_buffer->head_page); local_inc(&cpu_buffer->pages_read); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b5815a022ecc..e295c413580e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -176,7 +176,7 @@ static union trace_eval_map_item *trace_eval_maps; int tracing_set_tracer(struct trace_array *tr, const char *buf); static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, int pc); + unsigned int trace_ctx); #define MAX_TRACER_SIZE 100 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; @@ -408,7 +408,8 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export); TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \ TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \ TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \ - TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS) + TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \ + TRACE_ITER_HASH_PTR) /* trace_options that are only supported by global_trace */ #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \ @@ -454,6 +455,7 @@ static void __trace_array_put(struct trace_array *this_tr) /** * trace_array_put - Decrement the reference counter for this trace array. + * @this_tr : pointer to the trace array * * NOTE: Use this when we no longer need the trace array returned by * trace_array_get_by_name(). This ensures the trace array can be later @@ -530,6 +532,7 @@ trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) /** * trace_ignore_this_task - should a task be ignored for tracing * @filtered_pids: The list of pids to check + * @filtered_no_pids: The list of pids not to be traced * @task: The task that should be ignored if not filtered * * Checks if @task should be traced or not from @filtered_pids. @@ -780,7 +783,7 @@ u64 ftrace_now(int cpu) } /** - * tracing_is_enabled - Show if global_trace has been disabled + * tracing_is_enabled - Show if global_trace has been enabled * * Shows if the global trace has been enabled or not. It uses the * mirror flag "buffer_disabled" to be used in fast paths such as for @@ -905,23 +908,23 @@ static inline void trace_access_lock_init(void) #ifdef CONFIG_STACKTRACE static void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs); + unsigned int trace_ctx, + int skip, struct pt_regs *regs); static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs); + unsigned int trace_ctx, + int skip, struct pt_regs *regs); #else static inline void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { } static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned long trace_ctx, + int skip, struct pt_regs *regs) { } @@ -929,24 +932,24 @@ static inline void ftrace_trace_stack(struct trace_array *tr, static __always_inline void trace_event_setup(struct ring_buffer_event *event, - int type, unsigned long flags, int pc) + int type, unsigned int trace_ctx) { struct trace_entry *ent = ring_buffer_event_data(event); - tracing_generic_entry_update(ent, type, flags, pc); + tracing_generic_entry_update(ent, type, trace_ctx); } static __always_inline struct ring_buffer_event * __trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct ring_buffer_event *event; event = ring_buffer_lock_reserve(buffer, len); if (event != NULL) - trace_event_setup(event, type, flags, pc); + trace_event_setup(event, type, trace_ctx); return event; } @@ -1007,25 +1010,22 @@ int __trace_puts(unsigned long ip, const char *str, int size) struct ring_buffer_event *event; struct trace_buffer *buffer; struct print_entry *entry; - unsigned long irq_flags; + unsigned int trace_ctx; int alloc; - int pc; if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) return 0; - pc = preempt_count(); - if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; alloc = sizeof(*entry) + size + 2; /* possible \n added */ - local_save_flags(irq_flags); + trace_ctx = tracing_gen_ctx(); buffer = global_trace.array_buffer.buffer; ring_buffer_nest_start(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, - irq_flags, pc); + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, + trace_ctx); if (!event) { size = 0; goto out; @@ -1044,7 +1044,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) entry->buf[size] = '\0'; __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); + ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL); out: ring_buffer_nest_end(buffer); return size; @@ -1061,25 +1061,22 @@ int __trace_bputs(unsigned long ip, const char *str) struct ring_buffer_event *event; struct trace_buffer *buffer; struct bputs_entry *entry; - unsigned long irq_flags; + unsigned int trace_ctx; int size = sizeof(struct bputs_entry); int ret = 0; - int pc; if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) return 0; - pc = preempt_count(); - if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; - local_save_flags(irq_flags); + trace_ctx = tracing_gen_ctx(); buffer = global_trace.array_buffer.buffer; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, - irq_flags, pc); + trace_ctx); if (!event) goto out; @@ -1088,7 +1085,7 @@ int __trace_bputs(unsigned long ip, const char *str) entry->str = str; __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); + ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL); ret = 1; out: @@ -2584,36 +2581,34 @@ enum print_line_t trace_handle_return(struct trace_seq *s) } EXPORT_SYMBOL_GPL(trace_handle_return); -void -tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, - unsigned long flags, int pc) +unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) { - struct task_struct *tsk = current; + unsigned int trace_flags = irqs_status; + unsigned int pc; - entry->preempt_count = pc & 0xff; - entry->pid = (tsk) ? tsk->pid : 0; - entry->type = type; - entry->flags = -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | -#else - TRACE_FLAG_IRQS_NOSUPPORT | -#endif - ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | - ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | - ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | - (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); + pc = preempt_count(); + + if (pc & NMI_MASK) + trace_flags |= TRACE_FLAG_NMI; + if (pc & HARDIRQ_MASK) + trace_flags |= TRACE_FLAG_HARDIRQ; + if (in_serving_softirq()) + trace_flags |= TRACE_FLAG_SOFTIRQ; + + if (tif_need_resched()) + trace_flags |= TRACE_FLAG_NEED_RESCHED; + if (test_preempt_need_resched()) + trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; + return (trace_flags << 16) | (pc & 0xff); } -EXPORT_SYMBOL_GPL(tracing_generic_entry_update); struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { - return __trace_buffer_lock_reserve(buffer, type, len, flags, pc); + return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); } DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); @@ -2733,7 +2728,7 @@ struct ring_buffer_event * trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, struct trace_event_file *trace_file, int type, unsigned long len, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct ring_buffer_event *entry; int val; @@ -2746,15 +2741,15 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, /* Try to use the per cpu buffer first */ val = this_cpu_inc_return(trace_buffered_event_cnt); if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) { - trace_event_setup(entry, type, flags, pc); + trace_event_setup(entry, type, trace_ctx); entry->array[0] = len; return entry; } this_cpu_dec(trace_buffered_event_cnt); } - entry = __trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); + entry = __trace_buffer_lock_reserve(*current_rb, type, len, + trace_ctx); /* * If tracing is off, but we have triggers enabled * we still need to look at the event data. Use the temp_buffer @@ -2763,8 +2758,8 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, */ if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { *current_rb = temp_buffer; - entry = __trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); + entry = __trace_buffer_lock_reserve(*current_rb, type, len, + trace_ctx); } return entry; } @@ -2850,7 +2845,7 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer, fbuffer->event, fbuffer->entry, - fbuffer->flags, fbuffer->pc, fbuffer->regs); + fbuffer->trace_ctx, fbuffer->regs); } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); @@ -2866,7 +2861,7 @@ EXPORT_SYMBOL_GPL(trace_event_buffer_commit); void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc, + unsigned int trace_ctx, struct pt_regs *regs) { __buffer_unlock_commit(buffer, event); @@ -2877,8 +2872,8 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, * and mmiotrace, but that's ok if they lose a function or * two. They are not that meaningful. */ - ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs); - ftrace_trace_userstack(tr, buffer, flags, pc); + ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); + ftrace_trace_userstack(tr, buffer, trace_ctx); } /* @@ -2892,9 +2887,8 @@ trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, } void -trace_function(struct trace_array *tr, - unsigned long ip, unsigned long parent_ip, unsigned long flags, - int pc) +trace_function(struct trace_array *tr, unsigned long ip, unsigned long + parent_ip, unsigned int trace_ctx) { struct trace_event_call *call = &event_function; struct trace_buffer *buffer = tr->array_buffer.buffer; @@ -2902,7 +2896,7 @@ trace_function(struct trace_array *tr, struct ftrace_entry *entry; event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), - flags, pc); + trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -2936,8 +2930,8 @@ static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); static DEFINE_PER_CPU(int, ftrace_stack_reserve); static void __ftrace_trace_stack(struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { struct trace_event_call *call = &event_kernel_stack; struct ring_buffer_event *event; @@ -2984,7 +2978,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, size = nr_entries * sizeof(unsigned long); event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, - sizeof(*entry) + size, flags, pc); + sizeof(*entry) + size, trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3005,22 +2999,22 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, static inline void ftrace_trace_stack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, - int skip, int pc, struct pt_regs *regs) + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { if (!(tr->trace_flags & TRACE_ITER_STACKTRACE)) return; - __ftrace_trace_stack(buffer, flags, skip, pc, regs); + __ftrace_trace_stack(buffer, trace_ctx, skip, regs); } -void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, - int pc) +void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, + int skip) { struct trace_buffer *buffer = tr->array_buffer.buffer; if (rcu_is_watching()) { - __ftrace_trace_stack(buffer, flags, skip, pc, NULL); + __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); return; } @@ -3034,7 +3028,7 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, return; rcu_irq_enter_irqson(); - __ftrace_trace_stack(buffer, flags, skip, pc, NULL); + __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); rcu_irq_exit_irqson(); } @@ -3044,19 +3038,15 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, */ void trace_dump_stack(int skip) { - unsigned long flags; - if (tracing_disabled || tracing_selftest_running) return; - local_save_flags(flags); - #ifndef CONFIG_UNWINDER_ORC /* Skip 1 to skip this function. */ skip++; #endif __ftrace_trace_stack(global_trace.array_buffer.buffer, - flags, skip, preempt_count(), NULL); + tracing_gen_ctx(), skip, NULL); } EXPORT_SYMBOL_GPL(trace_dump_stack); @@ -3065,7 +3055,7 @@ static DEFINE_PER_CPU(int, user_stack_count); static void ftrace_trace_userstack(struct trace_array *tr, - struct trace_buffer *buffer, unsigned long flags, int pc) + struct trace_buffer *buffer, unsigned int trace_ctx) { struct trace_event_call *call = &event_user_stack; struct ring_buffer_event *event; @@ -3092,7 +3082,7 @@ ftrace_trace_userstack(struct trace_array *tr, __this_cpu_inc(user_stack_count); event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) goto out_drop_count; entry = ring_buffer_event_data(event); @@ -3112,7 +3102,7 @@ ftrace_trace_userstack(struct trace_array *tr, #else /* CONFIG_USER_STACKTRACE_SUPPORT */ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_buffer *buffer, - unsigned long flags, int pc) + unsigned int trace_ctx) { } #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ @@ -3242,9 +3232,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) struct trace_buffer *buffer; struct trace_array *tr = &global_trace; struct bprint_entry *entry; - unsigned long flags; + unsigned int trace_ctx; char *tbuffer; - int len = 0, size, pc; + int len = 0, size; if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; @@ -3252,7 +3242,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) /* Don't pollute graph traces with trace_vprintk internals */ pause_graph_tracing(); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); tbuffer = get_trace_buf(); @@ -3266,12 +3256,11 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) goto out_put; - local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; buffer = tr->array_buffer.buffer; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3281,7 +3270,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) memcpy(entry->buf, tbuffer, sizeof(u32) * len); if (!call_filter_check_discard(call, entry, buffer, event)) { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL); + ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); } out: @@ -3304,9 +3293,9 @@ __trace_array_vprintk(struct trace_buffer *buffer, { struct trace_event_call *call = &event_print; struct ring_buffer_event *event; - int len = 0, size, pc; + int len = 0, size; struct print_entry *entry; - unsigned long flags; + unsigned int trace_ctx; char *tbuffer; if (tracing_disabled || tracing_selftest_running) @@ -3315,7 +3304,7 @@ __trace_array_vprintk(struct trace_buffer *buffer, /* Don't pollute graph traces with trace_vprintk internals */ pause_graph_tracing(); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); @@ -3327,11 +3316,10 @@ __trace_array_vprintk(struct trace_buffer *buffer, len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); - local_save_flags(flags); size = sizeof(*entry) + len + 1; ring_buffer_nest_start(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3340,7 +3328,7 @@ __trace_array_vprintk(struct trace_buffer *buffer, memcpy(&entry->buf, tbuffer, len + 1); if (!call_filter_check_discard(call, entry, buffer, event)) { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL); + ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL); } out: @@ -3543,6 +3531,65 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, return next; } +#define STATIC_FMT_BUF_SIZE 128 +static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; + +static char *trace_iter_expand_format(struct trace_iterator *iter) +{ + char *tmp; + + if (iter->fmt == static_fmt_buf) + return NULL; + + tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, + GFP_KERNEL); + if (tmp) { + iter->fmt_size += STATIC_FMT_BUF_SIZE; + iter->fmt = tmp; + } + + return tmp; +} + +const char *trace_event_format(struct trace_iterator *iter, const char *fmt) +{ + const char *p, *new_fmt; + char *q; + + if (WARN_ON_ONCE(!fmt)) + return fmt; + + if (iter->tr->trace_flags & TRACE_ITER_HASH_PTR) + return fmt; + + p = fmt; + new_fmt = q = iter->fmt; + while (*p) { + if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { + if (!trace_iter_expand_format(iter)) + return fmt; + + q += iter->fmt - new_fmt; + new_fmt = iter->fmt; + } + + *q++ = *p++; + + /* Replace %p with %px */ + if (p[-1] == '%') { + if (p[0] == '%') { + *q++ = *p++; + } else if (p[0] == 'p' && !isalnum(p[1])) { + *q++ = *p++; + *q++ = 'x'; + } + } + } + *q = '\0'; + + return new_fmt; +} + #define STATIC_TEMP_BUF_SIZE 128 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); @@ -4336,6 +4383,16 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) iter->temp_size = 128; /* + * trace_event_printf() may need to modify given format + * string to replace %p with %px so that it shows real address + * instead of hash value. However, that is only for the event + * tracing, other tracer may not need. Defer the allocation + * until it is needed. + */ + iter->fmt = NULL; + iter->fmt_size = 0; + + /* * We make a copy of the current tracer to avoid concurrent * changes on it while we are reading. */ @@ -4486,6 +4543,7 @@ static int tracing_release(struct inode *inode, struct file *file) mutex_destroy(&iter->mutex); free_cpumask_var(iter->started); + kfree(iter->fmt); kfree(iter->temp); kfree(iter->trace); kfree(iter->buffer_iter); @@ -6653,7 +6711,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, enum event_trigger_type tt = ETT_NONE; struct trace_buffer *buffer; struct print_entry *entry; - unsigned long irq_flags; ssize_t written; int size; int len; @@ -6673,7 +6730,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - local_save_flags(irq_flags); size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */ /* If less than "<faulted>", then make sure we can still add that */ @@ -6682,7 +6738,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - irq_flags, preempt_count()); + tracing_gen_ctx()); if (unlikely(!event)) /* Ring buffer disabled, return as if not open for write */ return -EBADF; @@ -6734,7 +6790,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, struct ring_buffer_event *event; struct trace_buffer *buffer; struct raw_data_entry *entry; - unsigned long irq_flags; ssize_t written; int size; int len; @@ -6756,14 +6811,13 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - local_save_flags(irq_flags); size = sizeof(*entry) + cnt; if (cnt < FAULT_SIZE_ID) size += FAULT_SIZE_ID - cnt; buffer = tr->array_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, - irq_flags, preempt_count()); + tracing_gen_ctx()); if (!event) /* Ring buffer disabled, return as if not open for write */ return -EBADF; @@ -9348,9 +9402,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* Simulate the iterator */ trace_init_global_iter(&iter); - /* Can not use kmalloc for iter.temp */ + /* Can not use kmalloc for iter.temp and iter.fmt */ iter.temp = static_temp_buf; iter.temp_size = STATIC_TEMP_BUF_SIZE; + iter.fmt = static_fmt_buf; + iter.fmt_size = STATIC_FMT_BUF_SIZE; for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); @@ -9429,30 +9485,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) } EXPORT_SYMBOL_GPL(ftrace_dump); -int trace_run_command(const char *buf, int (*createfn)(int, char **)) -{ - char **argv; - int argc, ret; - - argc = 0; - ret = 0; - argv = argv_split(GFP_KERNEL, buf, &argc); - if (!argv) - return -ENOMEM; - - if (argc) - ret = createfn(argc, argv); - - argv_free(argv); - - return ret; -} - #define WRITE_BUFSIZE 4096 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, - int (*createfn)(int, char **)) + int (*createfn)(const char *)) { char *kbuf, *buf, *tmp; int ret = 0; @@ -9500,7 +9537,7 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, if (tmp) *tmp = '\0'; - ret = trace_run_command(buf, createfn); + ret = createfn(buf); if (ret) goto out; buf += size; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e448d2da0b99..dec13ff66077 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -136,25 +136,6 @@ struct kretprobe_trace_entry_head { unsigned long ret_ip; }; -/* - * trace_flag_type is an enumeration that holds different - * states when a trace occurs. These are: - * IRQS_OFF - interrupts were disabled - * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags - * NEED_RESCHED - reschedule is requested - * HARDIRQ - inside an interrupt handler - * SOFTIRQ - inside a softirq handler - */ -enum trace_flag_type { - TRACE_FLAG_IRQS_OFF = 0x01, - TRACE_FLAG_IRQS_NOSUPPORT = 0x02, - TRACE_FLAG_NEED_RESCHED = 0x04, - TRACE_FLAG_HARDIRQ = 0x08, - TRACE_FLAG_SOFTIRQ = 0x10, - TRACE_FLAG_PREEMPT_RESCHED = 0x20, - TRACE_FLAG_NMI = 0x40, -}; - #define TRACE_BUF_SIZE 1024 struct trace_array; @@ -589,8 +570,7 @@ struct ring_buffer_event * trace_buffer_lock_reserve(struct trace_buffer *buffer, int type, unsigned long len, - unsigned long flags, - int pc); + unsigned int trace_ctx); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); @@ -601,6 +581,8 @@ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event); +const char *trace_event_format(struct trace_iterator *iter, const char *fmt); + int trace_empty(struct trace_iterator *iter); void *trace_find_next_entry_inc(struct trace_iterator *iter); @@ -615,11 +597,11 @@ unsigned long trace_total_entries(struct trace_array *tr); void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc); + unsigned int trace_ctx); void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc); + unsigned int trace_ctx); void trace_latency_header(struct seq_file *m); void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); @@ -687,11 +669,10 @@ static inline void latency_fsnotify(struct trace_array *tr) { } #endif #ifdef CONFIG_STACKTRACE -void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, - int pc); +void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, int skip); #else -static inline void __trace_stack(struct trace_array *tr, unsigned long flags, - int skip, int pc) +static inline void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, + int skip) { } #endif /* CONFIG_STACKTRACE */ @@ -831,10 +812,10 @@ extern void graph_trace_open(struct trace_iterator *iter); extern void graph_trace_close(struct trace_iterator *iter); extern int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, - unsigned long flags, int pc); + unsigned int trace_ctx); extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, - unsigned long flags, int pc); + unsigned int trace_ctx); #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; @@ -1194,6 +1175,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, C(MARKERS, "markers"), \ C(EVENT_FORK, "event-fork"), \ C(PAUSE_ON_TRACE, "pause-on-trace"), \ + C(HASH_PTR, "hash-ptr"), /* Print hashed pointer */ \ FUNCTION_FLAGS \ FGRAPH_FLAGS \ STACK_FLAGS \ @@ -1297,15 +1279,15 @@ extern int call_filter_check_discard(struct trace_event_call *call, void *rec, void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc, + unsigned int trcace_ctx, struct pt_regs *regs); static inline void trace_buffer_unlock_commit(struct trace_array *tr, struct trace_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc) + unsigned int trace_ctx) { - trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL); + trace_buffer_unlock_commit_regs(tr, buffer, event, trace_ctx, NULL); } DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); @@ -1366,8 +1348,7 @@ __event_trigger_test_discard(struct trace_event_file *file, * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. + * @trace_ctx: The tracing context flags. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and @@ -1377,12 +1358,12 @@ static inline void event_trigger_unlock_commit(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc) + void *entry, unsigned int trace_ctx) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); + trace_buffer_unlock_commit(file->tr, buffer, event, trace_ctx); if (tt) event_triggers_post_call(file, tt); @@ -1394,8 +1375,7 @@ event_trigger_unlock_commit(struct trace_event_file *file, * @buffer: The ring buffer that the event is being written to * @event: The event meta data in the ring buffer * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. + * @trace_ctx: The tracing context flags. * * This is a helper function to handle triggers that require data * from the event itself. It also tests the event against filters and @@ -1408,14 +1388,14 @@ static inline void event_trigger_unlock_commit_regs(struct trace_event_file *file, struct trace_buffer *buffer, struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc, + void *entry, unsigned int trace_ctx, struct pt_regs *regs) { enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) trace_buffer_unlock_commit_regs(file->tr, buffer, event, - irq_flags, pc, regs); + trace_ctx, regs); if (tt) event_triggers_post_call(file, tt); @@ -1830,10 +1810,9 @@ extern int tracing_set_cpumask(struct trace_array *tr, #define MAX_EVENT_NAME_LEN 64 -extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); extern ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, - int (*createfn)(int, char**)); + int (*createfn)(const char *)); extern unsigned int err_pos(char *cmd, const char *str); extern void tracing_log_err(struct trace_array *tr, diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index eff099123aa2..e47fdb4c92fb 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -37,7 +37,7 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) struct ring_buffer_event *event; struct trace_branch *entry; unsigned long flags; - int pc; + unsigned int trace_ctx; const char *p; if (current->trace_recursion & TRACE_BRANCH_BIT) @@ -59,10 +59,10 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect) if (atomic_read(&data->disabled)) goto out; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(flags); buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) goto out; diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 4f967d5cd917..dc971a68dda4 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -31,23 +31,31 @@ int dyn_event_register(struct dyn_event_operations *ops) return 0; } -int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) +int dyn_event_release(const char *raw_command, struct dyn_event_operations *type) { struct dyn_event *pos, *n; char *system = NULL, *event, *p; - int ret = -ENOENT; + int argc, ret = -ENOENT; + char **argv; + + argv = argv_split(GFP_KERNEL, raw_command, &argc); + if (!argv) + return -ENOMEM; if (argv[0][0] == '-') { - if (argv[0][1] != ':') - return -EINVAL; + if (argv[0][1] != ':') { + ret = -EINVAL; + goto out; + } event = &argv[0][2]; } else { event = strchr(argv[0], ':'); - if (!event) - return -EINVAL; + if (!event) { + ret = -EINVAL; + goto out; + } event++; } - argc--; argv++; p = strchr(event, '/'); if (p) { @@ -63,7 +71,7 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) if (type && type != pos->ops) continue; if (!pos->ops->match(system, event, - argc, (const char **)argv, pos)) + argc - 1, (const char **)argv + 1, pos)) continue; ret = pos->ops->free(pos); @@ -71,21 +79,22 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) break; } mutex_unlock(&event_mutex); - +out: + argv_free(argv); return ret; } -static int create_dyn_event(int argc, char **argv) +static int create_dyn_event(const char *raw_command) { struct dyn_event_operations *ops; int ret = -ENODEV; - if (argv[0][0] == '-' || argv[0][0] == '!') - return dyn_event_release(argc, argv, NULL); + if (raw_command[0] == '-' || raw_command[0] == '!') + return dyn_event_release(raw_command, NULL); mutex_lock(&dyn_event_ops_mutex); list_for_each_entry(ops, &dyn_event_ops_list, list) { - ret = ops->create(argc, (const char **)argv); + ret = ops->create(raw_command); if (!ret || ret != -ECANCELED) break; } diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h index d6f72dcb7269..7754936b57ee 100644 --- a/kernel/trace/trace_dynevent.h +++ b/kernel/trace/trace_dynevent.h @@ -39,7 +39,7 @@ struct dyn_event; */ struct dyn_event_operations { struct list_head list; - int (*create)(int argc, const char *argv[]); + int (*create)(const char *raw_command); int (*show)(struct seq_file *m, struct dyn_event *ev); bool (*is_busy)(struct dyn_event *ev); int (*free)(struct dyn_event *ev); @@ -97,7 +97,7 @@ void *dyn_event_seq_start(struct seq_file *m, loff_t *pos); void *dyn_event_seq_next(struct seq_file *m, void *v, loff_t *pos); void dyn_event_seq_stop(struct seq_file *m, void *v); int dyn_events_release_all(struct dyn_event_operations *type); -int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type); +int dyn_event_release(const char *raw_command, struct dyn_event_operations *type); /* * for_each_dyn_event - iterate over the dyn_event list diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a71181655958..288ad2c274fb 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -421,11 +421,8 @@ NOKPROBE_SYMBOL(perf_trace_buf_alloc); void perf_trace_buf_update(void *record, u16 type) { struct trace_entry *entry = record; - int pc = preempt_count(); - unsigned long flags; - local_save_flags(flags); - tracing_generic_entry_update(entry, type, flags, pc); + tracing_generic_entry_update(entry, type, tracing_gen_ctx()); } NOKPROBE_SYMBOL(perf_trace_buf_update); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d387b774ceeb..a3563afd412d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -258,22 +258,19 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, trace_event_ignore_this_pid(trace_file)) return NULL; - local_save_flags(fbuffer->flags); - fbuffer->pc = preempt_count(); /* * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables * preemption (adding one to the preempt_count). Since we are * interested in the preempt_count at the time the tracepoint was * hit, we need to subtract one to offset the increment. */ - if (IS_ENABLED(CONFIG_PREEMPTION)) - fbuffer->pc--; + fbuffer->trace_ctx = tracing_gen_ctx_dec(); fbuffer->trace_file = trace_file; fbuffer->event = trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file, event_call->event.type, len, - fbuffer->flags, fbuffer->pc); + fbuffer->trace_ctx); if (!fbuffer->event) return NULL; @@ -2101,16 +2098,20 @@ event_subsystem_dir(struct trace_array *tr, const char *name, dir->subsystem = system; file->system = dir; - entry = tracefs_create_file("filter", 0644, dir->entry, dir, - &ftrace_subsystem_filter_fops); - if (!entry) { - kfree(system->filter); - system->filter = NULL; - pr_warn("Could not create tracefs '%s/filter' entry\n", name); - } + /* the ftrace system is special, do not create enable or filter files */ + if (strcmp(name, "ftrace") != 0) { - trace_create_file("enable", 0644, dir->entry, dir, - &ftrace_system_enable_fops); + entry = tracefs_create_file("filter", 0644, dir->entry, dir, + &ftrace_subsystem_filter_fops); + if (!entry) { + kfree(system->filter); + system->filter = NULL; + pr_warn("Could not create tracefs '%s/filter' entry\n", name); + } + + trace_create_file("enable", 0644, dir->entry, dir, + &ftrace_system_enable_fops); + } list_add(&dir->list, &tr->systems); @@ -3679,12 +3680,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, struct trace_buffer *buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; - unsigned long flags; + unsigned int trace_ctx; long disabled; int cpu; - int pc; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); @@ -3692,11 +3692,9 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, if (disabled != 1) goto out; - local_save_flags(flags); - event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file, TRACE_FN, sizeof(*entry), - flags, pc); + trace_ctx); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -3704,7 +3702,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, entry->parent_ip = parent_ip; event_trigger_unlock_commit(&event_trace_file, buffer, event, - entry, flags, pc); + entry, trace_ctx); out: atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); preempt_enable_notrace(); diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c index 22bcf7c51d1e..c188045c5f97 100644 --- a/kernel/trace/trace_events_inject.c +++ b/kernel/trace/trace_events_inject.c @@ -192,7 +192,6 @@ static void *trace_alloc_entry(struct trace_event_call *call, int *size) static int parse_entry(char *str, struct trace_event_call *call, void **pentry) { struct ftrace_event_field *field; - unsigned long irq_flags; void *entry = NULL; int entry_size; u64 val = 0; @@ -203,9 +202,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry) if (!entry) return -ENOMEM; - local_save_flags(irq_flags); - tracing_generic_entry_update(entry, call->event.type, irq_flags, - preempt_count()); + tracing_generic_entry_update(entry, call->event.type, + tracing_gen_ctx()); while ((len = parse_field(str, call, &field, &val)) > 0) { if (is_function_field(field)) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 5a8bc0b421f1..2979a96595b4 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -23,13 +23,14 @@ #undef ERRORS #define ERRORS \ C(BAD_NAME, "Illegal name"), \ - C(CMD_INCOMPLETE, "Incomplete command"), \ + C(INVALID_CMD, "Command must be of the form: <name> field[;field] ..."),\ + C(INVALID_DYN_CMD, "Command must be of the form: s or -:[synthetic/]<name> field[;field] ..."),\ C(EVENT_EXISTS, "Event already exists"), \ C(TOO_MANY_FIELDS, "Too many fields"), \ C(INCOMPLETE_TYPE, "Incomplete type"), \ C(INVALID_TYPE, "Invalid type"), \ - C(INVALID_FIELD, "Invalid field"), \ - C(CMD_TOO_LONG, "Command too long"), + C(INVALID_FIELD, "Invalid field"), \ + C(INVALID_ARRAY_SPEC, "Invalid array specification"), #undef C #define C(a, b) SYNTH_ERR_##a @@ -48,7 +49,7 @@ static int errpos(const char *str) return err_pos(last_cmd, str); } -static void last_cmd_set(char *str) +static void last_cmd_set(const char *str) { if (!str) return; @@ -62,7 +63,7 @@ static void synth_err(u8 err_type, u8 err_pos) err_type, err_pos); } -static int create_synth_event(int argc, const char **argv); +static int create_synth_event(const char *raw_command); static int synth_event_show(struct seq_file *m, struct dyn_event *ev); static int synth_event_release(struct dyn_event *ev); static bool synth_event_is_busy(struct dyn_event *ev); @@ -579,18 +580,32 @@ static void free_synth_field(struct synth_field *field) kfree(field); } -static struct synth_field *parse_synth_field(int argc, const char **argv, - int *consumed) +static int check_field_version(const char *prefix, const char *field_type, + const char *field_name) +{ + /* + * For backward compatibility, the old synthetic event command + * format did not require semicolons, and in order to not + * break user space, that old format must still work. If a new + * feature is added, then the format that uses the new feature + * will be required to have semicolons, as nothing that uses + * the old format would be using the new, yet to be created, + * feature. When a new feature is added, this will detect it, + * and return a number greater than 1, and require the format + * to use semicolons. + */ + return 1; +} + +static struct synth_field *parse_synth_field(int argc, char **argv, + int *consumed, int *field_version) { - struct synth_field *field; const char *prefix = NULL, *field_type = argv[0], *field_name, *array; + struct synth_field *field; int len, ret = -ENOMEM; struct seq_buf s; ssize_t size; - if (field_type[0] == ';') - field_type++; - if (!strcmp(field_type, "unsigned")) { if (argc < 3) { synth_err(SYNTH_ERR_INCOMPLETE_TYPE, errpos(field_type)); @@ -599,12 +614,19 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, prefix = "unsigned "; field_type = argv[1]; field_name = argv[2]; - *consumed = 3; + *consumed += 3; } else { field_name = argv[1]; - *consumed = 2; + *consumed += 2; } + if (!field_name) { + synth_err(SYNTH_ERR_INVALID_FIELD, errpos(field_type)); + return ERR_PTR(-EINVAL); + } + + *field_version = check_field_version(prefix, field_type, field_name); + field = kzalloc(sizeof(*field), GFP_KERNEL); if (!field) return ERR_PTR(-ENOMEM); @@ -613,8 +635,6 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, array = strchr(field_name, '['); if (array) len -= strlen(array); - else if (field_name[len - 1] == ';') - len--; field->name = kmemdup_nul(field_name, len, GFP_KERNEL); if (!field->name) @@ -626,8 +646,6 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, goto free; } - if (field_type[0] == ';') - field_type++; len = strlen(field_type) + 1; if (array) @@ -644,11 +662,8 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, if (prefix) seq_buf_puts(&s, prefix); seq_buf_puts(&s, field_type); - if (array) { + if (array) seq_buf_puts(&s, array); - if (s.buffer[s.len - 1] == ';') - s.len--; - } if (WARN_ON_ONCE(!seq_buf_buffer_left(&s))) goto free; @@ -656,7 +671,10 @@ static struct synth_field *parse_synth_field(int argc, const char **argv, size = synth_field_size(field->type); if (size < 0) { - synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type)); + if (array) + synth_err(SYNTH_ERR_INVALID_ARRAY_SPEC, errpos(field_name)); + else + synth_err(SYNTH_ERR_INVALID_TYPE, errpos(field_type)); ret = -EINVAL; goto free; } else if (size == 0) { @@ -1160,46 +1178,13 @@ int synth_event_gen_cmd_array_start(struct dynevent_cmd *cmd, const char *name, } EXPORT_SYMBOL_GPL(synth_event_gen_cmd_array_start); -static int save_cmdstr(int argc, const char *name, const char **argv) -{ - struct seq_buf s; - char *buf; - int i; - - buf = kzalloc(MAX_DYNEVENT_CMD_LEN, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - seq_buf_init(&s, buf, MAX_DYNEVENT_CMD_LEN); - - seq_buf_puts(&s, name); - - for (i = 0; i < argc; i++) { - seq_buf_putc(&s, ' '); - seq_buf_puts(&s, argv[i]); - } - - if (!seq_buf_buffer_left(&s)) { - synth_err(SYNTH_ERR_CMD_TOO_LONG, 0); - kfree(buf); - return -EINVAL; - } - buf[s.len] = 0; - last_cmd_set(buf); - - kfree(buf); - return 0; -} - -static int __create_synth_event(int argc, const char *name, const char **argv) +static int __create_synth_event(const char *name, const char *raw_fields) { + char **argv, *field_str, *tmp_fields, *saved_fields = NULL; struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; + int consumed, cmd_version = 1, n_fields_this_loop; + int i, argc, n_fields = 0, ret = 0; struct synth_event *event = NULL; - int i, consumed = 0, n_fields = 0, ret = 0; - - ret = save_cmdstr(argc, name, argv); - if (ret) - return ret; /* * Argument syntax: @@ -1208,46 +1193,99 @@ static int __create_synth_event(int argc, const char *name, const char **argv) * where 'field' = type field_name */ - if (name[0] == '\0' || argc < 1) { - synth_err(SYNTH_ERR_CMD_INCOMPLETE, 0); + if (name[0] == '\0') { + synth_err(SYNTH_ERR_INVALID_CMD, 0); return -EINVAL; } - mutex_lock(&event_mutex); - if (!is_good_name(name)) { synth_err(SYNTH_ERR_BAD_NAME, errpos(name)); - ret = -EINVAL; - goto out; + return -EINVAL; } + mutex_lock(&event_mutex); + event = find_synth_event(name); if (event) { synth_err(SYNTH_ERR_EVENT_EXISTS, errpos(name)); ret = -EEXIST; - goto out; + goto err; } - for (i = 0; i < argc - 1; i++) { - if (strcmp(argv[i], ";") == 0) - continue; - if (n_fields == SYNTH_FIELDS_MAX) { - synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); - ret = -EINVAL; + tmp_fields = saved_fields = kstrdup(raw_fields, GFP_KERNEL); + if (!tmp_fields) { + ret = -ENOMEM; + goto err; + } + + while ((field_str = strsep(&tmp_fields, ";")) != NULL) { + argv = argv_split(GFP_KERNEL, field_str, &argc); + if (!argv) { + ret = -ENOMEM; goto err; } - field = parse_synth_field(argc - i, &argv[i], &consumed); - if (IS_ERR(field)) { - ret = PTR_ERR(field); + if (!argc) + continue; + + n_fields_this_loop = 0; + consumed = 0; + while (argc > consumed) { + int field_version; + + field = parse_synth_field(argc - consumed, + argv + consumed, &consumed, + &field_version); + if (IS_ERR(field)) { + argv_free(argv); + ret = PTR_ERR(field); + goto err; + } + + /* + * Track the highest version of any field we + * found in the command. + */ + if (field_version > cmd_version) + cmd_version = field_version; + + /* + * Now sort out what is and isn't valid for + * each supported version. + * + * If we see more than 1 field per loop, it + * means we have multiple fields between + * semicolons, and that's something we no + * longer support in a version 2 or greater + * command. + */ + if (cmd_version > 1 && n_fields_this_loop >= 1) { + synth_err(SYNTH_ERR_INVALID_CMD, errpos(field_str)); + ret = -EINVAL; + goto err; + } + + fields[n_fields++] = field; + if (n_fields == SYNTH_FIELDS_MAX) { + synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); + ret = -EINVAL; + goto err; + } + + n_fields_this_loop++; + } + + if (consumed < argc) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + ret = -EINVAL; goto err; } - fields[n_fields++] = field; - i += consumed - 1; + + argv_free(argv); } - if (i < argc && strcmp(argv[i], ";") != 0) { - synth_err(SYNTH_ERR_INVALID_FIELD, errpos(argv[i])); + if (n_fields == 0) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); ret = -EINVAL; goto err; } @@ -1266,6 +1304,8 @@ static int __create_synth_event(int argc, const char *name, const char **argv) out: mutex_unlock(&event_mutex); + kfree(saved_fields); + return ret; err: for (i = 0; i < n_fields; i++) @@ -1383,19 +1423,79 @@ int synth_event_delete(const char *event_name) } EXPORT_SYMBOL_GPL(synth_event_delete); -static int create_or_delete_synth_event(int argc, char **argv) +static int check_command(const char *raw_command) { - const char *name = argv[0]; - int ret; + char **argv = NULL, *cmd, *saved_cmd, *name_and_field; + int argc, ret = 0; + + cmd = saved_cmd = kstrdup(raw_command, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + name_and_field = strsep(&cmd, ";"); + if (!name_and_field) { + ret = -EINVAL; + goto free; + } + + if (name_and_field[0] == '!') + goto free; + + argv = argv_split(GFP_KERNEL, name_and_field, &argc); + if (!argv) { + ret = -ENOMEM; + goto free; + } + argv_free(argv); + + if (argc < 3) + ret = -EINVAL; +free: + kfree(saved_cmd); + + return ret; +} + +static int create_or_delete_synth_event(const char *raw_command) +{ + char *name = NULL, *fields, *p; + int ret = 0; + + raw_command = skip_spaces(raw_command); + if (raw_command[0] == '\0') + return ret; + + last_cmd_set(raw_command); + + ret = check_command(raw_command); + if (ret) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + return ret; + } + + p = strpbrk(raw_command, " \t"); + if (!p && raw_command[0] != '!') { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + ret = -EINVAL; + goto free; + } + + name = kmemdup_nul(raw_command, p ? p - raw_command : strlen(raw_command), GFP_KERNEL); + if (!name) + return -ENOMEM; - /* trace_run_command() ensures argc != 0 */ if (name[0] == '!') { ret = synth_event_delete(name + 1); - return ret; + goto free; } - ret = __create_synth_event(argc - 1, name, (const char **)argv + 1); - return ret == -ECANCELED ? -EINVAL : ret; + fields = skip_spaces(p); + + ret = __create_synth_event(name, fields); +free: + kfree(name); + + return ret; } static int synth_event_run_command(struct dynevent_cmd *cmd) @@ -1403,7 +1503,7 @@ static int synth_event_run_command(struct dynevent_cmd *cmd) struct synth_event *se; int ret; - ret = trace_run_command(cmd->seq.buffer, create_or_delete_synth_event); + ret = create_or_delete_synth_event(cmd->seq.buffer); if (ret) return ret; @@ -1939,10 +2039,27 @@ int synth_event_trace_end(struct synth_event_trace_state *trace_state) } EXPORT_SYMBOL_GPL(synth_event_trace_end); -static int create_synth_event(int argc, const char **argv) +static int create_synth_event(const char *raw_command) { - const char *name = argv[0]; - int len; + char *fields, *p; + const char *name; + int len, ret = 0; + + raw_command = skip_spaces(raw_command); + if (raw_command[0] == '\0') + return ret; + + last_cmd_set(raw_command); + + p = strpbrk(raw_command, " \t"); + if (!p) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + return -EINVAL; + } + + fields = skip_spaces(p); + + name = raw_command; if (name[0] != 's' || name[1] != ':') return -ECANCELED; @@ -1951,11 +2068,30 @@ static int create_synth_event(int argc, const char **argv) /* This interface accepts group name prefix */ if (strchr(name, '/')) { len = str_has_prefix(name, SYNTH_SYSTEM "/"); - if (len == 0) + if (len == 0) { + synth_err(SYNTH_ERR_INVALID_DYN_CMD, 0); return -EINVAL; + } name += len; } - return __create_synth_event(argc - 1, name, argv + 1); + + len = name - raw_command; + + ret = check_command(raw_command + len); + if (ret) { + synth_err(SYNTH_ERR_INVALID_CMD, 0); + return ret; + } + + name = kmemdup_nul(raw_command + len, p - raw_command - len, GFP_KERNEL); + if (!name) + return -ENOMEM; + + ret = __create_synth_event(name, fields); + + kfree(name); + + return ret; } static int synth_event_release(struct dyn_event *ev) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c5095dd28e20..f93723ca66bc 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -106,8 +106,7 @@ static int function_trace_init(struct trace_array *tr) ftrace_init_array_ops(tr, func); - tr->array_buffer.cpu = get_cpu(); - put_cpu(); + tr->array_buffer.cpu = raw_smp_processor_id(); tracing_start_cmdline_record(); tracing_start_function_trace(tr); @@ -132,10 +131,9 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, { struct trace_array *tr = op->private; struct trace_array_cpu *data; - unsigned long flags; + unsigned int trace_ctx; int bit; int cpu; - int pc; if (unlikely(!tr->function_enabled)) return; @@ -144,15 +142,14 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); - if (!atomic_read(&data->disabled)) { - local_save_flags(flags); - trace_function(tr, ip, parent_ip, flags, pc); - } + if (!atomic_read(&data->disabled)) + trace_function(tr, ip, parent_ip, trace_ctx); + ftrace_test_recursion_unlock(bit); preempt_enable_notrace(); } @@ -184,7 +181,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, unsigned long flags; long disabled; int cpu; - int pc; + unsigned int trace_ctx; if (unlikely(!tr->function_enabled)) return; @@ -199,9 +196,9 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - pc = preempt_count(); - trace_function(tr, ip, parent_ip, flags, pc); - __trace_stack(tr, flags, STACK_SKIP, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + trace_function(tr, ip, parent_ip, trace_ctx); + __trace_stack(tr, trace_ctx, STACK_SKIP); } atomic_dec(&data->disabled); @@ -404,13 +401,11 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip, static __always_inline void trace_stack(struct trace_array *tr) { - unsigned long flags; - int pc; + unsigned int trace_ctx; - local_save_flags(flags); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); - __trace_stack(tr, flags, FTRACE_STACK_SKIP, pc); + __trace_stack(tr, trace_ctx, FTRACE_STACK_SKIP); } static void diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d874dec87131..0aa6e6faa943 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -96,8 +96,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration, int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, - unsigned long flags, - int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; @@ -105,7 +104,7 @@ int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return 0; entry = ring_buffer_event_data(event); @@ -129,10 +128,10 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) struct trace_array *tr = graph_array; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; long disabled; int ret; int cpu; - int pc; if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) return 0; @@ -174,8 +173,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - pc = preempt_count(); - ret = __trace_graph_entry(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + ret = __trace_graph_entry(tr, trace, trace_ctx); } else { ret = 0; } @@ -188,7 +187,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) static void __trace_graph_function(struct trace_array *tr, - unsigned long ip, unsigned long flags, int pc) + unsigned long ip, unsigned int trace_ctx) { u64 time = trace_clock_local(); struct ftrace_graph_ent ent = { @@ -202,22 +201,21 @@ __trace_graph_function(struct trace_array *tr, .rettime = time, }; - __trace_graph_entry(tr, &ent, flags, pc); - __trace_graph_return(tr, &ret, flags, pc); + __trace_graph_entry(tr, &ent, trace_ctx); + __trace_graph_return(tr, &ret, trace_ctx); } void trace_graph_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc) + unsigned int trace_ctx) { - __trace_graph_function(tr, ip, flags, pc); + __trace_graph_function(tr, ip, trace_ctx); } void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, - unsigned long flags, - int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; @@ -225,7 +223,7 @@ void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -239,9 +237,9 @@ void trace_graph_return(struct ftrace_graph_ret *trace) struct trace_array *tr = graph_array; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; long disabled; int cpu; - int pc; ftrace_graph_addr_finish(trace); @@ -255,8 +253,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace) data = per_cpu_ptr(tr->array_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { - pc = preempt_count(); - __trace_graph_return(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + __trace_graph_return(tr, trace, trace_ctx); } atomic_dec(&data->disabled); local_irq_restore(flags); diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index c0df9b97f147..34dc1a712dcb 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -108,14 +108,9 @@ static void trace_hwlat_sample(struct hwlat_sample *sample) struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct hwlat_entry *entry; - unsigned long flags; - int pc; - - pc = preempt_count(); - local_save_flags(flags); event = trace_buffer_lock_reserve(buffer, TRACE_HWLAT, sizeof(*entry), - flags, pc); + tracing_gen_ctx()); if (!event) return; entry = ring_buffer_event_data(event); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 6756379b661f..590b3d51afae 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -143,11 +143,14 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; if (!func_prolog_dec(tr, &data, &flags)) return; - trace_function(tr, ip, parent_ip, flags, preempt_count()); + trace_ctx = tracing_gen_ctx_flags(flags); + + trace_function(tr, ip, parent_ip, trace_ctx); atomic_dec(&data->disabled); } @@ -177,8 +180,8 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; unsigned long flags; + unsigned int trace_ctx; int ret; - int pc; if (ftrace_graph_ignore_func(trace)) return 0; @@ -195,8 +198,8 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) if (!func_prolog_dec(tr, &data, &flags)) return 0; - pc = preempt_count(); - ret = __trace_graph_entry(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + ret = __trace_graph_entry(tr, trace, trace_ctx); atomic_dec(&data->disabled); return ret; @@ -207,15 +210,15 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace) struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; unsigned long flags; - int pc; + unsigned int trace_ctx; ftrace_graph_addr_finish(trace); if (!func_prolog_dec(tr, &data, &flags)) return; - pc = preempt_count(); - __trace_graph_return(tr, trace, flags, pc); + trace_ctx = tracing_gen_ctx_flags(flags); + __trace_graph_return(tr, trace, trace_ctx); atomic_dec(&data->disabled); } @@ -267,12 +270,12 @@ static void irqsoff_print_header(struct seq_file *s) static void __trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc) + unsigned int trace_ctx) { if (is_graph(tr)) - trace_graph_function(tr, ip, parent_ip, flags, pc); + trace_graph_function(tr, ip, parent_ip, trace_ctx); else - trace_function(tr, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, trace_ctx); } #else @@ -322,15 +325,13 @@ check_critical_timing(struct trace_array *tr, { u64 T0, T1, delta; unsigned long flags; - int pc; + unsigned int trace_ctx; T0 = data->preempt_timestamp; T1 = ftrace_now(cpu); delta = T1-T0; - local_save_flags(flags); - - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); if (!report_latency(tr, delta)) goto out; @@ -341,9 +342,9 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(tr, delta)) goto out_unlock; - __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, trace_ctx); /* Skip 5 functions to get to the irq/preempt enable function */ - __trace_stack(tr, flags, 5, pc); + __trace_stack(tr, trace_ctx, 5); if (data->critical_sequence != max_sequence) goto out_unlock; @@ -363,16 +364,15 @@ out_unlock: out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); - __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); + __trace_function(tr, CALLER_ADDR0, parent_ip, trace_ctx); } static nokprobe_inline void -start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) +start_critical_timing(unsigned long ip, unsigned long parent_ip) { int cpu; struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; - unsigned long flags; if (!tracer_enabled || !tracing_is_enabled()) return; @@ -393,9 +393,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) data->preempt_timestamp = ftrace_now(cpu); data->critical_start = parent_ip ? : ip; - local_save_flags(flags); - - __trace_function(tr, ip, parent_ip, flags, pc); + __trace_function(tr, ip, parent_ip, tracing_gen_ctx()); per_cpu(tracing_cpu, cpu) = 1; @@ -403,12 +401,12 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) } static nokprobe_inline void -stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) +stop_critical_timing(unsigned long ip, unsigned long parent_ip) { int cpu; struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; - unsigned long flags; + unsigned int trace_ctx; cpu = raw_smp_processor_id(); /* Always clear the tracing cpu on stopping the trace */ @@ -428,8 +426,8 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) atomic_inc(&data->disabled); - local_save_flags(flags); - __trace_function(tr, ip, parent_ip, flags, pc); + trace_ctx = tracing_gen_ctx(); + __trace_function(tr, ip, parent_ip, trace_ctx); check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); @@ -438,20 +436,16 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) /* start and stop critical timings used to for stoppage (in idle) */ void start_critical_timings(void) { - int pc = preempt_count(); - - if (preempt_trace(pc) || irq_trace()) - start_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc); + if (preempt_trace(preempt_count()) || irq_trace()) + start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL_GPL(start_critical_timings); NOKPROBE_SYMBOL(start_critical_timings); void stop_critical_timings(void) { - int pc = preempt_count(); - - if (preempt_trace(pc) || irq_trace()) - stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc); + if (preempt_trace(preempt_count()) || irq_trace()) + stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL_GPL(stop_critical_timings); NOKPROBE_SYMBOL(stop_critical_timings); @@ -613,19 +607,15 @@ static void irqsoff_tracer_stop(struct trace_array *tr) */ void tracer_hardirqs_on(unsigned long a0, unsigned long a1) { - unsigned int pc = preempt_count(); - - if (!preempt_trace(pc) && irq_trace()) - stop_critical_timing(a0, a1, pc); + if (!preempt_trace(preempt_count()) && irq_trace()) + stop_critical_timing(a0, a1); } NOKPROBE_SYMBOL(tracer_hardirqs_on); void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { - unsigned int pc = preempt_count(); - - if (!preempt_trace(pc) && irq_trace()) - start_critical_timing(a0, a1, pc); + if (!preempt_trace(preempt_count()) && irq_trace()) + start_critical_timing(a0, a1); } NOKPROBE_SYMBOL(tracer_hardirqs_off); @@ -665,18 +655,14 @@ static struct tracer irqsoff_tracer __read_mostly = #ifdef CONFIG_PREEMPT_TRACER void tracer_preempt_on(unsigned long a0, unsigned long a1) { - int pc = preempt_count(); - - if (preempt_trace(pc) && !irq_trace()) - stop_critical_timing(a0, a1, pc); + if (preempt_trace(preempt_count()) && !irq_trace()) + stop_critical_timing(a0, a1); } void tracer_preempt_off(unsigned long a0, unsigned long a1) { - int pc = preempt_count(); - - if (preempt_trace(pc) && !irq_trace()) - start_critical_timing(a0, a1, pc); + if (preempt_trace(preempt_count()) && !irq_trace()) + start_critical_timing(a0, a1); } static int preemptoff_tracer_init(struct trace_array *tr) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 56c7fbff7bd7..8a1cb0878cbc 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -35,7 +35,7 @@ static int __init set_kprobe_boot_events(char *str) } __setup("kprobe_event=", set_kprobe_boot_events); -static int trace_kprobe_create(int argc, const char **argv); +static int trace_kprobe_create(const char *raw_command); static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_kprobe_release(struct dyn_event *ev); static bool trace_kprobe_is_busy(struct dyn_event *ev); @@ -711,7 +711,7 @@ static inline void sanitize_event_name(char *name) *name = '_'; } -static int trace_kprobe_create(int argc, const char *argv[]) +static int __trace_kprobe_create(int argc, const char *argv[]) { /* * Argument syntax: @@ -910,20 +910,25 @@ error: goto out; } -static int create_or_delete_trace_kprobe(int argc, char **argv) +static int trace_kprobe_create(const char *raw_command) +{ + return trace_probe_create(raw_command, __trace_kprobe_create); +} + +static int create_or_delete_trace_kprobe(const char *raw_command) { int ret; - if (argv[0][0] == '-') - return dyn_event_release(argc, argv, &trace_kprobe_ops); + if (raw_command[0] == '-') + return dyn_event_release(raw_command, &trace_kprobe_ops); - ret = trace_kprobe_create(argc, (const char **)argv); + ret = trace_kprobe_create(raw_command); return ret == -ECANCELED ? -EINVAL : ret; } static int trace_kprobe_run_command(struct dynevent_cmd *cmd) { - return trace_run_command(cmd->seq.buffer, create_or_delete_trace_kprobe); + return create_or_delete_trace_kprobe(cmd->seq.buffer); } /** @@ -1084,7 +1089,7 @@ int kprobe_event_delete(const char *name) snprintf(buf, MAX_EVENT_NAME_LEN, "-:%s", name); - return trace_run_command(buf, create_or_delete_trace_kprobe); + return create_or_delete_trace_kprobe(buf); } EXPORT_SYMBOL_GPL(kprobe_event_delete); @@ -1386,8 +1391,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(fbuffer.flags); - fbuffer.pc = preempt_count(); + fbuffer.trace_ctx = tracing_gen_ctx(); fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); @@ -1396,7 +1400,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, call->event.type, sizeof(*entry) + tk->tp.size + dsize, - fbuffer.flags, fbuffer.pc); + fbuffer.trace_ctx); if (!fbuffer.event) return; @@ -1434,8 +1438,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, if (trace_trigger_soft_disabled(trace_file)) return; - local_save_flags(fbuffer.flags); - fbuffer.pc = preempt_count(); + fbuffer.trace_ctx = tracing_gen_ctx(); fbuffer.trace_file = trace_file; dsize = __get_data_size(&tk->tp, regs); @@ -1443,7 +1446,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, call->event.type, sizeof(*entry) + tk->tp.size + dsize, - fbuffer.flags, fbuffer.pc); + fbuffer.trace_ctx); if (!fbuffer.event) return; @@ -1888,7 +1891,7 @@ static __init void setup_boot_kprobe_events(void) if (p) *p++ = '\0'; - ret = trace_run_command(cmd, create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe(cmd); if (ret) pr_warn("Failed to add event(%d): %s\n", ret, cmd); @@ -1982,8 +1985,7 @@ static __init int kprobe_trace_self_tests_init(void) pr_info("Testing kprobe tracing: "); - ret = trace_run_command("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)", - create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)"); if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function entry.\n"); warn++; @@ -2004,8 +2006,7 @@ static __init int kprobe_trace_self_tests_init(void) } } - ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target $retval", - create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("r:testprobe2 kprobe_trace_selftest_target $retval"); if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function return.\n"); warn++; @@ -2078,13 +2079,13 @@ static __init int kprobe_trace_self_tests_init(void) trace_probe_event_call(&tk->tp), file); } - ret = trace_run_command("-:testprobe", create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("-:testprobe"); if (WARN_ON_ONCE(ret)) { pr_warn("error on deleting a probe.\n"); warn++; } - ret = trace_run_command("-:testprobe2", create_or_delete_trace_kprobe); + ret = create_or_delete_trace_kprobe("-:testprobe2"); if (WARN_ON_ONCE(ret)) { pr_warn("error on deleting a probe.\n"); warn++; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 84582bf1ed5f..64e77b513697 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -5,8 +5,6 @@ * Copyright (C) 2008 Pekka Paalanen <pq@iki.fi> */ -#define DEBUG 1 - #include <linux/kernel.h> #include <linux/mmiotrace.h> #include <linux/pci.h> @@ -300,10 +298,11 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; - int pc = preempt_count(); + unsigned int trace_ctx; + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW, - sizeof(*entry), 0, pc); + sizeof(*entry), trace_ctx); if (!event) { atomic_inc(&dropped_count); return; @@ -312,7 +311,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, entry->rw = *rw; if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } void mmio_trace_rw(struct mmiotrace_rw *rw) @@ -330,10 +329,11 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct trace_buffer *buffer = tr->array_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; - int pc = preempt_count(); + unsigned int trace_ctx; + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP, - sizeof(*entry), 0, pc); + sizeof(*entry), trace_ctx); if (!event) { atomic_inc(&dropped_count); return; @@ -342,7 +342,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, entry->map = *map; if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } void mmio_trace_mapping(struct mmiotrace_map *map) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 92b1575ae0ca..61255bad7e01 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -312,13 +312,23 @@ int trace_raw_output_prep(struct trace_iterator *iter, } EXPORT_SYMBOL(trace_raw_output_prep); +void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + trace_seq_vprintf(&iter->seq, trace_event_format(iter, fmt), ap); + va_end(ap); +} +EXPORT_SYMBOL(trace_event_printf); + static int trace_output_raw(struct trace_iterator *iter, char *name, char *fmt, va_list ap) { struct trace_seq *s = &iter->seq; trace_seq_printf(s, "%s: ", name); - trace_seq_vprintf(s, fmt, ap); + trace_seq_vprintf(s, trace_event_format(iter, fmt), ap); return trace_handle_return(s); } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index d2867ccc6aca..ec589a4612df 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1134,3 +1134,20 @@ bool trace_probe_match_command_args(struct trace_probe *tp, } return true; } + +int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **)) +{ + int argc = 0, ret = 0; + char **argv; + + argv = argv_split(GFP_KERNEL, raw_command, &argc); + if (!argv) + return -ENOMEM; + + if (argc) + ret = createfn(argc, (const char **)argv); + + argv_free(argv); + + return ret; +} diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 2f703a20c724..7ce4027089ee 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -341,6 +341,7 @@ struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp, int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b); bool trace_probe_match_command_args(struct trace_probe *tp, int argc, const char **argv); +int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **)); #define trace_probe_for_each_link(pos, tp) \ list_for_each_entry(pos, &(tp)->event->files, list) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index c0181066dbe9..e5778d1d7a5b 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -67,7 +67,7 @@ static bool function_enabled; static int func_prolog_preempt_disable(struct trace_array *tr, struct trace_array_cpu **data, - int *pc) + unsigned int *trace_ctx) { long disabled; int cpu; @@ -75,7 +75,7 @@ func_prolog_preempt_disable(struct trace_array *tr, if (likely(!wakeup_task)) return 0; - *pc = preempt_count(); + *trace_ctx = tracing_gen_ctx(); preempt_disable_notrace(); cpu = raw_smp_processor_id(); @@ -116,8 +116,8 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; - unsigned long flags; - int pc, ret = 0; + unsigned int trace_ctx; + int ret = 0; if (ftrace_graph_ignore_func(trace)) return 0; @@ -131,11 +131,10 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace) if (ftrace_graph_notrace_addr(trace->func)) return 1; - if (!func_prolog_preempt_disable(tr, &data, &pc)) + if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return 0; - local_save_flags(flags); - ret = __trace_graph_entry(tr, trace, flags, pc); + ret = __trace_graph_entry(tr, trace, trace_ctx); atomic_dec(&data->disabled); preempt_enable_notrace(); @@ -146,16 +145,14 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; - unsigned long flags; - int pc; + unsigned int trace_ctx; ftrace_graph_addr_finish(trace); - if (!func_prolog_preempt_disable(tr, &data, &pc)) + if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return; - local_save_flags(flags); - __trace_graph_return(tr, trace, flags, pc); + __trace_graph_return(tr, trace, trace_ctx); atomic_dec(&data->disabled); preempt_enable_notrace(); @@ -217,13 +214,13 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; unsigned long flags; - int pc; + unsigned int trace_ctx; - if (!func_prolog_preempt_disable(tr, &data, &pc)) + if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return; local_irq_save(flags); - trace_function(tr, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, trace_ctx); local_irq_restore(flags); atomic_dec(&data->disabled); @@ -303,12 +300,12 @@ static void wakeup_print_header(struct seq_file *s) static void __trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, - unsigned long flags, int pc) + unsigned int trace_ctx) { if (is_graph(tr)) - trace_graph_function(tr, ip, parent_ip, flags, pc); + trace_graph_function(tr, ip, parent_ip, trace_ctx); else - trace_function(tr, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, trace_ctx); } static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set) @@ -375,7 +372,7 @@ static void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *prev, struct task_struct *next, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_context_switch; struct trace_buffer *buffer = tr->array_buffer.buffer; @@ -383,7 +380,7 @@ tracing_sched_switch_trace(struct trace_array *tr, struct ctx_switch_entry *entry; event = trace_buffer_lock_reserve(buffer, TRACE_CTX, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -396,14 +393,14 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_cpu = task_cpu(next); if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, flags, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } static void tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *wakee, struct task_struct *curr, - unsigned long flags, int pc) + unsigned int trace_ctx) { struct trace_event_call *call = &event_wakeup; struct ring_buffer_event *event; @@ -411,7 +408,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_buffer *buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, - sizeof(*entry), flags, pc); + sizeof(*entry), trace_ctx); if (!event) return; entry = ring_buffer_event_data(event); @@ -424,7 +421,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(tr, buffer, event, flags, pc); + trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); } static void notrace @@ -436,7 +433,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, unsigned long flags; long disabled; int cpu; - int pc; + unsigned int trace_ctx; tracing_record_cmdline(prev); @@ -455,8 +452,6 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, if (next != wakeup_task) return; - pc = preempt_count(); - /* disable local data, not wakeup_cpu data */ cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); @@ -464,6 +459,8 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, goto out; local_irq_save(flags); + trace_ctx = tracing_gen_ctx_flags(flags); + arch_spin_lock(&wakeup_lock); /* We could race with grabbing wakeup_lock */ @@ -473,9 +470,9 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, /* The task we are waiting for is waking up */ data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); - __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); - tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); - __trace_stack(wakeup_trace, flags, 0, pc); + __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, trace_ctx); + tracing_sched_switch_trace(wakeup_trace, prev, next, trace_ctx); + __trace_stack(wakeup_trace, trace_ctx, 0); T0 = data->preempt_timestamp; T1 = ftrace_now(cpu); @@ -527,9 +524,8 @@ probe_wakeup(void *ignore, struct task_struct *p) { struct trace_array_cpu *data; int cpu = smp_processor_id(); - unsigned long flags; long disabled; - int pc; + unsigned int trace_ctx; if (likely(!tracer_enabled)) return; @@ -550,11 +546,12 @@ probe_wakeup(void *ignore, struct task_struct *p) (!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio))) return; - pc = preempt_count(); disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); if (unlikely(disabled != 1)) goto out; + trace_ctx = tracing_gen_ctx(); + /* interrupts should be off from try_to_wake_up */ arch_spin_lock(&wakeup_lock); @@ -581,19 +578,17 @@ probe_wakeup(void *ignore, struct task_struct *p) wakeup_task = get_task_struct(p); - local_save_flags(flags); - data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); data->preempt_timestamp = ftrace_now(cpu); - tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); - __trace_stack(wakeup_trace, flags, 0, pc); + tracing_sched_wakeup_trace(wakeup_trace, p, current, trace_ctx); + __trace_stack(wakeup_trace, trace_ctx, 0); /* * We must be careful in using CALLER_ADDR2. But since wake_up * is not called by an assembly function (where as schedule is) * it should be safe to use it here. */ - __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); + __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, trace_ctx); out_locked: arch_spin_unlock(&wakeup_lock); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index d85a2f0f316b..8bfcd3b09422 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -298,9 +298,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct trace_buffer *buffer; - unsigned long irq_flags; + unsigned int trace_ctx; unsigned long args[6]; - int pc; int syscall_nr; int size; @@ -322,12 +321,11 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - local_save_flags(irq_flags); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, - sys_data->enter_event->event.type, size, irq_flags, pc); + sys_data->enter_event->event.type, size, trace_ctx); if (!event) return; @@ -337,7 +335,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args); event_trigger_unlock_commit(trace_file, buffer, event, entry, - irq_flags, pc); + trace_ctx); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) @@ -348,8 +346,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct ring_buffer_event *event; struct trace_buffer *buffer; - unsigned long irq_flags; - int pc; + unsigned int trace_ctx; int syscall_nr; syscall_nr = trace_get_syscall_nr(current, regs); @@ -368,13 +365,12 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) if (!sys_data) return; - local_save_flags(irq_flags); - pc = preempt_count(); + trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; event = trace_buffer_lock_reserve(buffer, sys_data->exit_event->event.type, sizeof(*entry), - irq_flags, pc); + trace_ctx); if (!event) return; @@ -383,7 +379,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) entry->ret = syscall_get_return_value(current, regs); event_trigger_unlock_commit(trace_file, buffer, event, entry, - irq_flags, pc); + trace_ctx); } static int reg_event_syscall_enter(struct trace_event_file *file, diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 3cf7128e1ad3..9b50869a5ddb 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -34,7 +34,7 @@ struct uprobe_trace_entry_head { #define DATAOF_TRACE_ENTRY(entry, is_return) \ ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return)) -static int trace_uprobe_create(int argc, const char **argv); +static int trace_uprobe_create(const char *raw_command); static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_uprobe_release(struct dyn_event *ev); static bool trace_uprobe_is_busy(struct dyn_event *ev); @@ -530,7 +530,7 @@ end: * Argument syntax: * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET[%return][(REF)] [FETCHARGS] */ -static int trace_uprobe_create(int argc, const char **argv) +static int __trace_uprobe_create(int argc, const char **argv) { struct trace_uprobe *tu; const char *event = NULL, *group = UPROBE_EVENT_SYSTEM; @@ -716,14 +716,19 @@ fail_address_parse: return ret; } -static int create_or_delete_trace_uprobe(int argc, char **argv) +int trace_uprobe_create(const char *raw_command) +{ + return trace_probe_create(raw_command, __trace_uprobe_create); +} + +static int create_or_delete_trace_uprobe(const char *raw_command) { int ret; - if (argv[0][0] == '-') - return dyn_event_release(argc, argv, &trace_uprobe_ops); + if (raw_command[0] == '-') + return dyn_event_release(raw_command, &trace_uprobe_ops); - ret = trace_uprobe_create(argc, (const char **)argv); + ret = trace_uprobe_create(raw_command); return ret == -ECANCELED ? -EINVAL : ret; } @@ -961,7 +966,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; event = trace_event_buffer_lock_reserve(&buffer, trace_file, - call->event.type, size, 0, 0); + call->event.type, size, 0); if (!event) return; @@ -977,7 +982,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, memcpy(data, ucb->buf, tu->tp.size + dsize); - event_trigger_unlock_commit(trace_file, buffer, event, entry, 0, 0); + event_trigger_unlock_commit(trace_file, buffer, event, entry, 0); } /* uprobe handler */ @@ -1635,7 +1640,7 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call) } #endif /* CONFIG_PERF_EVENTS */ -/* Make a trace interface for controling probe points */ +/* Make a trace interface for controlling probe points */ static __init int init_uprobe_trace(void) { int ret; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 7261fa0f5e3c..9f478d29b926 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -53,6 +53,12 @@ struct tp_probes { struct tracepoint_func probes[]; }; +/* Called in removal of a func but failed to allocate a new tp_funcs */ +static void tp_stub_func(void) +{ + return; +} + static inline void *allocate_probes(int count) { struct tp_probes *p = kmalloc(struct_size(p, probes, count), @@ -130,8 +136,9 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, int prio) { struct tracepoint_func *old, *new; - int nr_probes = 0; - int pos = -1; + int iter_probes; /* Iterate over old probe array. */ + int nr_probes = 0; /* Counter for probes */ + int pos = -1; /* Insertion position into new array */ if (WARN_ON(!tp_func->func)) return ERR_PTR(-EINVAL); @@ -140,13 +147,13 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, old = *funcs; if (old) { /* (N -> N+1), (N != 0, 1) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - /* Insert before probes of lower priority */ - if (pos < 0 && old[nr_probes].prio < prio) - pos = nr_probes; - if (old[nr_probes].func == tp_func->func && - old[nr_probes].data == tp_func->data) + for (iter_probes = 0; old[iter_probes].func; iter_probes++) { + if (old[iter_probes].func == tp_stub_func) + continue; /* Skip stub functions. */ + if (old[iter_probes].func == tp_func->func && + old[iter_probes].data == tp_func->data) return ERR_PTR(-EEXIST); + nr_probes++; } } /* + 2 : one for new probe, one for NULL func */ @@ -154,20 +161,24 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, if (new == NULL) return ERR_PTR(-ENOMEM); if (old) { - if (pos < 0) { - pos = nr_probes; - memcpy(new, old, nr_probes * sizeof(struct tracepoint_func)); - } else { - /* Copy higher priority probes ahead of the new probe */ - memcpy(new, old, pos * sizeof(struct tracepoint_func)); - /* Copy the rest after it. */ - memcpy(new + pos + 1, old + pos, - (nr_probes - pos) * sizeof(struct tracepoint_func)); + nr_probes = 0; + for (iter_probes = 0; old[iter_probes].func; iter_probes++) { + if (old[iter_probes].func == tp_stub_func) + continue; + /* Insert before probes of lower priority */ + if (pos < 0 && old[iter_probes].prio < prio) + pos = nr_probes++; + new[nr_probes++] = old[iter_probes]; } - } else + if (pos < 0) + pos = nr_probes++; + /* nr_probes now points to the end of the new array */ + } else { pos = 0; + nr_probes = 1; /* must point at end of array */ + } new[pos] = *tp_func; - new[nr_probes + 1].func = NULL; + new[nr_probes].func = NULL; *funcs = new; debug_print_probes(*funcs); return old; @@ -188,8 +199,9 @@ static void *func_remove(struct tracepoint_func **funcs, /* (N -> M), (N > 1, M >= 0) probes */ if (tp_func->func) { for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - if (old[nr_probes].func == tp_func->func && - old[nr_probes].data == tp_func->data) + if ((old[nr_probes].func == tp_func->func && + old[nr_probes].data == tp_func->data) || + old[nr_probes].func == tp_stub_func) nr_del++; } } @@ -208,14 +220,27 @@ static void *func_remove(struct tracepoint_func **funcs, /* N -> M, (N > 1, M > 0) */ /* + 1 for NULL */ new = allocate_probes(nr_probes - nr_del + 1); - if (new == NULL) - return ERR_PTR(-ENOMEM); - for (i = 0; old[i].func; i++) - if (old[i].func != tp_func->func - || old[i].data != tp_func->data) - new[j++] = old[i]; - new[nr_probes - nr_del].func = NULL; - *funcs = new; + if (new) { + for (i = 0; old[i].func; i++) { + if ((old[i].func != tp_func->func || + old[i].data != tp_func->data) && + old[i].func != tp_stub_func) + new[j++] = old[i]; + } + new[nr_probes - nr_del].func = NULL; + *funcs = new; + } else { + /* + * Failed to allocate, replace the old function + * with calls to tp_stub_func. + */ + for (i = 0; old[i].func; i++) { + if (old[i].func == tp_func->func && + old[i].data == tp_func->data) + WRITE_ONCE(old[i].func, tp_stub_func); + } + *funcs = old; + } } debug_print_probes(*funcs); return old; @@ -295,10 +320,12 @@ static int tracepoint_remove_func(struct tracepoint *tp, tp_funcs = rcu_dereference_protected(tp->funcs, lockdep_is_held(&tracepoints_mutex)); old = func_remove(&tp_funcs, func); - if (IS_ERR(old)) { - WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM); + if (WARN_ON_ONCE(IS_ERR(old))) return PTR_ERR(old); - } + + if (tp_funcs == old) + /* Failed allocating new tp_funcs, replaced func with stub */ + return 0; if (!tp_funcs) { /* Removed last function */ diff --git a/tools/Makefile b/tools/Makefile index 85af6ebbce91..7e9d34ddd74c 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -31,6 +31,7 @@ help: @echo ' bootconfig - boot config tool' @echo ' spi - spi tools' @echo ' tmon - thermal monitoring and tuning tool' + @echo ' tracing - misc tracing tools' @echo ' turbostat - Intel CPU idle stats and freq reporting tool' @echo ' usb - USB testing tools' @echo ' virtio - vhost test module' @@ -64,7 +65,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging: FORCE +cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE $(call descend,$@) bpf/%: FORCE @@ -103,7 +104,7 @@ all: acpi cgroup cpupower gpio hv firewire liblockdep \ perf selftests bootconfig spi turbostat usb \ virtio vm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ - pci debugging + pci debugging tracing acpi_install: $(call descend,power/$(@:_install=),install) @@ -111,7 +112,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install: +cgroup_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install: $(call descend,$(@:_install=),install) liblockdep_install: @@ -137,7 +138,8 @@ install: acpi_install cgroup_install cpupower_install gpio_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install \ - wmi_install pci_install debugging_install intel-speed-select_install + wmi_install pci_install debugging_install intel-speed-select_install \ + tracing_install acpi_clean: $(call descend,power/acpi,clean) @@ -145,7 +147,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean: +cgroup_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: @@ -184,6 +186,6 @@ clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \ vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \ - intel-speed-select_clean + intel-speed-select_clean tracing_clean .PHONY: FORCE diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc index 59216f3cfb12..2968cdc7df30 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc @@ -32,6 +32,10 @@ grep "myevent[[:space:]]u64 var1" synthetic_events # it is not possible to add same name event ! echo "myevent u64 var2" >> synthetic_events +# make sure !synthetic event doesn't require a field +echo "!myevent" >> synthetic_events +echo "myevent u64 var1" >> synthetic_events + # Non-append open will cleanup all events and add new one echo "myevent u64 var2" > synthetic_events diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc index ada594fe16cb..955e3ceea44b 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc @@ -1,19 +1,38 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test synthetic_events syntax parser errors -# requires: synthetic_events error_log +# requires: synthetic_events error_log "char name[]' >> synthetic_events":README check_error() { # command-with-error-pos-by-^ ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events' } +check_dyn_error() { # command-with-error-pos-by-^ + ftrace_errlog_check 'synthetic_events' "$1" 'dynamic_events' +} + check_error 'myevent ^chr arg' # INVALID_TYPE -check_error 'myevent ^char str[];; int v' # INVALID_TYPE -check_error 'myevent char ^str]; int v' # INVALID_NAME -check_error 'myevent char ^str;[]' # INVALID_NAME -check_error 'myevent ^char str[; int v' # INVALID_TYPE -check_error '^mye;vent char str[]' # BAD_NAME -check_error 'myevent char str[]; ^int' # INVALID_FIELD -check_error '^myevent' # INCOMPLETE_CMD +check_error 'myevent ^unsigned arg' # INCOMPLETE_TYPE + +check_error 'myevent char ^str]; int v' # BAD_NAME +check_error '^mye-vent char str[]' # BAD_NAME +check_error 'myevent char ^st-r[]' # BAD_NAME + +check_error 'myevent char str;^[]' # INVALID_FIELD +check_error 'myevent char str; ^int' # INVALID_FIELD + +check_error 'myevent char ^str[; int v' # INVALID_ARRAY_SPEC +check_error 'myevent char ^str[kdjdk]' # INVALID_ARRAY_SPEC +check_error 'myevent char ^str[257]' # INVALID_ARRAY_SPEC + +check_error '^mye;vent char str[]' # INVALID_CMD +check_error '^myevent ; char str[]' # INVALID_CMD +check_error '^myevent; char str[]' # INVALID_CMD +check_error '^myevent ;char str[]' # INVALID_CMD +check_error '^; char str[]' # INVALID_CMD +check_error '^;myevent char str[]' # INVALID_CMD +check_error '^myevent' # INVALID_CMD + +check_dyn_error '^s:junk/myevent char str[' # INVALID_DYN_CMD exit 0 diff --git a/tools/tracing/Makefile b/tools/tracing/Makefile new file mode 100644 index 000000000000..87e0ec48e2e7 --- /dev/null +++ b/tools/tracing/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../scripts/Makefile.include + +all: latency + +clean: latency_clean + +install: latency_install + +latency: + $(call descend,latency) + +latency_install: + $(call descend,latency,install) + +latency_clean: + $(call descend,latency,clean) + +.PHONY: all install clean latency latency_install latency_clean diff --git a/tools/tracing/latency/.gitignore b/tools/tracing/latency/.gitignore new file mode 100644 index 000000000000..0863960761e7 --- /dev/null +++ b/tools/tracing/latency/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +latency-collector diff --git a/tools/tracing/latency/Makefile b/tools/tracing/latency/Makefile new file mode 100644 index 000000000000..40c4ddaf8be1 --- /dev/null +++ b/tools/tracing/latency/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for vm tools +# +VAR_CFLAGS := $(shell pkg-config --cflags libtracefs 2>/dev/null) +VAR_LDLIBS := $(shell pkg-config --libs libtracefs 2>/dev/null) + +TARGETS = latency-collector +CFLAGS = -Wall -Wextra -g -O2 $(VAR_CFLAGS) +LDFLAGS = -lpthread $(VAR_LDLIBS) + +all: $(TARGETS) + +%: %.c + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + +clean: + $(RM) latency-collector + +prefix ?= /usr/local +sbindir ?= ${prefix}/sbin + +install: all + install -d $(DESTDIR)$(sbindir) + install -m 755 -p $(TARGETS) $(DESTDIR)$(sbindir) diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c new file mode 100644 index 000000000000..57b20802e71b --- /dev/null +++ b/tools/tracing/latency/latency-collector.c @@ -0,0 +1,2108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017, 2018, 2019, 2021 BMW Car IT GmbH + * Author: Viktor Rosendahl (viktor.rosendahl@bmw.de) + */ + +#define _GNU_SOURCE +#define _POSIX_C_SOURCE 200809L + +#include <ctype.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <sched.h> +#include <linux/unistd.h> +#include <signal.h> +#include <sys/inotify.h> +#include <unistd.h> +#include <pthread.h> +#include <tracefs.h> + +static const char *prg_name; +static const char *prg_unknown = "unknown program name"; + +static int fd_stdout; + +static int sched_policy; +static bool sched_policy_set; + +static int sched_pri; +static bool sched_pri_set; + +static bool trace_enable = true; +static bool setup_ftrace = true; +static bool use_random_sleep; + +#define TRACE_OPTS \ + C(FUNC_TR, "function-trace"), \ + C(DISP_GR, "display-graph"), \ + C(NR, NULL) + +#undef C +#define C(a, b) OPTIDX_##a + +enum traceopt { + TRACE_OPTS +}; + +#undef C +#define C(a, b) b + +static const char *const optstr[] = { + TRACE_OPTS +}; + +enum errhandling { + ERR_EXIT = 0, + ERR_WARN, + ERR_CLEANUP, +}; + +static bool use_options[OPTIDX_NR]; + +static char inotify_buffer[655360]; + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#define bool2str(x) (x ? "true":"false") + +#define DEFAULT_NR_PRINTER_THREADS (3) +static unsigned int nr_threads = DEFAULT_NR_PRINTER_THREADS; + +#define DEFAULT_TABLE_SIZE (2) +static unsigned int table_startsize = DEFAULT_TABLE_SIZE; + +static int verbosity; + +#define verbose_sizechange() (verbosity >= 1) +#define verbose_lostevent() (verbosity >= 2) +#define verbose_ftrace() (verbosity >= 1) + +#define was_changed(ORIG, CUR) (strcmp(ORIG, CUR) != 0) +#define needs_change(CUR, WANTED) (strcmp(CUR, WANTED) != 0) + +static const char *debug_tracefile; +static const char *debug_tracefile_dflt; +static const char *debug_maxlat; +static const char *debug_maxlat_dflt; +static const char * const DEBUG_NOFILE = "[file not found]"; + +static const char * const TR_MAXLAT = "tracing_max_latency"; +static const char * const TR_THRESH = "tracing_thresh"; +static const char * const TR_CURRENT = "current_tracer"; +static const char * const TR_OPTIONS = "trace_options"; + +static const char * const NOP_TRACER = "nop"; + +static const char * const OPT_NO_PREFIX = "no"; + +#define DFLT_THRESHOLD_US "0" +static const char *threshold = DFLT_THRESHOLD_US; + +#define DEV_URANDOM "/dev/urandom" +#define RT_DEFAULT_PRI (99) +#define DEFAULT_PRI (0) + +#define USEC_PER_MSEC (1000L) +#define NSEC_PER_USEC (1000L) +#define NSEC_PER_MSEC (USEC_PER_MSEC * NSEC_PER_USEC) + +#define MSEC_PER_SEC (1000L) +#define USEC_PER_SEC (USEC_PER_MSEC * MSEC_PER_SEC) +#define NSEC_PER_SEC (NSEC_PER_MSEC * MSEC_PER_SEC) + +#define SLEEP_TIME_MS_DEFAULT (1000L) +#define TRY_PRINTMUTEX_MS (1000) + +static long sleep_time = (USEC_PER_MSEC * SLEEP_TIME_MS_DEFAULT); + +static const char * const queue_full_warning = +"Could not queue trace for printing. It is likely that events happen faster\n" +"than what they can be printed. Probably partly because of random sleeping\n"; + +static const char * const no_tracer_msg = +"Could not find any tracers! Running this program as root may help!\n"; + +static const char * const no_latency_tr_msg = +"No latency tracers are supported by your kernel!\n"; + +struct policy { + const char *name; + int policy; + int default_pri; +}; + +static const struct policy policies[] = { + { "other", SCHED_OTHER, DEFAULT_PRI }, + { "batch", SCHED_BATCH, DEFAULT_PRI }, + { "idle", SCHED_IDLE, DEFAULT_PRI }, + { "rr", SCHED_RR, RT_DEFAULT_PRI }, + { "fifo", SCHED_FIFO, RT_DEFAULT_PRI }, + { NULL, 0, DEFAULT_PRI } +}; + +/* + * The default tracer will be the first on this list that is supported by the + * currently running Linux kernel. + */ +static const char * const relevant_tracers[] = { + "preemptirqsoff", + "preemptoff", + "irqsoff", + "wakeup", + "wakeup_rt", + "wakeup_dl", + NULL +}; + +/* This is the list of tracers for which random sleep makes sense */ +static const char * const random_tracers[] = { + "preemptirqsoff", + "preemptoff", + "irqsoff", + NULL +}; + +static const char *current_tracer; +static bool force_tracer; + +struct ftrace_state { + char *tracer; + char *thresh; + bool opt[OPTIDX_NR]; + bool opt_valid[OPTIDX_NR]; + pthread_mutex_t mutex; +}; + +struct entry { + int ticket; + int ticket_completed_ref; +}; + +struct print_state { + int ticket_counter; + int ticket_completed; + pthread_mutex_t mutex; + pthread_cond_t cond; + int cnt; + pthread_mutex_t cnt_mutex; +}; + +struct short_msg { + char buf[160]; + int len; +}; + +static struct print_state printstate; +static struct ftrace_state save_state; +volatile sig_atomic_t signal_flag; + +#define PROB_TABLE_MAX_SIZE (1000) + +int probabilities[PROB_TABLE_MAX_SIZE]; + +struct sleep_table { + int *table; + int size; + pthread_mutex_t mutex; +}; + +static struct sleep_table sleeptable; + +#define QUEUE_SIZE (10) + +struct queue { + struct entry entries[QUEUE_SIZE]; + int next_prod_idx; + int next_cons_idx; + pthread_mutex_t mutex; + pthread_cond_t cond; +}; + +#define MAX_THREADS (40) + +struct queue printqueue; +pthread_t printthread[MAX_THREADS]; +pthread_mutex_t print_mtx; +#define PRINT_BUFFER_SIZE (16 * 1024 * 1024) + +static void cleanup_exit(int status); +static int set_trace_opt(const char *opt, bool value); + +static __always_inline void *malloc_or_die(size_t size) +{ + void *ptr = malloc(size); + + if (unlikely(ptr == NULL)) { + warn("malloc() failed"); + cleanup_exit(EXIT_FAILURE); + } + return ptr; +} + +static __always_inline void *malloc_or_die_nocleanup(size_t size) +{ + void *ptr = malloc(size); + + if (unlikely(ptr == NULL)) + err(0, "malloc() failed"); + return ptr; +} + +static __always_inline void write_or_die(int fd, const char *buf, size_t count) +{ + ssize_t r; + + do { + r = write(fd, buf, count); + if (unlikely(r < 0)) { + if (errno == EINTR) + continue; + warn("write() failed"); + cleanup_exit(EXIT_FAILURE); + } + count -= r; + buf += r; + } while (count > 0); +} + +static __always_inline void clock_gettime_or_die(clockid_t clk_id, + struct timespec *tp) +{ + int r = clock_gettime(clk_id, tp); + + if (unlikely(r != 0)) + err(EXIT_FAILURE, "clock_gettime() failed"); +} + +static __always_inline void sigemptyset_or_die(sigset_t *s) +{ + if (unlikely(sigemptyset(s) != 0)) { + warn("sigemptyset() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static __always_inline void sigaddset_or_die(sigset_t *s, int signum) +{ + if (unlikely(sigaddset(s, signum) != 0)) { + warn("sigemptyset() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static __always_inline void sigaction_or_die(int signum, + const struct sigaction *act, + struct sigaction *oldact) +{ + if (unlikely(sigaction(signum, act, oldact) != 0)) { + warn("sigaction() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static void open_stdout(void) +{ + if (setvbuf(stdout, NULL, _IONBF, 0) != 0) + err(EXIT_FAILURE, "setvbuf() failed"); + fd_stdout = fileno(stdout); + if (fd_stdout < 0) + err(EXIT_FAILURE, "fileno() failed"); +} + +/* + * It's not worth it to call cleanup_exit() from mutex functions because + * cleanup_exit() uses mutexes. + */ +static __always_inline void mutex_lock(pthread_mutex_t *mtx) +{ + errno = pthread_mutex_lock(mtx); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_mutex_lock() failed"); +} + + +static __always_inline void mutex_unlock(pthread_mutex_t *mtx) +{ + errno = pthread_mutex_unlock(mtx); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_mutex_unlock() failed"); +} + +static __always_inline void cond_signal(pthread_cond_t *cond) +{ + errno = pthread_cond_signal(cond); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_signal() failed"); +} + +static __always_inline void cond_wait(pthread_cond_t *restrict cond, + pthread_mutex_t *restrict mutex) +{ + errno = pthread_cond_wait(cond, mutex); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_wait() failed"); +} + +static __always_inline void cond_broadcast(pthread_cond_t *cond) +{ + errno = pthread_cond_broadcast(cond); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_broadcast() failed"); +} + +static __always_inline void +mutex_init(pthread_mutex_t *mutex, + const pthread_mutexattr_t *attr) +{ + errno = pthread_mutex_init(mutex, attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutex_init() failed"); +} + +static __always_inline void mutexattr_init(pthread_mutexattr_t *attr) +{ + errno = pthread_mutexattr_init(attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_init() failed"); +} + +static __always_inline void mutexattr_destroy(pthread_mutexattr_t *attr) +{ + errno = pthread_mutexattr_destroy(attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_destroy() failed"); +} + +static __always_inline void mutexattr_settype(pthread_mutexattr_t *attr, + int type) +{ + errno = pthread_mutexattr_settype(attr, type); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_settype() failed"); +} + +static __always_inline void condattr_init(pthread_condattr_t *attr) +{ + errno = pthread_condattr_init(attr); + if (errno) + err(EXIT_FAILURE, "pthread_condattr_init() failed"); +} + +static __always_inline void condattr_destroy(pthread_condattr_t *attr) +{ + errno = pthread_condattr_destroy(attr); + if (errno) + err(EXIT_FAILURE, "pthread_condattr_destroy() failed"); +} + +static __always_inline void condattr_setclock(pthread_condattr_t *attr, + clockid_t clock_id) +{ + errno = pthread_condattr_setclock(attr, clock_id); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_condattr_setclock() failed"); +} + +static __always_inline void cond_init(pthread_cond_t *cond, + const pthread_condattr_t *attr) +{ + errno = pthread_cond_init(cond, attr); + if (errno) + err(EXIT_FAILURE, "pthread_cond_init() failed"); +} + +static __always_inline int +cond_timedwait(pthread_cond_t *restrict cond, + pthread_mutex_t *restrict mutex, + const struct timespec *restrict abstime) +{ + errno = pthread_cond_timedwait(cond, mutex, abstime); + if (errno && errno != ETIMEDOUT) + err(EXIT_FAILURE, "pthread_cond_timedwait() failed"); + return errno; +} + +static void init_printstate(void) +{ + pthread_condattr_t cattr; + + printstate.ticket_counter = 0; + printstate.ticket_completed = 0; + printstate.cnt = 0; + + mutex_init(&printstate.mutex, NULL); + + condattr_init(&cattr); + condattr_setclock(&cattr, CLOCK_MONOTONIC); + cond_init(&printstate.cond, &cattr); + condattr_destroy(&cattr); +} + +static void init_print_mtx(void) +{ + pthread_mutexattr_t mattr; + + mutexattr_init(&mattr); + mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE); + mutex_init(&print_mtx, &mattr); + mutexattr_destroy(&mattr); + +} + +static void signal_blocking(int how) +{ + sigset_t s; + + sigemptyset_or_die(&s); + sigaddset_or_die(&s, SIGHUP); + sigaddset_or_die(&s, SIGTERM); + sigaddset_or_die(&s, SIGINT); + + errno = pthread_sigmask(how, &s, NULL); + if (unlikely(errno)) { + warn("pthread_sigmask() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static void signal_handler(int num) +{ + signal_flag = num; +} + +static void setup_sig_handler(void) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = signal_handler; + + sigaction_or_die(SIGHUP, &sa, NULL); + sigaction_or_die(SIGTERM, &sa, NULL); + sigaction_or_die(SIGINT, &sa, NULL); +} + +static void process_signal(int signal) +{ + char *name; + + name = strsignal(signal); + if (name == NULL) + printf("Received signal %d\n", signal); + else + printf("Received signal %d (%s)\n", signal, name); + cleanup_exit(EXIT_SUCCESS); +} + +static __always_inline void check_signals(void) +{ + int signal = signal_flag; + + if (unlikely(signal)) + process_signal(signal); +} + +static __always_inline void get_time_in_future(struct timespec *future, + long time_us) +{ + long nsec; + + clock_gettime_or_die(CLOCK_MONOTONIC, future); + future->tv_sec += time_us / USEC_PER_SEC; + nsec = future->tv_nsec + (time_us * NSEC_PER_USEC) % NSEC_PER_SEC; + if (nsec >= NSEC_PER_SEC) { + future->tv_nsec = nsec % NSEC_PER_SEC; + future->tv_sec += 1; + } +} + +static __always_inline bool time_has_passed(const struct timespec *time) +{ + struct timespec now; + + clock_gettime_or_die(CLOCK_MONOTONIC, &now); + if (now.tv_sec > time->tv_sec) + return true; + if (now.tv_sec < time->tv_sec) + return false; + return (now.tv_nsec >= time->tv_nsec); +} + +static bool mutex_trylock_limit(pthread_mutex_t *mutex, int time_ms) +{ + long time_us = time_ms * USEC_PER_MSEC; + struct timespec limit; + + get_time_in_future(&limit, time_us); + do { + errno = pthread_mutex_trylock(mutex); + if (errno && errno != EBUSY) + err(EXIT_FAILURE, "pthread_mutex_trylock() failed"); + } while (errno && !time_has_passed(&limit)); + return errno == 0; +} + +static void restore_trace_opts(const struct ftrace_state *state, + const bool *cur) +{ + int i; + int r; + + for (i = 0; i < OPTIDX_NR; i++) + if (state->opt_valid[i] && state->opt[i] != cur[i]) { + r = set_trace_opt(optstr[i], state->opt[i]); + if (r < 0) + warnx("Failed to restore the %s option to %s", + optstr[i], bool2str(state->opt[i])); + else if (verbose_ftrace()) + printf("Restored the %s option in %s to %s\n", + optstr[i], TR_OPTIONS, + bool2str(state->opt[i])); + } +} + +static char *read_file(const char *file, enum errhandling h) +{ + int psize; + char *r; + static const char *emsg = "Failed to read the %s file"; + + r = tracefs_instance_file_read(NULL, file, &psize); + if (!r) { + if (h) { + warn(emsg, file); + if (h == ERR_CLEANUP) + cleanup_exit(EXIT_FAILURE); + } else + errx(EXIT_FAILURE, emsg, file); + } + + if (r && r[psize - 1] == '\n') + r[psize - 1] = '\0'; + return r; +} + +static void restore_file(const char *file, char **saved, const char *cur) +{ + if (*saved && was_changed(*saved, cur)) { + if (tracefs_instance_file_write(NULL, file, *saved) < 0) + warnx("Failed to restore %s to %s!", file, *saved); + else if (verbose_ftrace()) + printf("Restored %s to %s\n", file, *saved); + free(*saved); + *saved = NULL; + } +} + +static void restore_ftrace(void) +{ + mutex_lock(&save_state.mutex); + + restore_file(TR_CURRENT, &save_state.tracer, current_tracer); + restore_file(TR_THRESH, &save_state.thresh, threshold); + restore_trace_opts(&save_state, use_options); + + mutex_unlock(&save_state.mutex); +} + +static void cleanup_exit(int status) +{ + char *maxlat; + + if (!setup_ftrace) + exit(status); + + /* + * We try the print_mtx for 1 sec in order to avoid garbled + * output if possible, but if it cannot be obtained we proceed anyway. + */ + mutex_trylock_limit(&print_mtx, TRY_PRINTMUTEX_MS); + + maxlat = read_file(TR_MAXLAT, ERR_WARN); + if (maxlat) { + printf("The maximum detected latency was: %sus\n", maxlat); + free(maxlat); + } + + restore_ftrace(); + /* + * We do not need to unlock the print_mtx here because we will exit at + * the end of this function. Unlocking print_mtx causes problems if a + * print thread happens to be waiting for the mutex because we have + * just changed the ftrace settings to the original and thus the + * print thread would output incorrect data from ftrace. + */ + exit(status); +} + +static void init_save_state(void) +{ + pthread_mutexattr_t mattr; + + mutexattr_init(&mattr); + mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE); + mutex_init(&save_state.mutex, &mattr); + mutexattr_destroy(&mattr); + + save_state.tracer = NULL; + save_state.thresh = NULL; + save_state.opt_valid[OPTIDX_FUNC_TR] = false; + save_state.opt_valid[OPTIDX_DISP_GR] = false; +} + +static int printstate_next_ticket(struct entry *req) +{ + int r; + + r = ++(printstate.ticket_counter); + req->ticket = r; + req->ticket_completed_ref = printstate.ticket_completed; + cond_broadcast(&printstate.cond); + return r; +} + +static __always_inline +void printstate_mark_req_completed(const struct entry *req) +{ + if (req->ticket > printstate.ticket_completed) + printstate.ticket_completed = req->ticket; +} + +static __always_inline +bool printstate_has_new_req_arrived(const struct entry *req) +{ + return (printstate.ticket_counter != req->ticket); +} + +static __always_inline int printstate_cnt_inc(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = ++printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline int printstate_cnt_dec(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = --printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline int printstate_cnt_read(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline +bool prev_req_won_race(const struct entry *req) +{ + return (printstate.ticket_completed != req->ticket_completed_ref); +} + +static void sleeptable_resize(int size, bool printout, struct short_msg *msg) +{ + int bytes; + + if (printout) { + msg->len = 0; + if (unlikely(size > PROB_TABLE_MAX_SIZE)) + bytes = snprintf(msg->buf, sizeof(msg->buf), +"Cannot increase probability table to %d (maximum size reached)\n", size); + else + bytes = snprintf(msg->buf, sizeof(msg->buf), +"Increasing probability table to %d\n", size); + if (bytes < 0) + warn("snprintf() failed"); + else + msg->len = bytes; + } + + if (unlikely(size < 0)) { + /* Should never happen */ + warnx("Bad program state at %s:%d", __FILE__, __LINE__); + cleanup_exit(EXIT_FAILURE); + return; + } + sleeptable.size = size; + sleeptable.table = &probabilities[PROB_TABLE_MAX_SIZE - size]; +} + +static void init_probabilities(void) +{ + int i; + int j = 1000; + + for (i = 0; i < PROB_TABLE_MAX_SIZE; i++) { + probabilities[i] = 1000 / j; + j--; + } + mutex_init(&sleeptable.mutex, NULL); +} + +static int table_get_probability(const struct entry *req, + struct short_msg *msg) +{ + int diff = req->ticket - req->ticket_completed_ref; + int rval = 0; + + msg->len = 0; + diff--; + /* Should never happen...*/ + if (unlikely(diff < 0)) { + warnx("Programmer assumption error at %s:%d\n", __FILE__, + __LINE__); + cleanup_exit(EXIT_FAILURE); + } + mutex_lock(&sleeptable.mutex); + if (diff >= (sleeptable.size - 1)) { + rval = sleeptable.table[sleeptable.size - 1]; + sleeptable_resize(sleeptable.size + 1, verbose_sizechange(), + msg); + } else { + rval = sleeptable.table[diff]; + } + mutex_unlock(&sleeptable.mutex); + return rval; +} + +static void init_queue(struct queue *q) +{ + q->next_prod_idx = 0; + q->next_cons_idx = 0; + mutex_init(&q->mutex, NULL); + errno = pthread_cond_init(&q->cond, NULL); + if (errno) + err(EXIT_FAILURE, "pthread_cond_init() failed"); +} + +static __always_inline int queue_len(const struct queue *q) +{ + if (q->next_prod_idx >= q->next_cons_idx) + return q->next_prod_idx - q->next_cons_idx; + else + return QUEUE_SIZE - q->next_cons_idx + q->next_prod_idx; +} + +static __always_inline int queue_nr_free(const struct queue *q) +{ + int nr_free = QUEUE_SIZE - queue_len(q); + + /* + * If there is only one slot left we will anyway lie and claim that the + * queue is full because adding an element will make it appear empty + */ + if (nr_free == 1) + nr_free = 0; + return nr_free; +} + +static __always_inline void queue_idx_inc(int *idx) +{ + *idx = (*idx + 1) % QUEUE_SIZE; +} + +static __always_inline void queue_push_to_back(struct queue *q, + const struct entry *e) +{ + q->entries[q->next_prod_idx] = *e; + queue_idx_inc(&q->next_prod_idx); +} + +static __always_inline struct entry queue_pop_from_front(struct queue *q) +{ + struct entry e = q->entries[q->next_cons_idx]; + + queue_idx_inc(&q->next_cons_idx); + return e; +} + +static __always_inline void queue_cond_signal(struct queue *q) +{ + cond_signal(&q->cond); +} + +static __always_inline void queue_cond_wait(struct queue *q) +{ + cond_wait(&q->cond, &q->mutex); +} + +static __always_inline int queue_try_to_add_entry(struct queue *q, + const struct entry *e) +{ + int r = 0; + + mutex_lock(&q->mutex); + if (queue_nr_free(q) > 0) { + queue_push_to_back(q, e); + cond_signal(&q->cond); + } else + r = -1; + mutex_unlock(&q->mutex); + return r; +} + +static struct entry queue_wait_for_entry(struct queue *q) +{ + struct entry e; + + mutex_lock(&q->mutex); + while (true) { + if (queue_len(&printqueue) > 0) { + e = queue_pop_from_front(q); + break; + } + queue_cond_wait(q); + } + mutex_unlock(&q->mutex); + + return e; +} + +static const struct policy *policy_from_name(const char *name) +{ + const struct policy *p = &policies[0]; + + while (p->name != NULL) { + if (!strcmp(name, p->name)) + return p; + p++; + } + return NULL; +} + +static const char *policy_name(int policy) +{ + const struct policy *p = &policies[0]; + static const char *rval = "unknown"; + + while (p->name != NULL) { + if (p->policy == policy) + return p->name; + p++; + } + return rval; +} + +static bool is_relevant_tracer(const char *name) +{ + unsigned int i; + + for (i = 0; relevant_tracers[i]; i++) + if (!strcmp(name, relevant_tracers[i])) + return true; + return false; +} + +static bool random_makes_sense(const char *name) +{ + unsigned int i; + + for (i = 0; random_tracers[i]; i++) + if (!strcmp(name, random_tracers[i])) + return true; + return false; +} + +static void show_available(void) +{ + char **tracers; + int found = 0; + int i; + + tracers = tracefs_tracers(NULL); + for (i = 0; tracers && tracers[i]; i++) { + if (is_relevant_tracer(tracers[i])) + found++; + } + + if (!tracers) { + warnx(no_tracer_msg); + return; + } + + if (!found) { + warnx(no_latency_tr_msg); + tracefs_list_free(tracers); + return; + } + + printf("The following latency tracers are available on your system:\n"); + for (i = 0; tracers[i]; i++) { + if (is_relevant_tracer(tracers[i])) + printf("%s\n", tracers[i]); + } + tracefs_list_free(tracers); +} + +static bool tracer_valid(const char *name, bool *notracer) +{ + char **tracers; + int i; + bool rval = false; + + *notracer = false; + tracers = tracefs_tracers(NULL); + if (!tracers) { + *notracer = true; + return false; + } + for (i = 0; tracers[i]; i++) + if (!strcmp(tracers[i], name)) { + rval = true; + break; + } + tracefs_list_free(tracers); + return rval; +} + +static const char *find_default_tracer(void) +{ + int i; + bool notracer; + bool valid; + + for (i = 0; relevant_tracers[i]; i++) { + valid = tracer_valid(relevant_tracers[i], ¬racer); + if (notracer) + errx(EXIT_FAILURE, no_tracer_msg); + if (valid) + return relevant_tracers[i]; + } + return NULL; +} + +static bool toss_coin(struct drand48_data *buffer, unsigned int prob) +{ + long r; + + if (unlikely(lrand48_r(buffer, &r))) { + warnx("lrand48_r() failed"); + cleanup_exit(EXIT_FAILURE); + } + r = r % 1000L; + if (r < prob) + return true; + else + return false; +} + + +static long go_to_sleep(const struct entry *req) +{ + struct timespec future; + long delay = sleep_time; + + get_time_in_future(&future, delay); + + mutex_lock(&printstate.mutex); + while (!printstate_has_new_req_arrived(req)) { + cond_timedwait(&printstate.cond, &printstate.mutex, &future); + if (time_has_passed(&future)) + break; + }; + + if (printstate_has_new_req_arrived(req)) + delay = -1; + mutex_unlock(&printstate.mutex); + + return delay; +} + + +static void set_priority(void) +{ + int r; + pid_t pid; + struct sched_param param; + + memset(¶m, 0, sizeof(param)); + param.sched_priority = sched_pri; + + pid = getpid(); + r = sched_setscheduler(pid, sched_policy, ¶m); + + if (r != 0) + err(EXIT_FAILURE, "sched_setscheduler() failed"); +} + +pid_t latency_collector_gettid(void) +{ + return (pid_t) syscall(__NR_gettid); +} + +static void print_priority(void) +{ + pid_t tid; + int policy; + int r; + struct sched_param param; + + tid = latency_collector_gettid(); + r = pthread_getschedparam(pthread_self(), &policy, ¶m); + if (r != 0) { + warn("pthread_getschedparam() failed"); + cleanup_exit(EXIT_FAILURE); + } + mutex_lock(&print_mtx); + printf("Thread %d runs with scheduling policy %s and priority %d\n", + tid, policy_name(policy), param.sched_priority); + mutex_unlock(&print_mtx); +} + +static __always_inline +void __print_skipmessage(const struct short_msg *resize_msg, + const struct timespec *timestamp, char *buffer, + size_t bufspace, const struct entry *req, bool excuse, + const char *str) +{ + ssize_t bytes = 0; + char *p = &buffer[0]; + long us, sec; + int r; + + sec = timestamp->tv_sec; + us = timestamp->tv_nsec / 1000; + + if (resize_msg != NULL && resize_msg->len > 0) { + strncpy(p, resize_msg->buf, resize_msg->len); + bytes += resize_msg->len; + p += resize_msg->len; + bufspace -= resize_msg->len; + } + + if (excuse) + r = snprintf(p, bufspace, +"%ld.%06ld Latency %d printout skipped due to %s\n", + sec, us, req->ticket, str); + else + r = snprintf(p, bufspace, "%ld.%06ld Latency %d detected\n", + sec, us, req->ticket); + + if (r < 0) + warn("snprintf() failed"); + else + bytes += r; + + /* These prints could happen concurrently */ + mutex_lock(&print_mtx); + write_or_die(fd_stdout, buffer, bytes); + mutex_unlock(&print_mtx); +} + +static void print_skipmessage(const struct short_msg *resize_msg, + const struct timespec *timestamp, char *buffer, + size_t bufspace, const struct entry *req, + bool excuse) +{ + __print_skipmessage(resize_msg, timestamp, buffer, bufspace, req, + excuse, "random delay"); +} + +static void print_lostmessage(const struct timespec *timestamp, char *buffer, + size_t bufspace, const struct entry *req, + const char *reason) +{ + __print_skipmessage(NULL, timestamp, buffer, bufspace, req, true, + reason); +} + +static void print_tracefile(const struct short_msg *resize_msg, + const struct timespec *timestamp, char *buffer, + size_t bufspace, long slept, + const struct entry *req) +{ + static const int reserve = 256; + char *p = &buffer[0]; + ssize_t bytes = 0; + ssize_t bytes_tot = 0; + long us, sec; + long slept_ms; + int trace_fd; + + /* Save some space for the final string and final null char */ + bufspace = bufspace - reserve - 1; + + if (resize_msg != NULL && resize_msg->len > 0) { + bytes = resize_msg->len; + strncpy(p, resize_msg->buf, bytes); + bytes_tot += bytes; + p += bytes; + bufspace -= bytes; + } + + trace_fd = open(debug_tracefile, O_RDONLY); + + if (trace_fd < 0) { + warn("open() failed on %s", debug_tracefile); + return; + } + + sec = timestamp->tv_sec; + us = timestamp->tv_nsec / 1000; + + if (slept != 0) { + slept_ms = slept / 1000; + bytes = snprintf(p, bufspace, +"%ld.%06ld Latency %d randomly sleep for %ld ms before print\n", + sec, us, req->ticket, slept_ms); + } else { + bytes = snprintf(p, bufspace, + "%ld.%06ld Latency %d immediate print\n", sec, + us, req->ticket); + } + + if (bytes < 0) { + warn("snprintf() failed"); + return; + } + p += bytes; + bufspace -= bytes; + bytes_tot += bytes; + + bytes = snprintf(p, bufspace, +">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BEGIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n" + ); + + if (bytes < 0) { + warn("snprintf() failed"); + return; + } + + p += bytes; + bufspace -= bytes; + bytes_tot += bytes; + + do { + bytes = read(trace_fd, p, bufspace); + if (bytes < 0) { + if (errno == EINTR) + continue; + warn("read() failed on %s", debug_tracefile); + if (unlikely(close(trace_fd) != 0)) + warn("close() failed on %s", debug_tracefile); + return; + } + if (bytes == 0) + break; + p += bytes; + bufspace -= bytes; + bytes_tot += bytes; + } while (true); + + if (unlikely(close(trace_fd) != 0)) + warn("close() failed on %s", debug_tracefile); + + printstate_cnt_dec(); + /* Add the reserve space back to the budget for the final string */ + bufspace += reserve; + + bytes = snprintf(p, bufspace, + ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n"); + + if (bytes < 0) { + warn("snprintf() failed"); + return; + } + + bytes_tot += bytes; + + /* These prints could happen concurrently */ + mutex_lock(&print_mtx); + write_or_die(fd_stdout, buffer, bytes_tot); + mutex_unlock(&print_mtx); +} + +static char *get_no_opt(const char *opt) +{ + char *no_opt; + int s; + + s = strlen(opt) + strlen(OPT_NO_PREFIX) + 1; + /* We may be called from cleanup_exit() via set_trace_opt() */ + no_opt = malloc_or_die_nocleanup(s); + strcpy(no_opt, OPT_NO_PREFIX); + strcat(no_opt, opt); + return no_opt; +} + +static char *find_next_optstr(const char *allopt, const char **next) +{ + const char *begin; + const char *end; + char *r; + int s = 0; + + if (allopt == NULL) + return NULL; + + for (begin = allopt; *begin != '\0'; begin++) { + if (isgraph(*begin)) + break; + } + + if (*begin == '\0') + return NULL; + + for (end = begin; *end != '\0' && isgraph(*end); end++) + s++; + + r = malloc_or_die_nocleanup(s + 1); + strncpy(r, begin, s); + r[s] = '\0'; + *next = begin + s; + return r; +} + +static bool get_trace_opt(const char *allopt, const char *opt, bool *found) +{ + *found = false; + char *no_opt; + char *str; + const char *next = allopt; + bool rval = false; + + no_opt = get_no_opt(opt); + + do { + str = find_next_optstr(next, &next); + if (str == NULL) + break; + if (!strcmp(str, opt)) { + *found = true; + rval = true; + free(str); + break; + } + if (!strcmp(str, no_opt)) { + *found = true; + rval = false; + free(str); + break; + } + free(str); + } while (true); + free(no_opt); + + return rval; +} + +static int set_trace_opt(const char *opt, bool value) +{ + char *str; + int r; + + if (value) + str = strdup(opt); + else + str = get_no_opt(opt); + + r = tracefs_instance_file_write(NULL, TR_OPTIONS, str); + free(str); + return r; +} + +void save_trace_opts(struct ftrace_state *state) +{ + char *allopt; + int psize; + int i; + + allopt = tracefs_instance_file_read(NULL, TR_OPTIONS, &psize); + if (!allopt) + errx(EXIT_FAILURE, "Failed to read the %s file\n", TR_OPTIONS); + + for (i = 0; i < OPTIDX_NR; i++) + state->opt[i] = get_trace_opt(allopt, optstr[i], + &state->opt_valid[i]); + + free(allopt); +} + +static void write_file(const char *file, const char *cur, const char *new, + enum errhandling h) +{ + int r; + static const char *emsg = "Failed to write to the %s file!"; + + /* Do nothing if we now that the current and new value are equal */ + if (cur && !needs_change(cur, new)) + return; + + r = tracefs_instance_file_write(NULL, file, new); + if (r < 0) { + if (h) { + warnx(emsg, file); + if (h == ERR_CLEANUP) + cleanup_exit(EXIT_FAILURE); + } else + errx(EXIT_FAILURE, emsg, file); + } + if (verbose_ftrace()) { + mutex_lock(&print_mtx); + printf("%s was set to %s\n", file, new); + mutex_unlock(&print_mtx); + } +} + +static void reset_max_latency(void) +{ + write_file(TR_MAXLAT, NULL, "0", ERR_CLEANUP); +} + +static void save_and_disable_tracer(void) +{ + char *orig_th; + char *tracer; + bool need_nop = false; + + mutex_lock(&save_state.mutex); + + save_trace_opts(&save_state); + tracer = read_file(TR_CURRENT, ERR_EXIT); + orig_th = read_file(TR_THRESH, ERR_EXIT); + + if (needs_change(tracer, NOP_TRACER)) { + mutex_lock(&print_mtx); + if (force_tracer) { + printf( + "The %s tracer is already in use but proceeding anyway!\n", + tracer); + } else { + printf( + "The %s tracer is already in use, cowardly bailing out!\n" + "This could indicate that another program or instance is tracing.\n" + "Use the -F [--force] option to disregard the current tracer.\n", tracer); + exit(0); + } + mutex_unlock(&print_mtx); + need_nop = true; + } + + save_state.tracer = tracer; + save_state.thresh = orig_th; + + if (need_nop) + write_file(TR_CURRENT, NULL, NOP_TRACER, ERR_EXIT); + + mutex_unlock(&save_state.mutex); +} + +void set_trace_opts(struct ftrace_state *state, bool *new) +{ + int i; + int r; + + /* + * We only set options if we earlier detected that the option exists in + * the trace_options file and that the wanted setting is different from + * the one we saw in save_and_disable_tracer() + */ + for (i = 0; i < OPTIDX_NR; i++) + if (state->opt_valid[i] && + state->opt[i] != new[i]) { + r = set_trace_opt(optstr[i], new[i]); + if (r < 0) { + warnx("Failed to set the %s option to %s", + optstr[i], bool2str(new[i])); + cleanup_exit(EXIT_FAILURE); + } + if (verbose_ftrace()) { + mutex_lock(&print_mtx); + printf("%s in %s was set to %s\n", optstr[i], + TR_OPTIONS, bool2str(new[i])); + mutex_unlock(&print_mtx); + } + } +} + +static void enable_tracer(void) +{ + mutex_lock(&save_state.mutex); + set_trace_opts(&save_state, use_options); + + write_file(TR_THRESH, save_state.thresh, threshold, ERR_CLEANUP); + write_file(TR_CURRENT, NOP_TRACER, current_tracer, ERR_CLEANUP); + + mutex_unlock(&save_state.mutex); +} + +static void tracing_loop(void) +{ + int ifd = inotify_init(); + int wd; + const ssize_t bufsize = sizeof(inotify_buffer); + const ssize_t istructsize = sizeof(struct inotify_event); + char *buf = &inotify_buffer[0]; + ssize_t nr_read; + char *p; + int modified; + struct inotify_event *event; + struct entry req; + char *buffer; + const size_t bufspace = PRINT_BUFFER_SIZE; + struct timespec timestamp; + + print_priority(); + + buffer = malloc_or_die(bufspace); + + if (ifd < 0) + err(EXIT_FAILURE, "inotify_init() failed!"); + + + if (setup_ftrace) { + /* + * We must disable the tracer before resetting the max_latency + */ + save_and_disable_tracer(); + /* + * We must reset the max_latency before the inotify_add_watch() + * call. + */ + reset_max_latency(); + } + + wd = inotify_add_watch(ifd, debug_maxlat, IN_MODIFY); + if (wd < 0) + err(EXIT_FAILURE, "inotify_add_watch() failed!"); + + if (setup_ftrace) + enable_tracer(); + + signal_blocking(SIG_UNBLOCK); + + while (true) { + modified = 0; + check_signals(); + nr_read = read(ifd, buf, bufsize); + check_signals(); + if (nr_read < 0) { + if (errno == EINTR) + continue; + warn("read() failed on inotify fd!"); + cleanup_exit(EXIT_FAILURE); + } + if (nr_read == bufsize) + warnx("inotify() buffer filled, skipping events"); + if (nr_read < istructsize) { + warnx("read() returned too few bytes on inotify fd"); + cleanup_exit(EXIT_FAILURE); + } + + for (p = buf; p < buf + nr_read;) { + event = (struct inotify_event *) p; + if ((event->mask & IN_MODIFY) != 0) + modified++; + p += istructsize + event->len; + } + while (modified > 0) { + check_signals(); + mutex_lock(&printstate.mutex); + check_signals(); + printstate_next_ticket(&req); + if (printstate_cnt_read() > 0) { + printstate_mark_req_completed(&req); + mutex_unlock(&printstate.mutex); + if (verbose_lostevent()) { + clock_gettime_or_die(CLOCK_MONOTONIC, + ×tamp); + print_lostmessage(×tamp, buffer, + bufspace, &req, + "inotify loop"); + } + break; + } + mutex_unlock(&printstate.mutex); + if (queue_try_to_add_entry(&printqueue, &req) != 0) { + /* These prints could happen concurrently */ + check_signals(); + mutex_lock(&print_mtx); + check_signals(); + write_or_die(fd_stdout, queue_full_warning, + sizeof(queue_full_warning)); + mutex_unlock(&print_mtx); + } + modified--; + } + } +} + +static void *do_printloop(void *arg) +{ + const size_t bufspace = PRINT_BUFFER_SIZE; + char *buffer; + long *rseed = (long *) arg; + struct drand48_data drandbuf; + long slept = 0; + struct entry req; + int prob = 0; + struct timespec timestamp; + struct short_msg resize_msg; + + print_priority(); + + if (srand48_r(*rseed, &drandbuf) != 0) { + warn("srand48_r() failed!\n"); + cleanup_exit(EXIT_FAILURE); + } + + buffer = malloc_or_die(bufspace); + + while (true) { + req = queue_wait_for_entry(&printqueue); + clock_gettime_or_die(CLOCK_MONOTONIC, ×tamp); + mutex_lock(&printstate.mutex); + if (prev_req_won_race(&req)) { + printstate_mark_req_completed(&req); + mutex_unlock(&printstate.mutex); + if (verbose_lostevent()) + print_lostmessage(×tamp, buffer, bufspace, + &req, "print loop"); + continue; + } + mutex_unlock(&printstate.mutex); + + /* + * Toss a coin to decide if we want to sleep before printing + * out the backtrace. The reason for this is that opening + * /sys/kernel/debug/tracing/trace will cause a blackout of + * hundreds of ms, where no latencies will be noted by the + * latency tracer. Thus by randomly sleeping we try to avoid + * missing traces systematically due to this. With this option + * we will sometimes get the first latency, some other times + * some of the later ones, in case of closely spaced traces. + */ + if (trace_enable && use_random_sleep) { + slept = 0; + prob = table_get_probability(&req, &resize_msg); + if (!toss_coin(&drandbuf, prob)) + slept = go_to_sleep(&req); + if (slept >= 0) { + /* A print is ongoing */ + printstate_cnt_inc(); + /* + * We will do the printout below so we have to + * mark it as completed while we still have the + * mutex. + */ + mutex_lock(&printstate.mutex); + printstate_mark_req_completed(&req); + mutex_unlock(&printstate.mutex); + } + } + if (trace_enable) { + /* + * slept < 0 means that we detected another + * notification in go_to_sleep() above + */ + if (slept >= 0) + /* + * N.B. printstate_cnt_dec(); will be called + * inside print_tracefile() + */ + print_tracefile(&resize_msg, ×tamp, buffer, + bufspace, slept, &req); + else + print_skipmessage(&resize_msg, ×tamp, + buffer, bufspace, &req, true); + } else { + print_skipmessage(&resize_msg, ×tamp, buffer, + bufspace, &req, false); + } + } + return NULL; +} + +static void start_printthread(void) +{ + unsigned int i; + long *seed; + int ufd; + + ufd = open(DEV_URANDOM, O_RDONLY); + if (nr_threads > MAX_THREADS) { + warnx( +"Number of requested print threads was %d, max number is %d\n", + nr_threads, MAX_THREADS); + nr_threads = MAX_THREADS; + } + for (i = 0; i < nr_threads; i++) { + seed = malloc_or_die(sizeof(*seed)); + if (ufd < 0 || + read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) { + printf( +"Warning! Using trivial random nummer seed, since %s not available\n", + DEV_URANDOM); + fflush(stdout); + *seed = i; + } + errno = pthread_create(&printthread[i], NULL, do_printloop, + seed); + if (errno) + err(EXIT_FAILURE, "pthread_create()"); + } + if (ufd > 0 && close(ufd) != 0) + warn("close() failed"); +} + +static void show_usage(void) +{ + printf( +"Usage: %s [OPTION]...\n\n" +"Collect closely occurring latencies from %s\n" +"with any of the following tracers: preemptirqsoff, preemptoff, irqsoff, " +"wakeup,\nwakeup_dl, or wakeup_rt.\n\n" + +"The occurrence of a latency is detected by monitoring the file\n" +"%s with inotify.\n\n" + +"The following options are supported:\n\n" + +"-l, --list\t\tList the latency tracers that are supported by the\n" +"\t\t\tcurrently running Linux kernel. If you don't see the\n" +"\t\t\ttracer that you want, you will probably need to\n" +"\t\t\tchange your kernel config and build a new kernel.\n\n" + +"-t, --tracer TR\t\tUse the tracer TR. The default is to use the first\n" +"\t\t\ttracer that is supported by the kernel in the following\n" +"\t\t\torder of precedence:\n\n" +"\t\t\tpreemptirqsoff\n" +"\t\t\tpreemptoff\n" +"\t\t\tirqsoff\n" +"\t\t\twakeup\n" +"\t\t\twakeup_rt\n" +"\t\t\twakeup_dl\n" +"\n" +"\t\t\tIf TR is not on the list above, then a warning will be\n" +"\t\t\tprinted.\n\n" + +"-F, --force\t\tProceed even if another ftrace tracer is active. Without\n" +"\t\t\tthis option, the program will refuse to start tracing if\n" +"\t\t\tany other tracer than the nop tracer is active.\n\n" + +"-s, --threshold TH\tConfigure ftrace to use a threshold of TH microseconds\n" +"\t\t\tfor the tracer. The default is 0, which means that\n" +"\t\t\ttracing_max_latency will be used. tracing_max_latency is\n" +"\t\t\tset to 0 when the program is started and contains the\n" +"\t\t\tmaximum of the latencies that have been encountered.\n\n" + +"-f, --function\t\tEnable the function-trace option in trace_options. With\n" +"\t\t\tthis option, ftrace will trace the functions that are\n" +"\t\t\texecuted during a latency, without it we only get the\n" +"\t\t\tbeginning, end, and backtrace.\n\n" + +"-g, --graph\t\tEnable the display-graph option in trace_option. This\n" +"\t\t\toption causes ftrace to show the functionph of how\n" +"\t\t\tfunctions are calling other functions.\n\n" + +"-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n" +"\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n" +"\t\t\tusing rr or fifo, remember that these policies may cause\n" +"\t\t\tother tasks to experience latencies.\n\n" + +"-p, --priority PRI\tRun the program with priority PRI. The acceptable range\n" +"\t\t\tof PRI depends on the scheduling policy.\n\n" + +"-n, --notrace\t\tIf latency is detected, do not print out the content of\n" +"\t\t\tthe trace file to standard output\n\n" + +"-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n" + +"-r, --random\t\tArbitrarily sleep a certain amount of time, default\n" +"\t\t\t%ld ms, before reading the trace file. The\n" +"\t\t\tprobabilities for sleep are chosen so that the\n" +"\t\t\tprobability of obtaining any of a cluster of closely\n" +"\t\t\toccurring latencies are equal, i.e. we will randomly\n" +"\t\t\tchoose which one we collect from the trace file.\n\n" +"\t\t\tThis option is probably only useful with the irqsoff,\n" +"\t\t\tpreemptoff, and preemptirqsoff tracers.\n\n" + +"-a, --nrlat NRLAT\tFor the purpose of arbitrary delay, assume that there\n" +"\t\t\tare no more than NRLAT clustered latencies. If NRLAT\n" +"\t\t\tlatencies are detected during a run, this value will\n" +"\t\t\tautomatically be increased to NRLAT + 1 and then to\n" +"\t\t\tNRLAT + 2 and so on. The default is %d. This option\n" +"\t\t\timplies -r. We need to know this number in order to\n" +"\t\t\tbe able to calculate the probabilities of sleeping.\n" +"\t\t\tSpecifically, the probabilities of not sleeping, i.e. to\n" +"\t\t\tdo an immediate printout will be:\n\n" +"\t\t\t1/NRLAT 1/(NRLAT - 1) ... 1/3 1/2 1\n\n" +"\t\t\tThe probability of sleeping will be:\n\n" +"\t\t\t1 - P, where P is from the series above\n\n" +"\t\t\tThis descending probability will cause us to choose\n" +"\t\t\tan occurrence at random. Observe that the final\n" +"\t\t\tprobability is 0, it is when we reach this probability\n" +"\t\t\tthat we increase NRLAT automatically. As an example,\n" +"\t\t\twith the default value of 2, the probabilities will be:\n\n" +"\t\t\t1/2 0\n\n" +"\t\t\tThis means, when a latency is detected we will sleep\n" +"\t\t\twith 50%% probability. If we ever detect another latency\n" +"\t\t\tduring the sleep period, then the probability of sleep\n" +"\t\t\twill be 0%% and the table will be expanded to:\n\n" +"\t\t\t1/3 1/2 0\n\n" + +"-v, --verbose\t\tIncrease the verbosity. If this option is given once,\n" +"\t\t\tthen print a message every time that the NRLAT value\n" +"\t\t\tis automatically increased. It also causes a message to\n" +"\t\t\tbe printed when the ftrace settings are changed. If this\n" +"\t\t\toption is given at least twice, then also print a\n" +"\t\t\twarning for lost events.\n\n" + +"-u, --time TIME\t\tArbitrarily sleep for a specified time TIME ms before\n" +"\t\t\tprinting out the trace from the trace file. The default\n" +"\t\t\tis %ld ms. This option implies -r.\n\n" + +"-x, --no-ftrace\t\tDo not configure ftrace. This assume that the user\n" +"\t\t\tconfigures the ftrace files in sysfs such as\n" +"\t\t\t/sys/kernel/tracing/current_tracer or equivalent.\n\n" + +"-i, --tracefile FILE\tUse FILE as trace file. The default is\n" +"\t\t\t%s.\n" +"\t\t\tThis options implies -x\n\n" + +"-m, --max-lat FILE\tUse FILE as tracing_max_latency file. The default is\n" +"\t\t\t%s.\n" +"\t\t\tThis options implies -x\n\n" +, +prg_name, debug_tracefile_dflt, debug_maxlat_dflt, DEFAULT_NR_PRINTER_THREADS, +SLEEP_TIME_MS_DEFAULT, DEFAULT_TABLE_SIZE, SLEEP_TIME_MS_DEFAULT, +debug_tracefile_dflt, debug_maxlat_dflt); +} + +static void find_tracefiles(void) +{ + debug_tracefile_dflt = tracefs_get_tracing_file("trace"); + if (debug_tracefile_dflt == NULL) { + /* This is needed in show_usage() */ + debug_tracefile_dflt = DEBUG_NOFILE; + } + + debug_maxlat_dflt = tracefs_get_tracing_file("tracing_max_latency"); + if (debug_maxlat_dflt == NULL) { + /* This is needed in show_usage() */ + debug_maxlat_dflt = DEBUG_NOFILE; + } + + debug_tracefile = debug_tracefile_dflt; + debug_maxlat = debug_maxlat_dflt; +} + +bool alldigits(const char *s) +{ + for (; *s != '\0'; s++) + if (!isdigit(*s)) + return false; + return true; +} + +void check_alldigits(const char *optarg, const char *argname) +{ + if (!alldigits(optarg)) + errx(EXIT_FAILURE, + "The %s parameter expects a decimal argument\n", argname); +} + +static void scan_arguments(int argc, char *argv[]) +{ + int c; + int i; + int option_idx = 0; + + static struct option long_options[] = { + { "list", no_argument, 0, 'l' }, + { "tracer", required_argument, 0, 't' }, + { "force", no_argument, 0, 'F' }, + { "threshold", required_argument, 0, 's' }, + { "function", no_argument, 0, 'f' }, + { "graph", no_argument, 0, 'g' }, + { "policy", required_argument, 0, 'c' }, + { "priority", required_argument, 0, 'p' }, + { "help", no_argument, 0, 'h' }, + { "notrace", no_argument, 0, 'n' }, + { "random", no_argument, 0, 'r' }, + { "nrlat", required_argument, 0, 'a' }, + { "threads", required_argument, 0, 'e' }, + { "time", required_argument, 0, 'u' }, + { "verbose", no_argument, 0, 'v' }, + { "no-ftrace", no_argument, 0, 'x' }, + { "tracefile", required_argument, 0, 'i' }, + { "max-lat", required_argument, 0, 'm' }, + { 0, 0, 0, 0 } + }; + const struct policy *p; + int max, min; + int value; + bool notracer, valid; + + /* + * We must do this before parsing the arguments because show_usage() + * needs to display these. + */ + find_tracefiles(); + + while (true) { + c = getopt_long(argc, argv, "lt:Fs:fgc:p:hnra:e:u:vxi:m:", + long_options, &option_idx); + if (c == -1) + break; + + switch (c) { + case 'l': + show_available(); + exit(0); + break; + case 't': + current_tracer = strdup(optarg); + if (!is_relevant_tracer(current_tracer)) { + warnx("%s is not a known latency tracer!\n", + current_tracer); + } + valid = tracer_valid(current_tracer, ¬racer); + if (notracer) + errx(EXIT_FAILURE, no_tracer_msg); + if (!valid) + errx(EXIT_FAILURE, +"The tracer %s is not supported by your kernel!\n", current_tracer); + break; + case 'F': + force_tracer = true; + break; + case 's': + check_alldigits(optarg, "-s [--threshold]"); + threshold = strdup(optarg); + break; + case 'f': + use_options[OPTIDX_FUNC_TR] = true; + break; + case 'g': + use_options[OPTIDX_DISP_GR] = true; + break; + case 'c': + p = policy_from_name(optarg); + if (p != NULL) { + sched_policy = p->policy; + sched_policy_set = true; + if (!sched_pri_set) { + sched_pri = p->default_pri; + sched_pri_set = true; + } + } else { + warnx("Unknown scheduling %s\n", optarg); + show_usage(); + exit(0); + } + break; + case 'p': + check_alldigits(optarg, "-p [--priority]"); + sched_pri = atoi(optarg); + sched_pri_set = true; + break; + case 'h': + show_usage(); + exit(0); + break; + case 'n': + trace_enable = false; + use_random_sleep = false; + break; + case 'e': + check_alldigits(optarg, "-e [--threads]"); + value = atoi(optarg); + if (value > 0) + nr_threads = value; + else { + warnx("NRTHR must be > 0\n"); + show_usage(); + exit(0); + } + break; + case 'u': + check_alldigits(optarg, "-u [--time]"); + value = atoi(optarg); + if (value < 0) { + warnx("TIME must be >= 0\n"); + show_usage(); + ; + } + trace_enable = true; + use_random_sleep = true; + sleep_time = value * USEC_PER_MSEC; + break; + case 'v': + verbosity++; + break; + case 'r': + trace_enable = true; + use_random_sleep = true; + break; + case 'a': + check_alldigits(optarg, "-a [--nrlat]"); + value = atoi(optarg); + if (value <= 0) { + warnx("NRLAT must be > 0\n"); + show_usage(); + exit(0); + } + trace_enable = true; + use_random_sleep = true; + table_startsize = value; + break; + case 'x': + setup_ftrace = false; + break; + case 'i': + setup_ftrace = false; + debug_tracefile = strdup(optarg); + break; + case 'm': + setup_ftrace = false; + debug_maxlat = strdup(optarg); + break; + default: + show_usage(); + exit(0); + break; + } + } + + if (setup_ftrace) { + if (!current_tracer) { + current_tracer = find_default_tracer(); + if (!current_tracer) + errx(EXIT_FAILURE, +"No default tracer found and tracer not specified\n"); + } + + if (use_random_sleep && !random_makes_sense(current_tracer)) { + warnx("WARNING: The tracer is %s and random sleep has", + current_tracer); + fprintf(stderr, +"been enabled. Random sleep is intended for the following tracers:\n"); + for (i = 0; random_tracers[i]; i++) + fprintf(stderr, "%s\n", random_tracers[i]); + fprintf(stderr, "\n"); + } + } + + if (debug_tracefile == DEBUG_NOFILE || + debug_maxlat == DEBUG_NOFILE) + errx(EXIT_FAILURE, +"Could not find tracing directory e.g. /sys/kernel/tracing\n"); + + if (!sched_policy_set) { + sched_policy = SCHED_RR; + sched_policy_set = true; + if (!sched_pri_set) { + sched_pri = RT_DEFAULT_PRI; + sched_pri_set = true; + } + } + + max = sched_get_priority_max(sched_policy); + min = sched_get_priority_min(sched_policy); + + if (sched_pri < min) { + printf( +"ATTENTION: Increasing priority to minimum, which is %d\n", min); + sched_pri = min; + } + if (sched_pri > max) { + printf( +"ATTENTION: Reducing priority to maximum, which is %d\n", max); + sched_pri = max; + } +} + +static void show_params(void) +{ + printf( +"\n" +"Running with scheduling policy %s and priority %d. Using %d print threads.\n", + policy_name(sched_policy), sched_pri, nr_threads); + if (trace_enable) { + if (use_random_sleep) { + printf( +"%s will be printed with random delay\n" +"Start size of the probability table:\t\t\t%d\n" +"Print a message when the prob. table changes size:\t%s\n" +"Print a warning when an event has been lost:\t\t%s\n" +"Sleep time is:\t\t\t\t\t\t%ld ms\n", +debug_tracefile, +table_startsize, +bool2str(verbose_sizechange()), +bool2str(verbose_lostevent()), +sleep_time / USEC_PER_MSEC); + } else { + printf("%s will be printed immediately\n", + debug_tracefile); + } + } else { + printf("%s will not be printed\n", + debug_tracefile); + } + if (setup_ftrace) { + printf("Tracer:\t\t\t\t\t\t\t%s\n" + "%s option:\t\t\t\t\t%s\n" + "%s option:\t\t\t\t\t%s\n", + current_tracer, + optstr[OPTIDX_FUNC_TR], + bool2str(use_options[OPTIDX_FUNC_TR]), + optstr[OPTIDX_DISP_GR], + bool2str(use_options[OPTIDX_DISP_GR])); + if (!strcmp(threshold, "0")) + printf("Threshold:\t\t\t\t\t\ttracing_max_latency\n"); + else + printf("Threshold:\t\t\t\t\t\t%s\n", threshold); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + init_save_state(); + signal_blocking(SIG_BLOCK); + setup_sig_handler(); + open_stdout(); + + if (argc >= 1) + prg_name = argv[0]; + else + prg_name = prg_unknown; + + scan_arguments(argc, argv); + show_params(); + + init_printstate(); + init_print_mtx(); + if (use_random_sleep) { + init_probabilities(); + if (verbose_sizechange()) + printf("Initializing probability table to %d\n", + table_startsize); + sleeptable_resize(table_startsize, false, NULL); + } + set_priority(); + init_queue(&printqueue); + start_printthread(); + tracing_loop(); + return 0; +} |