From 55ea4cf403800af2ce6b125bc3d853117e0c0456 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 27 Nov 2020 11:20:58 -0500 Subject: ring-buffer: Update write stamp with the correct ts The write stamp, used to calculate deltas between events, was updated with the stale "ts" value in the "info" structure, and not with the updated "ts" variable. This caused the deltas between events to be inaccurate, and when crossing into a new sub buffer, had time go backwards. Link: https://lkml.kernel.org/r/20201124223917.795844-1-elavila@google.com Cc: stable@vger.kernel.org Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp") Reported-by: "J. Avila" Tested-by: Daniel Mentz Tested-by: Will McVicker Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index dc83b3fa9fe7..bccaf88d3706 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3291,7 +3291,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* Nothing came after this event between C and E */ info->delta = ts - info->after; (void)rb_time_cmpxchg(&cpu_buffer->write_stamp, - info->after, info->ts); + info->after, ts); info->ts = ts; } else { /* -- cgit v1.2.3 From 8785f51a17083eee7c37606079c6447afc6ba102 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sat, 28 Nov 2020 10:15:17 +0100 Subject: ring-buffer: Set the right timestamp in the slow path of __rb_reserve_next() In the slow path of __rb_reserve_next() a nested event(s) can happen between evaluating the timestamp delta of the current event and updating write_stamp via local_cmpxchg(); in this case the delta is not valid anymore and it should be set to 0 (same timestamp as the interrupting event), since the event that we are currently processing is not the last event in the buffer. Link: https://lkml.kernel.org/r/X8IVJcp1gRE+FJCJ@xps-13-7390 Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: stable@vger.kernel.org Link: https://lwn.net/Articles/831207 Fixes: a389d86f7fd0 ("ring-buffer: Have nested events still record running time stamp") Signed-off-by: Andrea Righi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bccaf88d3706..35d91b20d47a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3287,11 +3287,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, ts = rb_time_stamp(cpu_buffer->buffer); barrier(); /*E*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) && - info->after < ts) { + info->after < ts && + rb_time_cmpxchg(&cpu_buffer->write_stamp, + info->after, ts)) { /* Nothing came after this event between C and E */ info->delta = ts - info->after; - (void)rb_time_cmpxchg(&cpu_buffer->write_stamp, - info->after, ts); info->ts = ts; } else { /* -- cgit v1.2.3 From 310e3a4b5a4fc718a72201c1e4cf5c64ac6f5442 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 18 Nov 2020 15:05:20 +0300 Subject: tracing: Remove WARN_ON in start_thread() This patch reverts commit 978defee11a5 ("tracing: Do a WARN_ON() if start_thread() in hwlat is called when thread exists") .start hook can be legally called several times if according tracer is stopped screen window 1 [root@localhost ~]# echo 1 > /sys/kernel/tracing/events/kmem/kfree/enable [root@localhost ~]# echo 1 > /sys/kernel/tracing/options/pause-on-trace [root@localhost ~]# less -F /sys/kernel/tracing/trace screen window 2 [root@localhost ~]# cat /sys/kernel/debug/tracing/tracing_on 0 [root@localhost ~]# echo hwlat > /sys/kernel/debug/tracing/current_tracer [root@localhost ~]# echo 1 > /sys/kernel/debug/tracing/tracing_on [root@localhost ~]# cat /sys/kernel/debug/tracing/tracing_on 0 [root@localhost ~]# echo 2 > /sys/kernel/debug/tracing/tracing_on triggers warning in dmesg: WARNING: CPU: 3 PID: 1403 at kernel/trace/trace_hwlat.c:371 hwlat_tracer_start+0xc9/0xd0 Link: https://lkml.kernel.org/r/bd4d3e70-400d-9c82-7b73-a2d695e86b58@virtuozzo.com Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 978defee11a5 ("tracing: Do a WARN_ON() if start_thread() in hwlat is called when thread exists") Signed-off-by: Vasily Averin Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_hwlat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index c9ad5c6fbaad..d071fc271eef 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -368,7 +368,7 @@ static int start_kthread(struct trace_array *tr) struct task_struct *kthread; int next_cpu; - if (WARN_ON(hwlat_kthread)) + if (hwlat_kthread) return 0; /* Just pick the first CPU on first iteration */ -- cgit v1.2.3 From 8fa655a3a0013a0c2a2aada6f39a93ee6fc25549 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 25 Nov 2020 14:56:54 -0800 Subject: tracing: Fix alignment of static buffer With 5.9 kernel on ARM64, I found ftrace_dump output was broken but it had no problem with normal output "cat /sys/kernel/debug/tracing/trace". With investigation, it seems coping the data into temporal buffer seems to break the align binary printf expects if the static buffer is not aligned with 4-byte. IIUC, get_arg in bstr_printf expects that args has already right align to be decoded and seq_buf_bprintf says ``the arguments are saved in a 32bit word array that is defined by the format string constraints``. So if we don't keep the align under copy to temporal buffer, the output will be broken by shifting some bytes. This patch fixes it. Link: https://lkml.kernel.org/r/20201125225654.1618966-1-minchan@kernel.org Cc: Fixes: 8e99cf91b99bb ("tracing: Do not allocate buffer in trace_find_next_entry() in atomic") Signed-off-by: Namhyung Kim Signed-off-by: Minchan Kim Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 410cfeb16db5..7d53c5bdea3e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3534,7 +3534,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, } #define STATIC_TEMP_BUF_SIZE 128 -static char static_temp_buf[STATIC_TEMP_BUF_SIZE]; +static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); /* Find the next real entry, without updating the iterator itself */ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, -- cgit v1.2.3 From 4c75b0ff4e4bf7a45b5aef9639799719c28d0073 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 26 Nov 2020 23:38:38 +0530 Subject: ftrace: Fix updating FTRACE_FL_TRAMP On powerpc, kprobe-direct.tc triggered FTRACE_WARN_ON() in ftrace_get_addr_new() followed by the below message: Bad trampoline accounting at: 000000004222522f (wake_up_process+0xc/0x20) (f0000001) The set of steps leading to this involved: - modprobe ftrace-direct-too - enable_probe - modprobe ftrace-direct - rmmod ftrace-direct <-- trigger The problem turned out to be that we were not updating flags in the ftrace record properly. From the above message about the trampoline accounting being bad, it can be seen that the ftrace record still has FTRACE_FL_TRAMP set though ftrace-direct module is going away. This happens because we are checking if any ftrace_ops has the FTRACE_FL_TRAMP flag set _before_ updating the filter hash. The fix for this is to look for any _other_ ftrace_ops that also needs FTRACE_FL_TRAMP. Link: https://lkml.kernel.org/r/56c113aa9c3e10c19144a36d9684c7882bf09af5.1606412433.git.naveen.n.rao@linux.vnet.ibm.com Cc: stable@vger.kernel.org Fixes: a124692b698b0 ("ftrace: Enable trampoline when rec count returns back to one") Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8185f7240095..9c1bba8cc51b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1629,6 +1629,8 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec) static struct ftrace_ops * ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); static struct ftrace_ops * +ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_exclude); +static struct ftrace_ops * ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops); static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, @@ -1778,7 +1780,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, * to it. */ if (ftrace_rec_count(rec) == 1 && - ftrace_find_tramp_ops_any(rec)) + ftrace_find_tramp_ops_any_other(rec, ops)) rec->flags |= FTRACE_FL_TRAMP; else rec->flags &= ~FTRACE_FL_TRAMP; @@ -2244,6 +2246,24 @@ ftrace_find_tramp_ops_any(struct dyn_ftrace *rec) return NULL; } +static struct ftrace_ops * +ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_exclude) +{ + struct ftrace_ops *op; + unsigned long ip = rec->ip; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + + if (op == op_exclude || !op->trampoline) + continue; + + if (hash_contains_ip(ip, op->func_hash)) + return op; + } while_for_each_ftrace_op(op); + + return NULL; +} + static struct ftrace_ops * ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *op) -- cgit v1.2.3 From 49a962c075dfa41c78e34784772329bc8784d217 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 26 Nov 2020 23:38:39 +0530 Subject: ftrace: Fix DYNAMIC_FTRACE_WITH_DIRECT_CALLS dependency DYNAMIC_FTRACE_WITH_DIRECT_CALLS should depend on DYNAMIC_FTRACE_WITH_REGS since we need ftrace_regs_caller(). Link: https://lkml.kernel.org/r/fc4b257ea8689a36f086d2389a9ed989496ca63a.1606412433.git.naveen.n.rao@linux.vnet.ibm.com Cc: stable@vger.kernel.org Fixes: 763e34e74bb7d5c ("ftrace: Add register_ftrace_direct()") Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a4020c0b4508..e1bf5228fb69 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -202,7 +202,7 @@ config DYNAMIC_FTRACE_WITH_REGS config DYNAMIC_FTRACE_WITH_DIRECT_CALLS def_bool y - depends on DYNAMIC_FTRACE + depends on DYNAMIC_FTRACE_WITH_REGS depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS config FUNCTION_PROFILER -- cgit v1.2.3 From 68e10d5ff512b503dcba1246ad5620f32035e135 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 30 Nov 2020 23:16:03 -0500 Subject: ring-buffer: Always check to put back before stamp when crossing pages The current ring buffer logic checks to see if the updating of the event buffer was interrupted, and if it is, it will try to fix up the before stamp with the write stamp to make them equal again. This logic is flawed, because if it is not interrupted, the two are guaranteed to be different, as the current event just updated the before stamp before allocation. This guarantees that the next event (this one or another interrupting one) will think it interrupted the time updates of a previous event and inject an absolute time stamp to compensate. The correct logic is to always update the timestamps when traversing to a new sub buffer. Cc: stable@vger.kernel.org Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 35d91b20d47a..a6268e09160a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3234,14 +3234,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* See if we shot pass the end of this buffer page */ if (unlikely(write > BUF_PAGE_SIZE)) { - if (tail != w) { - /* before and after may now different, fix it up*/ - b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); - a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); - if (a_ok && b_ok && info->before != info->after) - (void)rb_time_cmpxchg(&cpu_buffer->before_stamp, - info->before, info->after); - } + /* before and after may now different, fix it up*/ + b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); + a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); + if (a_ok && b_ok && info->before != info->after) + (void)rb_time_cmpxchg(&cpu_buffer->before_stamp, + info->before, info->after); return rb_move_tail(cpu_buffer, tail, info); } -- cgit v1.2.3