1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
|
// SPDX-License-Identifier: GPL-2.0
/*
* Stack trace utility functions etc.
*
* Copyright 2008 Christoph Hellwig, IBM Corp.
* Copyright 2018 SUSE Linux GmbH
* Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp.
*/
#include <linux/export.h>
#include <linux/kallsyms.h>
#include <linux/module.h>
#include <linux/nmi.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
#include <linux/ftrace.h>
#include <asm/kprobes.h>
#include <asm/paca.h>
/*
* Save stack-backtrace addresses into a stack_trace buffer.
*/
static void save_context_stack(struct stack_trace *trace, unsigned long sp,
struct task_struct *tsk, int savesched)
{
for (;;) {
unsigned long *stack = (unsigned long *) sp;
unsigned long newsp, ip;
if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
return;
newsp = stack[0];
ip = stack[STACK_FRAME_LR_SAVE];
if (savesched || !in_sched_functions(ip)) {
if (!trace->skip)
trace->entries[trace->nr_entries++] = ip;
else
trace->skip--;
}
if (trace->nr_entries >= trace->max_entries)
return;
sp = newsp;
}
}
void save_stack_trace(struct stack_trace *trace)
{
unsigned long sp;
sp = current_stack_frame();
save_context_stack(trace, sp, current, 1);
}
EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
unsigned long sp;
if (!try_get_task_stack(tsk))
return;
if (tsk == current)
sp = current_stack_frame();
else
sp = tsk->thread.ksp;
save_context_stack(trace, sp, tsk, 0);
put_task_stack(tsk);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
void
save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
{
save_context_stack(trace, regs->gpr[1], current, 0);
}
EXPORT_SYMBOL_GPL(save_stack_trace_regs);
#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
/*
* This function returns an error if it detects any unreliable features of the
* stack. Otherwise it guarantees that the stack trace is reliable.
*
* If the task is not 'current', the caller *must* ensure the task is inactive.
*/
static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
struct stack_trace *trace)
{
unsigned long sp;
unsigned long newsp;
unsigned long stack_page = (unsigned long)task_stack_page(tsk);
unsigned long stack_end;
int graph_idx = 0;
bool firstframe;
stack_end = stack_page + THREAD_SIZE;
if (!is_idle_task(tsk)) {
/*
* For user tasks, this is the SP value loaded on
* kernel entry, see "PACAKSAVE(r13)" in _switch() and
* system_call_common()/EXCEPTION_PROLOG_COMMON().
*
* Likewise for non-swapper kernel threads,
* this also happens to be the top of the stack
* as setup by copy_thread().
*
* Note that stack backlinks are not properly setup by
* copy_thread() and thus, a forked task() will have
* an unreliable stack trace until it's been
* _switch()'ed to for the first time.
*/
stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
} else {
/*
* idle tasks have a custom stack layout,
* c.f. cpu_idle_thread_init().
*/
stack_end -= STACK_FRAME_OVERHEAD;
}
if (tsk == current)
sp = current_stack_frame();
else
sp = tsk->thread.ksp;
if (sp < stack_page + sizeof(struct thread_struct) ||
sp > stack_end - STACK_FRAME_MIN_SIZE) {
return -EINVAL;
}
for (firstframe = true; sp != stack_end;
firstframe = false, sp = newsp) {
unsigned long *stack = (unsigned long *) sp;
unsigned long ip;
/* sanity check: ABI requires SP to be aligned 16 bytes. */
if (sp & 0xF)
return -EINVAL;
newsp = stack[0];
/* Stack grows downwards; unwinder may only go up. */
if (newsp <= sp)
return -EINVAL;
if (newsp != stack_end &&
newsp > stack_end - STACK_FRAME_MIN_SIZE) {
return -EINVAL; /* invalid backlink, too far up. */
}
/*
* We can only trust the bottom frame's backlink, the
* rest of the frame may be uninitialized, continue to
* the next.
*/
if (firstframe)
continue;
/* Mark stacktraces with exception frames as unreliable. */
if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
return -EINVAL;
}
/* Examine the saved LR: it must point into kernel code. */
ip = stack[STACK_FRAME_LR_SAVE];
if (!__kernel_text_address(ip))
return -EINVAL;
/*
* FIXME: IMHO these tests do not belong in
* arch-dependent code, they are generic.
*/
ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
#ifdef CONFIG_KPROBES
/*
* Mark stacktraces with kretprobed functions on them
* as unreliable.
*/
if (ip == (unsigned long)kretprobe_trampoline)
return -EINVAL;
#endif
if (trace->nr_entries >= trace->max_entries)
return -E2BIG;
if (!trace->skip)
trace->entries[trace->nr_entries++] = ip;
else
trace->skip--;
}
return 0;
}
int save_stack_trace_tsk_reliable(struct task_struct *tsk,
struct stack_trace *trace)
{
int ret;
/*
* If the task doesn't have a stack (e.g., a zombie), the stack is
* "reliably" empty.
*/
if (!try_get_task_stack(tsk))
return 0;
ret = __save_stack_trace_tsk_reliable(tsk, trace);
put_task_stack(tsk);
return ret;
}
#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
static void handle_backtrace_ipi(struct pt_regs *regs)
{
nmi_cpu_backtrace(regs);
}
static void raise_backtrace_ipi(cpumask_t *mask)
{
unsigned int cpu;
for_each_cpu(cpu, mask) {
if (cpu == smp_processor_id())
handle_backtrace_ipi(NULL);
else
smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC);
}
for_each_cpu(cpu, mask) {
struct paca_struct *p = paca_ptrs[cpu];
cpumask_clear_cpu(cpu, mask);
pr_warn("CPU %d didn't respond to backtrace IPI, inspecting paca.\n", cpu);
if (!virt_addr_valid(p)) {
pr_warn("paca pointer appears corrupt? (%px)\n", p);
continue;
}
pr_warn("irq_soft_mask: 0x%02x in_mce: %d in_nmi: %d",
p->irq_soft_mask, p->in_mce, p->in_nmi);
if (virt_addr_valid(p->__current))
pr_cont(" current: %d (%s)\n", p->__current->pid,
p->__current->comm);
else
pr_cont(" current pointer corrupt? (%px)\n", p->__current);
pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1);
show_stack(p->__current, (unsigned long *)p->saved_r1, KERN_WARNING);
}
}
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
{
nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
}
#endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
|