summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/syscall.c5
-rw-r--r--kernel/bpf/verifier.c145
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/kprobes.c8
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/sched/core.c25
-rw-r--r--kernel/sched/idle.c2
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--kernel/trace/ftrace.c12
-rw-r--r--kernel/trace/trace.c34
-rw-r--r--kernel/trace/trace.h5
-rw-r--r--kernel/trace/trace_kprobe.c5
14 files changed, 221 insertions, 38 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index fd2411fd6914..265a0d854e33 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -783,7 +783,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
EXPORT_SYMBOL_GPL(bpf_prog_get_type);
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD kern_version
+#define BPF_PROG_LOAD_LAST_FIELD prog_flags
static int bpf_prog_load(union bpf_attr *attr)
{
@@ -796,6 +796,9 @@ static int bpf_prog_load(union bpf_attr *attr)
if (CHECK_ATTR(BPF_PROG_LOAD))
return -EINVAL;
+ if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT)
+ return -EINVAL;
+
/* copy eBPF program license from user space */
if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
sizeof(license) - 1) < 0)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c5b56c92f8e2..1eddb713b815 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -140,7 +140,7 @@ struct bpf_verifier_stack_elem {
struct bpf_verifier_stack_elem *next;
};
-#define BPF_COMPLEXITY_LIMIT_INSNS 65536
+#define BPF_COMPLEXITY_LIMIT_INSNS 98304
#define BPF_COMPLEXITY_LIMIT_STACK 1024
#define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
@@ -241,6 +241,12 @@ static void print_verifier_state(struct bpf_verifier_state *state)
if (reg->max_value != BPF_REGISTER_MAX_RANGE)
verbose(",max_value=%llu",
(unsigned long long)reg->max_value);
+ if (reg->min_align)
+ verbose(",min_align=%u", reg->min_align);
+ if (reg->aux_off)
+ verbose(",aux_off=%u", reg->aux_off);
+ if (reg->aux_off_align)
+ verbose(",aux_off_align=%u", reg->aux_off_align);
}
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
if (state->stack_slot_type[i] == STACK_SPILL)
@@ -466,6 +472,9 @@ static void init_reg_state(struct bpf_reg_state *regs)
regs[i].imm = 0;
regs[i].min_value = BPF_REGISTER_MIN_RANGE;
regs[i].max_value = BPF_REGISTER_MAX_RANGE;
+ regs[i].min_align = 0;
+ regs[i].aux_off = 0;
+ regs[i].aux_off_align = 0;
}
/* frame pointer */
@@ -492,6 +501,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
{
regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+ regs[regno].min_align = 0;
}
static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs,
@@ -779,17 +789,33 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
}
static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
- int off, int size)
+ int off, int size, bool strict)
{
- if (reg->id && size != 1) {
- verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n");
- return -EACCES;
+ int ip_align;
+ int reg_off;
+
+ /* Byte size accesses are always allowed. */
+ if (!strict || size == 1)
+ return 0;
+
+ reg_off = reg->off;
+ if (reg->id) {
+ if (reg->aux_off_align % size) {
+ verbose("Packet access is only %u byte aligned, %d byte access not allowed\n",
+ reg->aux_off_align, size);
+ return -EACCES;
+ }
+ reg_off += reg->aux_off;
}
- /* skb->data is NET_IP_ALIGN-ed */
- if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
+ /* skb->data is NET_IP_ALIGN-ed, but for strict alignment checking
+ * we force this to 2 which is universally what architectures use
+ * when they don't set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
+ */
+ ip_align = strict ? 2 : NET_IP_ALIGN;
+ if ((ip_align + reg_off + off) % size != 0) {
verbose("misaligned packet access off %d+%d+%d size %d\n",
- NET_IP_ALIGN, reg->off, off, size);
+ ip_align, reg_off, off, size);
return -EACCES;
}
@@ -797,9 +823,9 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
}
static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
- int size)
+ int size, bool strict)
{
- if (size != 1) {
+ if (strict && size != 1) {
verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
return -EACCES;
}
@@ -807,16 +833,20 @@ static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
return 0;
}
-static int check_ptr_alignment(const struct bpf_reg_state *reg,
+static int check_ptr_alignment(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg,
int off, int size)
{
+ bool strict = env->strict_alignment;
+
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+ strict = true;
+
switch (reg->type) {
case PTR_TO_PACKET:
- return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
- check_pkt_ptr_alignment(reg, off, size);
+ return check_pkt_ptr_alignment(reg, off, size, strict);
case PTR_TO_MAP_VALUE_ADJ:
- return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
- check_val_ptr_alignment(reg, size);
+ return check_val_ptr_alignment(reg, size, strict);
default:
if (off % size != 0) {
verbose("misaligned access off %d size %d\n",
@@ -849,7 +879,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
if (size < 0)
return size;
- err = check_ptr_alignment(reg, off, size);
+ err = check_ptr_alignment(env, reg, off, size);
if (err)
return err;
@@ -883,6 +913,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
value_regno);
/* note that reg.[id|off|range] == 0 */
state->regs[value_regno].type = reg_type;
+ state->regs[value_regno].aux_off = 0;
+ state->regs[value_regno].aux_off_align = 0;
}
} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
@@ -1455,6 +1487,8 @@ add_imm:
*/
dst_reg->off += imm;
} else {
+ bool had_id;
+
if (src_reg->type == PTR_TO_PACKET) {
/* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */
tmp_reg = *dst_reg; /* save r7 state */
@@ -1488,14 +1522,23 @@ add_imm:
src_reg->imm);
return -EACCES;
}
+
+ had_id = (dst_reg->id != 0);
+
/* dst_reg stays as pkt_ptr type and since some positive
* integer value was added to the pointer, increment its 'id'
*/
dst_reg->id = ++env->id_gen;
- /* something was added to pkt_ptr, set range and off to zero */
+ /* something was added to pkt_ptr, set range to zero */
+ dst_reg->aux_off += dst_reg->off;
dst_reg->off = 0;
dst_reg->range = 0;
+ if (had_id)
+ dst_reg->aux_off_align = min(dst_reg->aux_off_align,
+ src_reg->min_align);
+ else
+ dst_reg->aux_off_align = src_reg->min_align;
}
return 0;
}
@@ -1669,6 +1712,13 @@ static void check_reg_overflow(struct bpf_reg_state *reg)
reg->min_value = BPF_REGISTER_MIN_RANGE;
}
+static u32 calc_align(u32 imm)
+{
+ if (!imm)
+ return 1U << 31;
+ return imm - ((imm - 1) & imm);
+}
+
static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
struct bpf_insn *insn)
{
@@ -1676,8 +1726,10 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
s64 min_val = BPF_REGISTER_MIN_RANGE;
u64 max_val = BPF_REGISTER_MAX_RANGE;
u8 opcode = BPF_OP(insn->code);
+ u32 dst_align, src_align;
dst_reg = &regs[insn->dst_reg];
+ src_align = 0;
if (BPF_SRC(insn->code) == BPF_X) {
check_reg_overflow(&regs[insn->src_reg]);
min_val = regs[insn->src_reg].min_value;
@@ -1693,12 +1745,18 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
regs[insn->src_reg].type != UNKNOWN_VALUE) {
min_val = BPF_REGISTER_MIN_RANGE;
max_val = BPF_REGISTER_MAX_RANGE;
+ src_align = 0;
+ } else {
+ src_align = regs[insn->src_reg].min_align;
}
} else if (insn->imm < BPF_REGISTER_MAX_RANGE &&
(s64)insn->imm > BPF_REGISTER_MIN_RANGE) {
min_val = max_val = insn->imm;
+ src_align = calc_align(insn->imm);
}
+ dst_align = dst_reg->min_align;
+
/* We don't know anything about what was done to this register, mark it
* as unknown.
*/
@@ -1723,18 +1781,21 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
dst_reg->min_value += min_val;
if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
dst_reg->max_value += max_val;
+ dst_reg->min_align = min(src_align, dst_align);
break;
case BPF_SUB:
if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
dst_reg->min_value -= min_val;
if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
dst_reg->max_value -= max_val;
+ dst_reg->min_align = min(src_align, dst_align);
break;
case BPF_MUL:
if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
dst_reg->min_value *= min_val;
if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
dst_reg->max_value *= max_val;
+ dst_reg->min_align = max(src_align, dst_align);
break;
case BPF_AND:
/* Disallow AND'ing of negative numbers, ain't nobody got time
@@ -1746,17 +1807,23 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
else
dst_reg->min_value = 0;
dst_reg->max_value = max_val;
+ dst_reg->min_align = max(src_align, dst_align);
break;
case BPF_LSH:
/* Gotta have special overflow logic here, if we're shifting
* more than MAX_RANGE then just assume we have an invalid
* range.
*/
- if (min_val > ilog2(BPF_REGISTER_MAX_RANGE))
+ if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) {
dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
- else if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
- dst_reg->min_value <<= min_val;
-
+ dst_reg->min_align = 1;
+ } else {
+ if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
+ dst_reg->min_value <<= min_val;
+ if (!dst_reg->min_align)
+ dst_reg->min_align = 1;
+ dst_reg->min_align <<= min_val;
+ }
if (max_val > ilog2(BPF_REGISTER_MAX_RANGE))
dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
else if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
@@ -1766,11 +1833,19 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
/* RSH by a negative number is undefined, and the BPF_RSH is an
* unsigned shift, so make the appropriate casts.
*/
- if (min_val < 0 || dst_reg->min_value < 0)
+ if (min_val < 0 || dst_reg->min_value < 0) {
dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
- else
+ } else {
dst_reg->min_value =
(u64)(dst_reg->min_value) >> min_val;
+ }
+ if (min_val < 0) {
+ dst_reg->min_align = 1;
+ } else {
+ dst_reg->min_align >>= (u64) min_val;
+ if (!dst_reg->min_align)
+ dst_reg->min_align = 1;
+ }
if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
dst_reg->max_value >>= max_val;
break;
@@ -1872,6 +1947,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
regs[insn->dst_reg].imm = insn->imm;
regs[insn->dst_reg].max_value = insn->imm;
regs[insn->dst_reg].min_value = insn->imm;
+ regs[insn->dst_reg].min_align = calc_align(insn->imm);
}
} else if (opcode > BPF_END) {
@@ -2564,6 +2640,7 @@ peek_stack:
env->explored_states[t + 1] = STATE_LIST_MARK;
} else {
/* conditional jump with two edges */
+ env->explored_states[t] = STATE_LIST_MARK;
ret = push_insn(t, t + 1, FALLTHROUGH, env);
if (ret == 1)
goto peek_stack;
@@ -2722,6 +2799,12 @@ static bool states_equal(struct bpf_verifier_env *env,
rcur->type != NOT_INIT))
continue;
+ /* Don't care about the reg->id in this case. */
+ if (rold->type == PTR_TO_MAP_VALUE_OR_NULL &&
+ rcur->type == PTR_TO_MAP_VALUE_OR_NULL &&
+ rold->map_ptr == rcur->map_ptr)
+ continue;
+
if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
compare_ptrs_to_packet(rold, rcur))
continue;
@@ -2856,8 +2939,15 @@ static int do_check(struct bpf_verifier_env *env)
goto process_bpf_exit;
}
- if (log_level && do_print_state) {
- verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx);
+ if (need_resched())
+ cond_resched();
+
+ if (log_level > 1 || (log_level && do_print_state)) {
+ if (log_level > 1)
+ verbose("%d:", insn_idx);
+ else
+ verbose("\nfrom %d to %d:",
+ prev_insn_idx, insn_idx);
print_verifier_state(&env->cur_state);
do_print_state = false;
}
@@ -3494,6 +3584,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
} else {
log_level = 0;
}
+ if (attr->prog_flags & BPF_F_STRICT_ALIGNMENT)
+ env->strict_alignment = true;
+ else
+ env->strict_alignment = false;
ret = replace_map_fd_with_map_ptr(env);
if (ret < 0)
@@ -3599,6 +3693,7 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
mutex_lock(&bpf_verifier_lock);
log_level = 0;
+ env->strict_alignment = false;
env->explored_states = kcalloc(env->prog->len,
sizeof(struct bpf_verifier_state_list *),
diff --git a/kernel/fork.c b/kernel/fork.c
index 06d759ab4c62..aa1076c5e4a9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1845,11 +1845,13 @@ static __latent_entropy struct task_struct *copy_process(
*/
recalc_sigpending();
if (signal_pending(current)) {
- spin_unlock(&current->sighand->siglock);
- write_unlock_irq(&tasklist_lock);
retval = -ERESTARTNOINTR;
goto bad_fork_cancel_cgroup;
}
+ if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
+ retval = -ENOMEM;
+ goto bad_fork_cancel_cgroup;
+ }
if (likely(p->pid)) {
ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
@@ -1907,6 +1909,8 @@ static __latent_entropy struct task_struct *copy_process(
return p;
bad_fork_cancel_cgroup:
+ spin_unlock(&current->sighand->siglock);
+ write_unlock_irq(&tasklist_lock);
cgroup_cancel_fork(p);
bad_fork_free_pid:
cgroup_threadgroup_change_end(current);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 686be4b73018..c94da688ee9b 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -880,8 +880,8 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
if (!desc)
return;
- __irq_do_set_handler(desc, handle, 1, NULL);
desc->irq_common_data.handler_data = data;
+ __irq_do_set_handler(desc, handle, 1, NULL);
irq_put_desc_busunlock(desc, flags);
}
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7367e0ec6f81..2d2d3a568e4e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -595,7 +595,7 @@ static void kprobe_optimizer(struct work_struct *work)
}
/* Wait for completing optimization and unoptimization */
-static void wait_for_kprobe_optimizer(void)
+void wait_for_kprobe_optimizer(void)
{
mutex_lock(&kprobe_mutex);
@@ -2183,6 +2183,12 @@ static int kprobes_module_callback(struct notifier_block *nb,
* The vaddr this probe is installed will soon
* be vfreed buy not synced to disk. Hence,
* disarming the breakpoint isn't needed.
+ *
+ * Note, this will also move any optimized probes
+ * that are pending to be removed from their
+ * corresponding lists to the freeing_list and
+ * will not be touched by the delayed
+ * kprobe_optimizer work handler.
*/
kill_kprobe(p);
}
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index d1f3e9f558b8..74a5a7255b4d 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -277,7 +277,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
* if reparented.
*/
for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_INTERRUPTIBLE);
if (pid_ns->nr_hashed == init_pids)
break;
schedule();
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 759f4bd52cd6..803c3bc274c4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3502,6 +3502,31 @@ asmlinkage __visible void __sched schedule(void)
}
EXPORT_SYMBOL(schedule);
+/*
+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
+ * state (have scheduled out non-voluntarily) by making sure that all
+ * tasks have either left the run queue or have gone into user space.
+ * As idle tasks do not do either, they must not ever be preempted
+ * (schedule out non-voluntarily).
+ *
+ * schedule_idle() is similar to schedule_preempt_disable() except that it
+ * never enables preemption because it does not call sched_submit_work().
+ */
+void __sched schedule_idle(void)
+{
+ /*
+ * As this skips calling sched_submit_work(), which the idle task does
+ * regardless because that function is a nop when the task is in a
+ * TASK_RUNNING state, make sure this isn't used someplace that the
+ * current task can be in any other state. Note, idle is always in the
+ * TASK_RUNNING state.
+ */
+ WARN_ON_ONCE(current->state);
+ do {
+ __schedule(false);
+ } while (need_resched());
+}
+
#ifdef CONFIG_CONTEXT_TRACKING
asmlinkage __visible void __sched schedule_user(void)
{
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 2a25a9ec2c6e..ef63adce0c9c 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -265,7 +265,7 @@ static void do_idle(void)
smp_mb__after_atomic();
sched_ttwu_pending();
- schedule_preempt_disabled();
+ schedule_idle();
if (unlikely(klp_patch_pending(current)))
klp_update_patch_state(current);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7808ab050599..6dda2aab731e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1467,6 +1467,8 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
}
#endif
+extern void schedule_idle(void);
+
extern void sysrq_sched_debug_show(void);
extern void sched_init_granularity(void);
extern void update_max_interval(void);
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index bd8ae8d5ae9c..193c5f5e3f79 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1662,14 +1662,14 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
goto out;
if (attr == &dev_attr_act_mask) {
- if (sscanf(buf, "%llx", &value) != 1) {
+ if (kstrtoull(buf, 0, &value)) {
/* Assume it is a list of trace category names */
ret = blk_trace_str2mask(buf);
if (ret < 0)
goto out;
value = ret;
}
- } else if (sscanf(buf, "%llu", &value) != 1)
+ } else if (kstrtoull(buf, 0, &value))
goto out;
ret = -ENXIO;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 39dca4e86a94..74fdfe9ed3db 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4144,9 +4144,9 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
int i, ret = -ENODEV;
int size;
- if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
+ if (!glob || !strlen(glob) || !strcmp(glob, "*"))
func_g.search = NULL;
- else if (glob) {
+ else {
int not;
func_g.type = filter_parse_regex(glob, strlen(glob),
@@ -4256,6 +4256,14 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
return ret;
}
+void clear_ftrace_function_probes(struct trace_array *tr)
+{
+ struct ftrace_func_probe *probe, *n;
+
+ list_for_each_entry_safe(probe, n, &tr->func_probes, list)
+ unregister_ftrace_function_probe_func(NULL, tr, probe->probe_ops);
+}
+
static LIST_HEAD(ftrace_commands);
static DEFINE_MUTEX(ftrace_cmd_mutex);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c4536c449021..1122f151466f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1558,7 +1558,7 @@ static __init int init_trace_selftests(void)
return 0;
}
-early_initcall(init_trace_selftests);
+core_initcall(init_trace_selftests);
#else
static inline int run_tracer_selftest(struct tracer *type)
{
@@ -2568,7 +2568,36 @@ static inline void ftrace_trace_stack(struct trace_array *tr,
void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
int pc)
{
- __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
+ struct ring_buffer *buffer = tr->trace_buffer.buffer;
+
+ if (rcu_is_watching()) {
+ __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
+ return;
+ }
+
+ /*
+ * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
+ * but if the above rcu_is_watching() failed, then the NMI
+ * triggered someplace critical, and rcu_irq_enter() should
+ * not be called from NMI.
+ */
+ if (unlikely(in_nmi()))
+ return;
+
+ /*
+ * It is possible that a function is being traced in a
+ * location that RCU is not watching. A call to
+ * rcu_irq_enter() will make sure that it is, but there's
+ * a few internal rcu functions that could be traced
+ * where that wont work either. In those cases, we just
+ * do nothing.
+ */
+ if (unlikely(rcu_irq_enter_disabled()))
+ return;
+
+ rcu_irq_enter_irqson();
+ __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
+ rcu_irq_exit_irqson();
}
/**
@@ -7550,6 +7579,7 @@ static int instance_rmdir(const char *name)
}
tracing_set_nop(tr);
+ clear_ftrace_function_probes(tr);
event_trace_del_tracer(tr);
ftrace_clear_pids(tr);
ftrace_destroy_function_files(tr);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 291a1bca5748..39fd77330aab 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -980,6 +980,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr,
extern int
unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
struct ftrace_probe_ops *ops);
+extern void clear_ftrace_function_probes(struct trace_array *tr);
int register_ftrace_command(struct ftrace_func_command *cmd);
int unregister_ftrace_command(struct ftrace_func_command *cmd);
@@ -998,6 +999,10 @@ static inline __init int unregister_ftrace_command(char *cmd_name)
{
return -EINVAL;
}
+static inline void clear_ftrace_function_probes(struct trace_array *tr)
+{
+}
+
/*
* The ops parameter passed in is usually undefined.
* This must be a macro.
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8485f6738a87..c129fca6ec99 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1535,6 +1535,11 @@ static __init int kprobe_trace_self_tests_init(void)
end:
release_all_trace_kprobes();
+ /*
+ * Wait for the optimizer work to finish. Otherwise it might fiddle
+ * with probes in already freed __init text.
+ */
+ wait_for_kprobe_optimizer();
if (warn)
pr_cont("NG: Some tests are failed. Please check them.\n");
else