diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/syscall.c | 22 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 154 | ||||
-rw-r--r-- | kernel/ptrace.c | 15 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 12 | ||||
-rw-r--r-- | kernel/trace/trace_dynevent.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_events_hist.c | 1 | ||||
-rw-r--r-- | kernel/watchdog.c | 6 |
7 files changed, 130 insertions, 82 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 62f6bced3a3c..afca36f53c49 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -136,21 +136,29 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) void *bpf_map_area_alloc(size_t size, int numa_node) { - /* We definitely need __GFP_NORETRY, so OOM killer doesn't - * trigger under memory pressure as we really just want to - * fail instead. + /* We really just want to fail instead of triggering OOM killer + * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, + * which is used for lower order allocation requests. + * + * It has been observed that higher order allocation requests done by + * vmalloc with __GFP_NORETRY being set might fail due to not trying + * to reclaim memory from the page cache, thus we set + * __GFP_RETRY_MAYFAIL to avoid such situations. */ - const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; + + const gfp_t flags = __GFP_NOWARN | __GFP_ZERO; void *area; if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { - area = kmalloc_node(size, GFP_USER | flags, numa_node); + area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags, + numa_node); if (area != NULL) return area; } - return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, - __builtin_return_address(0)); + return __vmalloc_node_flags_caller(size, numa_node, + GFP_KERNEL | __GFP_RETRY_MAYFAIL | + flags, __builtin_return_address(0)); } void bpf_map_area_free(void *area) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ce166a002d16..fd502c1f71eb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -212,7 +212,7 @@ struct bpf_call_arg_meta { int access_size; s64 msize_smax_value; u64 msize_umax_value; - int ptr_id; + int ref_obj_id; int func_id; }; @@ -346,35 +346,23 @@ static bool reg_type_may_be_null(enum bpf_reg_type type) type == PTR_TO_TCP_SOCK_OR_NULL; } -static bool type_is_refcounted(enum bpf_reg_type type) -{ - return type == PTR_TO_SOCKET; -} - -static bool type_is_refcounted_or_null(enum bpf_reg_type type) -{ - return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL; -} - -static bool reg_is_refcounted(const struct bpf_reg_state *reg) -{ - return type_is_refcounted(reg->type); -} - static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) { return reg->type == PTR_TO_MAP_VALUE && map_value_has_spin_lock(reg->map_ptr); } -static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg) +static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) { - return type_is_refcounted_or_null(reg->type); + return type == PTR_TO_SOCKET || + type == PTR_TO_SOCKET_OR_NULL || + type == PTR_TO_TCP_SOCK || + type == PTR_TO_TCP_SOCK_OR_NULL; } -static bool arg_type_is_refcounted(enum bpf_arg_type type) +static bool arg_type_may_be_refcounted(enum bpf_arg_type type) { - return type == ARG_PTR_TO_SOCKET; + return type == ARG_PTR_TO_SOCK_COMMON; } /* Determine whether the function releases some resources allocated by another @@ -392,6 +380,12 @@ static bool is_acquire_function(enum bpf_func_id func_id) func_id == BPF_FUNC_sk_lookup_udp; } +static bool is_ptr_cast_function(enum bpf_func_id func_id) +{ + return func_id == BPF_FUNC_tcp_sock || + func_id == BPF_FUNC_sk_fullsock; +} + /* string representation of 'enum bpf_reg_type' */ static const char * const reg_type_str[] = { [NOT_INIT] = "?", @@ -466,6 +460,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, verbose(env, ",call_%d", func(env, reg)->callsite); } else { verbose(env, "(id=%d", reg->id); + if (reg_type_may_be_refcounted_or_null(t)) + verbose(env, ",ref_obj_id=%d", reg->ref_obj_id); if (t != SCALAR_VALUE) verbose(env, ",off=%d", reg->off); if (type_is_pkt_pointer(t)) @@ -2414,16 +2410,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ if (!type_is_sk_pointer(type)) goto err_type; - } else if (arg_type == ARG_PTR_TO_SOCKET) { - expected_type = PTR_TO_SOCKET; - if (type != expected_type) - goto err_type; - if (meta->ptr_id || !reg->id) { - verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n", - meta->ptr_id, reg->id); - return -EFAULT; + if (reg->ref_obj_id) { + if (meta->ref_obj_id) { + verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", + regno, reg->ref_obj_id, + meta->ref_obj_id); + return -EFAULT; + } + meta->ref_obj_id = reg->ref_obj_id; } - meta->ptr_id = reg->id; } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { if (meta->func_id == BPF_FUNC_spin_lock) { if (process_spin_lock(env, regno, true)) @@ -2740,32 +2735,38 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn) return true; } -static bool check_refcount_ok(const struct bpf_func_proto *fn) +static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id) { int count = 0; - if (arg_type_is_refcounted(fn->arg1_type)) + if (arg_type_may_be_refcounted(fn->arg1_type)) count++; - if (arg_type_is_refcounted(fn->arg2_type)) + if (arg_type_may_be_refcounted(fn->arg2_type)) count++; - if (arg_type_is_refcounted(fn->arg3_type)) + if (arg_type_may_be_refcounted(fn->arg3_type)) count++; - if (arg_type_is_refcounted(fn->arg4_type)) + if (arg_type_may_be_refcounted(fn->arg4_type)) count++; - if (arg_type_is_refcounted(fn->arg5_type)) + if (arg_type_may_be_refcounted(fn->arg5_type)) count++; + /* A reference acquiring function cannot acquire + * another refcounted ptr. + */ + if (is_acquire_function(func_id) && count) + return false; + /* We only support one arg being unreferenced at the moment, * which is sufficient for the helper functions we have right now. */ return count <= 1; } -static int check_func_proto(const struct bpf_func_proto *fn) +static int check_func_proto(const struct bpf_func_proto *fn, int func_id) { return check_raw_mode_ok(fn) && check_arg_pair_ok(fn) && - check_refcount_ok(fn) ? 0 : -EINVAL; + check_refcount_ok(fn, func_id) ? 0 : -EINVAL; } /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] @@ -2799,19 +2800,20 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) } static void release_reg_references(struct bpf_verifier_env *env, - struct bpf_func_state *state, int id) + struct bpf_func_state *state, + int ref_obj_id) { struct bpf_reg_state *regs = state->regs, *reg; int i; for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].id == id) + if (regs[i].ref_obj_id == ref_obj_id) mark_reg_unknown(env, regs, i); bpf_for_each_spilled_reg(i, state, reg) { if (!reg) continue; - if (reg_is_refcounted(reg) && reg->id == id) + if (reg->ref_obj_id == ref_obj_id) __mark_reg_unknown(reg); } } @@ -2820,15 +2822,20 @@ static void release_reg_references(struct bpf_verifier_env *env, * resources. Identify all copies of the same pointer and clear the reference. */ static int release_reference(struct bpf_verifier_env *env, - struct bpf_call_arg_meta *meta) + int ref_obj_id) { struct bpf_verifier_state *vstate = env->cur_state; + int err; int i; + err = release_reference_state(cur_func(env), ref_obj_id); + if (err) + return err; + for (i = 0; i <= vstate->curframe; i++) - release_reg_references(env, vstate->frame[i], meta->ptr_id); + release_reg_references(env, vstate->frame[i], ref_obj_id); - return release_reference_state(cur_func(env), meta->ptr_id); + return 0; } static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, @@ -3047,7 +3054,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn memset(&meta, 0, sizeof(meta)); meta.pkt_access = fn->pkt_access; - err = check_func_proto(fn); + err = check_func_proto(fn, func_id); if (err) { verbose(env, "kernel subsystem misconfigured func %s#%d\n", func_id_name(func_id), func_id); @@ -3093,7 +3100,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return err; } } else if (is_release_function(func_id)) { - err = release_reference(env, &meta); + err = release_reference(env, meta.ref_obj_id); if (err) { verbose(env, "func %s#%d reference has not been acquired before\n", func_id_name(func_id), func_id); @@ -3154,8 +3161,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn if (id < 0) return id; - /* For release_reference() */ + /* For mark_ptr_or_null_reg() */ regs[BPF_REG_0].id = id; + /* For release_reference() */ + regs[BPF_REG_0].ref_obj_id = id; } else { /* For mark_ptr_or_null_reg() */ regs[BPF_REG_0].id = ++env->id_gen; @@ -3170,6 +3179,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } + if (is_ptr_cast_function(func_id)) + /* For release_reference() */ + regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; + do_refine_retval_range(regs, fn->ret_type, func_id, &meta); err = check_map_func_compatibility(env, meta.map_ptr, func_id); @@ -3368,7 +3381,7 @@ do_sim: *dst_reg = *ptr_reg; } ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); - if (!ptr_is_dst_reg) + if (!ptr_is_dst_reg && ret) *dst_reg = tmp; return !ret ? -EFAULT : 0; } @@ -4665,11 +4678,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { reg->type = PTR_TO_TCP_SOCK; } - if (is_null || !(reg_is_refcounted(reg) || - reg_may_point_to_spin_lock(reg))) { - /* We don't need id from this point onwards anymore, - * thus we should better reset it, so that state - * pruning has chances to take effect. + if (is_null) { + /* We don't need id and ref_obj_id from this point + * onwards anymore, thus we should better reset it, + * so that state pruning has chances to take effect. + */ + reg->id = 0; + reg->ref_obj_id = 0; + } else if (!reg_may_point_to_spin_lock(reg)) { + /* For not-NULL ptr, reg->ref_obj_id will be reset + * in release_reg_references(). + * + * reg->id is still used by spin_lock ptr. Other + * than spin_lock ptr type, reg->id can be reset. */ reg->id = 0; } @@ -4684,11 +4705,16 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, { struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *reg, *regs = state->regs; + u32 ref_obj_id = regs[regno].ref_obj_id; u32 id = regs[regno].id; int i, j; - if (reg_is_refcounted_or_null(®s[regno]) && is_null) - release_reference_state(state, id); + if (ref_obj_id && ref_obj_id == id && is_null) + /* regs[regno] is in the " == NULL" branch. + * No one could have freed the reference state before + * doing the NULL check. + */ + WARN_ON_ONCE(release_reference_state(state, id)); for (i = 0; i < MAX_BPF_REG; i++) mark_ptr_or_null_reg(state, ®s[i], id, is_null); @@ -6052,15 +6078,17 @@ static int propagate_liveness(struct bpf_verifier_env *env, } /* Propagate read liveness of registers... */ BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); - /* We don't need to worry about FP liveness because it's read-only */ - for (i = 0; i < BPF_REG_FP; i++) { - if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ) - continue; - if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) { - err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i], - &vparent->frame[vstate->curframe]->regs[i]); - if (err) - return err; + for (frame = 0; frame <= vstate->curframe; frame++) { + /* We don't need to worry about FP liveness, it's read-only */ + for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { + if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ) + continue; + if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) { + err = mark_reg_read(env, &vstate->frame[frame]->regs[i], + &vparent->frame[frame]->regs[i]); + if (err) + return err; + } } } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 771e93f9c43f..6f357f4fc859 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -29,6 +29,7 @@ #include <linux/hw_breakpoint.h> #include <linux/cn_proc.h> #include <linux/compat.h> +#include <linux/sched/signal.h> /* * Access another process' address space via ptrace. @@ -924,18 +925,26 @@ int ptrace_request(struct task_struct *child, long request, ret = ptrace_setsiginfo(child, &siginfo); break; - case PTRACE_GETSIGMASK: + case PTRACE_GETSIGMASK: { + sigset_t *mask; + if (addr != sizeof(sigset_t)) { ret = -EINVAL; break; } - if (copy_to_user(datavp, &child->blocked, sizeof(sigset_t))) + if (test_tsk_restore_sigmask(child)) + mask = &child->saved_sigmask; + else + mask = &child->blocked; + + if (copy_to_user(datavp, mask, sizeof(sigset_t))) ret = -EFAULT; else ret = 0; break; + } case PTRACE_SETSIGMASK: { sigset_t new_set; @@ -961,6 +970,8 @@ int ptrace_request(struct task_struct *child, long request, child->blocked = new_set; spin_unlock_irq(&child->sighand->siglock); + clear_tsk_restore_sigmask(child); + ret = 0; break; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fa79323331b2..26c8ca9bd06b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1992,7 +1992,7 @@ static void print_bug_type(void) * modifying the code. @failed should be one of either: * EFAULT - if the problem happens on reading the @ip address * EINVAL - if what is read at @ip is not what was expected - * EPERM - if the problem happens on writting to the @ip address + * EPERM - if the problem happens on writing to the @ip address */ void ftrace_bug(int failed, struct dyn_ftrace *rec) { @@ -2391,7 +2391,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); } - return -1; /* unknow ftrace bug */ + return -1; /* unknown ftrace bug */ } void __weak ftrace_replace_code(int mod_flags) @@ -3004,7 +3004,7 @@ ftrace_allocate_pages(unsigned long num_to_init) int cnt; if (!num_to_init) - return 0; + return NULL; start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL); if (!pg) @@ -4755,7 +4755,7 @@ static int ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove, int reset, int enable) { - return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable); + return ftrace_set_hash(ops, NULL, 0, ip, remove, reset, enable); } /** @@ -5463,7 +5463,7 @@ void ftrace_create_filter_files(struct ftrace_ops *ops, /* * The name "destroy_filter_files" is really a misnomer. Although - * in the future, it may actualy delete the files, but this is + * in the future, it may actually delete the files, but this is * really intended to make sure the ops passed in are disabled * and that when this function returns, the caller is free to * free the ops. @@ -5786,7 +5786,7 @@ void ftrace_module_enable(struct module *mod) /* * If the tracing is enabled, go ahead and enable the record. * - * The reason not to enable the record immediatelly is the + * The reason not to enable the record immediately is the * inherent check of ftrace_make_nop/ftrace_make_call for * correct previous instructions. Making first the NOP * conversion puts the module to the correct state, thus diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index dd1f43588d70..fa100ed3b4de 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -74,7 +74,7 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) static int create_dyn_event(int argc, char **argv) { struct dyn_event_operations *ops; - int ret; + int ret = -ENODEV; if (argv[0][0] == '-' || argv[0][0] == '!') return dyn_event_release(argc, argv, NULL); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index ca46339f3009..795aa2038377 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3713,7 +3713,6 @@ static void track_data_destroy(struct hist_trigger_data *hist_data, struct trace_event_file *file = hist_data->event_file; destroy_hist_field(data->track_data.track_var, 0); - destroy_hist_field(data->track_data.var_ref, 0); if (data->action == ACTION_SNAPSHOT) { struct track_data *track_data; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 403c9bd90413..6a5787233113 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -554,13 +554,15 @@ static void softlockup_start_all(void) int lockup_detector_online_cpu(unsigned int cpu) { - watchdog_enable(cpu); + if (cpumask_test_cpu(cpu, &watchdog_allowed_mask)) + watchdog_enable(cpu); return 0; } int lockup_detector_offline_cpu(unsigned int cpu) { - watchdog_disable(cpu); + if (cpumask_test_cpu(cpu, &watchdog_allowed_mask)) + watchdog_disable(cpu); return 0; } |