diff options
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r-- | kernel/bpf/verifier.c | 1649 |
1 files changed, 1153 insertions, 496 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1cc945daa9c8..04c6630cc18f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -19,6 +19,8 @@ #include <linux/sort.h> #include <linux/perf_event.h> #include <linux/ctype.h> +#include <linux/error-injection.h> +#include <linux/bpf_lsm.h> #include "disasm.h" @@ -227,8 +229,7 @@ struct bpf_call_arg_meta { bool pkt_access; int regno; int access_size; - s64 msize_smax_value; - u64 msize_umax_value; + u64 msize_max_value; int ref_obj_id; int func_id; u32 btf_id; @@ -549,6 +550,22 @@ static void print_verifier_state(struct bpf_verifier_env *env, tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); verbose(env, ",var_off=%s", tn_buf); } + if (reg->s32_min_value != reg->smin_value && + reg->s32_min_value != S32_MIN) + verbose(env, ",s32_min_value=%d", + (int)(reg->s32_min_value)); + if (reg->s32_max_value != reg->smax_value && + reg->s32_max_value != S32_MAX) + verbose(env, ",s32_max_value=%d", + (int)(reg->s32_max_value)); + if (reg->u32_min_value != reg->umin_value && + reg->u32_min_value != U32_MIN) + verbose(env, ",u32_min_value=%d", + (int)(reg->u32_min_value)); + if (reg->u32_max_value != reg->umax_value && + reg->u32_max_value != U32_MAX) + verbose(env, ",u32_max_value=%d", + (int)(reg->u32_max_value)); } verbose(env, ")"); } @@ -923,6 +940,20 @@ static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) reg->smax_value = (s64)imm; reg->umin_value = imm; reg->umax_value = imm; + + reg->s32_min_value = (s32)imm; + reg->s32_max_value = (s32)imm; + reg->u32_min_value = (u32)imm; + reg->u32_max_value = (u32)imm; +} + +static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm) +{ + reg->var_off = tnum_const_subreg(reg->var_off, imm); + reg->s32_min_value = (s32)imm; + reg->s32_max_value = (s32)imm; + reg->u32_min_value = (u32)imm; + reg->u32_max_value = (u32)imm; } /* Mark the 'variable offset' part of a register as zero. This should be @@ -977,8 +1008,52 @@ static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, tnum_equals_const(reg->var_off, 0); } -/* Attempts to improve min/max values based on var_off information */ -static void __update_reg_bounds(struct bpf_reg_state *reg) +/* Reset the min/max bounds of a register */ +static void __mark_reg_unbounded(struct bpf_reg_state *reg) +{ + reg->smin_value = S64_MIN; + reg->smax_value = S64_MAX; + reg->umin_value = 0; + reg->umax_value = U64_MAX; + + reg->s32_min_value = S32_MIN; + reg->s32_max_value = S32_MAX; + reg->u32_min_value = 0; + reg->u32_max_value = U32_MAX; +} + +static void __mark_reg64_unbounded(struct bpf_reg_state *reg) +{ + reg->smin_value = S64_MIN; + reg->smax_value = S64_MAX; + reg->umin_value = 0; + reg->umax_value = U64_MAX; +} + +static void __mark_reg32_unbounded(struct bpf_reg_state *reg) +{ + reg->s32_min_value = S32_MIN; + reg->s32_max_value = S32_MAX; + reg->u32_min_value = 0; + reg->u32_max_value = U32_MAX; +} + +static void __update_reg32_bounds(struct bpf_reg_state *reg) +{ + struct tnum var32_off = tnum_subreg(reg->var_off); + + /* min signed is max(sign bit) | min(other bits) */ + reg->s32_min_value = max_t(s32, reg->s32_min_value, + var32_off.value | (var32_off.mask & S32_MIN)); + /* max signed is min(sign bit) | max(other bits) */ + reg->s32_max_value = min_t(s32, reg->s32_max_value, + var32_off.value | (var32_off.mask & S32_MAX)); + reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value); + reg->u32_max_value = min(reg->u32_max_value, + (u32)(var32_off.value | var32_off.mask)); +} + +static void __update_reg64_bounds(struct bpf_reg_state *reg) { /* min signed is max(sign bit) | min(other bits) */ reg->smin_value = max_t(s64, reg->smin_value, @@ -991,8 +1066,48 @@ static void __update_reg_bounds(struct bpf_reg_state *reg) reg->var_off.value | reg->var_off.mask); } +static void __update_reg_bounds(struct bpf_reg_state *reg) +{ + __update_reg32_bounds(reg); + __update_reg64_bounds(reg); +} + /* Uses signed min/max values to inform unsigned, and vice-versa */ -static void __reg_deduce_bounds(struct bpf_reg_state *reg) +static void __reg32_deduce_bounds(struct bpf_reg_state *reg) +{ + /* Learn sign from signed bounds. + * If we cannot cross the sign boundary, then signed and unsigned bounds + * are the same, so combine. This works even in the negative case, e.g. + * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. + */ + if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) { + reg->s32_min_value = reg->u32_min_value = + max_t(u32, reg->s32_min_value, reg->u32_min_value); + reg->s32_max_value = reg->u32_max_value = + min_t(u32, reg->s32_max_value, reg->u32_max_value); + return; + } + /* Learn sign from unsigned bounds. Signed bounds cross the sign + * boundary, so we must be careful. + */ + if ((s32)reg->u32_max_value >= 0) { + /* Positive. We can't learn anything from the smin, but smax + * is positive, hence safe. + */ + reg->s32_min_value = reg->u32_min_value; + reg->s32_max_value = reg->u32_max_value = + min_t(u32, reg->s32_max_value, reg->u32_max_value); + } else if ((s32)reg->u32_min_value < 0) { + /* Negative. We can't learn anything from the smax, but smin + * is negative, hence safe. + */ + reg->s32_min_value = reg->u32_min_value = + max_t(u32, reg->s32_min_value, reg->u32_min_value); + reg->s32_max_value = reg->u32_max_value; + } +} + +static void __reg64_deduce_bounds(struct bpf_reg_state *reg) { /* Learn sign from signed bounds. * If we cannot cross the sign boundary, then signed and unsigned bounds @@ -1026,32 +1141,106 @@ static void __reg_deduce_bounds(struct bpf_reg_state *reg) } } +static void __reg_deduce_bounds(struct bpf_reg_state *reg) +{ + __reg32_deduce_bounds(reg); + __reg64_deduce_bounds(reg); +} + /* Attempts to improve var_off based on unsigned min/max information */ static void __reg_bound_offset(struct bpf_reg_state *reg) { - reg->var_off = tnum_intersect(reg->var_off, - tnum_range(reg->umin_value, - reg->umax_value)); + struct tnum var64_off = tnum_intersect(reg->var_off, + tnum_range(reg->umin_value, + reg->umax_value)); + struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off), + tnum_range(reg->u32_min_value, + reg->u32_max_value)); + + reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); +} + +static void __reg_assign_32_into_64(struct bpf_reg_state *reg) +{ + reg->umin_value = reg->u32_min_value; + reg->umax_value = reg->u32_max_value; + /* Attempt to pull 32-bit signed bounds into 64-bit bounds + * but must be positive otherwise set to worse case bounds + * and refine later from tnum. + */ + if (reg->s32_min_value > 0) + reg->smin_value = reg->s32_min_value; + else + reg->smin_value = 0; + if (reg->s32_max_value > 0) + reg->smax_value = reg->s32_max_value; + else + reg->smax_value = U32_MAX; } -static void __reg_bound_offset32(struct bpf_reg_state *reg) +static void __reg_combine_32_into_64(struct bpf_reg_state *reg) { - u64 mask = 0xffffFFFF; - struct tnum range = tnum_range(reg->umin_value & mask, - reg->umax_value & mask); - struct tnum lo32 = tnum_cast(reg->var_off, 4); - struct tnum hi32 = tnum_lshift(tnum_rshift(reg->var_off, 32), 32); + /* special case when 64-bit register has upper 32-bit register + * zeroed. Typically happens after zext or <<32, >>32 sequence + * allowing us to use 32-bit bounds directly, + */ + if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) { + __reg_assign_32_into_64(reg); + } else { + /* Otherwise the best we can do is push lower 32bit known and + * unknown bits into register (var_off set from jmp logic) + * then learn as much as possible from the 64-bit tnum + * known and unknown bits. The previous smin/smax bounds are + * invalid here because of jmp32 compare so mark them unknown + * so they do not impact tnum bounds calculation. + */ + __mark_reg64_unbounded(reg); + __update_reg_bounds(reg); + } - reg->var_off = tnum_or(hi32, tnum_intersect(lo32, range)); + /* Intersecting with the old var_off might have improved our bounds + * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), + * then new var_off is (0; 0x7f...fc) which improves our umax. + */ + __reg_deduce_bounds(reg); + __reg_bound_offset(reg); + __update_reg_bounds(reg); } -/* Reset the min/max bounds of a register */ -static void __mark_reg_unbounded(struct bpf_reg_state *reg) +static bool __reg64_bound_s32(s64 a) { - reg->smin_value = S64_MIN; - reg->smax_value = S64_MAX; - reg->umin_value = 0; - reg->umax_value = U64_MAX; + if (a > S32_MIN && a < S32_MAX) + return true; + return false; +} + +static bool __reg64_bound_u32(u64 a) +{ + if (a > U32_MIN && a < U32_MAX) + return true; + return false; +} + +static void __reg_combine_64_into_32(struct bpf_reg_state *reg) +{ + __mark_reg32_unbounded(reg); + + if (__reg64_bound_s32(reg->smin_value)) + reg->s32_min_value = (s32)reg->smin_value; + if (__reg64_bound_s32(reg->smax_value)) + reg->s32_max_value = (s32)reg->smax_value; + if (__reg64_bound_u32(reg->umin_value)) + reg->u32_min_value = (u32)reg->umin_value; + if (__reg64_bound_u32(reg->umax_value)) + reg->u32_max_value = (u32)reg->umax_value; + + /* Intersecting with the old var_off might have improved our bounds + * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), + * then new var_off is (0; 0x7f...fc) which improves our umax. + */ + __reg_deduce_bounds(reg); + __reg_bound_offset(reg); + __update_reg_bounds(reg); } /* Mark a register as having a completely unknown (scalar) value. */ @@ -2784,6 +2973,12 @@ static int check_tp_buffer_access(struct bpf_verifier_env *env, return 0; } +/* BPF architecture zero extends alu32 ops into 64-bit registesr */ +static void zext_32_to_64(struct bpf_reg_state *reg) +{ + reg->var_off = tnum_subreg(reg->var_off); + __reg_assign_32_into_64(reg); +} /* truncate register to smaller size (in bytes) * must be called with size < BPF_REG_SIZE @@ -2806,6 +3001,14 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) } reg->smin_value = reg->umin_value; reg->smax_value = reg->umax_value; + + /* If size is smaller than 32bit register the 32bit register + * values are also truncated so we push 64-bit bounds into + * 32-bit bounds. Above were truncated < 32-bits already. + */ + if (size >= 4) + return; + __reg_combine_64_into_32(reg); } static bool bpf_map_is_rdonly(const struct bpf_map *map) @@ -3460,13 +3663,17 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, expected_type = CONST_PTR_TO_MAP; if (type != expected_type) goto err_type; - } else if (arg_type == ARG_PTR_TO_CTX) { + } else if (arg_type == ARG_PTR_TO_CTX || + arg_type == ARG_PTR_TO_CTX_OR_NULL) { expected_type = PTR_TO_CTX; - if (type != expected_type) - goto err_type; - err = check_ctx_reg(env, reg, regno); - if (err < 0) - return err; + if (!(register_is_null(reg) && + arg_type == ARG_PTR_TO_CTX_OR_NULL)) { + if (type != expected_type) + goto err_type; + err = check_ctx_reg(env, reg, regno); + if (err < 0) + return err; + } } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) { expected_type = PTR_TO_SOCK_COMMON; /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ @@ -3576,11 +3783,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, } else if (arg_type_is_mem_size(arg_type)) { bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); - /* remember the mem_size which may be used later - * to refine return values. + /* This is used to refine r0 return value bounds for helpers + * that enforce this value as an upper bound on return values. + * See do_refine_retval_range() for helpers that can refine + * the return value. C type of helper is u32 so we pull register + * bound from umax_value however, if negative verifier errors + * out. Only upper bounds can be learned because retval is an + * int type and negative retvals are allowed. */ - meta->msize_smax_value = reg->smax_value; - meta->msize_umax_value = reg->umax_value; + meta->msize_max_value = reg->umax_value; /* The register is SCALAR_VALUE; the access check * happens using its boundaries. @@ -3649,7 +3860,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (func_id != BPF_FUNC_perf_event_read && func_id != BPF_FUNC_perf_event_output && func_id != BPF_FUNC_skb_output && - func_id != BPF_FUNC_perf_event_read_value) + func_id != BPF_FUNC_perf_event_read_value && + func_id != BPF_FUNC_xdp_output) goto error; break; case BPF_MAP_TYPE_STACK_TRACE: @@ -3693,14 +3905,16 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (func_id != BPF_FUNC_sk_redirect_map && func_id != BPF_FUNC_sock_map_update && func_id != BPF_FUNC_map_delete_elem && - func_id != BPF_FUNC_msg_redirect_map) + func_id != BPF_FUNC_msg_redirect_map && + func_id != BPF_FUNC_sk_select_reuseport) goto error; break; case BPF_MAP_TYPE_SOCKHASH: if (func_id != BPF_FUNC_sk_redirect_hash && func_id != BPF_FUNC_sock_hash_update && func_id != BPF_FUNC_map_delete_elem && - func_id != BPF_FUNC_msg_redirect_hash) + func_id != BPF_FUNC_msg_redirect_hash && + func_id != BPF_FUNC_sk_select_reuseport) goto error; break; case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: @@ -3737,6 +3951,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_perf_event_output: case BPF_FUNC_perf_event_read_value: case BPF_FUNC_skb_output: + case BPF_FUNC_xdp_output: if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) goto error; break; @@ -3774,7 +3989,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, goto error; break; case BPF_FUNC_sk_select_reuseport: - if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) + if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && + map->map_type != BPF_MAP_TYPE_SOCKMAP && + map->map_type != BPF_MAP_TYPE_SOCKHASH) goto error; break; case BPF_FUNC_map_peek_elem: @@ -4117,10 +4334,11 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, func_id != BPF_FUNC_probe_read_str)) return; - ret_reg->smax_value = meta->msize_smax_value; - ret_reg->umax_value = meta->msize_umax_value; + ret_reg->smax_value = meta->msize_max_value; + ret_reg->s32_max_value = meta->msize_max_value; __reg_deduce_bounds(ret_reg); __reg_bound_offset(ret_reg); + __update_reg_bounds(ret_reg); } static int @@ -4427,7 +4645,17 @@ static bool signed_add_overflows(s64 a, s64 b) return res < a; } -static bool signed_sub_overflows(s64 a, s64 b) +static bool signed_add32_overflows(s64 a, s64 b) +{ + /* Do the add in u32, where overflow is well-defined */ + s32 res = (s32)((u32)a + (u32)b); + + if (b < 0) + return res > a; + return res < a; +} + +static bool signed_sub_overflows(s32 a, s32 b) { /* Do the sub in u64, where overflow is well-defined */ s64 res = (s64)((u64)a - (u64)b); @@ -4437,6 +4665,16 @@ static bool signed_sub_overflows(s64 a, s64 b) return res > a; } +static bool signed_sub32_overflows(s32 a, s32 b) +{ + /* Do the sub in u64, where overflow is well-defined */ + s32 res = (s32)((u32)a - (u32)b); + + if (b < 0) + return res < a; + return res > a; +} + static bool check_reg_sane_offset(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, enum bpf_reg_type type) @@ -4673,6 +4911,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) return -EINVAL; + /* pointer types do not carry 32-bit bounds at the moment. */ + __mark_reg32_unbounded(dst_reg); + switch (opcode) { case BPF_ADD: ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); @@ -4836,6 +5077,518 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, return 0; } +static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + s32 smin_val = src_reg->s32_min_value; + s32 smax_val = src_reg->s32_max_value; + u32 umin_val = src_reg->u32_min_value; + u32 umax_val = src_reg->u32_max_value; + + if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) || + signed_add32_overflows(dst_reg->s32_max_value, smax_val)) { + dst_reg->s32_min_value = S32_MIN; + dst_reg->s32_max_value = S32_MAX; + } else { + dst_reg->s32_min_value += smin_val; + dst_reg->s32_max_value += smax_val; + } + if (dst_reg->u32_min_value + umin_val < umin_val || + dst_reg->u32_max_value + umax_val < umax_val) { + dst_reg->u32_min_value = 0; + dst_reg->u32_max_value = U32_MAX; + } else { + dst_reg->u32_min_value += umin_val; + dst_reg->u32_max_value += umax_val; + } +} + +static void scalar_min_max_add(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + s64 smin_val = src_reg->smin_value; + s64 smax_val = src_reg->smax_value; + u64 umin_val = src_reg->umin_value; + u64 umax_val = src_reg->umax_value; + + if (signed_add_overflows(dst_reg->smin_value, smin_val) || + signed_add_overflows(dst_reg->smax_value, smax_val)) { + dst_reg->smin_value = S64_MIN; + dst_reg->smax_value = S64_MAX; + } else { + dst_reg->smin_value += smin_val; + dst_reg->smax_value += smax_val; + } + if (dst_reg->umin_value + umin_val < umin_val || + dst_reg->umax_value + umax_val < umax_val) { + dst_reg->umin_value = 0; + dst_reg->umax_value = U64_MAX; + } else { + dst_reg->umin_value += umin_val; + dst_reg->umax_value += umax_val; + } +} + +static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + s32 smin_val = src_reg->s32_min_value; + s32 smax_val = src_reg->s32_max_value; + u32 umin_val = src_reg->u32_min_value; + u32 umax_val = src_reg->u32_max_value; + + if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) || + signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) { + /* Overflow possible, we know nothing */ + dst_reg->s32_min_value = S32_MIN; + dst_reg->s32_max_value = S32_MAX; + } else { + dst_reg->s32_min_value -= smax_val; + dst_reg->s32_max_value -= smin_val; + } + if (dst_reg->u32_min_value < umax_val) { + /* Overflow possible, we know nothing */ + dst_reg->u32_min_value = 0; + dst_reg->u32_max_value = U32_MAX; + } else { + /* Cannot overflow (as long as bounds are consistent) */ + dst_reg->u32_min_value -= umax_val; + dst_reg->u32_max_value -= umin_val; + } +} + +static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + s64 smin_val = src_reg->smin_value; + s64 smax_val = src_reg->smax_value; + u64 umin_val = src_reg->umin_value; + u64 umax_val = src_reg->umax_value; + + if (signed_sub_overflows(dst_reg->smin_value, smax_val) || + signed_sub_overflows(dst_reg->smax_value, smin_val)) { + /* Overflow possible, we know nothing */ + dst_reg->smin_value = S64_MIN; + dst_reg->smax_value = S64_MAX; + } else { + dst_reg->smin_value -= smax_val; + dst_reg->smax_value -= smin_val; + } + if (dst_reg->umin_value < umax_val) { + /* Overflow possible, we know nothing */ + dst_reg->umin_value = 0; + dst_reg->umax_value = U64_MAX; + } else { + /* Cannot overflow (as long as bounds are consistent) */ + dst_reg->umin_value -= umax_val; + dst_reg->umax_value -= umin_val; + } +} + +static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + s32 smin_val = src_reg->s32_min_value; + u32 umin_val = src_reg->u32_min_value; + u32 umax_val = src_reg->u32_max_value; + + if (smin_val < 0 || dst_reg->s32_min_value < 0) { + /* Ain't nobody got time to multiply that sign */ + __mark_reg32_unbounded(dst_reg); + return; + } + /* Both values are positive, so we can work with unsigned and + * copy the result to signed (unless it exceeds S32_MAX). + */ + if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) { + /* Potential overflow, we know nothing */ + __mark_reg32_unbounded(dst_reg); + return; + } + dst_reg->u32_min_value *= umin_val; + dst_reg->u32_max_value *= umax_val; + if (dst_reg->u32_max_value > S32_MAX) { + /* Overflow possible, we know nothing */ + dst_reg->s32_min_value = S32_MIN; + dst_reg->s32_max_value = S32_MAX; + } else { + dst_reg->s32_min_value = dst_reg->u32_min_value; + dst_reg->s32_max_value = dst_reg->u32_max_value; + } +} + +static void scalar_min_max_mul(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + s64 smin_val = src_reg->smin_value; + u64 umin_val = src_reg->umin_value; + u64 umax_val = src_reg->umax_value; + + if (smin_val < 0 || dst_reg->smin_value < 0) { + /* Ain't nobody got time to multiply that sign */ + __mark_reg64_unbounded(dst_reg); + return; + } + /* Both values are positive, so we can work with unsigned and + * copy the result to signed (unless it exceeds S64_MAX). + */ + if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) { + /* Potential overflow, we know nothing */ + __mark_reg64_unbounded(dst_reg); + return; + } + dst_reg->umin_value *= umin_val; + dst_reg->umax_value *= umax_val; + if (dst_reg->umax_value > S64_MAX) { + /* Overflow possible, we know nothing */ + dst_reg->smin_value = S64_MIN; + dst_reg->smax_value = S64_MAX; + } else { + dst_reg->smin_value = dst_reg->umin_value; + dst_reg->smax_value = dst_reg->umax_value; + } +} + +static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + bool src_known = tnum_subreg_is_const(src_reg->var_off); + bool dst_known = tnum_subreg_is_const(dst_reg->var_off); + struct tnum var32_off = tnum_subreg(dst_reg->var_off); + s32 smin_val = src_reg->s32_min_value; + u32 umax_val = src_reg->u32_max_value; + + /* Assuming scalar64_min_max_and will be called so its safe + * to skip updating register for known 32-bit case. + */ + if (src_known && dst_known) + return; + + /* We get our minimum from the var_off, since that's inherently + * bitwise. Our maximum is the minimum of the operands' maxima. + */ + dst_reg->u32_min_value = var32_off.value; + dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val); + if (dst_reg->s32_min_value < 0 || smin_val < 0) { + /* Lose signed bounds when ANDing negative numbers, + * ain't nobody got time for that. + */ + dst_reg->s32_min_value = S32_MIN; + dst_reg->s32_max_value = S32_MAX; + } else { + /* ANDing two positives gives a positive, so safe to + * cast result into s64. + */ + dst_reg->s32_min_value = dst_reg->u32_min_value; + dst_reg->s32_max_value = dst_reg->u32_max_value; + } + +} + +static void scalar_min_max_and(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + bool src_known = tnum_is_const(src_reg->var_off); + bool dst_known = tnum_is_const(dst_reg->var_off); + s64 smin_val = src_reg->smin_value; + u64 umax_val = src_reg->umax_value; + + if (src_known && dst_known) { + __mark_reg_known(dst_reg, dst_reg->var_off.value & + src_reg->var_off.value); + return; + } + + /* We get our minimum from the var_off, since that's inherently + * bitwise. Our maximum is the minimum of the operands' maxima. + */ + dst_reg->umin_value = dst_reg->var_off.value; + dst_reg->umax_value = min(dst_reg->umax_value, umax_val); + if (dst_reg->smin_value < 0 || smin_val < 0) { + /* Lose signed bounds when ANDing negative numbers, + * ain't nobody got time for that. + */ + dst_reg->smin_value = S64_MIN; + dst_reg->smax_value = S64_MAX; + } else { + /* ANDing two positives gives a positive, so safe to + * cast result into s64. + */ + dst_reg->smin_value = dst_reg->umin_value; + dst_reg->smax_value = dst_reg->umax_value; + } + /* We may learn something more from the var_off */ + __update_reg_bounds(dst_reg); +} + +static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + bool src_known = tnum_subreg_is_const(src_reg->var_off); + bool dst_known = tnum_subreg_is_const(dst_reg->var_off); + struct tnum var32_off = tnum_subreg(dst_reg->var_off); + s32 smin_val = src_reg->smin_value; + u32 umin_val = src_reg->umin_value; + + /* Assuming scalar64_min_max_or will be called so it is safe + * to skip updating register for known case. + */ + if (src_known && dst_known) + return; + + /* We get our maximum from the var_off, and our minimum is the + * maximum of the operands' minima + */ + dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val); + dst_reg->u32_max_value = var32_off.value | var32_off.mask; + if (dst_reg->s32_min_value < 0 || smin_val < 0) { + /* Lose signed bounds when ORing negative numbers, + * ain't nobody got time for that. + */ + dst_reg->s32_min_value = S32_MIN; + dst_reg->s32_max_value = S32_MAX; + } else { + /* ORing two positives gives a positive, so safe to + * cast result into s64. + */ + dst_reg->s32_min_value = dst_reg->umin_value; + dst_reg->s32_max_value = dst_reg->umax_value; + } +} + +static void scalar_min_max_or(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + bool src_known = tnum_is_const(src_reg->var_off); + bool dst_known = tnum_is_const(dst_reg->var_off); + s64 smin_val = src_reg->smin_value; + u64 umin_val = src_reg->umin_value; + + if (src_known && dst_known) { + __mark_reg_known(dst_reg, dst_reg->var_off.value | + src_reg->var_off.value); + return; + } + + /* We get our maximum from the var_off, and our minimum is the + * maximum of the operands' minima + */ + dst_reg->umin_value = max(dst_reg->umin_value, umin_val); + dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask; + if (dst_reg->smin_value < 0 || smin_val < 0) { + /* Lose signed bounds when ORing negative numbers, + * ain't nobody got time for that. + */ + dst_reg->smin_value = S64_MIN; + dst_reg->smax_value = S64_MAX; + } else { + /* ORing two positives gives a positive, so safe to + * cast result into s64. + */ + dst_reg->smin_value = dst_reg->umin_value; + dst_reg->smax_value = dst_reg->umax_value; + } + /* We may learn something more from the var_off */ + __update_reg_bounds(dst_reg); +} + +static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, + u64 umin_val, u64 umax_val) +{ + /* We lose all sign bit information (except what we can pick + * up from var_off) + */ + dst_reg->s32_min_value = S32_MIN; + dst_reg->s32_max_value = S32_MAX; + /* If we might shift our top bit out, then we know nothing */ + if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) { + dst_reg->u32_min_value = 0; + dst_reg->u32_max_value = U32_MAX; + } else { + dst_reg->u32_min_value <<= umin_val; + dst_reg->u32_max_value <<= umax_val; + } +} + +static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + u32 umax_val = src_reg->u32_max_value; + u32 umin_val = src_reg->u32_min_value; + /* u32 alu operation will zext upper bits */ + struct tnum subreg = tnum_subreg(dst_reg->var_off); + + __scalar32_min_max_lsh(dst_reg, umin_val, umax_val); + dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val)); + /* Not required but being careful mark reg64 bounds as unknown so + * that we are forced to pick them up from tnum and zext later and + * if some path skips this step we are still safe. + */ + __mark_reg64_unbounded(dst_reg); + __update_reg32_bounds(dst_reg); +} + +static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg, + u64 umin_val, u64 umax_val) +{ + /* Special case <<32 because it is a common compiler pattern to sign + * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are + * positive we know this shift will also be positive so we can track + * bounds correctly. Otherwise we lose all sign bit information except + * what we can pick up from var_off. Perhaps we can generalize this + * later to shifts of any length. + */ + if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0) + dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32; + else + dst_reg->smax_value = S64_MAX; + + if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0) + dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32; + else + dst_reg->smin_value = S64_MIN; + + /* If we might shift our top bit out, then we know nothing */ + if (dst_reg->umax_value > 1ULL << (63 - umax_val)) { + dst_reg->umin_value = 0; + dst_reg->umax_value = U64_MAX; + } else { + dst_reg->umin_value <<= umin_val; + dst_reg->umax_value <<= umax_val; + } +} + +static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + u64 umax_val = src_reg->umax_value; + u64 umin_val = src_reg->umin_value; + + /* scalar64 calc uses 32bit unshifted bounds so must be called first */ + __scalar64_min_max_lsh(dst_reg, umin_val, umax_val); + __scalar32_min_max_lsh(dst_reg, umin_val, umax_val); + + dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); + /* We may learn something more from the var_off */ + __update_reg_bounds(dst_reg); +} + +static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + struct tnum subreg = tnum_subreg(dst_reg->var_off); + u32 umax_val = src_reg->u32_max_value; + u32 umin_val = src_reg->u32_min_value; + + /* BPF_RSH is an unsigned shift. If the value in dst_reg might + * be negative, then either: + * 1) src_reg might be zero, so the sign bit of the result is + * unknown, so we lose our signed bounds + * 2) it's known negative, thus the unsigned bounds capture the + * signed bounds + * 3) the signed bounds cross zero, so they tell us nothing + * about the result + * If the value in dst_reg is known nonnegative, then again the + * unsigned bounts capture the signed bounds. + * Thus, in all cases it suffices to blow away our signed bounds + * and rely on inferring new ones from the unsigned bounds and + * var_off of the result. + */ + dst_reg->s32_min_value = S32_MIN; + dst_reg->s32_max_value = S32_MAX; + + dst_reg->var_off = tnum_rshift(subreg, umin_val); + dst_reg->u32_min_value >>= umax_val; + dst_reg->u32_max_value >>= umin_val; + + __mark_reg64_unbounded(dst_reg); + __update_reg32_bounds(dst_reg); +} + +static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + u64 umax_val = src_reg->umax_value; + u64 umin_val = src_reg->umin_value; + + /* BPF_RSH is an unsigned shift. If the value in dst_reg might + * be negative, then either: + * 1) src_reg might be zero, so the sign bit of the result is + * unknown, so we lose our signed bounds + * 2) it's known negative, thus the unsigned bounds capture the + * signed bounds + * 3) the signed bounds cross zero, so they tell us nothing + * about the result + * If the value in dst_reg is known nonnegative, then again the + * unsigned bounts capture the signed bounds. + * Thus, in all cases it suffices to blow away our signed bounds + * and rely on inferring new ones from the unsigned bounds and + * var_off of the result. + */ + dst_reg->smin_value = S64_MIN; + dst_reg->smax_value = S64_MAX; + dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); + dst_reg->umin_value >>= umax_val; + dst_reg->umax_value >>= umin_val; + + /* Its not easy to operate on alu32 bounds here because it depends + * on bits being shifted in. Take easy way out and mark unbounded + * so we can recalculate later from tnum. + */ + __mark_reg32_unbounded(dst_reg); + __update_reg_bounds(dst_reg); +} + +static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + u64 umin_val = src_reg->u32_min_value; + + /* Upon reaching here, src_known is true and + * umax_val is equal to umin_val. + */ + dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val); + dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val); + + dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32); + + /* blow away the dst_reg umin_value/umax_value and rely on + * dst_reg var_off to refine the result. + */ + dst_reg->u32_min_value = 0; + dst_reg->u32_max_value = U32_MAX; + + __mark_reg64_unbounded(dst_reg); + __update_reg32_bounds(dst_reg); +} + +static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + u64 umin_val = src_reg->umin_value; + + /* Upon reaching here, src_known is true and umax_val is equal + * to umin_val. + */ + dst_reg->smin_value >>= umin_val; + dst_reg->smax_value >>= umin_val; + + dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64); + + /* blow away the dst_reg umin_value/umax_value and rely on + * dst_reg var_off to refine the result. + */ + dst_reg->umin_value = 0; + dst_reg->umax_value = U64_MAX; + + /* Its not easy to operate on alu32 bounds here because it depends + * on bits being shifted in from upper 32-bits. Take easy way out + * and mark unbounded so we can recalculate later from tnum. + */ + __mark_reg32_unbounded(dst_reg); + __update_reg_bounds(dst_reg); +} + /* WARNING: This function does calculations on 64-bit values, but the actual * execution may occur on 32-bit values. Therefore, things like bitshifts * need extra checks in the 32-bit case. @@ -4850,33 +5603,47 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, bool src_known, dst_known; s64 smin_val, smax_val; u64 umin_val, umax_val; + s32 s32_min_val, s32_max_val; + u32 u32_min_val, u32_max_val; u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; u32 dst = insn->dst_reg; int ret; - - if (insn_bitness == 32) { - /* Relevant for 32-bit RSH: Information can propagate towards - * LSB, so it isn't sufficient to only truncate the output to - * 32 bits. - */ - coerce_reg_to_size(dst_reg, 4); - coerce_reg_to_size(&src_reg, 4); - } + bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); smin_val = src_reg.smin_value; smax_val = src_reg.smax_value; umin_val = src_reg.umin_value; umax_val = src_reg.umax_value; - src_known = tnum_is_const(src_reg.var_off); - dst_known = tnum_is_const(dst_reg->var_off); - if ((src_known && (smin_val != smax_val || umin_val != umax_val)) || - smin_val > smax_val || umin_val > umax_val) { - /* Taint dst register if offset had invalid bounds derived from - * e.g. dead branches. - */ - __mark_reg_unknown(env, dst_reg); - return 0; + s32_min_val = src_reg.s32_min_value; + s32_max_val = src_reg.s32_max_value; + u32_min_val = src_reg.u32_min_value; + u32_max_val = src_reg.u32_max_value; + + if (alu32) { + src_known = tnum_subreg_is_const(src_reg.var_off); + dst_known = tnum_subreg_is_const(dst_reg->var_off); + if ((src_known && + (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) || + s32_min_val > s32_max_val || u32_min_val > u32_max_val) { + /* Taint dst register if offset had invalid bounds + * derived from e.g. dead branches. + */ + __mark_reg_unknown(env, dst_reg); + return 0; + } + } else { + src_known = tnum_is_const(src_reg.var_off); + dst_known = tnum_is_const(dst_reg->var_off); + if ((src_known && + (smin_val != smax_val || umin_val != umax_val)) || + smin_val > smax_val || umin_val > umax_val) { + /* Taint dst register if offset had invalid bounds + * derived from e.g. dead branches. + */ + __mark_reg_unknown(env, dst_reg); + return 0; + } } if (!src_known && @@ -4885,6 +5652,20 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, return 0; } + /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops. + * There are two classes of instructions: The first class we track both + * alu32 and alu64 sign/unsigned bounds independently this provides the + * greatest amount of precision when alu operations are mixed with jmp32 + * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD, + * and BPF_OR. This is possible because these ops have fairly easy to + * understand and calculate behavior in both 32-bit and 64-bit alu ops. + * See alu32 verifier tests for examples. The second class of + * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy + * with regards to tracking sign/unsigned bounds because the bits may + * cross subreg boundaries in the alu64 case. When this happens we mark + * the reg unbounded in the subreg bound space and use the resulting + * tnum to calculate an approximation of the sign/unsigned bounds. + */ switch (opcode) { case BPF_ADD: ret = sanitize_val_alu(env, insn); @@ -4892,22 +5673,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, verbose(env, "R%d tried to add from different pointers or scalars\n", dst); return ret; } - if (signed_add_overflows(dst_reg->smin_value, smin_val) || - signed_add_overflows(dst_reg->smax_value, smax_val)) { - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - } else { - dst_reg->smin_value += smin_val; - dst_reg->smax_value += smax_val; - } - if (dst_reg->umin_value + umin_val < umin_val || - dst_reg->umax_value + umax_val < umax_val) { - dst_reg->umin_value = 0; - dst_reg->umax_value = U64_MAX; - } else { - dst_reg->umin_value += umin_val; - dst_reg->umax_value += umax_val; - } + scalar32_min_max_add(dst_reg, &src_reg); + scalar_min_max_add(dst_reg, &src_reg); dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); break; case BPF_SUB: @@ -4916,111 +5683,24 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, verbose(env, "R%d tried to sub from different pointers or scalars\n", dst); return ret; } - if (signed_sub_overflows(dst_reg->smin_value, smax_val) || - signed_sub_overflows(dst_reg->smax_value, smin_val)) { - /* Overflow possible, we know nothing */ - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - } else { - dst_reg->smin_value -= smax_val; - dst_reg->smax_value -= smin_val; - } - if (dst_reg->umin_value < umax_val) { - /* Overflow possible, we know nothing */ - dst_reg->umin_value = 0; - dst_reg->umax_value = U64_MAX; - } else { - /* Cannot overflow (as long as bounds are consistent) */ - dst_reg->umin_value -= umax_val; - dst_reg->umax_value -= umin_val; - } + scalar32_min_max_sub(dst_reg, &src_reg); + scalar_min_max_sub(dst_reg, &src_reg); dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); break; case BPF_MUL: dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off); - if (smin_val < 0 || dst_reg->smin_value < 0) { - /* Ain't nobody got time to multiply that sign */ - __mark_reg_unbounded(dst_reg); - __update_reg_bounds(dst_reg); - break; - } - /* Both values are positive, so we can work with unsigned and - * copy the result to signed (unless it exceeds S64_MAX). - */ - if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) { - /* Potential overflow, we know nothing */ - __mark_reg_unbounded(dst_reg); - /* (except what we can learn from the var_off) */ - __update_reg_bounds(dst_reg); - break; - } - dst_reg->umin_value *= umin_val; - dst_reg->umax_value *= umax_val; - if (dst_reg->umax_value > S64_MAX) { - /* Overflow possible, we know nothing */ - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - } else { - dst_reg->smin_value = dst_reg->umin_value; - dst_reg->smax_value = dst_reg->umax_value; - } + scalar32_min_max_mul(dst_reg, &src_reg); + scalar_min_max_mul(dst_reg, &src_reg); break; case BPF_AND: - if (src_known && dst_known) { - __mark_reg_known(dst_reg, dst_reg->var_off.value & - src_reg.var_off.value); - break; - } - /* We get our minimum from the var_off, since that's inherently - * bitwise. Our maximum is the minimum of the operands' maxima. - */ dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off); - dst_reg->umin_value = dst_reg->var_off.value; - dst_reg->umax_value = min(dst_reg->umax_value, umax_val); - if (dst_reg->smin_value < 0 || smin_val < 0) { - /* Lose signed bounds when ANDing negative numbers, - * ain't nobody got time for that. - */ - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - } else { - /* ANDing two positives gives a positive, so safe to - * cast result into s64. - */ - dst_reg->smin_value = dst_reg->umin_value; - dst_reg->smax_value = dst_reg->umax_value; - } - /* We may learn something more from the var_off */ - __update_reg_bounds(dst_reg); + scalar32_min_max_and(dst_reg, &src_reg); + scalar_min_max_and(dst_reg, &src_reg); break; case BPF_OR: - if (src_known && dst_known) { - __mark_reg_known(dst_reg, dst_reg->var_off.value | - src_reg.var_off.value); - break; - } - /* We get our maximum from the var_off, and our minimum is the - * maximum of the operands' minima - */ dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off); - dst_reg->umin_value = max(dst_reg->umin_value, umin_val); - dst_reg->umax_value = dst_reg->var_off.value | - dst_reg->var_off.mask; - if (dst_reg->smin_value < 0 || smin_val < 0) { - /* Lose signed bounds when ORing negative numbers, - * ain't nobody got time for that. - */ - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - } else { - /* ORing two positives gives a positive, so safe to - * cast result into s64. - */ - dst_reg->smin_value = dst_reg->umin_value; - dst_reg->smax_value = dst_reg->umax_value; - } - /* We may learn something more from the var_off */ - __update_reg_bounds(dst_reg); + scalar32_min_max_or(dst_reg, &src_reg); + scalar_min_max_or(dst_reg, &src_reg); break; case BPF_LSH: if (umax_val >= insn_bitness) { @@ -5030,22 +5710,10 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, mark_reg_unknown(env, regs, insn->dst_reg); break; } - /* We lose all sign bit information (except what we can pick - * up from var_off) - */ - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - /* If we might shift our top bit out, then we know nothing */ - if (dst_reg->umax_value > 1ULL << (63 - umax_val)) { - dst_reg->umin_value = 0; - dst_reg->umax_value = U64_MAX; - } else { - dst_reg->umin_value <<= umin_val; - dst_reg->umax_value <<= umax_val; - } - dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); - /* We may learn something more from the var_off */ - __update_reg_bounds(dst_reg); + if (alu32) + scalar32_min_max_lsh(dst_reg, &src_reg); + else + scalar_min_max_lsh(dst_reg, &src_reg); break; case BPF_RSH: if (umax_val >= insn_bitness) { @@ -5055,27 +5723,10 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, mark_reg_unknown(env, regs, insn->dst_reg); break; } - /* BPF_RSH is an unsigned shift. If the value in dst_reg might - * be negative, then either: - * 1) src_reg might be zero, so the sign bit of the result is - * unknown, so we lose our signed bounds - * 2) it's known negative, thus the unsigned bounds capture the - * signed bounds - * 3) the signed bounds cross zero, so they tell us nothing - * about the result - * If the value in dst_reg is known nonnegative, then again the - * unsigned bounts capture the signed bounds. - * Thus, in all cases it suffices to blow away our signed bounds - * and rely on inferring new ones from the unsigned bounds and - * var_off of the result. - */ - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); - dst_reg->umin_value >>= umax_val; - dst_reg->umax_value >>= umin_val; - /* We may learn something more from the var_off */ - __update_reg_bounds(dst_reg); + if (alu32) + scalar32_min_max_rsh(dst_reg, &src_reg); + else + scalar_min_max_rsh(dst_reg, &src_reg); break; case BPF_ARSH: if (umax_val >= insn_bitness) { @@ -5085,38 +5736,21 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, mark_reg_unknown(env, regs, insn->dst_reg); break; } - - /* Upon reaching here, src_known is true and - * umax_val is equal to umin_val. - */ - if (insn_bitness == 32) { - dst_reg->smin_value = (u32)(((s32)dst_reg->smin_value) >> umin_val); - dst_reg->smax_value = (u32)(((s32)dst_reg->smax_value) >> umin_val); - } else { - dst_reg->smin_value >>= umin_val; - dst_reg->smax_value >>= umin_val; - } - - dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, - insn_bitness); - - /* blow away the dst_reg umin_value/umax_value and rely on - * dst_reg var_off to refine the result. - */ - dst_reg->umin_value = 0; - dst_reg->umax_value = U64_MAX; - __update_reg_bounds(dst_reg); + if (alu32) + scalar32_min_max_arsh(dst_reg, &src_reg); + else + scalar_min_max_arsh(dst_reg, &src_reg); break; default: mark_reg_unknown(env, regs, insn->dst_reg); break; } - if (BPF_CLASS(insn->code) != BPF_ALU64) { - /* 32-bit ALU ops are (32,32)->32 */ - coerce_reg_to_size(dst_reg, 4); - } + /* ALU32 ops are zero extended into 64bit register */ + if (alu32) + zext_32_to_64(dst_reg); + __update_reg_bounds(dst_reg); __reg_deduce_bounds(dst_reg); __reg_bound_offset(dst_reg); return 0; @@ -5290,7 +5924,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) mark_reg_unknown(env, regs, insn->dst_reg); } - coerce_reg_to_size(dst_reg, 4); + zext_32_to_64(dst_reg); } } else { /* case: R = imm @@ -5460,55 +6094,83 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, new_range); } -/* compute branch direction of the expression "if (reg opcode val) goto target;" - * and return: - * 1 - branch will be taken and "goto target" will be executed - * 0 - branch will not be taken and fall-through to next insn - * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10] - */ -static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, - bool is_jmp32) +static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) { - struct bpf_reg_state reg_lo; - s64 sval; + struct tnum subreg = tnum_subreg(reg->var_off); + s32 sval = (s32)val; - if (__is_pointer_value(false, reg)) - return -1; + switch (opcode) { + case BPF_JEQ: + if (tnum_is_const(subreg)) + return !!tnum_equals_const(subreg, val); + break; + case BPF_JNE: + if (tnum_is_const(subreg)) + return !tnum_equals_const(subreg, val); + break; + case BPF_JSET: + if ((~subreg.mask & subreg.value) & val) + return 1; + if (!((subreg.mask | subreg.value) & val)) + return 0; + break; + case BPF_JGT: + if (reg->u32_min_value > val) + return 1; + else if (reg->u32_max_value <= val) + return 0; + break; + case BPF_JSGT: + if (reg->s32_min_value > sval) + return 1; + else if (reg->s32_max_value < sval) + return 0; + break; + case BPF_JLT: + if (reg->u32_max_value < val) + return 1; + else if (reg->u32_min_value >= val) + return 0; + break; + case BPF_JSLT: + if (reg->s32_max_value < sval) + return 1; + else if (reg->s32_min_value >= sval) + return 0; + break; + case BPF_JGE: + if (reg->u32_min_value >= val) + return 1; + else if (reg->u32_max_value < val) + return 0; + break; + case BPF_JSGE: + if (reg->s32_min_value >= sval) + return 1; + else if (reg->s32_max_value < sval) + return 0; + break; + case BPF_JLE: + if (reg->u32_max_value <= val) + return 1; + else if (reg->u32_min_value > val) + return 0; + break; + case BPF_JSLE: + if (reg->s32_max_value <= sval) + return 1; + else if (reg->s32_min_value > sval) + return 0; + break; + } - if (is_jmp32) { - reg_lo = *reg; - reg = ®_lo; - /* For JMP32, only low 32 bits are compared, coerce_reg_to_size - * could truncate high bits and update umin/umax according to - * information of low bits. - */ - coerce_reg_to_size(reg, 4); - /* smin/smax need special handling. For example, after coerce, - * if smin_value is 0x00000000ffffffffLL, the value is -1 when - * used as operand to JMP32. It is a negative number from s32's - * point of view, while it is a positive number when seen as - * s64. The smin/smax are kept as s64, therefore, when used with - * JMP32, they need to be transformed into s32, then sign - * extended back to s64. - * - * Also, smin/smax were copied from umin/umax. If umin/umax has - * different sign bit, then min/max relationship doesn't - * maintain after casting into s32, for this case, set smin/smax - * to safest range. - */ - if ((reg->umax_value ^ reg->umin_value) & - (1ULL << 31)) { - reg->smin_value = S32_MIN; - reg->smax_value = S32_MAX; - } - reg->smin_value = (s64)(s32)reg->smin_value; - reg->smax_value = (s64)(s32)reg->smax_value; + return -1; +} - val = (u32)val; - sval = (s64)(s32)val; - } else { - sval = (s64)val; - } + +static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) +{ + s64 sval = (s64)val; switch (opcode) { case BPF_JEQ: @@ -5578,27 +6240,22 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, return -1; } -/* Generate min value of the high 32-bit from TNUM info. */ -static u64 gen_hi_min(struct tnum var) -{ - return var.value & ~0xffffffffULL; -} - -/* Generate max value of the high 32-bit from TNUM info. */ -static u64 gen_hi_max(struct tnum var) -{ - return (var.value | var.mask) & ~0xffffffffULL; -} - -/* Return true if VAL is compared with a s64 sign extended from s32, and they - * are with the same signedness. +/* compute branch direction of the expression "if (reg opcode val) goto target;" + * and return: + * 1 - branch will be taken and "goto target" will be executed + * 0 - branch will not be taken and fall-through to next insn + * -1 - unknown. Example: "if (reg < 5)" is unknown when register value + * range [0,10] */ -static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg) +static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, + bool is_jmp32) { - return ((s32)sval >= 0 && - reg->smin_value >= 0 && reg->smax_value <= S32_MAX) || - ((s32)sval < 0 && - reg->smax_value <= 0 && reg->smin_value >= S32_MIN); + if (__is_pointer_value(false, reg)) + return -1; + + if (is_jmp32) + return is_branch32_taken(reg, val, opcode); + return is_branch64_taken(reg, val, opcode); } /* Adjusts the register min/max values in the case that the dst_reg is the @@ -5607,10 +6264,16 @@ static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg) * In JEQ/JNE cases we also adjust the var_off values. */ static void reg_set_min_max(struct bpf_reg_state *true_reg, - struct bpf_reg_state *false_reg, u64 val, + struct bpf_reg_state *false_reg, + u64 val, u32 val32, u8 opcode, bool is_jmp32) { - s64 sval; + struct tnum false_32off = tnum_subreg(false_reg->var_off); + struct tnum false_64off = false_reg->var_off; + struct tnum true_32off = tnum_subreg(true_reg->var_off); + struct tnum true_64off = true_reg->var_off; + s64 sval = (s64)val; + s32 sval32 = (s32)val32; /* If the dst_reg is a pointer, we can't learn anything about its * variable offset from the compare (unless src_reg were a pointer into @@ -5621,9 +6284,6 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, if (__is_pointer_value(false, false_reg)) return; - val = is_jmp32 ? (u32)val : val; - sval = is_jmp32 ? (s64)(s32)val : (s64)val; - switch (opcode) { case BPF_JEQ: case BPF_JNE: @@ -5635,211 +6295,150 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, * if it is true we know the value for sure. Likewise for * BPF_JNE. */ - if (is_jmp32) { - u64 old_v = reg->var_off.value; - u64 hi_mask = ~0xffffffffULL; - - reg->var_off.value = (old_v & hi_mask) | val; - reg->var_off.mask &= hi_mask; - } else { + if (is_jmp32) + __mark_reg32_known(reg, val32); + else __mark_reg_known(reg, val); - } break; } case BPF_JSET: - false_reg->var_off = tnum_and(false_reg->var_off, - tnum_const(~val)); - if (is_power_of_2(val)) - true_reg->var_off = tnum_or(true_reg->var_off, - tnum_const(val)); + if (is_jmp32) { + false_32off = tnum_and(false_32off, tnum_const(~val32)); + if (is_power_of_2(val32)) + true_32off = tnum_or(true_32off, + tnum_const(val32)); + } else { + false_64off = tnum_and(false_64off, tnum_const(~val)); + if (is_power_of_2(val)) + true_64off = tnum_or(true_64off, + tnum_const(val)); + } break; case BPF_JGE: case BPF_JGT: { - u64 false_umax = opcode == BPF_JGT ? val : val - 1; - u64 true_umin = opcode == BPF_JGT ? val + 1 : val; - if (is_jmp32) { - false_umax += gen_hi_max(false_reg->var_off); - true_umin += gen_hi_min(true_reg->var_off); + u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1; + u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32; + + false_reg->u32_max_value = min(false_reg->u32_max_value, + false_umax); + true_reg->u32_min_value = max(true_reg->u32_min_value, + true_umin); + } else { + u64 false_umax = opcode == BPF_JGT ? val : val - 1; + u64 true_umin = opcode == BPF_JGT ? val + 1 : val; + + false_reg->umax_value = min(false_reg->umax_value, false_umax); + true_reg->umin_value = max(true_reg->umin_value, true_umin); } - false_reg->umax_value = min(false_reg->umax_value, false_umax); - true_reg->umin_value = max(true_reg->umin_value, true_umin); break; } case BPF_JSGE: case BPF_JSGT: { - s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1; - s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval; + if (is_jmp32) { + s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1; + s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32; - /* If the full s64 was not sign-extended from s32 then don't - * deduct further info. - */ - if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) - break; - false_reg->smax_value = min(false_reg->smax_value, false_smax); - true_reg->smin_value = max(true_reg->smin_value, true_smin); + false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax); + true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin); + } else { + s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1; + s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval; + + false_reg->smax_value = min(false_reg->smax_value, false_smax); + true_reg->smin_value = max(true_reg->smin_value, true_smin); + } break; } case BPF_JLE: case BPF_JLT: { - u64 false_umin = opcode == BPF_JLT ? val : val + 1; - u64 true_umax = opcode == BPF_JLT ? val - 1 : val; - if (is_jmp32) { - false_umin += gen_hi_min(false_reg->var_off); - true_umax += gen_hi_max(true_reg->var_off); + u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1; + u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32; + + false_reg->u32_min_value = max(false_reg->u32_min_value, + false_umin); + true_reg->u32_max_value = min(true_reg->u32_max_value, + true_umax); + } else { + u64 false_umin = opcode == BPF_JLT ? val : val + 1; + u64 true_umax = opcode == BPF_JLT ? val - 1 : val; + + false_reg->umin_value = max(false_reg->umin_value, false_umin); + true_reg->umax_value = min(true_reg->umax_value, true_umax); } - false_reg->umin_value = max(false_reg->umin_value, false_umin); - true_reg->umax_value = min(true_reg->umax_value, true_umax); break; } case BPF_JSLE: case BPF_JSLT: { - s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1; - s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval; + if (is_jmp32) { + s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1; + s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32; - if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) - break; - false_reg->smin_value = max(false_reg->smin_value, false_smin); - true_reg->smax_value = min(true_reg->smax_value, true_smax); + false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin); + true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax); + } else { + s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1; + s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval; + + false_reg->smin_value = max(false_reg->smin_value, false_smin); + true_reg->smax_value = min(true_reg->smax_value, true_smax); + } break; } default: - break; + return; } - __reg_deduce_bounds(false_reg); - __reg_deduce_bounds(true_reg); - /* We might have learned some bits from the bounds. */ - __reg_bound_offset(false_reg); - __reg_bound_offset(true_reg); if (is_jmp32) { - __reg_bound_offset32(false_reg); - __reg_bound_offset32(true_reg); + false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off), + tnum_subreg(false_32off)); + true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off), + tnum_subreg(true_32off)); + __reg_combine_32_into_64(false_reg); + __reg_combine_32_into_64(true_reg); + } else { + false_reg->var_off = false_64off; + true_reg->var_off = true_64off; + __reg_combine_64_into_32(false_reg); + __reg_combine_64_into_32(true_reg); } - /* Intersecting with the old var_off might have improved our bounds - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), - * then new var_off is (0; 0x7f...fc) which improves our umax. - */ - __update_reg_bounds(false_reg); - __update_reg_bounds(true_reg); } /* Same as above, but for the case that dst_reg holds a constant and src_reg is * the variable reg. */ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, - struct bpf_reg_state *false_reg, u64 val, + struct bpf_reg_state *false_reg, + u64 val, u32 val32, u8 opcode, bool is_jmp32) { - s64 sval; - - if (__is_pointer_value(false, false_reg)) - return; - - val = is_jmp32 ? (u32)val : val; - sval = is_jmp32 ? (s64)(s32)val : (s64)val; - - switch (opcode) { - case BPF_JEQ: - case BPF_JNE: - { - struct bpf_reg_state *reg = - opcode == BPF_JEQ ? true_reg : false_reg; - - if (is_jmp32) { - u64 old_v = reg->var_off.value; - u64 hi_mask = ~0xffffffffULL; - - reg->var_off.value = (old_v & hi_mask) | val; - reg->var_off.mask &= hi_mask; - } else { - __mark_reg_known(reg, val); - } - break; - } - case BPF_JSET: - false_reg->var_off = tnum_and(false_reg->var_off, - tnum_const(~val)); - if (is_power_of_2(val)) - true_reg->var_off = tnum_or(true_reg->var_off, - tnum_const(val)); - break; - case BPF_JGE: - case BPF_JGT: - { - u64 false_umin = opcode == BPF_JGT ? val : val + 1; - u64 true_umax = opcode == BPF_JGT ? val - 1 : val; - - if (is_jmp32) { - false_umin += gen_hi_min(false_reg->var_off); - true_umax += gen_hi_max(true_reg->var_off); - } - false_reg->umin_value = max(false_reg->umin_value, false_umin); - true_reg->umax_value = min(true_reg->umax_value, true_umax); - break; - } - case BPF_JSGE: - case BPF_JSGT: - { - s64 false_smin = opcode == BPF_JSGT ? sval : sval + 1; - s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval; - - if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) - break; - false_reg->smin_value = max(false_reg->smin_value, false_smin); - true_reg->smax_value = min(true_reg->smax_value, true_smax); - break; - } - case BPF_JLE: - case BPF_JLT: - { - u64 false_umax = opcode == BPF_JLT ? val : val - 1; - u64 true_umin = opcode == BPF_JLT ? val + 1 : val; - - if (is_jmp32) { - false_umax += gen_hi_max(false_reg->var_off); - true_umin += gen_hi_min(true_reg->var_off); - } - false_reg->umax_value = min(false_reg->umax_value, false_umax); - true_reg->umin_value = max(true_reg->umin_value, true_umin); - break; - } - case BPF_JSLE: - case BPF_JSLT: - { - s64 false_smax = opcode == BPF_JSLT ? sval : sval - 1; - s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval; - - if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) - break; - false_reg->smax_value = min(false_reg->smax_value, false_smax); - true_reg->smin_value = max(true_reg->smin_value, true_smin); - break; - } - default: - break; - } - - __reg_deduce_bounds(false_reg); - __reg_deduce_bounds(true_reg); - /* We might have learned some bits from the bounds. */ - __reg_bound_offset(false_reg); - __reg_bound_offset(true_reg); - if (is_jmp32) { - __reg_bound_offset32(false_reg); - __reg_bound_offset32(true_reg); - } - /* Intersecting with the old var_off might have improved our bounds - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), - * then new var_off is (0; 0x7f...fc) which improves our umax. + /* How can we transform "a <op> b" into "b <op> a"? */ + static const u8 opcode_flip[16] = { + /* these stay the same */ + [BPF_JEQ >> 4] = BPF_JEQ, + [BPF_JNE >> 4] = BPF_JNE, + [BPF_JSET >> 4] = BPF_JSET, + /* these swap "lesser" and "greater" (L and G in the opcodes) */ + [BPF_JGE >> 4] = BPF_JLE, + [BPF_JGT >> 4] = BPF_JLT, + [BPF_JLE >> 4] = BPF_JGE, + [BPF_JLT >> 4] = BPF_JGT, + [BPF_JSGE >> 4] = BPF_JSLE, + [BPF_JSGT >> 4] = BPF_JSLT, + [BPF_JSLE >> 4] = BPF_JSGE, + [BPF_JSLT >> 4] = BPF_JSGT + }; + opcode = opcode_flip[opcode >> 4]; + /* This uses zero as "not present in table"; luckily the zero opcode, + * BPF_JA, can't get here. */ - __update_reg_bounds(false_reg); - __update_reg_bounds(true_reg); + if (opcode) + reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32); } /* Regs are known to be equal, so intersect their min/max/var_off */ @@ -6128,13 +6727,22 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, dst_reg = ®s[insn->dst_reg]; is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; - if (BPF_SRC(insn->code) == BPF_K) - pred = is_branch_taken(dst_reg, insn->imm, - opcode, is_jmp32); - else if (src_reg->type == SCALAR_VALUE && - tnum_is_const(src_reg->var_off)) - pred = is_branch_taken(dst_reg, src_reg->var_off.value, - opcode, is_jmp32); + if (BPF_SRC(insn->code) == BPF_K) { + pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32); + } else if (src_reg->type == SCALAR_VALUE && + is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) { + pred = is_branch_taken(dst_reg, + tnum_subreg(src_reg->var_off).value, + opcode, + is_jmp32); + } else if (src_reg->type == SCALAR_VALUE && + !is_jmp32 && tnum_is_const(src_reg->var_off)) { + pred = is_branch_taken(dst_reg, + src_reg->var_off.value, + opcode, + is_jmp32); + } + if (pred >= 0) { err = mark_chain_precision(env, insn->dst_reg); if (BPF_SRC(insn->code) == BPF_X && !err) @@ -6168,32 +6776,24 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, */ if (BPF_SRC(insn->code) == BPF_X) { struct bpf_reg_state *src_reg = ®s[insn->src_reg]; - struct bpf_reg_state lo_reg0 = *dst_reg; - struct bpf_reg_state lo_reg1 = *src_reg; - struct bpf_reg_state *src_lo, *dst_lo; - - dst_lo = &lo_reg0; - src_lo = &lo_reg1; - coerce_reg_to_size(dst_lo, 4); - coerce_reg_to_size(src_lo, 4); if (dst_reg->type == SCALAR_VALUE && src_reg->type == SCALAR_VALUE) { if (tnum_is_const(src_reg->var_off) || - (is_jmp32 && tnum_is_const(src_lo->var_off))) + (is_jmp32 && + tnum_is_const(tnum_subreg(src_reg->var_off)))) reg_set_min_max(&other_branch_regs[insn->dst_reg], dst_reg, - is_jmp32 - ? src_lo->var_off.value - : src_reg->var_off.value, + src_reg->var_off.value, + tnum_subreg(src_reg->var_off).value, opcode, is_jmp32); else if (tnum_is_const(dst_reg->var_off) || - (is_jmp32 && tnum_is_const(dst_lo->var_off))) + (is_jmp32 && + tnum_is_const(tnum_subreg(dst_reg->var_off)))) reg_set_min_max_inv(&other_branch_regs[insn->src_reg], src_reg, - is_jmp32 - ? dst_lo->var_off.value - : dst_reg->var_off.value, + dst_reg->var_off.value, + tnum_subreg(dst_reg->var_off).value, opcode, is_jmp32); else if (!is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE)) @@ -6204,7 +6804,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } } else if (dst_reg->type == SCALAR_VALUE) { reg_set_min_max(&other_branch_regs[insn->dst_reg], - dst_reg, insn->imm, opcode, is_jmp32); + dst_reg, insn->imm, (u32)insn->imm, + opcode, is_jmp32); } /* detect if R == 0 where R is returned from bpf_map_lookup_elem(). @@ -6405,8 +7006,9 @@ static int check_return_code(struct bpf_verifier_env *env) struct tnum range = tnum_range(0, 1); int err; - /* The struct_ops func-ptr's return type could be "void" */ - if (env->prog->type == BPF_PROG_TYPE_STRUCT_OPS && + /* LSM and struct_ops func-ptr's return type could be "void" */ + if ((env->prog->type == BPF_PROG_TYPE_STRUCT_OPS || + env->prog->type == BPF_PROG_TYPE_LSM) && !prog->aux->attach_func_proto->type) return 0; @@ -8139,26 +8741,48 @@ static bool is_tracing_prog_type(enum bpf_prog_type type) } } +static bool is_preallocated_map(struct bpf_map *map) +{ + if (!check_map_prealloc(map)) + return false; + if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta)) + return false; + return true; +} + static int check_map_prog_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, struct bpf_prog *prog) { - /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use - * preallocated hash maps, since doing memory allocation - * in overflow_handler can crash depending on where nmi got - * triggered. + /* + * Validate that trace type programs use preallocated hash maps. + * + * For programs attached to PERF events this is mandatory as the + * perf NMI can hit any arbitrary code sequence. + * + * All other trace types using preallocated hash maps are unsafe as + * well because tracepoint or kprobes can be inside locked regions + * of the memory allocator or at a place where a recursion into the + * memory allocator would see inconsistent state. + * + * On RT enabled kernels run-time allocation of all trace type + * programs is strictly prohibited due to lock type constraints. On + * !RT kernels it is allowed for backwards compatibility reasons for + * now, but warnings are emitted so developers are made aware of + * the unsafety and can fix their programs before this is enforced. */ - if (prog->type == BPF_PROG_TYPE_PERF_EVENT) { - if (!check_map_prealloc(map)) { + if (is_tracing_prog_type(prog->type) && !is_preallocated_map(map)) { + if (prog->type == BPF_PROG_TYPE_PERF_EVENT) { verbose(env, "perf_event programs can only use preallocated hash map\n"); return -EINVAL; } - if (map->inner_map_meta && - !check_map_prealloc(map->inner_map_meta)) { - verbose(env, "perf_event programs can only use preallocated inner hash map\n"); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + verbose(env, "trace type programs can only use preallocated hash map\n"); return -EINVAL; } + WARN_ONCE(1, "trace type BPF program uses run-time allocation\n"); + verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n"); } if ((is_tracing_prog_type(prog->type) || @@ -9774,6 +10398,26 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) return 0; } +#define SECURITY_PREFIX "security_" + +static int check_attach_modify_return(struct bpf_verifier_env *env) +{ + struct bpf_prog *prog = env->prog; + unsigned long addr = (unsigned long) prog->aux->trampoline->func.addr; + + /* This is expected to be cleaned up in the future with the KRSI effort + * introducing the LSM_HOOK macro for cleaning up lsm_hooks.h. + */ + if (within_error_injection_list(addr) || + !strncmp(SECURITY_PREFIX, prog->aux->attach_func_name, + sizeof(SECURITY_PREFIX) - 1)) + return 0; + + verbose(env, "fmod_ret attach_btf_id %u (%s) is not modifiable\n", + prog->aux->attach_btf_id, prog->aux->attach_func_name); + + return -EINVAL; +} static int check_attach_btf_id(struct bpf_verifier_env *env) { @@ -9794,7 +10438,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) return check_struct_ops_btf_id(env); - if (prog->type != BPF_PROG_TYPE_TRACING && !prog_extension) + if (prog->type != BPF_PROG_TYPE_TRACING && + prog->type != BPF_PROG_TYPE_LSM && + !prog_extension) return 0; if (!btf_id) { @@ -9924,8 +10570,17 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) if (!prog_extension) return -EINVAL; /* fallthrough */ + case BPF_MODIFY_RETURN: + case BPF_LSM_MAC: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: + prog->aux->attach_func_name = tname; + if (prog->type == BPF_PROG_TYPE_LSM) { + ret = bpf_lsm_verify_prog(&env->log, prog); + if (ret < 0) + return ret; + } + if (!btf_type_is_func(t)) { verbose(env, "attach_btf_id %u is not a function\n", btf_id); @@ -9940,7 +10595,6 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) tr = bpf_trampoline_lookup(key); if (!tr) return -ENOMEM; - prog->aux->attach_func_name = tname; /* t is either vmlinux type or another program's type */ prog->aux->attach_func_proto = t; mutex_lock(&tr->mutex); @@ -9973,6 +10627,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) } tr->func.addr = (void *)addr; prog->aux->trampoline = tr; + + if (prog->expected_attach_type == BPF_MODIFY_RETURN) + ret = check_attach_modify_return(env); out: mutex_unlock(&tr->mutex); if (ret) |