From 06ef0ccb5a36e1feba9b413ff59a04ecc4407c1c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 18 Dec 2017 10:13:44 -0800 Subject: bpf/cgroup: fix a verification error for a CGROUP_DEVICE type prog The tools/testing/selftests/bpf test program test_dev_cgroup fails with the following error when compiled with llvm 6.0. (I did not try with earlier versions.) libbpf: load bpf program failed: Permission denied libbpf: -- BEGIN DUMP LOG --- libbpf: 0: (61) r2 = *(u32 *)(r1 +4) 1: (b7) r0 = 0 2: (55) if r2 != 0x1 goto pc+8 R0=inv0 R1=ctx(id=0,off=0,imm=0) R2=inv1 R10=fp0 3: (69) r2 = *(u16 *)(r1 +0) invalid bpf_context access off=0 size=2 ... The culprit is the following statement in dev_cgroup.c: short type = ctx->access_type & 0xFFFF; This code is typical as the ctx->access_type is assigned as below in kernel/bpf/cgroup.c: struct bpf_cgroup_dev_ctx ctx = { .access_type = (access << 16) | dev_type, .major = major, .minor = minor, }; The compiler converts it to u16 access while the verifier cgroup_dev_is_valid_access rejects any non u32 access. This patch permits the field access_type to be accessible with type u16 and u8 as well. Signed-off-by: Yonghong Song Tested-by: Roman Gushchin Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d01f1cb3cfc0..69eabfcb9bdb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1012,7 +1012,8 @@ struct bpf_perf_event_value { #define BPF_DEVCG_DEV_CHAR (1ULL << 1) struct bpf_cgroup_dev_ctx { - __u32 access_type; /* (access << 16) | type */ + /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ + __u32 access_type; __u32 major; __u32 minor; }; -- cgit v1.2.3 From 7105e828c087de970fcb5a9509db51bfe6bd7894 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 20 Dec 2017 13:42:57 +0100 Subject: bpf: allow for correlation of maps and helpers in dump Currently a dump of an xlated prog (post verifier stage) doesn't correlate used helpers as well as maps. The prog info lists involved map ids, however there's no correlation of where in the program they are used as of today. Likewise, bpftool does not correlate helper calls with the target functions. The latter can be done w/o any kernel changes through kallsyms, and also has the advantage that this works with inlined helpers and BPF calls. Example, via interpreter: # tc filter show dev foo ingress filter protocol all pref 49152 bpf chain 0 filter protocol all pref 49152 bpf chain 0 handle 0x1 foo.o:[ingress] \ direct-action not_in_hw id 1 tag c74773051b364165 <-- prog id:1 * Output before patch (calls/maps remain unclear): # bpftool prog dump xlated id 1 <-- dump prog id:1 0: (b7) r1 = 2 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 3: (07) r2 += -4 4: (18) r1 = 0xffff95c47a8d4800 6: (85) call unknown#73040 7: (15) if r0 == 0x0 goto pc+18 8: (bf) r2 = r10 9: (07) r2 += -4 10: (bf) r1 = r0 11: (85) call unknown#73040 12: (15) if r0 == 0x0 goto pc+23 [...] * Output after patch: # bpftool prog dump xlated id 1 0: (b7) r1 = 2 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 3: (07) r2 += -4 4: (18) r1 = map[id:2] <-- map id:2 6: (85) call bpf_map_lookup_elem#73424 <-- helper call 7: (15) if r0 == 0x0 goto pc+18 8: (bf) r2 = r10 9: (07) r2 += -4 10: (bf) r1 = r0 11: (85) call bpf_map_lookup_elem#73424 12: (15) if r0 == 0x0 goto pc+23 [...] # bpftool map show id 2 <-- show/dump/etc map id:2 2: hash_of_maps flags 0x0 key 4B value 4B max_entries 3 memlock 4096B Example, JITed, same prog: # tc filter show dev foo ingress filter protocol all pref 49152 bpf chain 0 filter protocol all pref 49152 bpf chain 0 handle 0x1 foo.o:[ingress] \ direct-action not_in_hw id 3 tag c74773051b364165 jited # bpftool prog show id 3 3: sched_cls tag c74773051b364165 loaded_at Dec 19/13:48 uid 0 xlated 384B jited 257B memlock 4096B map_ids 2 # bpftool prog dump xlated id 3 0: (b7) r1 = 2 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 3: (07) r2 += -4 4: (18) r1 = map[id:2] <-- map id:2 6: (85) call __htab_map_lookup_elem#77408 <-+ inlined rewrite 7: (15) if r0 == 0x0 goto pc+2 | 8: (07) r0 += 56 | 9: (79) r0 = *(u64 *)(r0 +0) <-+ 10: (15) if r0 == 0x0 goto pc+24 11: (bf) r2 = r10 12: (07) r2 += -4 [...] Example, same prog, but kallsyms disabled (in that case we are also not allowed to pass any relative offsets, etc, so prog becomes pointer sanitized on dump): # sysctl kernel.kptr_restrict=2 kernel.kptr_restrict = 2 # bpftool prog dump xlated id 3 0: (b7) r1 = 2 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 3: (07) r2 += -4 4: (18) r1 = map[id:2] 6: (85) call bpf_unspec#0 7: (15) if r0 == 0x0 goto pc+2 [...] Example, BPF calls via interpreter: # bpftool prog dump xlated id 1 0: (85) call pc+2#__bpf_prog_run_args32 1: (b7) r0 = 1 2: (95) exit 3: (b7) r0 = 2 4: (95) exit Example, BPF calls via JIT: # sysctl net.core.bpf_jit_enable=1 net.core.bpf_jit_enable = 1 # sysctl net.core.bpf_jit_kallsyms=1 net.core.bpf_jit_kallsyms = 1 # bpftool prog dump xlated id 1 0: (85) call pc+2#bpf_prog_3b185187f1855c4c_F 1: (b7) r0 = 1 2: (95) exit 3: (b7) r0 = 2 4: (95) exit And finally, an example for tail calls that is now working as well wrt correlation: # bpftool prog dump xlated id 2 [...] 10: (b7) r2 = 8 11: (85) call bpf_trace_printk#-41312 12: (bf) r1 = r6 13: (18) r2 = map[id:1] 15: (b7) r3 = 0 16: (85) call bpf_tail_call#12 17: (b7) r1 = 42 18: (6b) *(u16 *)(r6 +46) = r1 19: (b7) r0 = 0 20: (95) exit # bpftool map show id 1 1: prog_array flags 0x0 key 4B value 4B max_entries 1 memlock 4096B Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 9 +++ kernel/bpf/core.c | 4 +- kernel/bpf/disasm.c | 65 ++++++++++++++--- kernel/bpf/disasm.h | 29 ++++++-- kernel/bpf/syscall.c | 87 +++++++++++++++++++++-- kernel/bpf/verifier.c | 30 ++++++-- tools/bpf/bpftool/prog.c | 181 ++++++++++++++++++++++++++++++++++++++++++++--- 7 files changed, 370 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index e872b4ebaa57..2b0df2703671 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -724,6 +725,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_helper_changes_pkt_data(void *func); +static inline bool bpf_dump_raw_ok(void) +{ + /* Reconstruction of call-sites is dependent on kallsyms, + * thus make dump the same restriction. + */ + return kallsyms_show_value() == 1; +} + struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 768e0a02d8c8..70a534549cd3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -771,7 +771,9 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) /* Base function for offset calculation. Needs to go into .text section, * therefore keeping it non-static as well; will also be used by JITs - * anyway later on, so do not let the compiler omit it. + * anyway later on, so do not let the compiler omit it. This also needs + * to go into kallsyms for correlation from e.g. bpftool, so naming + * must not change. */ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index 883f88fa5bfc..8740406df2cd 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -21,10 +21,39 @@ static const char * const func_id_str[] = { }; #undef __BPF_FUNC_STR_FN -const char *func_id_name(int id) +static const char *__func_get_name(const struct bpf_insn_cbs *cbs, + const struct bpf_insn *insn, + char *buff, size_t len) { BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID); + if (insn->src_reg != BPF_PSEUDO_CALL && + insn->imm >= 0 && insn->imm < __BPF_FUNC_MAX_ID && + func_id_str[insn->imm]) + return func_id_str[insn->imm]; + + if (cbs && cbs->cb_call) + return cbs->cb_call(cbs->private_data, insn); + + if (insn->src_reg == BPF_PSEUDO_CALL) + snprintf(buff, len, "%+d", insn->imm); + + return buff; +} + +static const char *__func_imm_name(const struct bpf_insn_cbs *cbs, + const struct bpf_insn *insn, + u64 full_imm, char *buff, size_t len) +{ + if (cbs && cbs->cb_imm) + return cbs->cb_imm(cbs->private_data, insn, full_imm); + + snprintf(buff, len, "0x%llx", (unsigned long long)full_imm); + return buff; +} + +const char *func_id_name(int id) +{ if (id >= 0 && id < __BPF_FUNC_MAX_ID && func_id_str[id]) return func_id_str[id]; else @@ -83,7 +112,7 @@ static const char *const bpf_jmp_string[16] = { [BPF_EXIT >> 4] = "exit", }; -static void print_bpf_end_insn(bpf_insn_print_cb verbose, +static void print_bpf_end_insn(bpf_insn_print_t verbose, struct bpf_verifier_env *env, const struct bpf_insn *insn) { @@ -92,9 +121,12 @@ static void print_bpf_end_insn(bpf_insn_print_cb verbose, insn->imm, insn->dst_reg); } -void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env, - const struct bpf_insn *insn, bool allow_ptr_leaks) +void print_bpf_insn(const struct bpf_insn_cbs *cbs, + struct bpf_verifier_env *env, + const struct bpf_insn *insn, + bool allow_ptr_leaks) { + const bpf_insn_print_t verbose = cbs->cb_print; u8 class = BPF_CLASS(insn->code); if (class == BPF_ALU || class == BPF_ALU64) { @@ -175,12 +207,15 @@ void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env, */ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD; + char tmp[64]; if (map_ptr && !allow_ptr_leaks) imm = 0; - verbose(env, "(%02x) r%d = 0x%llx\n", insn->code, - insn->dst_reg, (unsigned long long)imm); + verbose(env, "(%02x) r%d = %s\n", + insn->code, insn->dst_reg, + __func_imm_name(cbs, insn, imm, + tmp, sizeof(tmp))); } else { verbose(env, "BUG_ld_%02x\n", insn->code); return; @@ -189,12 +224,20 @@ void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env, u8 opcode = BPF_OP(insn->code); if (opcode == BPF_CALL) { - if (insn->src_reg == BPF_PSEUDO_CALL) - verbose(env, "(%02x) call pc%+d\n", insn->code, - insn->imm); - else + char tmp[64]; + + if (insn->src_reg == BPF_PSEUDO_CALL) { + verbose(env, "(%02x) call pc%s\n", + insn->code, + __func_get_name(cbs, insn, + tmp, sizeof(tmp))); + } else { + strcpy(tmp, "unknown"); verbose(env, "(%02x) call %s#%d\n", insn->code, - func_id_name(insn->imm), insn->imm); + __func_get_name(cbs, insn, + tmp, sizeof(tmp)), + insn->imm); + } } else if (insn->code == (BPF_JMP | BPF_JA)) { verbose(env, "(%02x) goto pc%+d\n", insn->code, insn->off); diff --git a/kernel/bpf/disasm.h b/kernel/bpf/disasm.h index 8de977e420b6..e0857d016f89 100644 --- a/kernel/bpf/disasm.h +++ b/kernel/bpf/disasm.h @@ -17,16 +17,35 @@ #include #include #include +#ifndef __KERNEL__ +#include +#include +#endif + +struct bpf_verifier_env; extern const char *const bpf_alu_string[16]; extern const char *const bpf_class_string[8]; const char *func_id_name(int id); -struct bpf_verifier_env; -typedef void (*bpf_insn_print_cb)(struct bpf_verifier_env *env, - const char *, ...); -void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env, - const struct bpf_insn *insn, bool allow_ptr_leaks); +typedef void (*bpf_insn_print_t)(struct bpf_verifier_env *env, + const char *, ...); +typedef const char *(*bpf_insn_revmap_call_t)(void *private_data, + const struct bpf_insn *insn); +typedef const char *(*bpf_insn_print_imm_t)(void *private_data, + const struct bpf_insn *insn, + __u64 full_imm); + +struct bpf_insn_cbs { + bpf_insn_print_t cb_print; + bpf_insn_revmap_call_t cb_call; + bpf_insn_print_imm_t cb_imm; + void *private_data; +}; +void print_bpf_insn(const struct bpf_insn_cbs *cbs, + struct bpf_verifier_env *env, + const struct bpf_insn *insn, + bool allow_ptr_leaks); #endif diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 30e728dcd35d..007802c5ca7d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1558,6 +1558,67 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr) return fd; } +static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, + unsigned long addr) +{ + int i; + + for (i = 0; i < prog->aux->used_map_cnt; i++) + if (prog->aux->used_maps[i] == (void *)addr) + return prog->aux->used_maps[i]; + return NULL; +} + +static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) +{ + const struct bpf_map *map; + struct bpf_insn *insns; + u64 imm; + int i; + + insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), + GFP_USER); + if (!insns) + return insns; + + for (i = 0; i < prog->len; i++) { + if (insns[i].code == (BPF_JMP | BPF_TAIL_CALL)) { + insns[i].code = BPF_JMP | BPF_CALL; + insns[i].imm = BPF_FUNC_tail_call; + /* fall-through */ + } + if (insns[i].code == (BPF_JMP | BPF_CALL) || + insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) { + if (insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) + insns[i].code = BPF_JMP | BPF_CALL; + if (!bpf_dump_raw_ok()) + insns[i].imm = 0; + continue; + } + + if (insns[i].code != (BPF_LD | BPF_IMM | BPF_DW)) + continue; + + imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; + map = bpf_map_from_imm(prog, imm); + if (map) { + insns[i].src_reg = BPF_PSEUDO_MAP_FD; + insns[i].imm = map->id; + insns[i + 1].imm = 0; + continue; + } + + if (!bpf_dump_raw_ok() && + imm == (unsigned long)prog->aux) { + insns[i].imm = 0; + insns[i + 1].imm = 0; + continue; + } + } + + return insns; +} + static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, const union bpf_attr *attr, union bpf_attr __user *uattr) @@ -1608,18 +1669,34 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, ulen = info.jited_prog_len; info.jited_prog_len = prog->jited_len; if (info.jited_prog_len && ulen) { - uinsns = u64_to_user_ptr(info.jited_prog_insns); - ulen = min_t(u32, info.jited_prog_len, ulen); - if (copy_to_user(uinsns, prog->bpf_func, ulen)) - return -EFAULT; + if (bpf_dump_raw_ok()) { + uinsns = u64_to_user_ptr(info.jited_prog_insns); + ulen = min_t(u32, info.jited_prog_len, ulen); + if (copy_to_user(uinsns, prog->bpf_func, ulen)) + return -EFAULT; + } else { + info.jited_prog_insns = 0; + } } ulen = info.xlated_prog_len; info.xlated_prog_len = bpf_prog_insn_size(prog); if (info.xlated_prog_len && ulen) { + struct bpf_insn *insns_sanitized; + bool fault; + + if (prog->blinded && !bpf_dump_raw_ok()) { + info.xlated_prog_insns = 0; + goto done; + } + insns_sanitized = bpf_insn_prepare_dump(prog); + if (!insns_sanitized) + return -ENOMEM; uinsns = u64_to_user_ptr(info.xlated_prog_insns); ulen = min_t(u32, info.xlated_prog_len, ulen); - if (copy_to_user(uinsns, prog->insnsi, ulen)) + fault = copy_to_user(uinsns, insns_sanitized, ulen); + kfree(insns_sanitized); + if (fault) return -EFAULT; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3c3eec58b3e8..4ae46b2cba88 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4427,9 +4427,12 @@ static int do_check(struct bpf_verifier_env *env) } if (env->log.level) { + const struct bpf_insn_cbs cbs = { + .cb_print = verbose, + }; + verbose(env, "%d: ", insn_idx); - print_bpf_insn(verbose, env, insn, - env->allow_ptr_leaks); + print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks); } err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); @@ -5017,14 +5020,14 @@ static int jit_subprogs(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog, **func, *tmp; int i, j, subprog_start, subprog_end = 0, len, subprog; - struct bpf_insn *insn = prog->insnsi; + struct bpf_insn *insn; void *old_bpf_func; int err = -ENOMEM; if (env->subprog_cnt == 0) return 0; - for (i = 0; i < prog->len; i++, insn++) { + for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) continue; @@ -5116,6 +5119,25 @@ static int jit_subprogs(struct bpf_verifier_env *env) bpf_prog_lock_ro(func[i]); bpf_prog_kallsyms_add(func[i]); } + + /* Last step: make now unused interpreter insns from main + * prog consistent for later dump requests, so they can + * later look the same as if they were interpreted only. + */ + for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + unsigned long addr; + + if (insn->code != (BPF_JMP | BPF_CALL) || + insn->src_reg != BPF_PSEUDO_CALL) + continue; + insn->off = env->insn_aux_data[i].call_imm; + subprog = find_subprog(env, i + insn->off + 1); + addr = (unsigned long)func[subprog + 1]->bpf_func; + addr &= PAGE_MASK; + insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) + addr - __bpf_call_base; + } + prog->jited = 1; prog->bpf_func = func[0]->bpf_func; prog->aux->func = func; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 037484ceaeaf..42ee8892549c 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -401,6 +401,88 @@ static int do_show(int argc, char **argv) return err; } +#define SYM_MAX_NAME 256 + +struct kernel_sym { + unsigned long address; + char name[SYM_MAX_NAME]; +}; + +struct dump_data { + unsigned long address_call_base; + struct kernel_sym *sym_mapping; + __u32 sym_count; + char scratch_buff[SYM_MAX_NAME]; +}; + +static int kernel_syms_cmp(const void *sym_a, const void *sym_b) +{ + return ((struct kernel_sym *)sym_a)->address - + ((struct kernel_sym *)sym_b)->address; +} + +static void kernel_syms_load(struct dump_data *dd) +{ + struct kernel_sym *sym; + char buff[256]; + void *tmp, *address; + FILE *fp; + + fp = fopen("/proc/kallsyms", "r"); + if (!fp) + return; + + while (!feof(fp)) { + if (!fgets(buff, sizeof(buff), fp)) + break; + tmp = realloc(dd->sym_mapping, + (dd->sym_count + 1) * + sizeof(*dd->sym_mapping)); + if (!tmp) { +out: + free(dd->sym_mapping); + dd->sym_mapping = NULL; + fclose(fp); + return; + } + dd->sym_mapping = tmp; + sym = &dd->sym_mapping[dd->sym_count]; + if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2) + continue; + sym->address = (unsigned long)address; + if (!strcmp(sym->name, "__bpf_call_base")) { + dd->address_call_base = sym->address; + /* sysctl kernel.kptr_restrict was set */ + if (!sym->address) + goto out; + } + if (sym->address) + dd->sym_count++; + } + + fclose(fp); + + qsort(dd->sym_mapping, dd->sym_count, + sizeof(*dd->sym_mapping), kernel_syms_cmp); +} + +static void kernel_syms_destroy(struct dump_data *dd) +{ + free(dd->sym_mapping); +} + +static struct kernel_sym *kernel_syms_search(struct dump_data *dd, + unsigned long key) +{ + struct kernel_sym sym = { + .address = key, + }; + + return dd->sym_mapping ? + bsearch(&sym, dd->sym_mapping, dd->sym_count, + sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL; +} + static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) { va_list args; @@ -410,8 +492,71 @@ static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) va_end(args); } -static void dump_xlated_plain(void *buf, unsigned int len, bool opcodes) +static const char *print_call_pcrel(struct dump_data *dd, + struct kernel_sym *sym, + unsigned long address, + const struct bpf_insn *insn) { + if (sym) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%+d#%s", insn->off, sym->name); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%+d#0x%lx", insn->off, address); + return dd->scratch_buff; +} + +static const char *print_call_helper(struct dump_data *dd, + struct kernel_sym *sym, + unsigned long address) +{ + if (sym) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%s", sym->name); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "0x%lx", address); + return dd->scratch_buff; +} + +static const char *print_call(void *private_data, + const struct bpf_insn *insn) +{ + struct dump_data *dd = private_data; + unsigned long address = dd->address_call_base + insn->imm; + struct kernel_sym *sym; + + sym = kernel_syms_search(dd, address); + if (insn->src_reg == BPF_PSEUDO_CALL) + return print_call_pcrel(dd, sym, address, insn); + else + return print_call_helper(dd, sym, address); +} + +static const char *print_imm(void *private_data, + const struct bpf_insn *insn, + __u64 full_imm) +{ + struct dump_data *dd = private_data; + + if (insn->src_reg == BPF_PSEUDO_MAP_FD) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "map[id:%u]", insn->imm); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "0x%llx", (unsigned long long)full_imm); + return dd->scratch_buff; +} + +static void dump_xlated_plain(struct dump_data *dd, void *buf, + unsigned int len, bool opcodes) +{ + const struct bpf_insn_cbs cbs = { + .cb_print = print_insn, + .cb_call = print_call, + .cb_imm = print_imm, + .private_data = dd, + }; struct bpf_insn *insn = buf; bool double_insn = false; unsigned int i; @@ -425,7 +570,7 @@ static void dump_xlated_plain(void *buf, unsigned int len, bool opcodes) double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); printf("% 4d: ", i); - print_bpf_insn(print_insn, NULL, insn + i, true); + print_bpf_insn(&cbs, NULL, insn + i, true); if (opcodes) { printf(" "); @@ -454,8 +599,15 @@ static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...) va_end(args); } -static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) +static void dump_xlated_json(struct dump_data *dd, void *buf, + unsigned int len, bool opcodes) { + const struct bpf_insn_cbs cbs = { + .cb_print = print_insn_json, + .cb_call = print_call, + .cb_imm = print_imm, + .private_data = dd, + }; struct bpf_insn *insn = buf; bool double_insn = false; unsigned int i; @@ -470,7 +622,7 @@ static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) jsonw_start_object(json_wtr); jsonw_name(json_wtr, "disasm"); - print_bpf_insn(print_insn_json, NULL, insn + i, true); + print_bpf_insn(&cbs, NULL, insn + i, true); if (opcodes) { jsonw_name(json_wtr, "opcodes"); @@ -505,6 +657,7 @@ static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) static int do_dump(int argc, char **argv) { struct bpf_prog_info info = {}; + struct dump_data dd = {}; __u32 len = sizeof(info); unsigned int buf_size; char *filepath = NULL; @@ -592,6 +745,14 @@ static int do_dump(int argc, char **argv) goto err_free; } + if ((member_len == &info.jited_prog_len && + info.jited_prog_insns == 0) || + (member_len == &info.xlated_prog_len && + info.xlated_prog_insns == 0)) { + p_err("error retrieving insn dump: kernel.kptr_restrict set?"); + goto err_free; + } + if (filepath) { fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd < 0) { @@ -608,17 +769,19 @@ static int do_dump(int argc, char **argv) goto err_free; } } else { - if (member_len == &info.jited_prog_len) + if (member_len == &info.jited_prog_len) { disasm_print_insn(buf, *member_len, opcodes); - else + } else { + kernel_syms_load(&dd); if (json_output) - dump_xlated_json(buf, *member_len, opcodes); + dump_xlated_json(&dd, buf, *member_len, opcodes); else - dump_xlated_plain(buf, *member_len, opcodes); + dump_xlated_plain(&dd, buf, *member_len, opcodes); + kernel_syms_destroy(&dd); + } } free(buf); - return 0; err_free: -- cgit v1.2.3 From 70a87ffea8acc390ae315fa1930e849f656bdb88 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 25 Dec 2017 13:15:40 -0800 Subject: bpf: fix maximum stack depth tracking logic Instead of computing max stack depth for current call chain during the main verifier pass track stack depth of each function independently and after do_check() is done do another pass over all instructions analyzing depth of all possible call stacks. Fixes: f4d7e40a5b71 ("bpf: introduce function calls (verification)") Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 82 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 67 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index aaac589e490c..94a02ceb1246 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -194,6 +194,7 @@ struct bpf_verifier_env { struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ struct bpf_verifer_log log; u32 subprog_starts[BPF_MAX_SUBPROGS]; + /* computes the stack depth of each bpf function */ u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1]; u32 subprog_cnt; }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 82ae580440b8..738e919efdf0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1430,33 +1430,80 @@ static int update_stack_depth(struct bpf_verifier_env *env, const struct bpf_func_state *func, int off) { - u16 stack = env->subprog_stack_depth[func->subprogno], total = 0; - struct bpf_verifier_state *cur = env->cur_state; - int i; + u16 stack = env->subprog_stack_depth[func->subprogno]; if (stack >= -off) return 0; /* update known max for given subprogram */ env->subprog_stack_depth[func->subprogno] = -off; + return 0; +} - /* compute the total for current call chain */ - for (i = 0; i <= cur->curframe; i++) { - u32 depth = env->subprog_stack_depth[cur->frame[i]->subprogno]; - - /* round up to 32-bytes, since this is granularity - * of interpreter stack sizes - */ - depth = round_up(depth, 32); - total += depth; - } +/* starting from main bpf function walk all instructions of the function + * and recursively walk all callees that given function can call. + * Ignore jump and exit insns. + * Since recursion is prevented by check_cfg() this algorithm + * only needs a local stack of MAX_CALL_FRAMES to remember callsites + */ +static int check_max_stack_depth(struct bpf_verifier_env *env) +{ + int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end; + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + int ret_insn[MAX_CALL_FRAMES]; + int ret_prog[MAX_CALL_FRAMES]; - if (total > MAX_BPF_STACK) { +process_func: + /* round up to 32-bytes, since this is granularity + * of interpreter stack size + */ + depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32); + if (depth > MAX_BPF_STACK) { verbose(env, "combined stack size of %d calls is %d. Too large\n", - cur->curframe, total); + frame + 1, depth); return -EACCES; } - return 0; +continue_func: + if (env->subprog_cnt == subprog) + subprog_end = insn_cnt; + else + subprog_end = env->subprog_starts[subprog]; + for (; i < subprog_end; i++) { + if (insn[i].code != (BPF_JMP | BPF_CALL)) + continue; + if (insn[i].src_reg != BPF_PSEUDO_CALL) + continue; + /* remember insn and function to return to */ + ret_insn[frame] = i + 1; + ret_prog[frame] = subprog; + + /* find the callee */ + i = i + insn[i].imm + 1; + subprog = find_subprog(env, i); + if (subprog < 0) { + WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", + i); + return -EFAULT; + } + subprog++; + frame++; + if (frame >= MAX_CALL_FRAMES) { + WARN_ONCE(1, "verifier bug. Call stack is too deep\n"); + return -EFAULT; + } + goto process_func; + } + /* end of for() loop means the last insn of the 'subprog' + * was reached. Doesn't matter whether it was JA or EXIT + */ + if (frame == 0) + return 0; + depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32); + frame--; + i = ret_insn[frame]; + subprog = ret_prog[frame]; + goto continue_func; } static int get_callee_stack_depth(struct bpf_verifier_env *env, @@ -5403,6 +5450,9 @@ skip_full_check: if (ret == 0) sanitize_dead_code(env); + if (ret == 0) + ret = check_max_stack_depth(env); + if (ret == 0) /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env); -- cgit v1.2.3