diff options
author | David S. Miller <davem@davemloft.net> | 2019-03-04 10:14:31 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-03-04 10:14:31 -0800 |
commit | f7fb7c1a1c8f86005d34f28278524213c521f761 (patch) | |
tree | 05a3b21c5e0b1667b106153fc0f0eb88cd980ab2 /kernel | |
parent | 8c4238df4d0cc3420c5ee14b54d200d74267cfe5 (diff) | |
parent | 87dab7c3d54ce0f1ff6b54840bf7279d0944bc6a (diff) | |
download | linux-f7fb7c1a1c8f86005d34f28278524213c521f761.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2019-03-04
The following pull-request contains BPF updates for your *net-next* tree.
The main changes are:
1) Add AF_XDP support to libbpf. Rationale is to facilitate writing
AF_XDP applications by offering higher-level APIs that hide many
of the details of the AF_XDP uapi. Sample programs are converted
over to this new interface as well, from Magnus.
2) Introduce a new cant_sleep() macro for annotation of functions
that cannot sleep and use it in BPF_PROG_RUN() to assert that
BPF programs run under preemption disabled context, from Peter.
3) Introduce per BPF prog stats in order to monitor the usage
of BPF; this is controlled by kernel.bpf_stats_enabled sysctl
knob where monitoring tools can make use of this to efficiently
determine the average cost of programs, from Alexei.
4) Split up BPF selftest's test_progs similarly as we already
did with test_verifier. This allows to further reduce merge
conflicts in future and to get more structure into our
quickly growing BPF selftest suite, from Stanislav.
5) Fix a bug in BTF's dedup algorithm which can cause an infinite
loop in some circumstances; also various BPF doc fixes and
improvements, from Andrii.
6) Various BPF sample cleanups and migration to libbpf in order
to further isolate the old sample loader code (so we can get
rid of it at some point), from Jakub.
7) Add a new BPF helper for BPF cgroup skb progs that allows
to set ECN CE code point and a Host Bandwidth Manager (HBM)
sample program for limiting the bandwidth used by v2 cgroups,
from Lawrence.
8) Enable write access to skb->queue_mapping from tc BPF egress
programs in order to let BPF pick TX queue, from Jesper.
9) Fix a bug in BPF spinlock handling for map-in-map which did
not propagate spin_lock_off to the meta map, from Yonghong.
10) Fix a bug in the new per-CPU BPF prog counters to properly
initialize stats for each CPU, from Eric.
11) Add various BPF helper prototypes to selftest's bpf_helpers.h,
from Willem.
12) Fix various BPF samples bugs in XDP and tracing progs,
from Toke, Daniel and Yonghong.
13) Silence preemption splat in test_bpf after BPF_PROG_RUN()
enforces it now everywhere, from Anders.
14) Fix a signedness bug in libbpf's btf_dedup_ref_type() to
get error handling working, from Dan.
15) Fix bpftool documentation and auto-completion with regards
to stream_{verdict,parser} attach types, from Alban.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/core.c | 37 | ||||
-rw-r--r-- | kernel/bpf/map_in_map.c | 1 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 39 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 7 | ||||
-rw-r--r-- | kernel/sched/core.c | 28 | ||||
-rw-r--r-- | kernel/seccomp.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 37 |
7 files changed, 146 insertions, 5 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ef88b167959d..3f08c257858e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -78,7 +78,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns return NULL; } -struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) +struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags) { gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog_aux *aux; @@ -104,6 +104,32 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) return fp; } + +struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) +{ + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; + struct bpf_prog *prog; + int cpu; + + prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags); + if (!prog) + return NULL; + + prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags); + if (!prog->aux->stats) { + kfree(prog->aux); + vfree(prog); + return NULL; + } + + for_each_possible_cpu(cpu) { + struct bpf_prog_stats *pstats; + + pstats = per_cpu_ptr(prog->aux->stats, cpu); + u64_stats_init(&pstats->syncp); + } + return prog; +} EXPORT_SYMBOL_GPL(bpf_prog_alloc); int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) @@ -231,7 +257,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, void __bpf_prog_free(struct bpf_prog *fp) { - kfree(fp->aux); + if (fp->aux) { + free_percpu(fp->aux->stats); + kfree(fp->aux); + } vfree(fp); } @@ -2069,6 +2098,10 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, return -EFAULT; } +DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); +EXPORT_SYMBOL(bpf_stats_enabled_key); +int sysctl_bpf_stats_enabled __read_mostly; + /* All definitions of tracepoints related to BPF. */ #define CREATE_TRACE_POINTS #include <linux/bpf_trace.h> diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 583346a0ab29..3dff41403583 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -58,6 +58,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->value_size = inner_map->value_size; inner_map_meta->map_flags = inner_map->map_flags; inner_map_meta->max_entries = inner_map->max_entries; + inner_map_meta->spin_lock_off = inner_map->spin_lock_off; /* Misc members not needed in bpf_map_meta_equal() check. */ inner_map_meta->ops = inner_map->ops; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 797a99c7493e..bc34cf9fe9ee 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1283,24 +1283,54 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) return 0; } +static void bpf_prog_get_stats(const struct bpf_prog *prog, + struct bpf_prog_stats *stats) +{ + u64 nsecs = 0, cnt = 0; + int cpu; + + for_each_possible_cpu(cpu) { + const struct bpf_prog_stats *st; + unsigned int start; + u64 tnsecs, tcnt; + + st = per_cpu_ptr(prog->aux->stats, cpu); + do { + start = u64_stats_fetch_begin_irq(&st->syncp); + tnsecs = st->nsecs; + tcnt = st->cnt; + } while (u64_stats_fetch_retry_irq(&st->syncp, start)); + nsecs += tnsecs; + cnt += tcnt; + } + stats->nsecs = nsecs; + stats->cnt = cnt; +} + #ifdef CONFIG_PROC_FS static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_prog *prog = filp->private_data; char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; + struct bpf_prog_stats stats; + bpf_prog_get_stats(prog, &stats); bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); seq_printf(m, "prog_type:\t%u\n" "prog_jited:\t%u\n" "prog_tag:\t%s\n" "memlock:\t%llu\n" - "prog_id:\t%u\n", + "prog_id:\t%u\n" + "run_time_ns:\t%llu\n" + "run_cnt:\t%llu\n", prog->type, prog->jited, prog_tag, prog->pages * 1ULL << PAGE_SHIFT, - prog->aux->id); + prog->aux->id, + stats.nsecs, + stats.cnt); } #endif @@ -2122,6 +2152,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); struct bpf_prog_info info = {}; u32 info_len = attr->info.info_len; + struct bpf_prog_stats stats; char __user *uinsns; u32 ulen; int err; @@ -2161,6 +2192,10 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, if (err) return err; + bpf_prog_get_stats(prog, &stats); + info.run_time_ns = stats.nsecs; + info.run_cnt = stats.cnt; + if (!capable(CAP_SYS_ADMIN)) { info.jited_prog_len = 0; info.xlated_prog_len = 0; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ebc3b264aa4d..a7b96bf0e654 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7320,7 +7320,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) subprog_end = env->subprog_info[i + 1].start; len = subprog_end - subprog_start; - func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER); + /* BPF_PROG_RUN doesn't call subprogs directly, + * hence main prog stats include the runtime of subprogs. + * subprogs don't have IDs and not reachable via prog_get_next_id + * func[i]->aux->stats will never be accessed and stays NULL + */ + func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER); if (!func[i]) goto out_free; memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d8d76a65cfdd..7cbb5658be80 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6162,6 +6162,34 @@ void ___might_sleep(const char *file, int line, int preempt_offset) add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } EXPORT_SYMBOL(___might_sleep); + +void __cant_sleep(const char *file, int line, int preempt_offset) +{ + static unsigned long prev_jiffy; + + if (irqs_disabled()) + return; + + if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) + return; + + if (preempt_count() > preempt_offset) + return; + + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) + return; + prev_jiffy = jiffies; + + printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line); + printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", + in_atomic(), irqs_disabled(), + current->pid, current->comm); + + debug_show_held_locks(current); + dump_stack(); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); +} +EXPORT_SYMBOL_GPL(__cant_sleep); #endif #ifdef CONFIG_MAGIC_SYSRQ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index e815781ed751..a43c601ac252 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -267,6 +267,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). */ + preempt_disable(); for (; f; f = f->prev) { u32 cur_ret = BPF_PROG_RUN(f->prog, sd); @@ -275,6 +276,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, *match = f; } } + preempt_enable(); return ret; } #endif /* CONFIG_SECCOMP_FILTER */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ba4d9e85feb8..7578e21a711b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -224,6 +224,11 @@ static int proc_dostring_coredump(struct ctl_table *table, int write, #endif static int proc_dopipe_max_size(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +#ifdef CONFIG_BPF_SYSCALL +static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +#endif #ifdef CONFIG_MAGIC_SYSRQ /* Note: sysrq code uses its own private copy */ @@ -1229,6 +1234,15 @@ static struct ctl_table kern_table[] = { .extra1 = &one, .extra2 = &one, }, + { + .procname = "bpf_stats_enabled", + .data = &sysctl_bpf_stats_enabled, + .maxlen = sizeof(sysctl_bpf_stats_enabled), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_bpf_stats, + .extra1 = &zero, + .extra2 = &one, + }, #endif #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) { @@ -3260,6 +3274,29 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, #endif /* CONFIG_PROC_SYSCTL */ +#ifdef CONFIG_BPF_SYSCALL +static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret, bpf_stats = *(int *)table->data; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &bpf_stats; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + *(int *)table->data = bpf_stats; + if (bpf_stats) + static_branch_enable(&bpf_stats_enabled_key); + else + static_branch_disable(&bpf_stats_enabled_key); + } + return ret; +} +#endif /* * No sense putting this after each symbol definition, twice, * exception granted :-) |