diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-19 10:05:34 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-19 10:05:34 -0700 |
| commit | 1200b6809dfd9d73bc4c7db76d288c35fa4b2ebe (patch) | |
| tree | 552e03de245cdbd0780ca1215914edc4a26540f7 /kernel/bpf/syscall.c | |
| parent | 6b5f04b6cf8ebab9a65d9c0026c650bb2538fd0f (diff) | |
| parent | fe30937b65354c7fec244caebbdaae68e28ca797 (diff) | |
| download | linux-1200b6809dfd9d73bc4c7db76d288c35fa4b2ebe.tar.bz2 | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
1) Support more Realtek wireless chips, from Jes Sorenson.
2) New BPF types for per-cpu hash and arrap maps, from Alexei
Starovoitov.
3) Make several TCP sysctls per-namespace, from Nikolay Borisov.
4) Allow the use of SO_REUSEPORT in order to do per-thread processing
of incoming TCP/UDP connections. The muxing can be done using a
BPF program which hashes the incoming packet. From Craig Gallek.
5) Add a multiplexer for TCP streams, to provide a messaged based
interface. BPF programs can be used to determine the message
boundaries. From Tom Herbert.
6) Add 802.1AE MACSEC support, from Sabrina Dubroca.
7) Avoid factorial complexity when taking down an inetdev interface
with lots of configured addresses. We were doing things like
traversing the entire address less for each address removed, and
flushing the entire netfilter conntrack table for every address as
well.
8) Add and use SKB bulk free infrastructure, from Jesper Brouer.
9) Allow offloading u32 classifiers to hardware, and implement for
ixgbe, from John Fastabend.
10) Allow configuring IRQ coalescing parameters on a per-queue basis,
from Kan Liang.
11) Extend ethtool so that larger link mode masks can be supported.
From David Decotigny.
12) Introduce devlink, which can be used to configure port link types
(ethernet vs Infiniband, etc.), port splitting, and switch device
level attributes as a whole. From Jiri Pirko.
13) Hardware offload support for flower classifiers, from Amir Vadai.
14) Add "Local Checksum Offload". Basically, for a tunneled packet
the checksum of the outer header is 'constant' (because with the
checksum field filled into the inner protocol header, the payload
of the outer frame checksums to 'zero'), and we can take advantage
of that in various ways. From Edward Cree"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1548 commits)
bonding: fix bond_get_stats()
net: bcmgenet: fix dma api length mismatch
net/mlx4_core: Fix backward compatibility on VFs
phy: mdio-thunder: Fix some Kconfig typos
lan78xx: add ndo_get_stats64
lan78xx: handle statistics counter rollover
RDS: TCP: Remove unused constant
RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
net: smc911x: convert pxa dma to dmaengine
team: remove duplicate set of flag IFF_MULTICAST
bonding: remove duplicate set of flag IFF_MULTICAST
net: fix a comment typo
ethernet: micrel: fix some error codes
ip_tunnels, bpf: define IP_TUNNEL_OPTS_MAX and use it
bpf, dst: add and use dst_tclassid helper
bpf: make skb->tc_classid also readable
net: mvneta: bm: clarify dependencies
cls_bpf: reset class and reuse major in da
ldmvsw: Checkpatch sunvnet.c and sunvnet_common.c
ldmvsw: Add ldmvsw.c driver code
...
Diffstat (limited to 'kernel/bpf/syscall.c')
| -rw-r--r-- | kernel/bpf/syscall.c | 90 |
1 files changed, 73 insertions, 17 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 637397059f76..2a2efe1bc76c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -18,6 +18,8 @@ #include <linux/filter.h> #include <linux/version.h> +DEFINE_PER_CPU(int, bpf_prog_active); + int sysctl_unprivileged_bpf_disabled __read_mostly; static LIST_HEAD(bpf_map_types); @@ -46,6 +48,19 @@ void bpf_register_map_type(struct bpf_map_type_list *tl) list_add(&tl->list_node, &bpf_map_types); } +int bpf_map_precharge_memlock(u32 pages) +{ + struct user_struct *user = get_current_user(); + unsigned long memlock_limit, cur; + + memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + cur = atomic_long_read(&user->locked_vm); + free_uid(user); + if (cur + pages > memlock_limit) + return -EPERM; + return 0; +} + static int bpf_map_charge_memlock(struct bpf_map *map) { struct user_struct *user = get_current_user(); @@ -151,7 +166,7 @@ int bpf_map_new_fd(struct bpf_map *map) offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ sizeof(attr->CMD##_LAST_FIELD)) != NULL -#define BPF_MAP_CREATE_LAST_FIELD max_entries +#define BPF_MAP_CREATE_LAST_FIELD map_flags /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -229,6 +244,11 @@ static void __user *u64_to_ptr(__u64 val) return (void __user *) (unsigned long) val; } +int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) +{ + return -ENOTSUPP; +} + /* last field in 'union bpf_attr' used by this command */ #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value @@ -239,6 +259,7 @@ static int map_lookup_elem(union bpf_attr *attr) int ufd = attr->map_fd; struct bpf_map *map; void *key, *value, *ptr; + u32 value_size; struct fd f; int err; @@ -259,23 +280,37 @@ static int map_lookup_elem(union bpf_attr *attr) if (copy_from_user(key, ukey, map->key_size) != 0) goto free_key; + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + value_size = round_up(map->value_size, 8) * num_possible_cpus(); + else + value_size = map->value_size; + err = -ENOMEM; - value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); if (!value) goto free_key; - rcu_read_lock(); - ptr = map->ops->map_lookup_elem(map, key); - if (ptr) - memcpy(value, ptr, map->value_size); - rcu_read_unlock(); + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { + err = bpf_percpu_hash_copy(map, key, value); + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { + err = bpf_percpu_array_copy(map, key, value); + } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { + err = bpf_stackmap_copy(map, key, value); + } else { + rcu_read_lock(); + ptr = map->ops->map_lookup_elem(map, key); + if (ptr) + memcpy(value, ptr, value_size); + rcu_read_unlock(); + err = ptr ? 0 : -ENOENT; + } - err = -ENOENT; - if (!ptr) + if (err) goto free_value; err = -EFAULT; - if (copy_to_user(uvalue, value, map->value_size) != 0) + if (copy_to_user(uvalue, value, value_size) != 0) goto free_value; err = 0; @@ -298,6 +333,7 @@ static int map_update_elem(union bpf_attr *attr) int ufd = attr->map_fd; struct bpf_map *map; void *key, *value; + u32 value_size; struct fd f; int err; @@ -318,21 +354,37 @@ static int map_update_elem(union bpf_attr *attr) if (copy_from_user(key, ukey, map->key_size) != 0) goto free_key; + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + value_size = round_up(map->value_size, 8) * num_possible_cpus(); + else + value_size = map->value_size; + err = -ENOMEM; - value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); if (!value) goto free_key; err = -EFAULT; - if (copy_from_user(value, uvalue, map->value_size) != 0) + if (copy_from_user(value, uvalue, value_size) != 0) goto free_value; - /* eBPF program that use maps are running under rcu_read_lock(), - * therefore all map accessors rely on this fact, so do the same here + /* must increment bpf_prog_active to avoid kprobe+bpf triggering from + * inside bpf map update or delete otherwise deadlocks are possible */ - rcu_read_lock(); - err = map->ops->map_update_elem(map, key, value, attr->flags); - rcu_read_unlock(); + preempt_disable(); + __this_cpu_inc(bpf_prog_active); + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { + err = bpf_percpu_hash_update(map, key, value, attr->flags); + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { + err = bpf_percpu_array_update(map, key, value, attr->flags); + } else { + rcu_read_lock(); + err = map->ops->map_update_elem(map, key, value, attr->flags); + rcu_read_unlock(); + } + __this_cpu_dec(bpf_prog_active); + preempt_enable(); free_value: kfree(value); @@ -371,9 +423,13 @@ static int map_delete_elem(union bpf_attr *attr) if (copy_from_user(key, ukey, map->key_size) != 0) goto free_key; + preempt_disable(); + __this_cpu_inc(bpf_prog_active); rcu_read_lock(); err = map->ops->map_delete_elem(map, key); rcu_read_unlock(); + __this_cpu_dec(bpf_prog_active); + preempt_enable(); free_key: kfree(key); |