From f56a653c1fd13a197076dec4461c656fd2adec73 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 18 Apr 2018 15:56:01 -0700 Subject: bpf: btf: Add BPF_BTF_LOAD command This patch adds a BPF_BTF_LOAD command which 1) loads and verifies the BTF (implemented in earlier patches) 2) returns a BTF fd to userspace. In the next patch, the BTF fd can be specified during BPF_MAP_CREATE. It currently limits to CAP_SYS_ADMIN. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/syscall.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4ca46df19c9a..cd8ebadc66eb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include #include @@ -2023,6 +2024,19 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, return err; } +#define BPF_BTF_LOAD_LAST_FIELD btf_log_level + +static int bpf_btf_load(const union bpf_attr *attr) +{ + if (CHECK_ATTR(BPF_BTF_LOAD)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return btf_new_fd(attr); +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr = {}; @@ -2103,6 +2117,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_RAW_TRACEPOINT_OPEN: err = bpf_raw_tracepoint_open(&attr); break; + case BPF_BTF_LOAD: + err = bpf_btf_load(&attr); + break; default: err = -EINVAL; break; -- cgit v1.2.3 From 60197cfb6e11ffc03aa0ed23765b2f7e70b2e2d4 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 18 Apr 2018 15:56:02 -0700 Subject: bpf: btf: Add BPF_OBJ_GET_INFO_BY_FD support to BTF fd This patch adds BPF_OBJ_GET_INFO_BY_FD support to BTF fd. The original BTF data, which was used to create the BTF fd during the earlier BPF_BTF_LOAD call, will be returned. The userspace is expected to allocate buffer to info.info and the buffer size is set to info.info_len before calling BPF_OBJ_GET_INFO_BY_FD. The original BTF data is copied to the userspace buffer (info.info). Only upto the user's specified info.info_len will be copied. The original BTF data size is set to info.info_len. The userspace needs to check if it is bigger than its allocated buffer size. If it is, the userspace should realloc with the kernel-returned info.info_len and call the BPF_OBJ_GET_INFO_BY_FD again. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/btf.h | 5 +++++ kernel/bpf/btf.c | 17 ++++++++++++++++- kernel/bpf/syscall.c | 2 ++ 3 files changed, 23 insertions(+), 1 deletion(-) (limited to 'kernel/bpf/syscall.c') diff --git a/include/linux/btf.h b/include/linux/btf.h index a7c7072535ea..a966dc6d61ee 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -10,9 +10,14 @@ struct btf; struct btf_type; union bpf_attr; +extern const struct file_operations btf_fops; + void btf_put(struct btf *btf); int btf_new_fd(const union bpf_attr *attr); struct btf *btf_get_by_fd(int fd); +int btf_get_info_by_fd(const struct btf *btf, + const union bpf_attr *attr, + union bpf_attr __user *uattr); /* Figure out the size of a type_id. If type_id is a modifier * (e.g. const), it will be resolved to find out the type with size. * diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2322340694cf..eb56ac760547 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -2002,7 +2002,7 @@ static int btf_release(struct inode *inode, struct file *filp) return 0; } -static const struct file_operations btf_fops = { +const struct file_operations btf_fops = { .release = btf_release, }; @@ -2047,3 +2047,18 @@ struct btf *btf_get_by_fd(int fd) return btf; } + +int btf_get_info_by_fd(const struct btf *btf, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + void __user *udata = u64_to_user_ptr(attr->info.info); + u32 copy_len = min_t(u32, btf->data_size, + attr->info.info_len); + + if (copy_to_user(udata, btf->data, copy_len) || + put_user(btf->data_size, &uattr->info.info_len)) + return -EFAULT; + + return 0; +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cd8ebadc66eb..0a4924a0a8da 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2017,6 +2017,8 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, else if (f.file->f_op == &bpf_map_fops) err = bpf_map_get_info_by_fd(f.file->private_data, attr, uattr); + else if (f.file->f_op == &btf_fops) + err = btf_get_info_by_fd(f.file->private_data, attr, uattr); else err = -EINVAL; -- cgit v1.2.3 From a26ca7c982cb576749cbdd01e8ecde4bf010d60a Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 18 Apr 2018 15:56:03 -0700 Subject: bpf: btf: Add pretty print support to the basic arraymap This patch adds pretty print support to the basic arraymap. Support for other bpf maps can be added later. This patch adds new attrs to the BPF_MAP_CREATE command to allow specifying the btf_fd, btf_key_id and btf_value_id. The BPF_MAP_CREATE can then associate the btf to the map if the creating map supports BTF. A BTF supported map needs to implement two new map ops, map_seq_show_elem() and map_check_btf(). This patch has implemented these new map ops for the basic arraymap. It also adds file_operations, bpffs_map_fops, to the pinned map such that the pinned map can be opened and read. After that, the user has an intuitive way to do "cat bpffs/pathto/a-pinned-map" instead of getting an error. bpffs_map_fops should not be extended further to support other operations. Other operations (e.g. write/key-lookup...) should be realized by the userspace tools (e.g. bpftool) through the BPF_OBJ_GET_INFO_BY_FD, map's lookup/update interface...etc. Follow up patches will allow the userspace to obtain the BTF from a map-fd. Here is a sample output when reading a pinned arraymap with the following map's value: struct map_value { int count_a; int count_b; }; cat /sys/fs/bpf/pinned_array_map: 0: {1,2} 1: {3,4} 2: {5,6} ... Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 20 +++++- include/uapi/linux/bpf.h | 3 + kernel/bpf/arraymap.c | 50 +++++++++++++++ kernel/bpf/inode.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++- kernel/bpf/syscall.c | 32 +++++++++- 5 files changed, 254 insertions(+), 7 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 95a7abd0ee92..ee5275e7d4df 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -22,6 +22,8 @@ struct perf_event; struct bpf_prog; struct bpf_map; struct sock; +struct seq_file; +struct btf; /* map is generic key/value storage optionally accesible by eBPF programs */ struct bpf_map_ops { @@ -43,10 +45,14 @@ struct bpf_map_ops { void (*map_fd_put_ptr)(void *ptr); u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); + void (*map_seq_show_elem)(struct bpf_map *map, void *key, + struct seq_file *m); + int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf, + u32 key_type_id, u32 value_type_id); }; struct bpf_map { - /* 1st cacheline with read-mostly members of which some + /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). */ const struct bpf_map_ops *ops ____cacheline_aligned; @@ -62,10 +68,13 @@ struct bpf_map { u32 pages; u32 id; int numa_node; + u32 btf_key_id; + u32 btf_value_id; + struct btf *btf; bool unpriv_array; - /* 7 bytes hole */ + /* 55 bytes hole */ - /* 2nd cacheline with misc members to avoid false sharing + /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ struct user_struct *user ____cacheline_aligned; @@ -100,6 +109,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map) return container_of(map, struct bpf_offloaded_map, map); } +static inline bool bpf_map_support_seq_show(const struct bpf_map *map) +{ + return map->ops->map_seq_show_elem && map->ops->map_check_btf; +} + extern const struct bpf_map_ops bpf_map_offload_ops; /* function argument constraints */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 795bcd577750..c8383a289f7b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -280,6 +280,9 @@ union bpf_attr { */ char map_name[BPF_OBJ_NAME_LEN]; __u32 map_ifindex; /* ifindex of netdev to create on */ + __u32 btf_fd; /* fd pointing to a BTF type data */ + __u32 btf_key_id; /* BTF type_id of the key */ + __u32 btf_value_id; /* BTF type_id of the value */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 14750e7c5ee4..02a189339381 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -11,11 +11,13 @@ * General Public License for more details. */ #include +#include #include #include #include #include #include +#include #include "map_in_map.h" @@ -336,6 +338,52 @@ static void array_map_free(struct bpf_map *map) bpf_map_area_free(array); } +static void array_map_seq_show_elem(struct bpf_map *map, void *key, + struct seq_file *m) +{ + void *value; + + rcu_read_lock(); + + value = array_map_lookup_elem(map, key); + if (!value) { + rcu_read_unlock(); + return; + } + + seq_printf(m, "%u: ", *(u32 *)key); + btf_type_seq_show(map->btf, map->btf_value_id, value, m); + seq_puts(m, "\n"); + + rcu_read_unlock(); +} + +static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf, + u32 btf_key_id, u32 btf_value_id) +{ + const struct btf_type *key_type, *value_type; + u32 key_size, value_size; + u32 int_data; + + key_type = btf_type_id_size(btf, &btf_key_id, &key_size); + if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) + return -EINVAL; + + int_data = *(u32 *)(key_type + 1); + /* bpf array can only take a u32 key. This check makes + * sure that the btf matches the attr used during map_create. + */ + if (BTF_INT_BITS(int_data) != 32 || key_size != 4 || + BTF_INT_OFFSET(int_data)) + return -EINVAL; + + value_type = btf_type_id_size(btf, &btf_value_id, &value_size); + if (!value_type || value_size > map->value_size) + return -EINVAL; + + return 0; +} + const struct bpf_map_ops array_map_ops = { .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, @@ -345,6 +393,8 @@ const struct bpf_map_ops array_map_ops = { .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, .map_gen_lookup = array_map_gen_lookup, + .map_seq_show_elem = array_map_seq_show_elem, + .map_check_btf = array_map_check_btf, }; const struct bpf_map_ops percpu_array_map_ops = { diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index bf6da59ae0d0..a41343009ccc 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -150,8 +150,154 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return 0; } +struct map_iter { + void *key; + bool done; +}; + +static struct map_iter *map_iter(struct seq_file *m) +{ + return m->private; +} + +static struct bpf_map *seq_file_to_map(struct seq_file *m) +{ + return file_inode(m->file)->i_private; +} + +static void map_iter_free(struct map_iter *iter) +{ + if (iter) { + kfree(iter->key); + kfree(iter); + } +} + +static struct map_iter *map_iter_alloc(struct bpf_map *map) +{ + struct map_iter *iter; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN); + if (!iter) + goto error; + + iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN); + if (!iter->key) + goto error; + + return iter; + +error: + map_iter_free(iter); + return NULL; +} + +static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct bpf_map *map = seq_file_to_map(m); + void *key = map_iter(m)->key; + + if (map_iter(m)->done) + return NULL; + + if (unlikely(v == SEQ_START_TOKEN)) + goto done; + + if (map->ops->map_get_next_key(map, key, key)) { + map_iter(m)->done = true; + return NULL; + } + +done: + ++(*pos); + return key; +} + +static void *map_seq_start(struct seq_file *m, loff_t *pos) +{ + if (map_iter(m)->done) + return NULL; + + return *pos ? map_iter(m)->key : SEQ_START_TOKEN; +} + +static void map_seq_stop(struct seq_file *m, void *v) +{ +} + +static int map_seq_show(struct seq_file *m, void *v) +{ + struct bpf_map *map = seq_file_to_map(m); + void *key = map_iter(m)->key; + + if (unlikely(v == SEQ_START_TOKEN)) { + seq_puts(m, "# WARNING!! The output is for debug purpose only\n"); + seq_puts(m, "# WARNING!! The output format will change\n"); + } else { + map->ops->map_seq_show_elem(map, key, m); + } + + return 0; +} + +static const struct seq_operations bpffs_map_seq_ops = { + .start = map_seq_start, + .next = map_seq_next, + .show = map_seq_show, + .stop = map_seq_stop, +}; + +static int bpffs_map_open(struct inode *inode, struct file *file) +{ + struct bpf_map *map = inode->i_private; + struct map_iter *iter; + struct seq_file *m; + int err; + + iter = map_iter_alloc(map); + if (!iter) + return -ENOMEM; + + err = seq_open(file, &bpffs_map_seq_ops); + if (err) { + map_iter_free(iter); + return err; + } + + m = file->private_data; + m->private = iter; + + return 0; +} + +static int bpffs_map_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + + map_iter_free(map_iter(m)); + + return seq_release(inode, file); +} + +/* bpffs_map_fops should only implement the basic + * read operation for a BPF map. The purpose is to + * provide a simple user intuitive way to do + * "cat bpffs/pathto/a-pinned-map". + * + * Other operations (e.g. write, lookup...) should be realized by + * the userspace tools (e.g. bpftool) through the + * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update + * interface. + */ +static const struct file_operations bpffs_map_fops = { + .open = bpffs_map_open, + .read = seq_read, + .release = bpffs_map_release, +}; + static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, - const struct inode_operations *iops) + const struct inode_operations *iops, + const struct file_operations *fops) { struct inode *dir = dentry->d_parent->d_inode; struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode); @@ -159,6 +305,7 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, return PTR_ERR(inode); inode->i_op = iops; + inode->i_fop = fops; inode->i_private = raw; bpf_dentry_finalize(dentry, inode, dir); @@ -167,12 +314,15 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) { - return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops); + return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, NULL); } static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) { - return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops); + struct bpf_map *map = arg; + + return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops, + map->btf ? &bpffs_map_fops : NULL); } static struct dentry * diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0a4924a0a8da..fe23dc5a3ec4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -27,6 +27,7 @@ #include #include #include +#include #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ @@ -251,6 +252,7 @@ static void bpf_map_free_deferred(struct work_struct *work) bpf_map_uncharge_memlock(map); security_bpf_map_free(map); + btf_put(map->btf); /* implementation dependent freeing */ map->ops->map_free(map); } @@ -416,7 +418,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src) return 0; } -#define BPF_MAP_CREATE_LAST_FIELD map_ifindex +#define BPF_MAP_CREATE_LAST_FIELD btf_value_id /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -450,6 +452,33 @@ static int map_create(union bpf_attr *attr) atomic_set(&map->refcnt, 1); atomic_set(&map->usercnt, 1); + if (bpf_map_support_seq_show(map) && + (attr->btf_key_id || attr->btf_value_id)) { + struct btf *btf; + + if (!attr->btf_key_id || !attr->btf_value_id) { + err = -EINVAL; + goto free_map_nouncharge; + } + + btf = btf_get_by_fd(attr->btf_fd); + if (IS_ERR(btf)) { + err = PTR_ERR(btf); + goto free_map_nouncharge; + } + + err = map->ops->map_check_btf(map, btf, attr->btf_key_id, + attr->btf_value_id); + if (err) { + btf_put(btf); + goto free_map_nouncharge; + } + + map->btf = btf; + map->btf_key_id = attr->btf_key_id; + map->btf_value_id = attr->btf_value_id; + } + err = security_bpf_map_alloc(map); if (err) goto free_map_nouncharge; @@ -482,6 +511,7 @@ free_map: free_map_sec: security_bpf_map_free(map); free_map_nouncharge: + btf_put(map->btf); map->ops->map_free(map); return err; } -- cgit v1.2.3 From b85fab0e67b162014cd328cb4e2a8e8ae382cb8a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 25 Apr 2018 19:41:06 +0200 Subject: bpf: Add gpl_compatible flag to struct bpf_prog_info Adding gpl_compatible flag to struct bpf_prog_info so it can be dumped via bpf_prog_get_info_by_fd and displayed via bpftool progs dump. Alexei noticed 4-byte hole in struct bpf_prog_info, so we put the u32 flags field in there, and we can keep adding bit fields in there without breaking user space. Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 1 + 2 files changed, 2 insertions(+) (limited to 'kernel/bpf/syscall.c') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e6679393b687..da8801860c7d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1060,6 +1060,7 @@ struct bpf_prog_info { __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 gpl_compatible:1; __u64 netns_dev; __u64 netns_ino; } __attribute__((aligned(8))); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index fe23dc5a3ec4..7bb4ff1c770a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1914,6 +1914,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, info.load_time = prog->aux->load_time; info.created_by_uid = from_kuid_munged(current_user_ns(), prog->aux->user->uid); + info.gpl_compatible = prog->gpl_compatible; memcpy(info.tag, prog->tag, sizeof(prog->tag)); memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); -- cgit v1.2.3 From 4d220ed0f8140c478ab7b0a14d96821da639b646 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sat, 28 Apr 2018 19:56:37 -0700 Subject: bpf: remove tracepoints from bpf core tracepoints to bpf core were added as a way to provide introspection to bpf programs and maps, but after some time it became clear that this approach is inadequate, so prog_id, map_id and corresponding get_next_id, get_fd_by_id, get_info_by_fd, prog_query APIs were introduced and fully adopted by bpftool and other applications. The tracepoints in bpf core started to rot and causing syzbot warnings: WARNING: CPU: 0 PID: 3008 at kernel/trace/trace_event_perf.c:274 Kernel panic - not syncing: panic_on_warn set ... perf_trace_bpf_map_keyval+0x260/0xbd0 include/trace/events/bpf.h:228 trace_bpf_map_update_elem include/trace/events/bpf.h:274 [inline] map_update_elem kernel/bpf/syscall.c:597 [inline] SYSC_bpf kernel/bpf/syscall.c:1478 [inline] Hence this patch deletes tracepoints in bpf core. Reported-by: Eric Biggers Reported-by: syzbot Signed-off-by: Alexei Starovoitov Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- MAINTAINERS | 1 - include/linux/bpf_trace.h | 1 - include/trace/events/bpf.h | 355 --------------------------------------------- kernel/bpf/core.c | 6 - kernel/bpf/inode.c | 16 +- kernel/bpf/syscall.c | 15 +- 6 files changed, 2 insertions(+), 392 deletions(-) delete mode 100644 include/trace/events/bpf.h (limited to 'kernel/bpf/syscall.c') diff --git a/MAINTAINERS b/MAINTAINERS index a52800867850..537fd17a211b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2727,7 +2727,6 @@ F: Documentation/networking/filter.txt F: Documentation/bpf/ F: include/linux/bpf* F: include/linux/filter.h -F: include/trace/events/bpf.h F: include/trace/events/xdp.h F: include/uapi/linux/bpf* F: include/uapi/linux/filter.h diff --git a/include/linux/bpf_trace.h b/include/linux/bpf_trace.h index e6fe98ae3794..ddf896abcfb6 100644 --- a/include/linux/bpf_trace.h +++ b/include/linux/bpf_trace.h @@ -2,7 +2,6 @@ #ifndef __LINUX_BPF_TRACE_H__ #define __LINUX_BPF_TRACE_H__ -#include #include #endif /* __LINUX_BPF_TRACE_H__ */ diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h deleted file mode 100644 index 150185647e6b..000000000000 --- a/include/trace/events/bpf.h +++ /dev/null @@ -1,355 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM bpf - -#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_BPF_H - -/* These are only used within the BPF_SYSCALL code */ -#ifdef CONFIG_BPF_SYSCALL - -#include -#include -#include -#include - -#define __PROG_TYPE_MAP(FN) \ - FN(SOCKET_FILTER) \ - FN(KPROBE) \ - FN(SCHED_CLS) \ - FN(SCHED_ACT) \ - FN(TRACEPOINT) \ - FN(XDP) \ - FN(PERF_EVENT) \ - FN(CGROUP_SKB) \ - FN(CGROUP_SOCK) \ - FN(LWT_IN) \ - FN(LWT_OUT) \ - FN(LWT_XMIT) - -#define __MAP_TYPE_MAP(FN) \ - FN(HASH) \ - FN(ARRAY) \ - FN(PROG_ARRAY) \ - FN(PERF_EVENT_ARRAY) \ - FN(PERCPU_HASH) \ - FN(PERCPU_ARRAY) \ - FN(STACK_TRACE) \ - FN(CGROUP_ARRAY) \ - FN(LRU_HASH) \ - FN(LRU_PERCPU_HASH) \ - FN(LPM_TRIE) - -#define __PROG_TYPE_TP_FN(x) \ - TRACE_DEFINE_ENUM(BPF_PROG_TYPE_##x); -#define __PROG_TYPE_SYM_FN(x) \ - { BPF_PROG_TYPE_##x, #x }, -#define __PROG_TYPE_SYM_TAB \ - __PROG_TYPE_MAP(__PROG_TYPE_SYM_FN) { -1, 0 } -__PROG_TYPE_MAP(__PROG_TYPE_TP_FN) - -#define __MAP_TYPE_TP_FN(x) \ - TRACE_DEFINE_ENUM(BPF_MAP_TYPE_##x); -#define __MAP_TYPE_SYM_FN(x) \ - { BPF_MAP_TYPE_##x, #x }, -#define __MAP_TYPE_SYM_TAB \ - __MAP_TYPE_MAP(__MAP_TYPE_SYM_FN) { -1, 0 } -__MAP_TYPE_MAP(__MAP_TYPE_TP_FN) - -DECLARE_EVENT_CLASS(bpf_prog_event, - - TP_PROTO(const struct bpf_prog *prg), - - TP_ARGS(prg), - - TP_STRUCT__entry( - __array(u8, prog_tag, 8) - __field(u32, type) - ), - - TP_fast_assign( - BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); - memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); - __entry->type = prg->type; - ), - - TP_printk("prog=%s type=%s", - __print_hex_str(__entry->prog_tag, 8), - __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB)) -); - -DEFINE_EVENT(bpf_prog_event, bpf_prog_get_type, - - TP_PROTO(const struct bpf_prog *prg), - - TP_ARGS(prg) -); - -DEFINE_EVENT(bpf_prog_event, bpf_prog_put_rcu, - - TP_PROTO(const struct bpf_prog *prg), - - TP_ARGS(prg) -); - -TRACE_EVENT(bpf_prog_load, - - TP_PROTO(const struct bpf_prog *prg, int ufd), - - TP_ARGS(prg, ufd), - - TP_STRUCT__entry( - __array(u8, prog_tag, 8) - __field(u32, type) - __field(int, ufd) - ), - - TP_fast_assign( - BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); - memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); - __entry->type = prg->type; - __entry->ufd = ufd; - ), - - TP_printk("prog=%s type=%s ufd=%d", - __print_hex_str(__entry->prog_tag, 8), - __print_symbolic(__entry->type, __PROG_TYPE_SYM_TAB), - __entry->ufd) -); - -TRACE_EVENT(bpf_map_create, - - TP_PROTO(const struct bpf_map *map, int ufd), - - TP_ARGS(map, ufd), - - TP_STRUCT__entry( - __field(u32, type) - __field(u32, size_key) - __field(u32, size_value) - __field(u32, max_entries) - __field(u32, flags) - __field(int, ufd) - ), - - TP_fast_assign( - __entry->type = map->map_type; - __entry->size_key = map->key_size; - __entry->size_value = map->value_size; - __entry->max_entries = map->max_entries; - __entry->flags = map->map_flags; - __entry->ufd = ufd; - ), - - TP_printk("map type=%s ufd=%d key=%u val=%u max=%u flags=%x", - __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), - __entry->ufd, __entry->size_key, __entry->size_value, - __entry->max_entries, __entry->flags) -); - -DECLARE_EVENT_CLASS(bpf_obj_prog, - - TP_PROTO(const struct bpf_prog *prg, int ufd, - const struct filename *pname), - - TP_ARGS(prg, ufd, pname), - - TP_STRUCT__entry( - __array(u8, prog_tag, 8) - __field(int, ufd) - __string(path, pname->name) - ), - - TP_fast_assign( - BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(prg->tag)); - memcpy(__entry->prog_tag, prg->tag, sizeof(prg->tag)); - __assign_str(path, pname->name); - __entry->ufd = ufd; - ), - - TP_printk("prog=%s path=%s ufd=%d", - __print_hex_str(__entry->prog_tag, 8), - __get_str(path), __entry->ufd) -); - -DEFINE_EVENT(bpf_obj_prog, bpf_obj_pin_prog, - - TP_PROTO(const struct bpf_prog *prg, int ufd, - const struct filename *pname), - - TP_ARGS(prg, ufd, pname) -); - -DEFINE_EVENT(bpf_obj_prog, bpf_obj_get_prog, - - TP_PROTO(const struct bpf_prog *prg, int ufd, - const struct filename *pname), - - TP_ARGS(prg, ufd, pname) -); - -DECLARE_EVENT_CLASS(bpf_obj_map, - - TP_PROTO(const struct bpf_map *map, int ufd, - const struct filename *pname), - - TP_ARGS(map, ufd, pname), - - TP_STRUCT__entry( - __field(u32, type) - __field(int, ufd) - __string(path, pname->name) - ), - - TP_fast_assign( - __assign_str(path, pname->name); - __entry->type = map->map_type; - __entry->ufd = ufd; - ), - - TP_printk("map type=%s ufd=%d path=%s", - __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), - __entry->ufd, __get_str(path)) -); - -DEFINE_EVENT(bpf_obj_map, bpf_obj_pin_map, - - TP_PROTO(const struct bpf_map *map, int ufd, - const struct filename *pname), - - TP_ARGS(map, ufd, pname) -); - -DEFINE_EVENT(bpf_obj_map, bpf_obj_get_map, - - TP_PROTO(const struct bpf_map *map, int ufd, - const struct filename *pname), - - TP_ARGS(map, ufd, pname) -); - -DECLARE_EVENT_CLASS(bpf_map_keyval, - - TP_PROTO(const struct bpf_map *map, int ufd, - const void *key, const void *val), - - TP_ARGS(map, ufd, key, val), - - TP_STRUCT__entry( - __field(u32, type) - __field(u32, key_len) - __dynamic_array(u8, key, map->key_size) - __field(bool, key_trunc) - __field(u32, val_len) - __dynamic_array(u8, val, map->value_size) - __field(bool, val_trunc) - __field(int, ufd) - ), - - TP_fast_assign( - memcpy(__get_dynamic_array(key), key, map->key_size); - memcpy(__get_dynamic_array(val), val, map->value_size); - __entry->type = map->map_type; - __entry->key_len = min(map->key_size, 16U); - __entry->key_trunc = map->key_size != __entry->key_len; - __entry->val_len = min(map->value_size, 16U); - __entry->val_trunc = map->value_size != __entry->val_len; - __entry->ufd = ufd; - ), - - TP_printk("map type=%s ufd=%d key=[%s%s] val=[%s%s]", - __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), - __entry->ufd, - __print_hex(__get_dynamic_array(key), __entry->key_len), - __entry->key_trunc ? " ..." : "", - __print_hex(__get_dynamic_array(val), __entry->val_len), - __entry->val_trunc ? " ..." : "") -); - -DEFINE_EVENT(bpf_map_keyval, bpf_map_lookup_elem, - - TP_PROTO(const struct bpf_map *map, int ufd, - const void *key, const void *val), - - TP_ARGS(map, ufd, key, val) -); - -DEFINE_EVENT(bpf_map_keyval, bpf_map_update_elem, - - TP_PROTO(const struct bpf_map *map, int ufd, - const void *key, const void *val), - - TP_ARGS(map, ufd, key, val) -); - -TRACE_EVENT(bpf_map_delete_elem, - - TP_PROTO(const struct bpf_map *map, int ufd, - const void *key), - - TP_ARGS(map, ufd, key), - - TP_STRUCT__entry( - __field(u32, type) - __field(u32, key_len) - __dynamic_array(u8, key, map->key_size) - __field(bool, key_trunc) - __field(int, ufd) - ), - - TP_fast_assign( - memcpy(__get_dynamic_array(key), key, map->key_size); - __entry->type = map->map_type; - __entry->key_len = min(map->key_size, 16U); - __entry->key_trunc = map->key_size != __entry->key_len; - __entry->ufd = ufd; - ), - - TP_printk("map type=%s ufd=%d key=[%s%s]", - __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), - __entry->ufd, - __print_hex(__get_dynamic_array(key), __entry->key_len), - __entry->key_trunc ? " ..." : "") -); - -TRACE_EVENT(bpf_map_next_key, - - TP_PROTO(const struct bpf_map *map, int ufd, - const void *key, const void *key_next), - - TP_ARGS(map, ufd, key, key_next), - - TP_STRUCT__entry( - __field(u32, type) - __field(u32, key_len) - __dynamic_array(u8, key, map->key_size) - __dynamic_array(u8, nxt, map->key_size) - __field(bool, key_trunc) - __field(bool, key_null) - __field(int, ufd) - ), - - TP_fast_assign( - if (key) - memcpy(__get_dynamic_array(key), key, map->key_size); - __entry->key_null = !key; - memcpy(__get_dynamic_array(nxt), key_next, map->key_size); - __entry->type = map->map_type; - __entry->key_len = min(map->key_size, 16U); - __entry->key_trunc = map->key_size != __entry->key_len; - __entry->ufd = ufd; - ), - - TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]", - __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), - __entry->ufd, - __entry->key_null ? "NULL" : __print_hex(__get_dynamic_array(key), - __entry->key_len), - __entry->key_trunc && !__entry->key_null ? " ..." : "", - __print_hex(__get_dynamic_array(nxt), __entry->key_len), - __entry->key_trunc ? " ..." : "") -); -#endif /* CONFIG_BPF_SYSCALL */ -#endif /* _TRACE_BPF_H */ - -#include diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9349a5db3cf2..90feeba3a1a1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1845,9 +1845,3 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, #include EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); - -/* These are only used within the BPF_SYSCALL code */ -#ifdef CONFIG_BPF_SYSCALL -EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type); -EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu); -#endif diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index a41343009ccc..ed13645bd80c 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -429,13 +429,6 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname) ret = bpf_obj_do_pin(pname, raw, type); if (ret != 0) bpf_any_put(raw, type); - if ((trace_bpf_obj_pin_prog_enabled() || - trace_bpf_obj_pin_map_enabled()) && !ret) { - if (type == BPF_TYPE_PROG) - trace_bpf_obj_pin_prog(raw, ufd, pname); - if (type == BPF_TYPE_MAP) - trace_bpf_obj_pin_map(raw, ufd, pname); - } out: putname(pname); return ret; @@ -502,15 +495,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags) else goto out; - if (ret < 0) { + if (ret < 0) bpf_any_put(raw, type); - } else if (trace_bpf_obj_get_prog_enabled() || - trace_bpf_obj_get_map_enabled()) { - if (type == BPF_TYPE_PROG) - trace_bpf_obj_get_prog(raw, ret, pname); - if (type == BPF_TYPE_MAP) - trace_bpf_obj_get_map(raw, ret, pname); - } out: putname(pname); return ret; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0bd2944eafb9..263e13ede029 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -503,7 +503,6 @@ static int map_create(union bpf_attr *attr) return err; } - trace_bpf_map_create(map, err); return err; free_map: @@ -663,7 +662,6 @@ static int map_lookup_elem(union bpf_attr *attr) if (copy_to_user(uvalue, value, value_size) != 0) goto free_value; - trace_bpf_map_lookup_elem(map, ufd, key, value); err = 0; free_value: @@ -760,8 +758,6 @@ static int map_update_elem(union bpf_attr *attr) __this_cpu_dec(bpf_prog_active); preempt_enable(); out: - if (!err) - trace_bpf_map_update_elem(map, ufd, key, value); free_value: kfree(value); free_key: @@ -814,8 +810,6 @@ static int map_delete_elem(union bpf_attr *attr) __this_cpu_dec(bpf_prog_active); preempt_enable(); out: - if (!err) - trace_bpf_map_delete_elem(map, ufd, key); kfree(key); err_put: fdput(f); @@ -879,7 +873,6 @@ out: if (copy_to_user(unext_key, next_key, map->key_size) != 0) goto free_next_key; - trace_bpf_map_next_key(map, ufd, key, next_key); err = 0; free_next_key: @@ -1027,7 +1020,6 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) if (atomic_dec_and_test(&prog->aux->refcnt)) { int i; - trace_bpf_prog_put_rcu(prog); /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); @@ -1194,11 +1186,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd) struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv) { - struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv); - - if (!IS_ERR(prog)) - trace_bpf_prog_get_type(prog); - return prog; + return __bpf_prog_get(ufd, &type, attach_drv); } EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); @@ -1373,7 +1361,6 @@ static int bpf_prog_load(union bpf_attr *attr) } bpf_prog_kallsyms_add(prog); - trace_bpf_prog_load(prog, err); return err; free_used_maps: -- cgit v1.2.3 From 630a4d3874a06aa9f9481cbfc688981aad7a834c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 3 May 2018 18:37:09 -0700 Subject: nfp: bpf: record offload neutral maps in the driver For asynchronous events originating from the device, like perf event output, we need to be able to make sure that objects being referred to by the FW message are valid on the host. FW events can get queued and reordered. Even if we had a FW message "barrier" we should still protect ourselves from bogus FW output. Add a reverse-mapping hash table and record in it all raw map pointers FW may refer to. Only record neutral maps, i.e. perf event arrays. These are currently the only objects FW can refer to. Use RCU protection on the read side, update side is under RTNL. Since program vs map destruction order is slightly painful for offload simply take an extra reference on all the recorded maps to make sure they don't disappear. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 25 ++++- drivers/net/ethernet/netronome/nfp/bpf/main.h | 20 +++- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 125 ++++++++++++++++++++++- drivers/net/ethernet/netronome/nfp/nfp_app.c | 2 +- kernel/bpf/syscall.c | 2 + 5 files changed, 168 insertions(+), 6 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 1dc424685f4e..f65df341c557 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017 Netronome Systems, Inc. + * Copyright (C) 2017-2018 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this @@ -43,6 +43,14 @@ #include "fw.h" #include "main.h" +const struct rhashtable_params nfp_bpf_maps_neutral_params = { + .nelem_hint = 4, + .key_len = FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr), + .key_offset = offsetof(struct nfp_bpf_neutral_map, ptr), + .head_offset = offsetof(struct nfp_bpf_neutral_map, l), + .automatic_shrinking = true, +}; + static bool nfp_net_ebpf_capable(struct nfp_net *nn) { #ifdef __LITTLE_ENDIAN @@ -401,17 +409,28 @@ static int nfp_bpf_init(struct nfp_app *app) init_waitqueue_head(&bpf->cmsg_wq); INIT_LIST_HEAD(&bpf->map_list); - err = nfp_bpf_parse_capabilities(app); + err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params); if (err) goto err_free_bpf; + err = nfp_bpf_parse_capabilities(app); + if (err) + goto err_free_neutral_maps; + return 0; +err_free_neutral_maps: + rhashtable_destroy(&bpf->maps_neutral); err_free_bpf: kfree(bpf); return err; } +static void nfp_check_rhashtable_empty(void *ptr, void *arg) +{ + WARN_ON_ONCE(1); +} + static void nfp_bpf_clean(struct nfp_app *app) { struct nfp_app_bpf *bpf = app->priv; @@ -419,6 +438,8 @@ static void nfp_bpf_clean(struct nfp_app *app) WARN_ON(!skb_queue_empty(&bpf->cmsg_replies)); WARN_ON(!list_empty(&bpf->map_list)); WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use); + rhashtable_free_and_destroy(&bpf->maps_neutral, + nfp_check_rhashtable_empty, NULL); kfree(bpf); } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 68b5d326483d..5db6284a44ba 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2017 Netronome Systems, Inc. + * Copyright (C) 2016-2018 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -114,6 +115,8 @@ enum pkt_vec { * @maps_in_use: number of currently offloaded maps * @map_elems_in_use: number of elements allocated to offloaded maps * + * @maps_neutral: hash table of offload-neutral maps (on pointer) + * * @adjust_head: adjust head capability * @adjust_head.flags: extra flags for adjust head * @adjust_head.off_min: minimal packet offset within buffer required @@ -150,6 +153,8 @@ struct nfp_app_bpf { unsigned int maps_in_use; unsigned int map_elems_in_use; + struct rhashtable maps_neutral; + struct nfp_bpf_cap_adjust_head { u32 flags; int off_min; @@ -199,6 +204,14 @@ struct nfp_bpf_map { enum nfp_bpf_map_use use_map[]; }; +struct nfp_bpf_neutral_map { + struct rhash_head l; + struct bpf_map *ptr; + u32 count; +}; + +extern const struct rhashtable_params nfp_bpf_maps_neutral_params; + struct nfp_prog; struct nfp_insn_meta; typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); @@ -367,6 +380,8 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta) * @error: error code if something went wrong * @stack_depth: max stack depth from the verifier * @adjust_head_location: if program has single adjust head call - the insn no. + * @map_records_cnt: the number of map pointers recorded for this prog + * @map_records: the map record pointers from bpf->maps_neutral * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) */ struct nfp_prog { @@ -390,6 +405,9 @@ struct nfp_prog { unsigned int stack_depth; unsigned int adjust_head_location; + unsigned int map_records_cnt; + struct nfp_bpf_neutral_map **map_records; + struct list_head insns; }; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 42d98792bd25..e3ead906cc60 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2017 Netronome Systems, Inc. + * Copyright (C) 2016-2018 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this @@ -56,6 +56,126 @@ #include "../nfp_net_ctrl.h" #include "../nfp_net.h" +static int +nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, + struct bpf_map *map) +{ + struct nfp_bpf_neutral_map *record; + int err; + + /* Map record paths are entered via ndo, update side is protected. */ + ASSERT_RTNL(); + + /* Reuse path - other offloaded program is already tracking this map. */ + record = rhashtable_lookup_fast(&bpf->maps_neutral, &map, + nfp_bpf_maps_neutral_params); + if (record) { + nfp_prog->map_records[nfp_prog->map_records_cnt++] = record; + record->count++; + return 0; + } + + /* Grab a single ref to the map for our record. The prog destroy ndo + * happens after free_used_maps(). + */ + map = bpf_map_inc(map, false); + if (IS_ERR(map)) + return PTR_ERR(map); + + record = kmalloc(sizeof(*record), GFP_KERNEL); + if (!record) { + err = -ENOMEM; + goto err_map_put; + } + + record->ptr = map; + record->count = 1; + + err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l, + nfp_bpf_maps_neutral_params); + if (err) + goto err_free_rec; + + nfp_prog->map_records[nfp_prog->map_records_cnt++] = record; + + return 0; + +err_free_rec: + kfree(record); +err_map_put: + bpf_map_put(map); + return err; +} + +static void +nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog) +{ + bool freed = false; + int i; + + ASSERT_RTNL(); + + for (i = 0; i < nfp_prog->map_records_cnt; i++) { + if (--nfp_prog->map_records[i]->count) { + nfp_prog->map_records[i] = NULL; + continue; + } + + WARN_ON(rhashtable_remove_fast(&bpf->maps_neutral, + &nfp_prog->map_records[i]->l, + nfp_bpf_maps_neutral_params)); + freed = true; + } + + if (freed) { + synchronize_rcu(); + + for (i = 0; i < nfp_prog->map_records_cnt; i++) + if (nfp_prog->map_records[i]) { + bpf_map_put(nfp_prog->map_records[i]->ptr); + kfree(nfp_prog->map_records[i]); + } + } + + kfree(nfp_prog->map_records); + nfp_prog->map_records = NULL; + nfp_prog->map_records_cnt = 0; +} + +static int +nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog, + struct bpf_prog *prog) +{ + int i, cnt, err; + + /* Quickly count the maps we will have to remember */ + cnt = 0; + for (i = 0; i < prog->aux->used_map_cnt; i++) + if (bpf_map_offload_neutral(prog->aux->used_maps[i])) + cnt++; + if (!cnt) + return 0; + + nfp_prog->map_records = kmalloc_array(cnt, + sizeof(nfp_prog->map_records[0]), + GFP_KERNEL); + if (!nfp_prog->map_records) + return -ENOMEM; + + for (i = 0; i < prog->aux->used_map_cnt; i++) + if (bpf_map_offload_neutral(prog->aux->used_maps[i])) { + err = nfp_map_ptr_record(bpf, nfp_prog, + prog->aux->used_maps[i]); + if (err) { + nfp_map_ptrs_forget(bpf, nfp_prog); + return err; + } + } + WARN_ON(cnt != nfp_prog->map_records_cnt); + + return 0; +} + static int nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, unsigned int cnt) @@ -151,7 +271,7 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog) prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64); prog->aux->offload->jited_image = nfp_prog->prog; - return 0; + return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog); } static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) @@ -159,6 +279,7 @@ static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; kvfree(nfp_prog->prog); + nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog); nfp_prog_free(nfp_prog); return 0; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 6aedef0ad433..0e0253c7e17b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017 Netronome Systems, Inc. + * Copyright (C) 2017-2018 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 263e13ede029..9b87198deea2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -282,6 +282,7 @@ void bpf_map_put(struct bpf_map *map) { __bpf_map_put(map, true); } +EXPORT_SYMBOL_GPL(bpf_map_put); void bpf_map_put_with_uref(struct bpf_map *map) { @@ -543,6 +544,7 @@ struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) atomic_inc(&map->usercnt); return map; } +EXPORT_SYMBOL_GPL(bpf_map_inc); struct bpf_map *bpf_map_get_with_uref(u32 ufd) { -- cgit v1.2.3 From 78958fca7ead2f81b60a6827881c4866d1ed0c52 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 4 May 2018 14:49:51 -0700 Subject: bpf: btf: Introduce BTF ID This patch gives an ID to each loaded BTF. The ID is allocated by the idr like the existing prog-id and map-id. The bpf_put(map->btf) is moved to __bpf_map_put() so that the userspace can stop seeing the BTF ID ASAP when the last BTF refcnt is gone. It also makes BTF accessible from userspace through the 1. new BPF_BTF_GET_FD_BY_ID command. It is limited to CAP_SYS_ADMIN which is inline with the BPF_BTF_LOAD cmd and the existing BPF_[MAP|PROG]_GET_FD_BY_ID cmd. 2. new btf_id (and btf_key_id + btf_value_id) in "struct bpf_map_info" Once the BTF ID handler is accessible from userspace, freeing a BTF object has to go through a rcu period. The BPF_BTF_GET_FD_BY_ID cmd can then be done under a rcu_read_lock() instead of taking spin_lock. [Note: A similar rcu usage can be done to the existing bpf_prog_get_fd_by_id() in a follow up patch] When processing the BPF_BTF_GET_FD_BY_ID cmd, refcount_inc_not_zero() is needed because the BTF object could be already in the rcu dead row . btf_get() is removed since its usage is currently limited to btf.c alone. refcount_inc() is used directly instead. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/btf.h | 2 + include/uapi/linux/bpf.h | 5 +++ kernel/bpf/btf.c | 108 ++++++++++++++++++++++++++++++++++++++++++----- kernel/bpf/syscall.c | 24 ++++++++++- 4 files changed, 128 insertions(+), 11 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/include/linux/btf.h b/include/linux/btf.h index a966dc6d61ee..e076c4697049 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -44,5 +44,7 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, u32 *ret_size); void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m); +int btf_get_fd_by_id(u32 id); +u32 btf_id(const struct btf *btf); #endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 93d5a4eeec2a..6106f23a9a8a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -96,6 +96,7 @@ enum bpf_cmd { BPF_PROG_QUERY, BPF_RAW_TRACEPOINT_OPEN, BPF_BTF_LOAD, + BPF_BTF_GET_FD_BY_ID, }; enum bpf_map_type { @@ -344,6 +345,7 @@ union bpf_attr { __u32 start_id; __u32 prog_id; __u32 map_id; + __u32 btf_id; }; __u32 next_id; __u32 open_flags; @@ -2130,6 +2132,9 @@ struct bpf_map_info { __u32 ifindex; __u64 netns_dev; __u64 netns_ino; + __u32 btf_id; + __u32 btf_key_id; + __u32 btf_value_id; } __attribute__((aligned(8))); /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index fa0dce0452e7..40950b6bf395 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -179,6 +180,9 @@ i < btf_type_vlen(struct_type); \ i++, member++) +static DEFINE_IDR(btf_idr); +static DEFINE_SPINLOCK(btf_idr_lock); + struct btf { union { struct btf_header *hdr; @@ -193,6 +197,8 @@ struct btf { u32 types_size; u32 data_size; refcount_t refcnt; + u32 id; + struct rcu_head rcu; }; enum verifier_phase { @@ -598,6 +604,42 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) return 0; } +static int btf_alloc_id(struct btf *btf) +{ + int id; + + idr_preload(GFP_KERNEL); + spin_lock_bh(&btf_idr_lock); + id = idr_alloc_cyclic(&btf_idr, btf, 1, INT_MAX, GFP_ATOMIC); + if (id > 0) + btf->id = id; + spin_unlock_bh(&btf_idr_lock); + idr_preload_end(); + + if (WARN_ON_ONCE(!id)) + return -ENOSPC; + + return id > 0 ? 0 : id; +} + +static void btf_free_id(struct btf *btf) +{ + unsigned long flags; + + /* + * In map-in-map, calling map_delete_elem() on outer + * map will call bpf_map_put on the inner map. + * It will then eventually call btf_free_id() + * on the inner map. Some of the map_delete_elem() + * implementation may have irq disabled, so + * we need to use the _irqsave() version instead + * of the _bh() version. + */ + spin_lock_irqsave(&btf_idr_lock, flags); + idr_remove(&btf_idr, btf->id); + spin_unlock_irqrestore(&btf_idr_lock, flags); +} + static void btf_free(struct btf *btf) { kvfree(btf->types); @@ -607,15 +649,19 @@ static void btf_free(struct btf *btf) kfree(btf); } -static void btf_get(struct btf *btf) +static void btf_free_rcu(struct rcu_head *rcu) { - refcount_inc(&btf->refcnt); + struct btf *btf = container_of(rcu, struct btf, rcu); + + btf_free(btf); } void btf_put(struct btf *btf) { - if (btf && refcount_dec_and_test(&btf->refcnt)) - btf_free(btf); + if (btf && refcount_dec_and_test(&btf->refcnt)) { + btf_free_id(btf); + call_rcu(&btf->rcu, btf_free_rcu); + } } static int env_resolve_init(struct btf_verifier_env *env) @@ -2006,10 +2052,15 @@ const struct file_operations btf_fops = { .release = btf_release, }; +static int __btf_new_fd(struct btf *btf) +{ + return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC); +} + int btf_new_fd(const union bpf_attr *attr) { struct btf *btf; - int fd; + int ret; btf = btf_parse(u64_to_user_ptr(attr->btf), attr->btf_size, attr->btf_log_level, @@ -2018,12 +2069,23 @@ int btf_new_fd(const union bpf_attr *attr) if (IS_ERR(btf)) return PTR_ERR(btf); - fd = anon_inode_getfd("btf", &btf_fops, btf, - O_RDONLY | O_CLOEXEC); - if (fd < 0) + ret = btf_alloc_id(btf); + if (ret) { + btf_free(btf); + return ret; + } + + /* + * The BTF ID is published to the userspace. + * All BTF free must go through call_rcu() from + * now on (i.e. free by calling btf_put()). + */ + + ret = __btf_new_fd(btf); + if (ret < 0) btf_put(btf); - return fd; + return ret; } struct btf *btf_get_by_fd(int fd) @@ -2042,7 +2104,7 @@ struct btf *btf_get_by_fd(int fd) } btf = f.file->private_data; - btf_get(btf); + refcount_inc(&btf->refcnt); fdput(f); return btf; @@ -2062,3 +2124,29 @@ int btf_get_info_by_fd(const struct btf *btf, return 0; } + +int btf_get_fd_by_id(u32 id) +{ + struct btf *btf; + int fd; + + rcu_read_lock(); + btf = idr_find(&btf_idr, id); + if (!btf || !refcount_inc_not_zero(&btf->refcnt)) + btf = ERR_PTR(-ENOENT); + rcu_read_unlock(); + + if (IS_ERR(btf)) + return PTR_ERR(btf); + + fd = __btf_new_fd(btf); + if (fd < 0) + btf_put(btf); + + return fd; +} + +u32 btf_id(const struct btf *btf) +{ + return btf->id; +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9b87198deea2..31c4092da277 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -252,7 +252,6 @@ static void bpf_map_free_deferred(struct work_struct *work) bpf_map_uncharge_memlock(map); security_bpf_map_free(map); - btf_put(map->btf); /* implementation dependent freeing */ map->ops->map_free(map); } @@ -273,6 +272,7 @@ static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) if (atomic_dec_and_test(&map->refcnt)) { /* bpf_map_free_id() must be called first */ bpf_map_free_id(map, do_idr_lock); + btf_put(map->btf); INIT_WORK(&map->work, bpf_map_free_deferred); schedule_work(&map->work); } @@ -2002,6 +2002,12 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, info.map_flags = map->map_flags; memcpy(info.name, map->name, sizeof(map->name)); + if (map->btf) { + info.btf_id = btf_id(map->btf); + info.btf_key_id = map->btf_key_id; + info.btf_value_id = map->btf_value_id; + } + if (bpf_map_is_dev_bound(map)) { err = bpf_map_offload_info_fill(&info, map); if (err) @@ -2059,6 +2065,19 @@ static int bpf_btf_load(const union bpf_attr *attr) return btf_new_fd(attr); } +#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id + +static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) +{ + if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return btf_get_fd_by_id(attr->btf_id); +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr = {}; @@ -2142,6 +2161,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_BTF_LOAD: err = bpf_btf_load(&attr); break; + case BPF_BTF_GET_FD_BY_ID: + err = bpf_btf_get_fd_by_id(&attr); + break; default: err = -EINVAL; break; -- cgit v1.2.3 From 62dab84c81a487d946a5fc37c6df541dd95cca38 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 4 May 2018 14:49:52 -0700 Subject: bpf: btf: Add struct bpf_btf_info During BPF_OBJ_GET_INFO_BY_FD on a btf_fd, the current bpf_attr's info.info is directly filled with the BTF binary data. It is not extensible. In this case, we want to add BTF ID. This patch adds "struct bpf_btf_info" which has the BTF ID as one of its member. The BTF binary data itself is exposed through the "btf" and "btf_size" members. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 6 ++++++ kernel/bpf/btf.c | 26 +++++++++++++++++++++----- kernel/bpf/syscall.c | 17 ++++++++++++++++- 3 files changed, 43 insertions(+), 6 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6106f23a9a8a..d615c777b573 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2137,6 +2137,12 @@ struct bpf_map_info { __u32 btf_value_id; } __attribute__((aligned(8))); +struct bpf_btf_info { + __aligned_u64 btf; + __u32 btf_size; + __u32 id; +} __attribute__((aligned(8))); + /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on * attach attach type). diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 40950b6bf395..ded10ab47b8a 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -2114,12 +2114,28 @@ int btf_get_info_by_fd(const struct btf *btf, const union bpf_attr *attr, union bpf_attr __user *uattr) { - void __user *udata = u64_to_user_ptr(attr->info.info); - u32 copy_len = min_t(u32, btf->data_size, - attr->info.info_len); + struct bpf_btf_info __user *uinfo; + struct bpf_btf_info info = {}; + u32 info_copy, btf_copy; + void __user *ubtf; + u32 uinfo_len; - if (copy_to_user(udata, btf->data, copy_len) || - put_user(btf->data_size, &uattr->info.info_len)) + uinfo = u64_to_user_ptr(attr->info.info); + uinfo_len = attr->info.info_len; + + info_copy = min_t(u32, uinfo_len, sizeof(info)); + if (copy_from_user(&info, uinfo, info_copy)) + return -EFAULT; + + info.id = btf->id; + ubtf = u64_to_user_ptr(info.btf); + btf_copy = min_t(u32, btf->data_size, info.btf_size); + if (copy_to_user(ubtf, btf->data, btf_copy)) + return -EFAULT; + info.btf_size = btf->data_size; + + if (copy_to_user(uinfo, &info, info_copy) || + put_user(info_copy, &uattr->info.info_len)) return -EFAULT; return 0; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 31c4092da277..e2aeb5e89f44 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2021,6 +2021,21 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, return 0; } +static int bpf_btf_get_info_by_fd(struct btf *btf, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info); + u32 info_len = attr->info.info_len; + int err; + + err = check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len); + if (err) + return err; + + return btf_get_info_by_fd(btf, attr, uattr); +} + #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, @@ -2044,7 +2059,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, err = bpf_map_get_info_by_fd(f.file->private_data, attr, uattr); else if (f.file->f_op == &btf_fops) - err = btf_get_info_by_fd(f.file->private_data, attr, uattr); + err = bpf_btf_get_info_by_fd(f.file->private_data, attr, uattr); else err = -EINVAL; -- cgit v1.2.3 From dcab51f19b291d5ee23724c51b0a3a597c16451a Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 22 May 2018 15:03:31 -0700 Subject: bpf: Expose check_uarg_tail_zero() This patch exposes check_uarg_tail_zero() which will be reused by a later BTF patch. Its name is changed to bpf_check_uarg_tail_zero(). Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 ++ kernel/bpf/syscall.c | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ed0122b45b63..f6fe3c719ca8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -463,6 +463,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); int bpf_get_file_flag(int flags); +int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size, + size_t actual_size); /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and * forced to use 'long' read/writes to try to atomically copy long counters. diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index bfcde949c7f8..2b29ef84ded3 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -65,9 +65,9 @@ static const struct bpf_map_ops * const bpf_map_types[] = { * copy_from_user() call. However, this is not a concern since this function is * meant to be a future-proofing of bits. */ -static int check_uarg_tail_zero(void __user *uaddr, - size_t expected_size, - size_t actual_size) +int bpf_check_uarg_tail_zero(void __user *uaddr, + size_t expected_size, + size_t actual_size) { unsigned char __user *addr; unsigned char __user *end; @@ -1899,7 +1899,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, u32 ulen; int err; - err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); + err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len); if (err) return err; info_len = min_t(u32, sizeof(info), info_len); @@ -1998,7 +1998,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, u32 info_len = attr->info.info_len; int err; - err = check_uarg_tail_zero(uinfo, sizeof(info), info_len); + err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len); if (err) return err; info_len = min_t(u32, sizeof(info), info_len); @@ -2038,7 +2038,7 @@ static int bpf_btf_get_info_by_fd(struct btf *btf, u32 info_len = attr->info.info_len; int err; - err = check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len); + err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len); if (err) return err; @@ -2110,7 +2110,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN)) return -EPERM; - err = check_uarg_tail_zero(uattr, sizeof(attr), size); + err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); if (err) return err; size = min_t(u32, size, sizeof(attr)); -- cgit v1.2.3 From 9b2cf328b2eccf761537a06bef914d2a0700fba7 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 22 May 2018 14:57:21 -0700 Subject: bpf: btf: Rename btf_key_id and btf_value_id in bpf_map_info In "struct bpf_map_info", the name "btf_id", "btf_key_id" and "btf_value_id" could cause confusion because the "id" of "btf_id" means the BPF obj id given to the BTF object while "btf_key_id" and "btf_value_id" means the BTF type id within that BTF object. To make it clear, btf_key_id and btf_value_id are renamed to btf_key_type_id and btf_value_type_id. Suggested-by: Daniel Borkmann Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 4 ++-- include/uapi/linux/bpf.h | 8 ++++---- kernel/bpf/arraymap.c | 2 +- kernel/bpf/syscall.c | 18 +++++++++--------- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f6fe3c719ca8..1795eeee846c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -69,8 +69,8 @@ struct bpf_map { u32 pages; u32 id; int numa_node; - u32 btf_key_id; - u32 btf_value_id; + u32 btf_key_type_id; + u32 btf_value_type_id; struct btf *btf; bool unpriv_array; /* 55 bytes hole */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 97446bbe2ca5..c3e502d06bc3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -284,8 +284,8 @@ union bpf_attr { char map_name[BPF_OBJ_NAME_LEN]; __u32 map_ifindex; /* ifindex of netdev to create on */ __u32 btf_fd; /* fd pointing to a BTF type data */ - __u32 btf_key_id; /* BTF type_id of the key */ - __u32 btf_value_id; /* BTF type_id of the value */ + __u32 btf_key_type_id; /* BTF type_id of the key */ + __u32 btf_value_type_id; /* BTF type_id of the value */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -2219,8 +2219,8 @@ struct bpf_map_info { __u64 netns_dev; __u64 netns_ino; __u32 btf_id; - __u32 btf_key_id; - __u32 btf_value_id; + __u32 btf_key_type_id; + __u32 btf_value_type_id; } __attribute__((aligned(8))); struct bpf_btf_info { diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 0fd8d8f1a398..544e58f5f642 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -352,7 +352,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key, } seq_printf(m, "%u: ", *(u32 *)key); - btf_type_seq_show(map->btf, map->btf_value_id, value, m); + btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); seq_puts(m, "\n"); rcu_read_unlock(); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2b29ef84ded3..0b4c94551001 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -422,7 +422,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src) return 0; } -#define BPF_MAP_CREATE_LAST_FIELD btf_value_id +#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -457,10 +457,10 @@ static int map_create(union bpf_attr *attr) atomic_set(&map->usercnt, 1); if (bpf_map_support_seq_show(map) && - (attr->btf_key_id || attr->btf_value_id)) { + (attr->btf_key_type_id || attr->btf_value_type_id)) { struct btf *btf; - if (!attr->btf_key_id || !attr->btf_value_id) { + if (!attr->btf_key_type_id || !attr->btf_value_type_id) { err = -EINVAL; goto free_map_nouncharge; } @@ -471,16 +471,16 @@ static int map_create(union bpf_attr *attr) goto free_map_nouncharge; } - err = map->ops->map_check_btf(map, btf, attr->btf_key_id, - attr->btf_value_id); + err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id, + attr->btf_value_type_id); if (err) { btf_put(btf); goto free_map_nouncharge; } map->btf = btf; - map->btf_key_id = attr->btf_key_id; - map->btf_value_id = attr->btf_value_id; + map->btf_key_type_id = attr->btf_key_type_id; + map->btf_value_type_id = attr->btf_value_type_id; } err = security_bpf_map_alloc(map); @@ -2013,8 +2013,8 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, if (map->btf) { info.btf_id = btf_id(map->btf); - info.btf_key_id = map->btf_key_id; - info.btf_value_id = map->btf_value_id; + info.btf_key_type_id = map->btf_key_type_id; + info.btf_value_type_id = map->btf_value_type_id; } if (bpf_map_is_dev_bound(map)) { -- cgit v1.2.3 From dbecd7388476aedeb66389febea84d5450d28773 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 24 May 2018 12:26:48 +0530 Subject: bpf: get kernel symbol addresses via syscall This adds new two new fields to struct bpf_prog_info. For multi-function programs, these fields can be used to pass a list of kernel symbol addresses for all functions in a given program to userspace using the bpf system call with the BPF_OBJ_GET_INFO_BY_FD command. When bpf_jit_kallsyms is enabled, we can get the address of the corresponding kernel symbol for a callee function and resolve the symbol's name. The address is determined by adding the value of the call instruction's imm field to __bpf_call_base. This offset gets assigned to the imm field by the verifier. For some architectures, such as powerpc64, the imm field is not large enough to hold this offset. We resolve this by: [1] Assigning the subprog id to the imm field of a call instruction in the verifier instead of the offset of the callee's symbol's address from __bpf_call_base. [2] Determining the address of a callee's corresponding symbol by using the imm field as an index for the list of kernel symbol addresses now available from the program info. Suggested-by: Daniel Borkmann Signed-off-by: Sandipan Das Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 2 ++ kernel/bpf/syscall.c | 25 +++++++++++++++++++++++++ kernel/bpf/verifier.c | 7 +------ 3 files changed, 28 insertions(+), 6 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c3e502d06bc3..0be90965867d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2205,6 +2205,8 @@ struct bpf_prog_info { __u32 gpl_compatible:1; __u64 netns_dev; __u64 netns_ino; + __u32 nr_jited_ksyms; + __aligned_u64 jited_ksyms; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0b4c94551001..068a4fc79ddb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1933,6 +1933,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, if (!capable(CAP_SYS_ADMIN)) { info.jited_prog_len = 0; info.xlated_prog_len = 0; + info.nr_jited_ksyms = 0; goto done; } @@ -1981,6 +1982,30 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, } } + ulen = info.nr_jited_ksyms; + info.nr_jited_ksyms = prog->aux->func_cnt; + if (info.nr_jited_ksyms && ulen) { + if (bpf_dump_raw_ok()) { + u64 __user *user_ksyms; + ulong ksym_addr; + u32 i; + + /* copy the address of the kernel symbol + * corresponding to each function + */ + ulen = min_t(u32, info.nr_jited_ksyms, ulen); + user_ksyms = u64_to_user_ptr(info.jited_ksyms); + for (i = 0; i < ulen; i++) { + ksym_addr = (ulong) prog->aux->func[i]->bpf_func; + ksym_addr &= PAGE_MASK; + if (put_user((u64) ksym_addr, &user_ksyms[i])) + return -EFAULT; + } + } else { + info.jited_ksyms = 0; + } + } + done: if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 559cb74ba29e..8c4d9d0fd3ab 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5426,17 +5426,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) * later look the same as if they were interpreted only. */ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { - unsigned long addr; - if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) continue; insn->off = env->insn_aux_data[i].call_imm; subprog = find_subprog(env, i + insn->off + 1); - addr = (unsigned long)func[subprog]->bpf_func; - addr &= PAGE_MASK; - insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) - addr - __bpf_call_base; + insn->imm = subprog; } prog->jited = 1; -- cgit v1.2.3 From 4d56a76ead2fcd856e677cdc9445ad331a409b8c Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 24 May 2018 12:26:51 +0530 Subject: bpf: fix multi-function JITed dump obtained via syscall Currently, for multi-function programs, we cannot get the JITed instructions using the bpf system call's BPF_OBJ_GET_INFO_BY_FD command. Because of this, userspace tools such as bpftool fail to identify a multi-function program as being JITed or not. With the JIT enabled and the test program running, this can be verified as follows: # cat /proc/sys/net/core/bpf_jit_enable 1 Before applying this patch: # bpftool prog list 1: kprobe name foo tag b811aab41a39ad3d gpl loaded_at 2018-05-16T11:43:38+0530 uid 0 xlated 216B not jited memlock 65536B ... # bpftool prog dump jited id 1 no instructions returned After applying this patch: # bpftool prog list 1: kprobe name foo tag b811aab41a39ad3d gpl loaded_at 2018-05-16T12:13:01+0530 uid 0 xlated 216B jited 308B memlock 65536B ... # bpftool prog dump jited id 1 0: nop 4: nop 8: mflr r0 c: std r0,16(r1) 10: stdu r1,-112(r1) 14: std r31,104(r1) 18: addi r31,r1,48 1c: li r3,10 ... Signed-off-by: Sandipan Das Signed-off-by: Daniel Borkmann --- kernel/bpf/syscall.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 068a4fc79ddb..c8e987a612b5 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1970,13 +1970,44 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, * for offload. */ ulen = info.jited_prog_len; - info.jited_prog_len = prog->jited_len; + if (prog->aux->func_cnt) { + u32 i; + + info.jited_prog_len = 0; + for (i = 0; i < prog->aux->func_cnt; i++) + info.jited_prog_len += prog->aux->func[i]->jited_len; + } else { + info.jited_prog_len = prog->jited_len; + } + if (info.jited_prog_len && ulen) { if (bpf_dump_raw_ok()) { uinsns = u64_to_user_ptr(info.jited_prog_insns); ulen = min_t(u32, info.jited_prog_len, ulen); - if (copy_to_user(uinsns, prog->bpf_func, ulen)) - return -EFAULT; + + /* for multi-function programs, copy the JITed + * instructions for all the functions + */ + if (prog->aux->func_cnt) { + u32 len, free, i; + u8 *img; + + free = ulen; + for (i = 0; i < prog->aux->func_cnt; i++) { + len = prog->aux->func[i]->jited_len; + len = min_t(u32, len, free); + img = (u8 *) prog->aux->func[i]->bpf_func; + if (copy_to_user(uinsns, img, len)) + return -EFAULT; + uinsns += len; + free -= len; + if (!free) + break; + } + } else { + if (copy_to_user(uinsns, prog->bpf_func, ulen)) + return -EFAULT; + } } else { info.jited_prog_insns = 0; } -- cgit v1.2.3 From 815581c11cc29f74af252b6306ea1ec94160231a Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 24 May 2018 12:26:52 +0530 Subject: bpf: get JITed image lengths of functions via syscall This adds new two new fields to struct bpf_prog_info. For multi-function programs, these fields can be used to pass a list of the JITed image lengths of each function for a given program to userspace using the bpf system call with the BPF_OBJ_GET_INFO_BY_FD command. This can be used by userspace applications like bpftool to split up the contiguous JITed dump, also obtained via the system call, into more relatable chunks corresponding to each function. Signed-off-by: Sandipan Das Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 2 ++ kernel/bpf/syscall.c | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'kernel/bpf/syscall.c') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0be90965867d..344d2ddcef49 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2206,7 +2206,9 @@ struct bpf_prog_info { __u64 netns_dev; __u64 netns_ino; __u32 nr_jited_ksyms; + __u32 nr_jited_func_lens; __aligned_u64 jited_ksyms; + __aligned_u64 jited_func_lens; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c8e987a612b5..788456c18617 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2037,6 +2037,26 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, } } + ulen = info.nr_jited_func_lens; + info.nr_jited_func_lens = prog->aux->func_cnt; + if (info.nr_jited_func_lens && ulen) { + if (bpf_dump_raw_ok()) { + u32 __user *user_lens; + u32 func_len, i; + + /* copy the JITed image lengths for each function */ + ulen = min_t(u32, info.nr_jited_func_lens, ulen); + user_lens = u64_to_user_ptr(info.jited_func_lens); + for (i = 0; i < ulen; i++) { + func_len = prog->aux->func[i]->jited_len; + if (put_user(func_len, &user_lens[i])) + return -EFAULT; + } + } else { + info.jited_func_lens = 0; + } + } + done: if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) -- cgit v1.2.3 From 41bdc4b40ed6fb26c6acc655ed9a243a348709c9 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 24 May 2018 11:21:09 -0700 Subject: bpf: introduce bpf subcommand BPF_TASK_FD_QUERY Currently, suppose a userspace application has loaded a bpf program and attached it to a tracepoint/kprobe/uprobe, and a bpf introspection tool, e.g., bpftool, wants to show which bpf program is attached to which tracepoint/kprobe/uprobe. Such attachment information will be really useful to understand the overall bpf deployment in the system. There is a name field (16 bytes) for each program, which could be used to encode the attachment point. There are some drawbacks for this approaches. First, bpftool user (e.g., an admin) may not really understand the association between the name and the attachment point. Second, if one program is attached to multiple places, encoding a proper name which can imply all these attachments becomes difficult. This patch introduces a new bpf subcommand BPF_TASK_FD_QUERY. Given a pid and fd, if the is associated with a tracepoint/kprobe/uprobe perf event, BPF_TASK_FD_QUERY will return . prog_id . tracepoint name, or . k[ret]probe funcname + offset or kernel addr, or . u[ret]probe filename + offset to the userspace. The user can use "bpftool prog" to find more information about bpf program itself with prog_id. Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/linux/trace_events.h | 17 ++++++ include/uapi/linux/bpf.h | 26 +++++++++ kernel/bpf/syscall.c | 131 +++++++++++++++++++++++++++++++++++++++++++ kernel/trace/bpf_trace.c | 48 ++++++++++++++++ kernel/trace/trace_kprobe.c | 29 ++++++++++ kernel/trace/trace_uprobe.c | 22 ++++++++ 6 files changed, 273 insertions(+) (limited to 'kernel/bpf/syscall.c') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 2bde3eff564c..d34144a3c5b5 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info); int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog); int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog); struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name); +int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, + u32 *fd_type, const char **buf, + u64 *probe_offset, u64 *probe_addr); #else static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { @@ -504,6 +507,13 @@ static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name { return NULL; } +static inline int bpf_get_perf_event_info(const struct perf_event *event, + u32 *prog_id, u32 *fd_type, + const char **buf, u64 *probe_offset, + u64 *probe_addr) +{ + return -EOPNOTSUPP; +} #endif enum { @@ -560,10 +570,17 @@ extern void perf_trace_del(struct perf_event *event, int flags); #ifdef CONFIG_KPROBE_EVENTS extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe); extern void perf_kprobe_destroy(struct perf_event *event); +extern int bpf_get_kprobe_info(const struct perf_event *event, + u32 *fd_type, const char **symbol, + u64 *probe_offset, u64 *probe_addr, + bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe); extern void perf_uprobe_destroy(struct perf_event *event); +extern int bpf_get_uprobe_info(const struct perf_event *event, + u32 *fd_type, const char **filename, + u64 *probe_offset, bool perf_type_tracepoint); #endif extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e95fec90c2c1..9b8c6e310e9a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -97,6 +97,7 @@ enum bpf_cmd { BPF_RAW_TRACEPOINT_OPEN, BPF_BTF_LOAD, BPF_BTF_GET_FD_BY_ID, + BPF_TASK_FD_QUERY, }; enum bpf_map_type { @@ -380,6 +381,22 @@ union bpf_attr { __u32 btf_log_size; __u32 btf_log_level; }; + + struct { + __u32 pid; /* input: pid */ + __u32 fd; /* input: fd */ + __u32 flags; /* input: flags */ + __u32 buf_len; /* input/output: buf len */ + __aligned_u64 buf; /* input/output: + * tp_name for tracepoint + * symbol for kprobe + * filename for uprobe + */ + __u32 prog_id; /* output: prod_id */ + __u32 fd_type; /* output: BPF_FD_TYPE_* */ + __u64 probe_offset; /* output: probe_offset */ + __u64 probe_addr; /* output: probe_addr */ + } task_fd_query; } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF @@ -2557,4 +2574,13 @@ struct bpf_fib_lookup { __u8 dmac[6]; /* ETH_ALEN */ }; +enum bpf_task_fd_type { + BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ + BPF_FD_TYPE_TRACEPOINT, /* tp name */ + BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */ + BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */ + BPF_FD_TYPE_UPROBE, /* filename + offset */ + BPF_FD_TYPE_URETPROBE, /* filename + offset */ +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 788456c18617..388d4feda348 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -18,7 +18,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -2178,6 +2180,132 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) return btf_get_fd_by_id(attr->btf_id); } +static int bpf_task_fd_query_copy(const union bpf_attr *attr, + union bpf_attr __user *uattr, + u32 prog_id, u32 fd_type, + const char *buf, u64 probe_offset, + u64 probe_addr) +{ + char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf); + u32 len = buf ? strlen(buf) : 0, input_len; + int err = 0; + + if (put_user(len, &uattr->task_fd_query.buf_len)) + return -EFAULT; + input_len = attr->task_fd_query.buf_len; + if (input_len && ubuf) { + if (!len) { + /* nothing to copy, just make ubuf NULL terminated */ + char zero = '\0'; + + if (put_user(zero, ubuf)) + return -EFAULT; + } else if (input_len >= len + 1) { + /* ubuf can hold the string with NULL terminator */ + if (copy_to_user(ubuf, buf, len + 1)) + return -EFAULT; + } else { + /* ubuf cannot hold the string with NULL terminator, + * do a partial copy with NULL terminator. + */ + char zero = '\0'; + + err = -ENOSPC; + if (copy_to_user(ubuf, buf, input_len - 1)) + return -EFAULT; + if (put_user(zero, ubuf + input_len - 1)) + return -EFAULT; + } + } + + if (put_user(prog_id, &uattr->task_fd_query.prog_id) || + put_user(fd_type, &uattr->task_fd_query.fd_type) || + put_user(probe_offset, &uattr->task_fd_query.probe_offset) || + put_user(probe_addr, &uattr->task_fd_query.probe_addr)) + return -EFAULT; + + return err; +} + +#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr + +static int bpf_task_fd_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + pid_t pid = attr->task_fd_query.pid; + u32 fd = attr->task_fd_query.fd; + const struct perf_event *event; + struct files_struct *files; + struct task_struct *task; + struct file *file; + int err; + + if (CHECK_ATTR(BPF_TASK_FD_QUERY)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (attr->task_fd_query.flags != 0) + return -EINVAL; + + task = get_pid_task(find_vpid(pid), PIDTYPE_PID); + if (!task) + return -ENOENT; + + files = get_files_struct(task); + put_task_struct(task); + if (!files) + return -ENOENT; + + err = 0; + spin_lock(&files->file_lock); + file = fcheck_files(files, fd); + if (!file) + err = -EBADF; + else + get_file(file); + spin_unlock(&files->file_lock); + put_files_struct(files); + + if (err) + goto out; + + if (file->f_op == &bpf_raw_tp_fops) { + struct bpf_raw_tracepoint *raw_tp = file->private_data; + struct bpf_raw_event_map *btp = raw_tp->btp; + + err = bpf_task_fd_query_copy(attr, uattr, + raw_tp->prog->aux->id, + BPF_FD_TYPE_RAW_TRACEPOINT, + btp->tp->name, 0, 0); + goto put_file; + } + + event = perf_get_event(file); + if (!IS_ERR(event)) { + u64 probe_offset, probe_addr; + u32 prog_id, fd_type; + const char *buf; + + err = bpf_get_perf_event_info(event, &prog_id, &fd_type, + &buf, &probe_offset, + &probe_addr); + if (!err) + err = bpf_task_fd_query_copy(attr, uattr, prog_id, + fd_type, buf, + probe_offset, + probe_addr); + goto put_file; + } + + err = -ENOTSUPP; +put_file: + fput(file); +out: + return err; +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr = {}; @@ -2264,6 +2392,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_BTF_GET_FD_BY_ID: err = bpf_btf_get_fd_by_id(&attr); break; + case BPF_TASK_FD_QUERY: + err = bpf_task_fd_query(&attr, uattr); + break; default: err = -EINVAL; break; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ce2cbbff27e4..81fdf2fc94ac 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "trace_probe.h" @@ -1163,3 +1164,50 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) mutex_unlock(&bpf_event_mutex); return err; } + +int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, + u32 *fd_type, const char **buf, + u64 *probe_offset, u64 *probe_addr) +{ + bool is_tracepoint, is_syscall_tp; + struct bpf_prog *prog; + int flags, err = 0; + + prog = event->prog; + if (!prog) + return -ENOENT; + + /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */ + if (prog->type == BPF_PROG_TYPE_PERF_EVENT) + return -EOPNOTSUPP; + + *prog_id = prog->aux->id; + flags = event->tp_event->flags; + is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT; + is_syscall_tp = is_syscall_trace_event(event->tp_event); + + if (is_tracepoint || is_syscall_tp) { + *buf = is_tracepoint ? event->tp_event->tp->name + : event->tp_event->name; + *fd_type = BPF_FD_TYPE_TRACEPOINT; + *probe_offset = 0x0; + *probe_addr = 0x0; + } else { + /* kprobe/uprobe */ + err = -EOPNOTSUPP; +#ifdef CONFIG_KPROBE_EVENTS + if (flags & TRACE_EVENT_FL_KPROBE) + err = bpf_get_kprobe_info(event, fd_type, buf, + probe_offset, probe_addr, + event->attr.type == PERF_TYPE_TRACEPOINT); +#endif +#ifdef CONFIG_UPROBE_EVENTS + if (flags & TRACE_EVENT_FL_UPROBE) + err = bpf_get_uprobe_info(event, fd_type, buf, + probe_offset, + event->attr.type == PERF_TYPE_TRACEPOINT); +#endif + } + + return err; +} diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 02aed76e0978..daa81571b22a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1287,6 +1287,35 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, head, NULL); } NOKPROBE_SYMBOL(kretprobe_perf_func); + +int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, + const char **symbol, u64 *probe_offset, + u64 *probe_addr, bool perf_type_tracepoint) +{ + const char *pevent = trace_event_name(event->tp_event); + const char *group = event->tp_event->class->system; + struct trace_kprobe *tk; + + if (perf_type_tracepoint) + tk = find_trace_kprobe(pevent, group); + else + tk = event->tp_event->data; + if (!tk) + return -EINVAL; + + *fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE + : BPF_FD_TYPE_KPROBE; + if (tk->symbol) { + *symbol = tk->symbol; + *probe_offset = tk->rp.kp.offset; + *probe_addr = 0; + } else { + *symbol = NULL; + *probe_offset = 0; + *probe_addr = (unsigned long)tk->rp.kp.addr; + } + return 0; +} #endif /* CONFIG_PERF_EVENTS */ /* diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index ac892878dbe6..bf89a51e740d 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1161,6 +1161,28 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, { __uprobe_perf_func(tu, func, regs, ucb, dsize); } + +int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, + const char **filename, u64 *probe_offset, + bool perf_type_tracepoint) +{ + const char *pevent = trace_event_name(event->tp_event); + const char *group = event->tp_event->class->system; + struct trace_uprobe *tu; + + if (perf_type_tracepoint) + tu = find_probe_event(pevent, group); + else + tu = event->tp_event->data; + if (!tu) + return -EINVAL; + + *fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE + : BPF_FD_TYPE_UPROBE; + *filename = tu->filename; + *probe_offset = tu->offset; + return 0; +} #endif /* CONFIG_PERF_EVENTS */ static int -- cgit v1.2.3 From 1cedee13d25ab118d325f95588c1a084e9317229 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 25 May 2018 08:55:23 -0700 Subject: bpf: Hooks for sys_sendmsg In addition to already existing BPF hooks for sys_bind and sys_connect, the patch provides new hooks for sys_sendmsg. It leverages existing BPF program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` that provides access to socket itlself (properties like family, type, protocol) and user-passed `struct sockaddr *` so that BPF program can override destination IP and port for system calls such as sendto(2) or sendmsg(2) and/or assign source IP to the socket. The hooks are implemented as two new attach types: `BPF_CGROUP_UDP4_SENDMSG` and `BPF_CGROUP_UDP6_SENDMSG` for UDPv4 and UDPv6 correspondingly. UDPv4 and UDPv6 separate attach types for same reason as sys_bind and sys_connect hooks, i.e. to prevent reading from / writing to e.g. user_ip6 fields when user passes sockaddr_in since it'd be out-of-bound. The difference with already existing hooks is sys_sendmsg are implemented only for unconnected UDP. For TCP it doesn't make sense to change user-provided `struct sockaddr *` at sendto(2)/sendmsg(2) time since socket either was already connected and has source/destination set or wasn't connected and call to sendto(2)/sendmsg(2) would lead to ENOTCONN anyway. Connected UDP is already handled by sys_connect hooks that can override source/destination at connect time and use fast-path later, i.e. these hooks don't affect UDP fast-path. Rewriting source IP is implemented differently than that in sys_connect hooks. When sys_sendmsg is used with unconnected UDP it doesn't work to just bind socket to desired local IP address since source IP can be set on per-packet basis by using ancillary data (cmsg(3)). So no matter if socket is bound or not, source IP has to be rewritten on every call to sys_sendmsg. To do so two new fields are added to UAPI `struct bpf_sock_addr`; * `msg_src_ip4` to set source IPv4 for UDPv4; * `msg_src_ip6` to set source IPv6 for UDPv6. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 23 +++++++++++++++++------ include/linux/filter.h | 1 + include/uapi/linux/bpf.h | 8 ++++++++ kernel/bpf/cgroup.c | 11 ++++++++++- kernel/bpf/syscall.c | 8 ++++++++ net/core/filter.c | 39 +++++++++++++++++++++++++++++++++++++++ net/ipv4/udp.c | 20 ++++++++++++++++++-- net/ipv6/udp.c | 24 ++++++++++++++++++++++++ 8 files changed, 125 insertions(+), 9 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index de8e89a3758b..975fb4cf1bb7 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -66,7 +66,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, - enum bpf_attach_type type); + enum bpf_attach_type type, + void *t_ctx); int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct bpf_sock_ops_kern *sock_ops, @@ -120,16 +121,18 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, ({ \ int __ret = 0; \ if (cgroup_bpf_enabled) \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ + NULL); \ __ret; \ }) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \ +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ + t_ctx); \ release_sock(sk); \ } \ __ret; \ @@ -151,10 +154,16 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT) + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT, NULL) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT) + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT, NULL) + +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_SENDMSG, t_ctx) + +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ ({ \ @@ -198,6 +207,8 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) diff --git a/include/linux/filter.h b/include/linux/filter.h index d358d1815c16..d90abdaea94b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1010,6 +1010,7 @@ struct bpf_sock_addr_kern { * only two (src and dst) are available at convert_ctx_access time */ u64 tmp_reg; + void *t_ctx; /* Attach type specific context. */ }; struct bpf_sock_ops_kern { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9b8c6e310e9a..cc68787f2d97 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -160,6 +160,8 @@ enum bpf_attach_type { BPF_CGROUP_INET6_CONNECT, BPF_CGROUP_INET4_POST_BIND, BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_UDP4_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, __MAX_BPF_ATTACH_TYPE }; @@ -2363,6 +2365,12 @@ struct bpf_sock_addr { __u32 family; /* Allows 4-byte read, but no write */ __u32 type; /* Allows 4-byte read, but no write */ __u32 protocol; /* Allows 4-byte read, but no write */ + __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write. + * Stored in network byte order. + */ + __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write. + * Stored in network byte order. + */ }; /* User bpf_sock_ops struct to access socket values and specify request ops diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 43171a0bb02b..f7c00bd6f8e4 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -500,6 +500,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); * @sk: sock struct that will use sockaddr * @uaddr: sockaddr struct provided by user * @type: The type of program to be exectuted + * @t_ctx: Pointer to attach type specific context * * socket is expected to be of type INET or INET6. * @@ -508,12 +509,15 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); */ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, - enum bpf_attach_type type) + enum bpf_attach_type type, + void *t_ctx) { struct bpf_sock_addr_kern ctx = { .sk = sk, .uaddr = uaddr, + .t_ctx = t_ctx, }; + struct sockaddr_storage unspec; struct cgroup *cgrp; int ret; @@ -523,6 +527,11 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) return 0; + if (!ctx.uaddr) { + memset(&unspec, 0, sizeof(unspec)); + ctx.uaddr = (struct sockaddr *)&unspec; + } + cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 388d4feda348..e254526d6744 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1249,6 +1249,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: return 0; default: return -EINVAL; @@ -1565,6 +1567,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; break; case BPF_CGROUP_SOCK_OPS: @@ -1635,6 +1639,8 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; break; case BPF_CGROUP_SOCK_OPS: @@ -1692,6 +1698,8 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET6_POST_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_DEVICE: break; diff --git a/net/core/filter.c b/net/core/filter.c index acf1f4fb99d1..24e6ce8be567 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5299,6 +5299,7 @@ static bool sock_addr_is_valid_access(int off, int size, switch (prog->expected_attach_type) { case BPF_CGROUP_INET4_BIND: case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_UDP4_SENDMSG: break; default: return false; @@ -5308,6 +5309,24 @@ static bool sock_addr_is_valid_access(int off, int size, switch (prog->expected_attach_type) { case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UDP6_SENDMSG: + break; + default: + return false; + } + break; + case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4): + switch (prog->expected_attach_type) { + case BPF_CGROUP_UDP4_SENDMSG: + break; + default: + return false; + } + break; + case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0], + msg_src_ip6[3]): + switch (prog->expected_attach_type) { + case BPF_CGROUP_UDP6_SENDMSG: break; default: return false; @@ -5318,6 +5337,9 @@ static bool sock_addr_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct bpf_sock_addr, user_ip4): case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]): + case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4): + case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0], + msg_src_ip6[3]): /* Only narrow read access allowed for now. */ if (type == BPF_READ) { bpf_ctx_record_field_size(info, size_default); @@ -6072,6 +6094,23 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); break; + + case offsetof(struct bpf_sock_addr, msg_src_ip4): + /* Treat t_ctx as struct in_addr for msg_src_ip4. */ + SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( + struct bpf_sock_addr_kern, struct in_addr, t_ctx, + s_addr, BPF_SIZE(si->code), 0, tmp_reg); + break; + + case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0], + msg_src_ip6[3]): + off = si->off; + off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]); + /* Treat t_ctx as struct in6_addr for msg_src_ip6. */ + SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( + struct bpf_sock_addr_kern, struct in6_addr, t_ctx, + s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg); + break; } return insn - insn_buf; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d71f1f3e1155..3c27d00b5730 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -901,6 +901,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); + DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); struct flowi4 fl4_stack; struct flowi4 *fl4; int ulen = len; @@ -955,8 +956,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* * Get and verify the address. */ - if (msg->msg_name) { - DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); + if (usin) { if (msg->msg_namelen < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) { @@ -1010,6 +1010,22 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) rcu_read_unlock(); } + if (cgroup_bpf_enabled && !connected) { + err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, + (struct sockaddr *)usin, &ipc.addr); + if (err) + goto out_free; + if (usin) { + if (usin->sin_port == 0) { + /* BPF program set invalid port. Reject it. */ + err = -EINVAL; + goto out_free; + } + daddr = usin->sin_addr.s_addr; + dport = usin->sin_port; + } + } + saddr = ipc.addr; ipc.addr = faddr = daddr; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 426c9d2b418d..9f729a7b8cf0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1316,6 +1316,29 @@ do_udp_sendmsg: fl6.saddr = np->saddr; fl6.fl6_sport = inet->inet_sport; + if (cgroup_bpf_enabled && !connected) { + err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, + (struct sockaddr *)sin6, &fl6.saddr); + if (err) + goto out_no_dst; + if (sin6) { + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { + /* BPF program rewrote IPv6-only by IPv4-mapped + * IPv6. It's currently unsupported. + */ + err = -ENOTSUPP; + goto out_no_dst; + } + if (sin6->sin6_port == 0) { + /* BPF program set invalid port. Reject it. */ + err = -EINVAL; + goto out_no_dst; + } + fl6.fl6_dport = sin6->sin6_port; + fl6.daddr = sin6->sin6_addr; + } + } + final_p = fl6_update_dst(&fl6, opt, &final); if (final_p) connected = false; @@ -1395,6 +1418,7 @@ do_append_data: out: dst_release(dst); +out_no_dst: fl6_sock_release(flowlabel); txopt_put(opt_to_free); if (!err) -- cgit v1.2.3 From f4364dcfc86df7c1ca47b256eaf6b6d0cdd0d936 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sun, 27 May 2018 12:24:09 +0100 Subject: media: rc: introduce BPF_PROG_LIRC_MODE2 Add support for BPF_PROG_LIRC_MODE2. This type of BPF program can call rc_keydown() to reported decoded IR scancodes, or rc_repeat() to report that the last key should be repeated. The bpf program can be attached to using the bpf(BPF_PROG_ATTACH) syscall; the target_fd must be the /dev/lircN device. Acked-by: Yonghong Song Signed-off-by: Sean Young Signed-off-by: Daniel Borkmann --- drivers/media/rc/Kconfig | 13 ++ drivers/media/rc/Makefile | 1 + drivers/media/rc/bpf-lirc.c | 313 ++++++++++++++++++++++++++++++++++++++++ drivers/media/rc/lirc_dev.c | 30 ++++ drivers/media/rc/rc-core-priv.h | 21 +++ drivers/media/rc/rc-ir-raw.c | 12 +- include/linux/bpf_lirc.h | 29 ++++ include/linux/bpf_types.h | 3 + include/uapi/linux/bpf.h | 53 ++++++- kernel/bpf/syscall.c | 7 + 10 files changed, 479 insertions(+), 3 deletions(-) create mode 100644 drivers/media/rc/bpf-lirc.c create mode 100644 include/linux/bpf_lirc.h (limited to 'kernel/bpf/syscall.c') diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig index eb2c3b6eca7f..d5b35a6ba899 100644 --- a/drivers/media/rc/Kconfig +++ b/drivers/media/rc/Kconfig @@ -25,6 +25,19 @@ config LIRC passes raw IR to and from userspace, which is needed for IR transmitting (aka "blasting") and for the lirc daemon. +config BPF_LIRC_MODE2 + bool "Support for eBPF programs attached to lirc devices" + depends on BPF_SYSCALL + depends on RC_CORE=y + depends on LIRC + help + Allow attaching eBPF programs to a lirc device using the bpf(2) + syscall command BPF_PROG_ATTACH. This is supported for raw IR + receivers. + + These eBPF programs can be used to decode IR into scancodes, for + IR protocols not supported by the kernel decoders. + menuconfig RC_DECODERS bool "Remote controller decoders" depends on RC_CORE diff --git a/drivers/media/rc/Makefile b/drivers/media/rc/Makefile index 2e1c87066f6c..e0340d043fe8 100644 --- a/drivers/media/rc/Makefile +++ b/drivers/media/rc/Makefile @@ -5,6 +5,7 @@ obj-y += keymaps/ obj-$(CONFIG_RC_CORE) += rc-core.o rc-core-y := rc-main.o rc-ir-raw.o rc-core-$(CONFIG_LIRC) += lirc_dev.o +rc-core-$(CONFIG_BPF_LIRC_MODE2) += bpf-lirc.o obj-$(CONFIG_IR_NEC_DECODER) += ir-nec-decoder.o obj-$(CONFIG_IR_RC5_DECODER) += ir-rc5-decoder.o obj-$(CONFIG_IR_RC6_DECODER) += ir-rc6-decoder.o diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c new file mode 100644 index 000000000000..40826bba06b6 --- /dev/null +++ b/drivers/media/rc/bpf-lirc.c @@ -0,0 +1,313 @@ +// SPDX-License-Identifier: GPL-2.0 +// bpf-lirc.c - handles bpf +// +// Copyright (C) 2018 Sean Young + +#include +#include +#include +#include "rc-core-priv.h" + +/* + * BPF interface for raw IR + */ +const struct bpf_prog_ops lirc_mode2_prog_ops = { +}; + +BPF_CALL_1(bpf_rc_repeat, u32*, sample) +{ + struct ir_raw_event_ctrl *ctrl; + + ctrl = container_of(sample, struct ir_raw_event_ctrl, bpf_sample); + + rc_repeat(ctrl->dev); + + return 0; +} + +static const struct bpf_func_proto rc_repeat_proto = { + .func = bpf_rc_repeat, + .gpl_only = true, /* rc_repeat is EXPORT_SYMBOL_GPL */ + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + +/* + * Currently rc-core does not support 64-bit scancodes, but there are many + * known protocols with more than 32 bits. So, define the interface as u64 + * as a future-proof. + */ +BPF_CALL_4(bpf_rc_keydown, u32*, sample, u32, protocol, u64, scancode, + u32, toggle) +{ + struct ir_raw_event_ctrl *ctrl; + + ctrl = container_of(sample, struct ir_raw_event_ctrl, bpf_sample); + + rc_keydown(ctrl->dev, protocol, scancode, toggle != 0); + + return 0; +} + +static const struct bpf_func_proto rc_keydown_proto = { + .func = bpf_rc_keydown, + .gpl_only = true, /* rc_keydown is EXPORT_SYMBOL_GPL */ + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + +static const struct bpf_func_proto * +lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_rc_repeat: + return &rc_repeat_proto; + case BPF_FUNC_rc_keydown: + return &rc_keydown_proto; + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; + case BPF_FUNC_ktime_get_ns: + return &bpf_ktime_get_ns_proto; + case BPF_FUNC_tail_call: + return &bpf_tail_call_proto; + case BPF_FUNC_get_prandom_u32: + return &bpf_get_prandom_u32_proto; + case BPF_FUNC_trace_printk: + if (capable(CAP_SYS_ADMIN)) + return bpf_get_trace_printk_proto(); + /* fall through */ + default: + return NULL; + } +} + +static bool lirc_mode2_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + /* We have one field of u32 */ + return type == BPF_READ && off == 0 && size == sizeof(u32); +} + +const struct bpf_verifier_ops lirc_mode2_verifier_ops = { + .get_func_proto = lirc_mode2_func_proto, + .is_valid_access = lirc_mode2_is_valid_access +}; + +#define BPF_MAX_PROGS 64 + +static int lirc_bpf_attach(struct rc_dev *rcdev, struct bpf_prog *prog) +{ + struct bpf_prog_array __rcu *old_array; + struct bpf_prog_array *new_array; + struct ir_raw_event_ctrl *raw; + int ret; + + if (rcdev->driver_type != RC_DRIVER_IR_RAW) + return -EINVAL; + + ret = mutex_lock_interruptible(&ir_raw_handler_lock); + if (ret) + return ret; + + raw = rcdev->raw; + if (!raw) { + ret = -ENODEV; + goto unlock; + } + + if (raw->progs && bpf_prog_array_length(raw->progs) >= BPF_MAX_PROGS) { + ret = -E2BIG; + goto unlock; + } + + old_array = raw->progs; + ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); + if (ret < 0) + goto unlock; + + rcu_assign_pointer(raw->progs, new_array); + bpf_prog_array_free(old_array); + +unlock: + mutex_unlock(&ir_raw_handler_lock); + return ret; +} + +static int lirc_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog) +{ + struct bpf_prog_array __rcu *old_array; + struct bpf_prog_array *new_array; + struct ir_raw_event_ctrl *raw; + int ret; + + if (rcdev->driver_type != RC_DRIVER_IR_RAW) + return -EINVAL; + + ret = mutex_lock_interruptible(&ir_raw_handler_lock); + if (ret) + return ret; + + raw = rcdev->raw; + if (!raw) { + ret = -ENODEV; + goto unlock; + } + + old_array = raw->progs; + ret = bpf_prog_array_copy(old_array, prog, NULL, &new_array); + /* + * Do not use bpf_prog_array_delete_safe() as we would end up + * with a dummy entry in the array, and the we would free the + * dummy in lirc_bpf_free() + */ + if (ret) + goto unlock; + + rcu_assign_pointer(raw->progs, new_array); + bpf_prog_array_free(old_array); +unlock: + mutex_unlock(&ir_raw_handler_lock); + return ret; +} + +void lirc_bpf_run(struct rc_dev *rcdev, u32 sample) +{ + struct ir_raw_event_ctrl *raw = rcdev->raw; + + raw->bpf_sample = sample; + + if (raw->progs) + BPF_PROG_RUN_ARRAY(raw->progs, &raw->bpf_sample, BPF_PROG_RUN); +} + +/* + * This should be called once the rc thread has been stopped, so there can be + * no concurrent bpf execution. + */ +void lirc_bpf_free(struct rc_dev *rcdev) +{ + struct bpf_prog **progs; + + if (!rcdev->raw->progs) + return; + + progs = rcu_dereference(rcdev->raw->progs)->progs; + while (*progs) + bpf_prog_put(*progs++); + + bpf_prog_array_free(rcdev->raw->progs); +} + +int lirc_prog_attach(const union bpf_attr *attr) +{ + struct bpf_prog *prog; + struct rc_dev *rcdev; + int ret; + + if (attr->attach_flags) + return -EINVAL; + + prog = bpf_prog_get_type(attr->attach_bpf_fd, + BPF_PROG_TYPE_LIRC_MODE2); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + rcdev = rc_dev_get_from_fd(attr->target_fd); + if (IS_ERR(rcdev)) { + bpf_prog_put(prog); + return PTR_ERR(rcdev); + } + + ret = lirc_bpf_attach(rcdev, prog); + if (ret) + bpf_prog_put(prog); + + put_device(&rcdev->dev); + + return ret; +} + +int lirc_prog_detach(const union bpf_attr *attr) +{ + struct bpf_prog *prog; + struct rc_dev *rcdev; + int ret; + + if (attr->attach_flags) + return -EINVAL; + + prog = bpf_prog_get_type(attr->attach_bpf_fd, + BPF_PROG_TYPE_LIRC_MODE2); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + rcdev = rc_dev_get_from_fd(attr->target_fd); + if (IS_ERR(rcdev)) { + bpf_prog_put(prog); + return PTR_ERR(rcdev); + } + + ret = lirc_bpf_detach(rcdev, prog); + + bpf_prog_put(prog); + put_device(&rcdev->dev); + + return ret; +} + +int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) +{ + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); + struct bpf_prog_array __rcu *progs; + struct rc_dev *rcdev; + u32 cnt, flags = 0; + int ret; + + if (attr->query.query_flags) + return -EINVAL; + + rcdev = rc_dev_get_from_fd(attr->query.target_fd); + if (IS_ERR(rcdev)) + return PTR_ERR(rcdev); + + if (rcdev->driver_type != RC_DRIVER_IR_RAW) { + ret = -EINVAL; + goto put; + } + + ret = mutex_lock_interruptible(&ir_raw_handler_lock); + if (ret) + goto put; + + progs = rcdev->raw->progs; + cnt = progs ? bpf_prog_array_length(progs) : 0; + + if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt))) { + ret = -EFAULT; + goto unlock; + } + + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) { + ret = -EFAULT; + goto unlock; + } + + if (attr->query.prog_cnt != 0 && prog_ids && cnt) + ret = bpf_prog_array_copy_to_user(progs, prog_ids, cnt); + +unlock: + mutex_unlock(&ir_raw_handler_lock); +put: + put_device(&rcdev->dev); + + return ret; +} diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 24e9fbb80e81..da7013a12a58 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -104,6 +105,12 @@ void ir_lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev) TO_US(ev.duration), TO_STR(ev.pulse)); } + /* + * bpf does not care about the gap generated above; that exists + * for backwards compatibility + */ + lirc_bpf_run(dev, sample); + spin_lock_irqsave(&dev->lirc_fh_lock, flags); list_for_each_entry(fh, &dev->lirc_fh, list) { if (LIRC_IS_TIMEOUT(sample) && !fh->send_timeout_reports) @@ -816,4 +823,27 @@ void __exit lirc_dev_exit(void) unregister_chrdev_region(lirc_base_dev, RC_DEV_MAX); } +struct rc_dev *rc_dev_get_from_fd(int fd) +{ + struct fd f = fdget(fd); + struct lirc_fh *fh; + struct rc_dev *dev; + + if (!f.file) + return ERR_PTR(-EBADF); + + if (f.file->f_op != &lirc_fops) { + fdput(f); + return ERR_PTR(-EINVAL); + } + + fh = f.file->private_data; + dev = fh->rc; + + get_device(&dev->dev); + fdput(f); + + return dev; +} + MODULE_ALIAS("lirc_dev"); diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h index e0e6a17460f6..eb004757038b 100644 --- a/drivers/media/rc/rc-core-priv.h +++ b/drivers/media/rc/rc-core-priv.h @@ -13,6 +13,7 @@ #define MAX_IR_EVENT_SIZE 512 #include +#include #include /** @@ -57,6 +58,11 @@ struct ir_raw_event_ctrl { /* raw decoder state follows */ struct ir_raw_event prev_ev; struct ir_raw_event this_ev; + +#ifdef CONFIG_BPF_LIRC_MODE2 + u32 bpf_sample; + struct bpf_prog_array __rcu *progs; +#endif struct nec_dec { int state; unsigned count; @@ -126,6 +132,9 @@ struct ir_raw_event_ctrl { } imon; }; +/* Mutex for locking raw IR processing and handler change */ +extern struct mutex ir_raw_handler_lock; + /* macros for IR decoders */ static inline bool geq_margin(unsigned d1, unsigned d2, unsigned margin) { @@ -288,6 +297,7 @@ void ir_lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev); void ir_lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc); int ir_lirc_register(struct rc_dev *dev); void ir_lirc_unregister(struct rc_dev *dev); +struct rc_dev *rc_dev_get_from_fd(int fd); #else static inline int lirc_dev_init(void) { return 0; } static inline void lirc_dev_exit(void) {} @@ -299,4 +309,15 @@ static inline int ir_lirc_register(struct rc_dev *dev) { return 0; } static inline void ir_lirc_unregister(struct rc_dev *dev) { } #endif +/* + * bpf interface + */ +#ifdef CONFIG_BPF_LIRC_MODE2 +void lirc_bpf_free(struct rc_dev *dev); +void lirc_bpf_run(struct rc_dev *dev, u32 sample); +#else +static inline void lirc_bpf_free(struct rc_dev *dev) { } +static inline void lirc_bpf_run(struct rc_dev *dev, u32 sample) { } +#endif + #endif /* _RC_CORE_PRIV */ diff --git a/drivers/media/rc/rc-ir-raw.c b/drivers/media/rc/rc-ir-raw.c index 374f83105a23..7675b7ee5bc7 100644 --- a/drivers/media/rc/rc-ir-raw.c +++ b/drivers/media/rc/rc-ir-raw.c @@ -14,7 +14,7 @@ static LIST_HEAD(ir_raw_client_list); /* Used to handle IR raw handler extensions */ -static DEFINE_MUTEX(ir_raw_handler_lock); +DEFINE_MUTEX(ir_raw_handler_lock); static LIST_HEAD(ir_raw_handler_list); static atomic64_t available_protocols = ATOMIC64_INIT(0); @@ -621,9 +621,17 @@ void ir_raw_event_unregister(struct rc_dev *dev) list_for_each_entry(handler, &ir_raw_handler_list, list) if (handler->raw_unregister) handler->raw_unregister(dev); - mutex_unlock(&ir_raw_handler_lock); + + lirc_bpf_free(dev); ir_raw_event_free(dev); + + /* + * A user can be calling bpf(BPF_PROG_{QUERY|ATTACH|DETACH}), so + * ensure that the raw member is null on unlock; this is how + * "device gone" is checked. + */ + mutex_unlock(&ir_raw_handler_lock); } /* diff --git a/include/linux/bpf_lirc.h b/include/linux/bpf_lirc.h new file mode 100644 index 000000000000..5f8a4283092d --- /dev/null +++ b/include/linux/bpf_lirc.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BPF_LIRC_H +#define _BPF_LIRC_H + +#include + +#ifdef CONFIG_BPF_LIRC_MODE2 +int lirc_prog_attach(const union bpf_attr *attr); +int lirc_prog_detach(const union bpf_attr *attr); +int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); +#else +static inline int lirc_prog_attach(const union bpf_attr *attr) +{ + return -EINVAL; +} + +static inline int lirc_prog_detach(const union bpf_attr *attr) +{ + return -EINVAL; +} + +static inline int lirc_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EINVAL; +} +#endif + +#endif /* _BPF_LIRC_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index b161e506dcfc..c5700c2d5549 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -26,6 +26,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) #endif +#ifdef CONFIG_BPF_LIRC_MODE2 +BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 33f37eb0b6bf..64ac0f7a689e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -143,6 +143,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_RAW_TRACEPOINT, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_PROG_TYPE_LWT_SEG6LOCAL, + BPF_PROG_TYPE_LIRC_MODE2, }; enum bpf_attach_type { @@ -162,6 +163,7 @@ enum bpf_attach_type { BPF_CGROUP_INET6_POST_BIND, BPF_CGROUP_UDP4_SENDMSG, BPF_CGROUP_UDP6_SENDMSG, + BPF_LIRC_MODE2, __MAX_BPF_ATTACH_TYPE }; @@ -2005,6 +2007,53 @@ union bpf_attr { * direct packet access. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded key press with *scancode*, + * *toggle* value in the given *protocol*. The scancode will be + * translated to a keycode using the rc keymap, and reported as + * an input key down event. After a period a key up event is + * generated. This period can be extended by calling either + * **bpf_rc_keydown** () again with the same values, or calling + * **bpf_rc_repeat** (). + * + * Some protocols include a toggle bit, in case the button was + * released and pressed again between consecutive scancodes. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * The *protocol* is the decoded protocol number (see + * **enum rc_proto** for some predefined values). + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * + * Return + * 0 + * + * int bpf_rc_repeat(void *ctx) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded repeat key message. This delays + * the generation of a key up event for previously generated + * key down event. + * + * Some IR protocols like NEC have a special IR message for + * repeating last button, for when a button is held down. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * + * Return + * 0 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2083,7 +2132,9 @@ union bpf_attr { FN(lwt_push_encap), \ FN(lwt_seg6_store_bytes), \ FN(lwt_seg6_adjust_srh), \ - FN(lwt_seg6_action), + FN(lwt_seg6_action), \ + FN(rc_repeat), \ + FN(rc_keydown), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e254526d6744..7365d79ae00d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include #include @@ -1582,6 +1583,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true); + case BPF_LIRC_MODE2: + return lirc_prog_attach(attr); default: return -EINVAL; } @@ -1654,6 +1657,8 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false); + case BPF_LIRC_MODE2: + return lirc_prog_detach(attr); default: return -EINVAL; } @@ -1703,6 +1708,8 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_DEVICE: break; + case BPF_LIRC_MODE2: + return lirc_prog_query(attr, uattr); default: return -EINVAL; } -- cgit v1.2.3 From 4316b40914ecde3738968225af56e650e8b61938 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 2 Jun 2018 23:06:34 +0200 Subject: bpf: show prog and map id in fdinfo Its trivial and straight forward to expose it for scripts that can then use it along with bpftool in order to inspect an individual application's used maps and progs. Right now we dump some basic information in the fdinfo file but with the help of the map/prog id full introspection becomes possible now. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel/bpf/syscall.c') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7365d79ae00d..0fa20624707f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -327,13 +327,15 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "value_size:\t%u\n" "max_entries:\t%u\n" "map_flags:\t%#x\n" - "memlock:\t%llu\n", + "memlock:\t%llu\n" + "map_id:\t%u\n", map->map_type, map->key_size, map->value_size, map->max_entries, map->map_flags, - map->pages * 1ULL << PAGE_SHIFT); + map->pages * 1ULL << PAGE_SHIFT, + map->id); if (owner_prog_type) { seq_printf(m, "owner_prog_type:\t%u\n", @@ -1070,11 +1072,13 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) "prog_type:\t%u\n" "prog_jited:\t%u\n" "prog_tag:\t%s\n" - "memlock:\t%llu\n", + "memlock:\t%llu\n" + "prog_id:\t%u\n", prog->type, prog->jited, prog_tag, - prog->pages * 1ULL << PAGE_SHIFT); + prog->pages * 1ULL << PAGE_SHIFT, + prog->aux->id); } #endif -- cgit v1.2.3