diff options
author | Alexei Starovoitov <ast@kernel.org> | 2019-04-09 17:05:48 -0700 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2019-04-09 17:05:48 -0700 |
commit | 6316f78306c171f5a857a2442dbeebc7baab3566 (patch) | |
tree | 43130bc503436a74453dc7c28732a138cc9b8ca8 /kernel/bpf/syscall.c | |
parent | ff466b58055f2d28d8ddc1388af312e87a693efe (diff) | |
parent | c861168b7c219838637aaa8c3acc81707aa495f6 (diff) | |
download | linux-6316f78306c171f5a857a2442dbeebc7baab3566.tar.bz2 |
Merge branch 'support-global-data'
Daniel Borkmann says:
====================
This series is a major rework of previously submitted libbpf
patches [0] in order to add global data support for BPF. The
kernel has been extended to add proper infrastructure that allows
for full .bss/.data/.rodata sections on BPF loader side based
upon feedback from LPC discussions [1]. Latter support is then
also added into libbpf in this series which allows for more
natural C-like programming of BPF programs. For more information
on loader, please refer to 'bpf, libbpf: support global data/bss/
rodata sections' patch in this series.
Thanks a lot!
v5 -> v6:
- Removed synchronize_rcu() from map freeze (Jann)
- Rest as-is
v4 -> v5:
- Removed index selection again for ldimm64 (Alexei)
- Adapted related test cases and added new ones to test
rejection of off != 0
v3 -> v4:
- Various fixes in BTF verification e.g. to disallow
Var and DataSec to be an intermediate type during resolve (Martin)
- More BTF test cases added
- Few cleanups in key-less BTF commit (Martin)
- Bump libbpf minor version from 2 to 3
- Renamed and simplified read-only locking
- Various minor improvements all over the place
v2 -> v3:
- Implement BTF support in kernel, libbpf, bpftool, add tests
- Fix idx + off conversion (Andrii)
- Document lower / higher bits for direct value access (Andrii)
- Add tests with small value size (Andrii)
- Add index selection into ldimm64 (Andrii)
- Fix missing fdput() (Jann)
- Reject invalid flags in BPF_F_*_PROG (Jakub)
- Complete rework of libbpf support, includes:
- Add objname to map name (Stanislav)
- Make .rodata map full read-only after setup (Andrii)
- Merge relocation handling into single one (Andrii)
- Store global maps into obj->maps array (Andrii, Alexei)
- Debug message when skipping section (Andrii)
- Reject non-static global data till we have
semantics for sharing them (Yonghong, Andrii, Alexei)
- More test cases and completely reworked prog test (Alexei)
- Fixes, cleanups, etc all over the set
- Not yet addressed:
- Make BTF mandatory for these maps (Alexei)
-> Waiting till BTF support for these lands first
v1 -> v2:
- Instead of 32-bit static data, implement full global
data support (Alexei)
[0] https://patchwork.ozlabs.org/cover/1040290/
[1] http://vger.kernel.org/lpc-bpf2018.html#session-3
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r-- | kernel/bpf/syscall.c | 131 |
1 files changed, 104 insertions, 27 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1d65e56594db..438199e2eca4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -166,13 +166,25 @@ void bpf_map_area_free(void *area) kvfree(area); } +static u32 bpf_map_flags_retain_permanent(u32 flags) +{ + /* Some map creation flags are not tied to the map object but + * rather to the map fd instead, so they have no meaning upon + * map object inspection since multiple file descriptors with + * different (access) properties can exist here. Thus, given + * this has zero meaning for the map itself, lets clear these + * from here. + */ + return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); +} + void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) { map->map_type = attr->map_type; map->key_size = attr->key_size; map->value_size = attr->value_size; map->max_entries = attr->max_entries; - map->map_flags = attr->map_flags; + map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); map->numa_node = bpf_map_attr_numa_node(attr); } @@ -343,6 +355,18 @@ static int bpf_map_release(struct inode *inode, struct file *filp) return 0; } +static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) +{ + fmode_t mode = f.file->f_mode; + + /* Our file permissions may have been overridden by global + * map permissions facing syscall side. + */ + if (READ_ONCE(map->frozen)) + mode &= ~FMODE_CAN_WRITE; + return mode; +} + #ifdef CONFIG_PROC_FS static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) { @@ -364,14 +388,16 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "max_entries:\t%u\n" "map_flags:\t%#x\n" "memlock:\t%llu\n" - "map_id:\t%u\n", + "map_id:\t%u\n" + "frozen:\t%u\n", map->map_type, map->key_size, map->value_size, map->max_entries, map->map_flags, map->pages * 1ULL << PAGE_SHIFT, - map->id); + map->id, + READ_ONCE(map->frozen)); if (owner_prog_type) { seq_printf(m, "owner_prog_type:\t%u\n", @@ -448,10 +474,10 @@ static int bpf_obj_name_cpy(char *dst, const char *src) const char *end = src + BPF_OBJ_NAME_LEN; memset(dst, 0, BPF_OBJ_NAME_LEN); - - /* Copy all isalnum() and '_' char */ + /* Copy all isalnum(), '_' and '.' chars. */ while (src < end && *src) { - if (!isalnum(*src) && *src != '_') + if (!isalnum(*src) && + *src != '_' && *src != '.') return -EINVAL; *dst++ = *src++; } @@ -478,9 +504,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, u32 key_size, value_size; int ret = 0; - key_type = btf_type_id_size(btf, &btf_key_id, &key_size); - if (!key_type || key_size != map->key_size) - return -EINVAL; + /* Some maps allow key to be unspecified. */ + if (btf_key_id) { + key_type = btf_type_id_size(btf, &btf_key_id, &key_size); + if (!key_type || key_size != map->key_size) + return -EINVAL; + } else { + key_type = btf_type_by_id(btf, 0); + if (!map->ops->map_check_btf) + return -EINVAL; + } value_type = btf_type_id_size(btf, &btf_value_id, &value_size); if (!value_type || value_size != map->value_size) @@ -489,6 +522,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, map->spin_lock_off = btf_find_spin_lock(btf, value_type); if (map_value_has_spin_lock(map)) { + if (map->map_flags & BPF_F_RDONLY_PROG) + return -EACCES; if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_ARRAY && map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) @@ -545,7 +580,7 @@ static int map_create(union bpf_attr *attr) if (attr->btf_key_type_id || attr->btf_value_type_id) { struct btf *btf; - if (!attr->btf_key_type_id || !attr->btf_value_type_id) { + if (!attr->btf_value_type_id) { err = -EINVAL; goto free_map_nouncharge; } @@ -713,8 +748,7 @@ static int map_lookup_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - - if (!(f.file->f_mode & FMODE_CAN_READ)) { + if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { err = -EPERM; goto err_put; } @@ -843,8 +877,7 @@ static int map_update_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - - if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; } @@ -955,8 +988,7 @@ static int map_delete_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - - if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; } @@ -1007,8 +1039,7 @@ static int map_get_next_key(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - - if (!(f.file->f_mode & FMODE_CAN_READ)) { + if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { err = -EPERM; goto err_put; } @@ -1075,8 +1106,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - - if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; } @@ -1118,6 +1148,36 @@ err_put: return err; } +#define BPF_MAP_FREEZE_LAST_FIELD map_fd + +static int map_freeze(const union bpf_attr *attr) +{ + int err = 0, ufd = attr->map_fd; + struct bpf_map *map; + struct fd f; + + if (CHECK_ATTR(BPF_MAP_FREEZE)) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + if (READ_ONCE(map->frozen)) { + err = -EBUSY; + goto err_put; + } + if (!capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto err_put; + } + + WRITE_ONCE(map->frozen, true); +err_put: + fdput(f); + return err; +} + static const struct bpf_prog_ops * const bpf_prog_types[] = { #define BPF_PROG_TYPE(_id, _name) \ [_id] = & _name ## _prog_ops, @@ -2072,13 +2132,26 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr) } static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, - unsigned long addr) + unsigned long addr, u32 *off, + u32 *type) { + const struct bpf_map *map; int i; - for (i = 0; i < prog->aux->used_map_cnt; i++) - if (prog->aux->used_maps[i] == (void *)addr) - return prog->aux->used_maps[i]; + for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { + map = prog->aux->used_maps[i]; + if (map == (void *)addr) { + *type = BPF_PSEUDO_MAP_FD; + return map; + } + if (!map->ops->map_direct_value_meta) + continue; + if (!map->ops->map_direct_value_meta(map, addr, off)) { + *type = BPF_PSEUDO_MAP_VALUE; + return map; + } + } + return NULL; } @@ -2086,6 +2159,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) { const struct bpf_map *map; struct bpf_insn *insns; + u32 off, type; u64 imm; int i; @@ -2113,11 +2187,11 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) continue; imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; - map = bpf_map_from_imm(prog, imm); + map = bpf_map_from_imm(prog, imm, &off, &type); if (map) { - insns[i].src_reg = BPF_PSEUDO_MAP_FD; + insns[i].src_reg = type; insns[i].imm = map->id; - insns[i + 1].imm = 0; + insns[i + 1].imm = off; continue; } } @@ -2707,6 +2781,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_MAP_GET_NEXT_KEY: err = map_get_next_key(&attr); break; + case BPF_MAP_FREEZE: + err = map_freeze(&attr); + break; case BPF_PROG_LOAD: err = bpf_prog_load(&attr, uattr); break; |