summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2019-11-18 11:42:00 +0100
committerDaniel Borkmann <daniel@iogearbox.net>2019-11-18 11:42:11 +0100
commitb97e12e594eb3d0fba35c99a83fef56ff692048b (patch)
tree50748951dae082f17a9919dce69ccd78962487bb /tools
parent2893c996d8ae021611d02906b9e6fcd0c765fba4 (diff)
parent5051b384523be92925d13694fabbc6bedf2f907b (diff)
downloadlinux-b97e12e594eb3d0fba35c99a83fef56ff692048b.tar.bz2
Merge branch 'bpf-array-mmap'
Andrii Nakryiko says: ==================== This patch set adds ability to memory-map BPF array maps (single- and multi-element). The primary use case is memory-mapping BPF array maps, created to back global data variables, created by libbpf implicitly. This allows for much better usability, along with avoiding syscalls to read or update data completely. Due to memory-mapping requirements, BPF array map that is supposed to be memory-mapped, has to be created with special BPF_F_MMAPABLE attribute, which triggers slightly different memory allocation strategy internally. See patch 1 for details. Libbpf is extended to detect kernel support for this flag, and if supported, will specify it for all global data maps automatically. Patch #1 refactors bpf_map_inc() and converts bpf_map's refcnt to atomic64_t to make refcounting never fail. Patch #2 does similar refactoring for bpf_prog_add()/bpf_prog_inc(). v5->v6: - add back uref counting (Daniel); v4->v5: - change bpf_prog's refcnt to atomic64_t (Daniel); v3->v4: - add mmap's open() callback to fix refcounting (Johannes); - switch to remap_vmalloc_pages() instead of custom fault handler (Johannes); - converted bpf_map's refcnt/usercnt into atomic64_t; - provide default bpf_map_default_vmops handling open/close properly; v2->v3: - change allocation strategy to avoid extra pointer dereference (Jakub); v1->v2: - fix map lookup code generation for BPF_F_MMAPABLE case; - prevent BPF_F_MMAPABLE flag for all but plain array map type; - centralize ref-counting in generic bpf_map_mmap(); - don't use uref counting (Alexei); - use vfree() directly; - print flags with %x (Song); - extend tests to verify bpf_map_{lookup,update}_elem() logic as well. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'tools')
-rw-r--r--tools/include/uapi/linux/bpf.h3
-rw-r--r--tools/lib/bpf/libbpf.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mmap.c220
-rw-r--r--tools/testing/selftests/bpf/progs/test_mmap.c45
5 files changed, 325 insertions, 20 deletions
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4842a134b202..dbbcf0b02970 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -348,6 +348,9 @@ enum bpf_attach_type {
/* Clone map from listener for newly accepted socket */
#define BPF_F_CLONE (1U << 9)
+/* Enable memory-mapping BPF map */
+#define BPF_F_MMAPABLE (1U << 10)
+
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7132c6bdec02..15e91a1d6c11 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -142,6 +142,8 @@ struct bpf_capabilities {
__u32 btf_func:1;
/* BTF_KIND_VAR and BTF_KIND_DATASEC support */
__u32 btf_datasec:1;
+ /* BPF_F_MMAPABLE is supported for arrays */
+ __u32 array_mmap:1;
};
/*
@@ -857,8 +859,6 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
pr_warn("failed to alloc map name\n");
return -ENOMEM;
}
- pr_debug("map '%s' (global data): at sec_idx %d, offset %zu.\n",
- map_name, map->sec_idx, map->sec_offset);
def = &map->def;
def->type = BPF_MAP_TYPE_ARRAY;
@@ -866,6 +866,12 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
def->value_size = data->d_size;
def->max_entries = 1;
def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0;
+ if (obj->caps.array_mmap)
+ def->map_flags |= BPF_F_MMAPABLE;
+
+ pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
+ map_name, map->sec_idx, map->sec_offset, def->map_flags);
+
if (data_buff) {
*data_buff = malloc(data->d_size);
if (!*data_buff) {
@@ -2161,6 +2167,27 @@ static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
return 0;
}
+static int bpf_object__probe_array_mmap(struct bpf_object *obj)
+{
+ struct bpf_create_map_attr attr = {
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_flags = BPF_F_MMAPABLE,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1,
+ };
+ int fd;
+
+ fd = bpf_create_map_xattr(&attr);
+ if (fd >= 0) {
+ obj->caps.array_mmap = 1;
+ close(fd);
+ return 1;
+ }
+
+ return 0;
+}
+
static int
bpf_object__probe_caps(struct bpf_object *obj)
{
@@ -2169,6 +2196,7 @@ bpf_object__probe_caps(struct bpf_object *obj)
bpf_object__probe_global_data,
bpf_object__probe_btf_func,
bpf_object__probe_btf_datasec,
+ bpf_object__probe_array_mmap,
};
int i, ret;
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index f94bd071536b..ec9e2fdd6b89 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "progs/core_reloc_types.h"
+#include <sys/mman.h>
#define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
@@ -453,8 +454,15 @@ struct data {
char out[256];
};
+static size_t roundup_page(size_t sz)
+{
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ return (sz + page_size - 1) / page_size * page_size;
+}
+
void test_core_reloc(void)
{
+ const size_t mmap_sz = roundup_page(sizeof(struct data));
struct bpf_object_load_attr load_attr = {};
struct core_reloc_test_case *test_case;
const char *tp_name, *probe_name;
@@ -463,8 +471,8 @@ void test_core_reloc(void)
struct bpf_map *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
- const int zero = 0;
- struct data data;
+ struct data *data;
+ void *mmap_data = NULL;
for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
test_case = &test_cases[i];
@@ -476,8 +484,7 @@ void test_core_reloc(void)
);
obj = bpf_object__open_file(test_case->bpf_obj_file, &opts);
- if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
- "failed to open '%s': %ld\n",
+ if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
test_case->bpf_obj_file, PTR_ERR(obj)))
continue;
@@ -519,24 +526,22 @@ void test_core_reloc(void)
if (CHECK(!data_map, "find_data_map", "data map not found\n"))
goto cleanup;
- memset(&data, 0, sizeof(data));
- memcpy(data.in, test_case->input, test_case->input_len);
-
- err = bpf_map_update_elem(bpf_map__fd(data_map),
- &zero, &data, 0);
- if (CHECK(err, "update_data_map",
- "failed to update .data map: %d\n", err))
+ mmap_data = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED, bpf_map__fd(data_map), 0);
+ if (CHECK(mmap_data == MAP_FAILED, "mmap",
+ ".bss mmap failed: %d", errno)) {
+ mmap_data = NULL;
goto cleanup;
+ }
+ data = mmap_data;
+
+ memset(mmap_data, 0, sizeof(*data));
+ memcpy(data->in, test_case->input, test_case->input_len);
/* trigger test run */
usleep(1);
- err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &data);
- if (CHECK(err, "get_result",
- "failed to get output data: %d\n", err))
- goto cleanup;
-
- equal = memcmp(data.out, test_case->output,
+ equal = memcmp(data->out, test_case->output,
test_case->output_len) == 0;
if (CHECK(!equal, "check_result",
"input/output data don't match\n")) {
@@ -548,12 +553,16 @@ void test_core_reloc(void)
}
for (j = 0; j < test_case->output_len; j++) {
printf("output byte #%d: EXP 0x%02hhx GOT 0x%02hhx\n",
- j, test_case->output[j], data.out[j]);
+ j, test_case->output[j], data->out[j]);
}
goto cleanup;
}
cleanup:
+ if (mmap_data) {
+ CHECK_FAIL(munmap(mmap_data, mmap_sz));
+ mmap_data = NULL;
+ }
if (!IS_ERR_OR_NULL(link)) {
bpf_link__destroy(link);
link = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
new file mode 100644
index 000000000000..051a6d48762c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <sys/mman.h>
+
+struct map_data {
+ __u64 val[512 * 4];
+};
+
+struct bss_data {
+ __u64 in_val;
+ __u64 out_val;
+};
+
+static size_t roundup_page(size_t sz)
+{
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ return (sz + page_size - 1) / page_size * page_size;
+}
+
+void test_mmap(void)
+{
+ const char *file = "test_mmap.o";
+ const char *probe_name = "raw_tracepoint/sys_enter";
+ const char *tp_name = "sys_enter";
+ const size_t bss_sz = roundup_page(sizeof(struct bss_data));
+ const size_t map_sz = roundup_page(sizeof(struct map_data));
+ const int zero = 0, one = 1, two = 2, far = 1500;
+ const long page_size = sysconf(_SC_PAGE_SIZE);
+ int err, duration = 0, i, data_map_fd;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ struct bpf_link *link = NULL;
+ struct bpf_map *data_map, *bss_map;
+ void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2;
+ volatile struct bss_data *bss_data;
+ volatile struct map_data *map_data;
+ __u64 val = 0;
+
+ obj = bpf_object__open_file("test_mmap.o", NULL);
+ if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
+ file, PTR_ERR(obj)))
+ return;
+ prog = bpf_object__find_program_by_title(obj, probe_name);
+ if (CHECK(!prog, "find_probe", "prog '%s' not found\n", probe_name))
+ goto cleanup;
+ err = bpf_object__load(obj);
+ if (CHECK(err, "obj_load", "failed to load prog '%s': %d\n",
+ probe_name, err))
+ goto cleanup;
+
+ bss_map = bpf_object__find_map_by_name(obj, "test_mma.bss");
+ if (CHECK(!bss_map, "find_bss_map", ".bss map not found\n"))
+ goto cleanup;
+ data_map = bpf_object__find_map_by_name(obj, "data_map");
+ if (CHECK(!data_map, "find_data_map", "data_map map not found\n"))
+ goto cleanup;
+ data_map_fd = bpf_map__fd(data_map);
+
+ bss_mmaped = mmap(NULL, bss_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
+ bpf_map__fd(bss_map), 0);
+ if (CHECK(bss_mmaped == MAP_FAILED, "bss_mmap",
+ ".bss mmap failed: %d\n", errno)) {
+ bss_mmaped = NULL;
+ goto cleanup;
+ }
+ /* map as R/W first */
+ map_mmaped = mmap(NULL, map_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
+ data_map_fd, 0);
+ if (CHECK(map_mmaped == MAP_FAILED, "data_mmap",
+ "data_map mmap failed: %d\n", errno)) {
+ map_mmaped = NULL;
+ goto cleanup;
+ }
+
+ bss_data = bss_mmaped;
+ map_data = map_mmaped;
+
+ CHECK_FAIL(bss_data->in_val);
+ CHECK_FAIL(bss_data->out_val);
+ CHECK_FAIL(map_data->val[0]);
+ CHECK_FAIL(map_data->val[1]);
+ CHECK_FAIL(map_data->val[2]);
+ CHECK_FAIL(map_data->val[far]);
+
+ link = bpf_program__attach_raw_tracepoint(prog, tp_name);
+ if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ goto cleanup;
+
+ bss_data->in_val = 123;
+ val = 111;
+ CHECK_FAIL(bpf_map_update_elem(data_map_fd, &zero, &val, 0));
+
+ usleep(1);
+
+ CHECK_FAIL(bss_data->in_val != 123);
+ CHECK_FAIL(bss_data->out_val != 123);
+ CHECK_FAIL(map_data->val[0] != 111);
+ CHECK_FAIL(map_data->val[1] != 222);
+ CHECK_FAIL(map_data->val[2] != 123);
+ CHECK_FAIL(map_data->val[far] != 3 * 123);
+
+ CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &zero, &val));
+ CHECK_FAIL(val != 111);
+ CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &one, &val));
+ CHECK_FAIL(val != 222);
+ CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &two, &val));
+ CHECK_FAIL(val != 123);
+ CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &far, &val));
+ CHECK_FAIL(val != 3 * 123);
+
+ /* data_map freeze should fail due to R/W mmap() */
+ err = bpf_map_freeze(data_map_fd);
+ if (CHECK(!err || errno != EBUSY, "no_freeze",
+ "data_map freeze succeeded: err=%d, errno=%d\n", err, errno))
+ goto cleanup;
+
+ /* unmap R/W mapping */
+ err = munmap(map_mmaped, map_sz);
+ map_mmaped = NULL;
+ if (CHECK(err, "data_map_munmap", "data_map munmap failed: %d\n", errno))
+ goto cleanup;
+
+ /* re-map as R/O now */
+ map_mmaped = mmap(NULL, map_sz, PROT_READ, MAP_SHARED, data_map_fd, 0);
+ if (CHECK(map_mmaped == MAP_FAILED, "data_mmap",
+ "data_map R/O mmap failed: %d\n", errno)) {
+ map_mmaped = NULL;
+ goto cleanup;
+ }
+ map_data = map_mmaped;
+
+ /* map/unmap in a loop to test ref counting */
+ for (i = 0; i < 10; i++) {
+ int flags = i % 2 ? PROT_READ : PROT_WRITE;
+ void *p;
+
+ p = mmap(NULL, map_sz, flags, MAP_SHARED, data_map_fd, 0);
+ if (CHECK_FAIL(p == MAP_FAILED))
+ goto cleanup;
+ err = munmap(p, map_sz);
+ if (CHECK_FAIL(err))
+ goto cleanup;
+ }
+
+ /* data_map freeze should now succeed due to no R/W mapping */
+ err = bpf_map_freeze(data_map_fd);
+ if (CHECK(err, "freeze", "data_map freeze failed: err=%d, errno=%d\n",
+ err, errno))
+ goto cleanup;
+
+ /* mapping as R/W now should fail */
+ tmp1 = mmap(NULL, map_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
+ data_map_fd, 0);
+ if (CHECK(tmp1 != MAP_FAILED, "data_mmap", "mmap succeeded\n")) {
+ munmap(tmp1, map_sz);
+ goto cleanup;
+ }
+
+ bss_data->in_val = 321;
+ usleep(1);
+ CHECK_FAIL(bss_data->in_val != 321);
+ CHECK_FAIL(bss_data->out_val != 321);
+ CHECK_FAIL(map_data->val[0] != 111);
+ CHECK_FAIL(map_data->val[1] != 222);
+ CHECK_FAIL(map_data->val[2] != 321);
+ CHECK_FAIL(map_data->val[far] != 3 * 321);
+
+ /* check some more advanced mmap() manipulations */
+
+ /* map all but last page: pages 1-3 mapped */
+ tmp1 = mmap(NULL, 3 * page_size, PROT_READ, MAP_SHARED,
+ data_map_fd, 0);
+ if (CHECK(tmp1 == MAP_FAILED, "adv_mmap1", "errno %d\n", errno))
+ goto cleanup;
+
+ /* unmap second page: pages 1, 3 mapped */
+ err = munmap(tmp1 + page_size, page_size);
+ if (CHECK(err, "adv_mmap2", "errno %d\n", errno)) {
+ munmap(tmp1, map_sz);
+ goto cleanup;
+ }
+
+ /* map page 2 back */
+ tmp2 = mmap(tmp1 + page_size, page_size, PROT_READ,
+ MAP_SHARED | MAP_FIXED, data_map_fd, 0);
+ if (CHECK(tmp2 == MAP_FAILED, "adv_mmap3", "errno %d\n", errno)) {
+ munmap(tmp1, page_size);
+ munmap(tmp1 + 2*page_size, page_size);
+ goto cleanup;
+ }
+ CHECK(tmp1 + page_size != tmp2, "adv_mmap4",
+ "tmp1: %p, tmp2: %p\n", tmp1, tmp2);
+
+ /* re-map all 4 pages */
+ tmp2 = mmap(tmp1, 4 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED,
+ data_map_fd, 0);
+ if (CHECK(tmp2 == MAP_FAILED, "adv_mmap5", "errno %d\n", errno)) {
+ munmap(tmp1, 3 * page_size); /* unmap page 1 */
+ goto cleanup;
+ }
+ CHECK(tmp1 != tmp2, "adv_mmap6", "tmp1: %p, tmp2: %p\n", tmp1, tmp2);
+
+ map_data = tmp2;
+ CHECK_FAIL(bss_data->in_val != 321);
+ CHECK_FAIL(bss_data->out_val != 321);
+ CHECK_FAIL(map_data->val[0] != 111);
+ CHECK_FAIL(map_data->val[1] != 222);
+ CHECK_FAIL(map_data->val[2] != 321);
+ CHECK_FAIL(map_data->val[far] != 3 * 321);
+
+ munmap(tmp2, 4 * page_size);
+cleanup:
+ if (bss_mmaped)
+ CHECK_FAIL(munmap(bss_mmaped, bss_sz));
+ if (map_mmaped)
+ CHECK_FAIL(munmap(map_mmaped, map_sz));
+ if (!IS_ERR_OR_NULL(link))
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c
new file mode 100644
index 000000000000..0d2ec9fbcf61
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_mmap.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 512 * 4); /* at least 4 pages of data */
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __type(key, __u32);
+ __type(value, __u64);
+} data_map SEC(".maps");
+
+static volatile __u64 in_val;
+static volatile __u64 out_val;
+
+SEC("raw_tracepoint/sys_enter")
+int test_mmap(void *ctx)
+{
+ int zero = 0, one = 1, two = 2, far = 1500;
+ __u64 val, *p;
+
+ out_val = in_val;
+
+ /* data_map[2] = in_val; */
+ bpf_map_update_elem(&data_map, &two, (const void *)&in_val, 0);
+
+ /* data_map[1] = data_map[0] * 2; */
+ p = bpf_map_lookup_elem(&data_map, &zero);
+ if (p) {
+ val = (*p) * 2;
+ bpf_map_update_elem(&data_map, &one, &val, 0);
+ }
+
+ /* data_map[far] = in_val * 3; */
+ val = in_val * 3;
+ bpf_map_update_elem(&data_map, &far, &val, 0);
+
+ return 0;
+}
+