summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-08-03 00:47:33 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2018-08-03 00:47:34 +0200
commit82c018d734a7aa24a9ac8f33f610c55923fb6911 (patch)
tree46c6811129c61671aeeb37b64a7c209e95a92cd4 /tools
parentfbeb1603bf4e9baa82da8f794de42949d0fe5e25 (diff)
parent28ba068760a7e136a7fe2783bca74e3f43affb9b (diff)
downloadlinux-82c018d734a7aa24a9ac8f33f610c55923fb6911.tar.bz2
Merge branch 'bpf-cgroup-local-storage'
Roman Gushchin says: ==================== This patchset implements cgroup local storage for bpf programs. The main idea is to provide a fast accessible memory for storing various per-cgroup data, e.g. number of transmitted packets. Cgroup local storage looks as a special type of map for userspace, and is accessible using generic bpf maps API for reading and updating of the data. The (cgroup inode id, attachment type) pair is used as a map key. A user can't create new entries or destroy existing entries; it happens automatically when a user attaches/detaches a bpf program to a cgroup. From a bpf program's point of view, cgroup storage is accessible without lookup using the special get_local_storage() helper function. It takes a map fd as an argument. It always returns a valid pointer to the corresponding memory area. To implement such a lookup-free access a pointer to the cgroup storage is saved for an attachment of a bpf program to a cgroup, if required by the program. Before running the program, it's saved in a special global per-cpu variable, which is accessible from the get_local_storage() helper. This patchset implement only cgroup local storage, however the API is intentionally made extensible to support other local storage types further: e.g. thread local storage, socket local storage, etc. v7->v6: - fixed a use-after-free bug, caused by not clearing prog->aux->cgroup_storage pointer after releasing the map v6->v5: - fixed an error with returning -EINVAL instead of a pointer v5->v4: - fixed an issue in verifier (test that flags == 0 properly) - added a corresponding test - added a note about synchronization, sync docs to tools/uapi/... - switched the cgroup test to use XADD - added a check for attr->max_entries to be 0, and atter->max_flags to be sane - use bpf_uncharge_memlock() in bpf_uncharge_memlock() - rebased to bpf-next v4->v3: - fixed a leak in cgroup attachment code (discovered by Daniel) - cgroup storage map will be released if the corresponding bpf program failed to load by any reason - introduced bpf_uncharge_memlock() helper v3->v2: - fixed more build and sparse issues - rebased to bpf-next v2->v1: - fixed build issues - removed explicit rlimit calls in patch 14 - rebased to bpf-next ==================== Signed-off-by: Roman Gushchin <guro@fb.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Martin KaFai Lau <kafai@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'tools')
-rw-r--r--tools/bpf/bpftool/map.c1
-rw-r--r--tools/include/uapi/linux/bpf.h27
-rw-r--r--tools/testing/selftests/bpf/Makefile3
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c130
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c140
6 files changed, 300 insertions, 3 deletions
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 0ee3ba479d87..2dd1f8d9cc2d 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -68,6 +68,7 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_SOCKMAP] = "sockmap",
[BPF_MAP_TYPE_CPUMAP] = "cpumap",
[BPF_MAP_TYPE_SOCKHASH] = "sockhash",
+ [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
};
static bool map_is_per_cpu(__u32 type)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0ebaaf7f3568..dd5758dc35d3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -75,6 +75,11 @@ struct bpf_lpm_trie_key {
__u8 data[0]; /* Arbitrary size */
};
+struct bpf_cgroup_storage_key {
+ __u64 cgroup_inode_id; /* cgroup inode id */
+ __u32 attach_type; /* program attach type */
+};
+
/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
BPF_MAP_CREATE,
@@ -120,6 +125,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_CPUMAP,
BPF_MAP_TYPE_XSKMAP,
BPF_MAP_TYPE_SOCKHASH,
+ BPF_MAP_TYPE_CGROUP_STORAGE,
};
enum bpf_prog_type {
@@ -2089,6 +2095,24 @@ union bpf_attr {
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
+ *
+ * void* get_local_storage(void *map, u64 flags)
+ * Description
+ * Get the pointer to the local storage area.
+ * The type and the size of the local storage is defined
+ * by the *map* argument.
+ * The *flags* meaning is specific for each map type,
+ * and has to be 0 for cgroup local storage.
+ *
+ * Depending on the bpf program type, a local storage area
+ * can be shared between multiple instances of the bpf program,
+ * running simultaneously.
+ *
+ * A user should care about the synchronization by himself.
+ * For example, by using the BPF_STX_XADD instruction to alter
+ * the shared data.
+ * Return
+ * Pointer to the local storage area.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2171,7 +2195,8 @@ union bpf_attr {
FN(rc_repeat), \
FN(rc_keydown), \
FN(skb_cgroup_id), \
- FN(get_current_cgroup_id),
+ FN(get_current_cgroup_id), \
+ FN(get_local_storage),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1b28277998e2..ad241ddba350 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,7 +23,7 @@ $(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
- test_socket_cookie
+ test_socket_cookie test_cgroup_storage
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
@@ -66,6 +66,7 @@ $(OUTPUT)/test_sockmap: cgroup_helpers.c
$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
$(OUTPUT)/test_progs: trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
+$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
.PHONY: force
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 19a424483f6e..cb9fcfbc9307 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -135,6 +135,8 @@ static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
(void *) BPF_FUNC_rc_keydown;
static unsigned long long (*bpf_get_current_cgroup_id)(void) =
(void *) BPF_FUNC_get_current_cgroup_id;
+static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
+ (void *) BPF_FUNC_get_local_storage;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
new file mode 100644
index 000000000000..dc83fb2d3f27
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
+#include <bpf/bpf.h>
+#include <linux/filter.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "cgroup_helpers.h"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+#define TEST_CGROUP "/test-bpf-cgroup-storage-buf/"
+
+int main(int argc, char **argv)
+{
+ struct bpf_insn prog[] = {
+ BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
+ BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ int error = EXIT_FAILURE;
+ int map_fd, prog_fd, cgroup_fd;
+ struct bpf_cgroup_storage_key key;
+ unsigned long long value;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
+ sizeof(value), 0, 0);
+ if (map_fd < 0) {
+ printf("Failed to create map: %s\n", strerror(errno));
+ goto out;
+ }
+
+ prog[0].imm = map_fd;
+ prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, insns_cnt, "GPL", 0,
+ bpf_log_buf, BPF_LOG_BUF_SIZE);
+ if (prog_fd < 0) {
+ printf("Failed to load bpf program: %s\n", bpf_log_buf);
+ goto out;
+ }
+
+ if (setup_cgroup_environment()) {
+ printf("Failed to setup cgroup environment\n");
+ goto err;
+ }
+
+ /* Create a cgroup, get fd, and join it */
+ cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ if (!cgroup_fd) {
+ printf("Failed to create test cgroup\n");
+ goto err;
+ }
+
+ if (join_cgroup(TEST_CGROUP)) {
+ printf("Failed to join cgroup\n");
+ goto err;
+ }
+
+ /* Attach the bpf program */
+ if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
+ printf("Failed to attach bpf program\n");
+ goto err;
+ }
+
+ if (bpf_map_get_next_key(map_fd, NULL, &key)) {
+ printf("Failed to get the first key in cgroup storage\n");
+ goto err;
+ }
+
+ if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+ printf("Failed to lookup cgroup storage\n");
+ goto err;
+ }
+
+ /* Every second packet should be dropped */
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+
+ /* Check the counter in the cgroup local storage */
+ if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+ printf("Failed to lookup cgroup storage\n");
+ goto err;
+ }
+
+ if (value != 3) {
+ printf("Unexpected data in the cgroup storage: %llu\n", value);
+ goto err;
+ }
+
+ /* Bump the counter in the cgroup local storage */
+ value++;
+ if (bpf_map_update_elem(map_fd, &key, &value, 0)) {
+ printf("Failed to update the data in the cgroup storage\n");
+ goto err;
+ }
+
+ /* Every second packet should be dropped */
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+
+ /* Check the final value of the counter in the cgroup local storage */
+ if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+ printf("Failed to lookup the cgroup storage\n");
+ goto err;
+ }
+
+ if (value != 7) {
+ printf("Unexpected data in the cgroup storage: %llu\n", value);
+ goto err;
+ }
+
+ error = 0;
+ printf("test_cgroup_storage:PASS\n");
+
+err:
+ cleanup_cgroup_environment();
+
+out:
+ return error;
+}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index c582afba9d1f..4b5e03c25204 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -50,7 +50,7 @@
#define MAX_INSNS BPF_MAXINSNS
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 7
+#define MAX_NR_MAPS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
@@ -70,6 +70,7 @@ struct bpf_test {
int fixup_prog1[MAX_FIXUPS];
int fixup_prog2[MAX_FIXUPS];
int fixup_map_in_map[MAX_FIXUPS];
+ int fixup_cgroup_storage[MAX_FIXUPS];
const char *errstr;
const char *errstr_unpriv;
uint32_t retval;
@@ -4631,6 +4632,121 @@ static struct bpf_test tests[] = {
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "valid cgroup storage access",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .result = REJECT,
+ .errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "fd 1 is not pointing to valid bpf_map",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid per-cgroup storage access 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=64 off=256 size=4",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=64 off=-2 size=4",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 7),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "get_local_storage() doesn't support non-zero flags",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 6",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "get_local_storage() doesn't support non-zero flags",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
"multiple registers share map_lookup_elem result",
.insns = {
BPF_MOV64_IMM(BPF_REG_1, 10),
@@ -12462,6 +12578,19 @@ static int create_map_in_map(void)
return outer_map_fd;
}
+static int create_cgroup_storage(void)
+{
+ int fd;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
+ sizeof(struct bpf_cgroup_storage_key),
+ TEST_DATA_LEN, 0, 0);
+ if (fd < 0)
+ printf("Failed to create array '%s'!\n", strerror(errno));
+
+ return fd;
+}
+
static char bpf_vlog[UINT_MAX >> 8];
static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
@@ -12474,6 +12603,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
int *fixup_prog1 = test->fixup_prog1;
int *fixup_prog2 = test->fixup_prog2;
int *fixup_map_in_map = test->fixup_map_in_map;
+ int *fixup_cgroup_storage = test->fixup_cgroup_storage;
if (test->fill_helper)
test->fill_helper(test);
@@ -12541,6 +12671,14 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
fixup_map_in_map++;
} while (*fixup_map_in_map);
}
+
+ if (*fixup_cgroup_storage) {
+ map_fds[7] = create_cgroup_storage();
+ do {
+ prog[*fixup_cgroup_storage].imm = map_fds[7];
+ fixup_cgroup_storage++;
+ } while (*fixup_cgroup_storage);
+ }
}
static void do_test_single(struct bpf_test *test, bool unpriv,