From b7d3ed5be9bd7e0689eee0f0f36702937cd8f7c8 Mon Sep 17 00:00:00 2001 From: Teng Qin Date: Fri, 2 Jun 2017 21:03:54 -0700 Subject: bpf: update perf event helper functions documentation This commit updates documentation of the bpf_perf_event_output and bpf_perf_event_read helpers to match their implementation. Signed-off-by: Teng Qin Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 94dfa9def355..e78aece03628 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -313,8 +313,11 @@ union bpf_attr { * @flags: room for future extensions * Return: 0 on success or negative error * - * u64 bpf_perf_event_read(&map, index) - * Return: Number events read or error code + * u64 bpf_perf_event_read(map, flags) + * read perf event counter value + * @map: pointer to perf_event_array map + * @flags: index of event in the map or bitmask flags + * Return: value of perf event counter read or error code * * int bpf_redirect(ifindex, flags) * redirect to another netdev @@ -328,11 +331,11 @@ union bpf_attr { * @skb: pointer to skb * Return: realm if != 0 * - * int bpf_perf_event_output(ctx, map, index, data, size) + * int bpf_perf_event_output(ctx, map, flags, data, size) * output perf raw sample * @ctx: struct pt_regs* * @map: pointer to perf_event_array map - * @index: index of event in the map + * @flags: index of event in the map or bitmask flags * @data: data on stack to be output as raw data * @size: size of data * Return: 0 on success or negative error -- cgit v1.2.3 From 95b9afd3987f91c09151158279e165276a95c597 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 5 Jun 2017 12:15:53 -0700 Subject: bpf: Test for bpf ID Add test to exercise the bpf_prog/map id generation, bpf_(prog|map)_get_next_id(), bpf_(prog|map)_get_fd_by_id() and bpf_get_obj_info_by_fd(). Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 41 +++++++ tools/lib/bpf/bpf.c | 68 +++++++++++ tools/lib/bpf/bpf.h | 5 + tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/test_obj_id.c | 35 ++++++ tools/testing/selftests/bpf/test_progs.c | 191 ++++++++++++++++++++++++++++++ 6 files changed, 341 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/test_obj_id.c (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e78aece03628..9b2c10b45733 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -82,6 +82,11 @@ enum bpf_cmd { BPF_PROG_ATTACH, BPF_PROG_DETACH, BPF_PROG_TEST_RUN, + BPF_PROG_GET_NEXT_ID, + BPF_MAP_GET_NEXT_ID, + BPF_PROG_GET_FD_BY_ID, + BPF_MAP_GET_FD_BY_ID, + BPF_OBJ_GET_INFO_BY_FD, }; enum bpf_map_type { @@ -209,6 +214,21 @@ union bpf_attr { __u32 repeat; __u32 duration; } test; + + struct { /* anonymous struct used by BPF_*_GET_*_ID */ + union { + __u32 start_id; + __u32 prog_id; + __u32 map_id; + }; + __u32 next_id; + }; + + struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ + __u32 bpf_fd; + __u32 info_len; + __aligned_u64 info; + } info; } __attribute__((aligned(8))); /* BPF helper function descriptions: @@ -673,4 +693,25 @@ struct xdp_md { __u32 data_end; }; +#define BPF_TAG_SIZE 8 + +struct bpf_prog_info { + __u32 type; + __u32 id; + __u8 tag[BPF_TAG_SIZE]; + __u32 jited_prog_len; + __u32 xlated_prog_len; + __aligned_u64 jited_prog_insns; + __aligned_u64 xlated_prog_insns; +} __attribute__((aligned(8))); + +struct bpf_map_info { + __u32 type; + __u32 id; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 map_flags; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 6e178987af8e..7e0405e1651d 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -257,3 +257,71 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, *duration = attr.test.duration; return ret; } + +int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) +{ + union bpf_attr attr; + int err; + + bzero(&attr, sizeof(attr)); + attr.start_id = start_id; + + err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr)); + if (!err) + *next_id = attr.next_id; + + return err; +} + +int bpf_map_get_next_id(__u32 start_id, __u32 *next_id) +{ + union bpf_attr attr; + int err; + + bzero(&attr, sizeof(attr)); + attr.start_id = start_id; + + err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr)); + if (!err) + *next_id = attr.next_id; + + return err; +} + +int bpf_prog_get_fd_by_id(__u32 id) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.prog_id = id; + + return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); +} + +int bpf_map_get_fd_by_id(__u32 id) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.map_id = id; + + return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); +} + +int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) +{ + union bpf_attr attr; + int err; + + bzero(&attr, sizeof(attr)); + bzero(info, *info_len); + attr.info.bpf_fd = prog_fd; + attr.info.info_len = *info_len; + attr.info.info = ptr_to_u64(info); + + err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); + if (!err) + *info_len = attr.info.info_len; + + return err; +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 972bd8333eb7..16de44a14b48 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -54,5 +54,10 @@ int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration); +int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); +int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); +int bpf_prog_get_fd_by_id(__u32 id); +int bpf_map_get_fd_by_id(__u32 id); +int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); #endif diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f389b02d43a0..9f0e07ba5334 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -14,7 +14,7 @@ LDLIBS += -lcap -lelf TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align -TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o +TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o TEST_PROGS := test_kmod.sh diff --git a/tools/testing/selftests/bpf/test_obj_id.c b/tools/testing/selftests/bpf/test_obj_id.c new file mode 100644 index 000000000000..d8723aaf827a --- /dev/null +++ b/tools/testing/selftests/bpf/test_obj_id.c @@ -0,0 +1,35 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include "bpf_helpers.h" + +/* It is a dumb bpf program such that it must have no + * issue to be loaded since testing the verifier is + * not the focus here. + */ + +int _version SEC("version") = 1; + +struct bpf_map_def SEC("maps") test_map_id = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 1, +}; + +SEC("test_prog_id") +int test_prog_id(struct __sk_buff *skb) +{ + __u32 key = 0; + __u64 *value; + + value = bpf_map_lookup_elem(&test_map_id, &key); + + return TC_ACT_OK; +} diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index b59f5ed4ae40..8189bfc7e277 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -22,6 +22,8 @@ typedef __u16 __sum16; #include #include +#include +#include #include #include @@ -70,6 +72,7 @@ static struct { pass_cnt++; \ printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\ } \ + __ret; \ }) static int bpf_prog_load(const char *file, enum bpf_prog_type type, @@ -283,6 +286,193 @@ static void test_tcp_estats(void) bpf_object__close(obj); } +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +static void test_bpf_obj_id(void) +{ + const __u64 array_magic_value = 0xfaceb00c; + const __u32 array_key = 0; + const int nr_iters = 2; + const char *file = "./test_obj_id.o"; + + struct bpf_object *objs[nr_iters]; + int prog_fds[nr_iters], map_fds[nr_iters]; + /* +1 to test for the info_len returned by kernel */ + struct bpf_prog_info prog_infos[nr_iters + 1]; + struct bpf_map_info map_infos[nr_iters + 1]; + char jited_insns[128], xlated_insns[128]; + __u32 i, next_id, info_len, nr_id_found, duration = 0; + int err = 0; + __u64 array_value; + + err = bpf_prog_get_fd_by_id(0); + CHECK(err >= 0 || errno != ENOENT, + "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); + + err = bpf_map_get_fd_by_id(0); + CHECK(err >= 0 || errno != ENOENT, + "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno); + + for (i = 0; i < nr_iters; i++) + objs[i] = NULL; + + /* Check bpf_obj_get_info_by_fd() */ + for (i = 0; i < nr_iters; i++) { + err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER, + &objs[i], &prog_fds[i]); + /* test_obj_id.o is a dumb prog. It should never fail + * to load. + */ + assert(!err); + + /* Check getting prog info */ + info_len = sizeof(struct bpf_prog_info) * 2; + prog_infos[i].jited_prog_insns = ptr_to_u64(jited_insns); + prog_infos[i].jited_prog_len = sizeof(jited_insns); + prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns); + prog_infos[i].xlated_prog_len = sizeof(xlated_insns); + err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i], + &info_len); + if (CHECK(err || + prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER || + info_len != sizeof(struct bpf_prog_info) || + !prog_infos[i].jited_prog_len || + !prog_infos[i].xlated_prog_len, + "get-prog-info(fd)", + "err %d errno %d i %d type %d(%d) info_len %u(%lu) jited_prog_len %u xlated_prog_len %u\n", + err, errno, i, + prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, + info_len, sizeof(struct bpf_prog_info), + prog_infos[i].jited_prog_len, + prog_infos[i].xlated_prog_len)) + goto done; + + map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); + assert(map_fds[i] >= 0); + err = bpf_map_update_elem(map_fds[i], &array_key, + &array_magic_value, 0); + assert(!err); + + /* Check getting map info */ + info_len = sizeof(struct bpf_map_info) * 2; + err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i], + &info_len); + if (CHECK(err || + map_infos[i].type != BPF_MAP_TYPE_ARRAY || + map_infos[i].key_size != sizeof(__u32) || + map_infos[i].value_size != sizeof(__u64) || + map_infos[i].max_entries != 1 || + map_infos[i].map_flags != 0 || + info_len != sizeof(struct bpf_map_info), + "get-map-info(fd)", + "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X\n", + err, errno, + map_infos[i].type, BPF_MAP_TYPE_ARRAY, + info_len, sizeof(struct bpf_map_info), + map_infos[i].key_size, + map_infos[i].value_size, + map_infos[i].max_entries, + map_infos[i].map_flags)) + goto done; + } + + /* Check bpf_prog_get_next_id() */ + nr_id_found = 0; + next_id = 0; + while (!bpf_prog_get_next_id(next_id, &next_id)) { + struct bpf_prog_info prog_info; + int prog_fd; + + info_len = sizeof(prog_info); + + prog_fd = bpf_prog_get_fd_by_id(next_id); + if (prog_fd < 0 && errno == ENOENT) + /* The bpf_prog is in the dead row */ + continue; + if (CHECK(prog_fd < 0, "get-prog-fd(next_id)", + "prog_fd %d next_id %d errno %d\n", + prog_fd, next_id, errno)) + break; + + for (i = 0; i < nr_iters; i++) + if (prog_infos[i].id == next_id) + break; + + if (i == nr_iters) + continue; + + nr_id_found++; + + err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + CHECK(err || info_len != sizeof(struct bpf_prog_info) || + memcmp(&prog_info, &prog_infos[i], info_len), + "get-prog-info(next_id->fd)", + "err %d errno %d info_len %u(%lu) memcmp %d\n", + err, errno, info_len, sizeof(struct bpf_prog_info), + memcmp(&prog_info, &prog_infos[i], info_len)); + + close(prog_fd); + } + CHECK(nr_id_found != nr_iters, + "check total prog id found by get_next_id", + "nr_id_found %u(%u)\n", + nr_id_found, nr_iters); + + /* Check bpf_map_get_next_id() */ + nr_id_found = 0; + next_id = 0; + while (!bpf_map_get_next_id(next_id, &next_id)) { + struct bpf_map_info map_info; + int map_fd; + + info_len = sizeof(map_info); + + map_fd = bpf_map_get_fd_by_id(next_id); + if (map_fd < 0 && errno == ENOENT) + /* The bpf_map is in the dead row */ + continue; + if (CHECK(map_fd < 0, "get-map-fd(next_id)", + "map_fd %d next_id %u errno %d\n", + map_fd, next_id, errno)) + break; + + for (i = 0; i < nr_iters; i++) + if (map_infos[i].id == next_id) + break; + + if (i == nr_iters) + continue; + + nr_id_found++; + + err = bpf_map_lookup_elem(map_fd, &array_key, &array_value); + assert(!err); + + err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + CHECK(err || info_len != sizeof(struct bpf_map_info) || + memcmp(&map_info, &map_infos[i], info_len) || + array_value != array_magic_value, + "check get-map-info(next_id->fd)", + "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n", + err, errno, info_len, sizeof(struct bpf_map_info), + memcmp(&map_info, &map_infos[i], info_len), + array_value, array_magic_value); + + close(map_fd); + } + CHECK(nr_id_found != nr_iters, + "check total map id found by get_next_id", + "nr_id_found %u(%u)\n", + nr_id_found, nr_iters); + +done: + for (i = 0; i < nr_iters; i++) + bpf_object__close(objs[i]); +} + int main(void) { struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; @@ -293,6 +483,7 @@ int main(void) test_xdp(); test_l4lb(); test_tcp_estats(); + test_bpf_obj_id(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return 0; -- cgit v1.2.3 From ded092cd73c2c56a394b936f86897f29b2e131c0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Jun 2017 00:50:47 +0200 Subject: bpf: add bpf_set_hash helper for tc progs Allow for tc BPF programs to set a skb->hash, apart from clearing and triggering a recalc that we have right now. It allows for BPF to implement a custom hashing routine for skb_get_hash(). Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 8 +++++++- net/core/filter.c | 20 ++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 8 +++++++- 3 files changed, 34 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9b2c10b45733..f94b48b168dc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -513,6 +513,11 @@ union bpf_attr { * Get the owner uid of the socket stored inside sk_buff. * @skb: pointer to skb * Return: uid of the socket owner on success or overflowuid if failed. + * + * u32 bpf_set_hash(skb, hash) + * Set full skb->hash. + * @skb: pointer to skb + * @hash: hash to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -562,7 +567,8 @@ union bpf_attr { FN(xdp_adjust_head), \ FN(probe_read_str), \ FN(get_socket_cookie), \ - FN(get_socket_uid), + FN(get_socket_uid), \ + FN(set_hash), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/net/core/filter.c b/net/core/filter.c index 4867391126e4..a65a3b25e104 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1874,6 +1874,24 @@ static const struct bpf_func_proto bpf_set_hash_invalid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash) +{ + /* Set user specified hash as L4(+), so that it gets returned + * on skb_get_hash() call unless BPF prog later on triggers a + * skb_clear_hash(). + */ + __skb_set_sw_hash(skb, hash, true); + return 0; +} + +static const struct bpf_func_proto bpf_set_hash_proto = { + .func = bpf_set_hash, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, u16, vlan_tci) { @@ -2744,6 +2762,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_get_hash_recalc_proto; case BPF_FUNC_set_hash_invalid: return &bpf_set_hash_invalid_proto; + case BPF_FUNC_set_hash: + return &bpf_set_hash_proto; case BPF_FUNC_perf_event_output: return &bpf_skb_event_output_proto; case BPF_FUNC_get_smp_processor_id: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9b2c10b45733..f94b48b168dc 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -513,6 +513,11 @@ union bpf_attr { * Get the owner uid of the socket stored inside sk_buff. * @skb: pointer to skb * Return: uid of the socket owner on success or overflowuid if failed. + * + * u32 bpf_set_hash(skb, hash) + * Set full skb->hash. + * @skb: pointer to skb + * @hash: hash to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -562,7 +567,8 @@ union bpf_attr { FN(xdp_adjust_head), \ FN(probe_read_str), \ FN(get_socket_cookie), \ - FN(get_socket_uid), + FN(get_socket_uid), \ + FN(set_hash), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 04df41e343db9ca91a278ea14606bbaaf0491f2e Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 30 Jun 2017 20:02:55 -0700 Subject: bpf: update tools/include/uapi/linux/bpf.h Update tools/include/uapi/linux/bpf.h to include changes related to new bpf sock_ops program type. Signed-off-by: Lawrence Brakmo Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 66 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f94b48b168dc..284b3661f1df 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -120,12 +120,14 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_IN, BPF_PROG_TYPE_LWT_OUT, BPF_PROG_TYPE_LWT_XMIT, + BPF_PROG_TYPE_SOCK_OPS, }; enum bpf_attach_type { BPF_CGROUP_INET_INGRESS, BPF_CGROUP_INET_EGRESS, BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_SOCK_OPS, __MAX_BPF_ATTACH_TYPE }; @@ -518,6 +520,17 @@ union bpf_attr { * Set full skb->hash. * @skb: pointer to skb * @hash: hash to set + * + * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) + * Calls setsockopt. Not all opts are available, only those with + * integer optvals plus TCP_CONGESTION. + * Supported levels: SOL_SOCKET and IPROTO_TCP + * @bpf_socket: pointer to bpf_socket + * @level: SOL_SOCKET or IPROTO_TCP + * @optname: option name + * @optval: pointer to option value + * @optlen: length of optval in byes + * Return: 0 or negative error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -568,7 +581,8 @@ union bpf_attr { FN(probe_read_str), \ FN(get_socket_cookie), \ FN(get_socket_uid), \ - FN(set_hash), + FN(set_hash), \ + FN(setsockopt), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -720,4 +734,54 @@ struct bpf_map_info { __u32 map_flags; } __attribute__((aligned(8))); +/* User bpf_sock_ops struct to access socket values and specify request ops + * and their replies. + * New fields can only be added at the end of this structure + */ +struct bpf_sock_ops { + __u32 op; + union { + __u32 reply; + __u32 replylong[4]; + }; + __u32 family; + __u32 remote_ip4; + __u32 local_ip4; + __u32 remote_ip6[4]; + __u32 local_ip6[4]; + __u32 remote_port; + __u32 local_port; +}; + +/* List of known BPF sock_ops operators. + * New entries can only be added at the end + */ +enum { + BPF_SOCK_OPS_VOID, + BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or + * -1 if default value should be used + */ + BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized + * window (in packets) or -1 if default + * value should be used + */ + BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an + * active connection is initialized + */ + BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an + * active connection is + * established + */ + BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a + * passive connection is + * established + */ + BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control + * needs ECN + */ +}; + +#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ +#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 2be7e212d5419a400d051c84ca9fdd083e5aacac Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 2 Jul 2017 02:13:26 +0200 Subject: bpf: add bpf_skb_adjust_room helper This work adds a helper that can be used to adjust net room of an skb. The helper is generic and can be further extended in future. Main use case is for having a programmatic way to add/remove room to v4/v6 header options along with cls_bpf on egress and ingress hook of the data path. It reuses most of the infrastructure that we added for the bpf_skb_change_type() helper which can be used in nat64 translations. Similarly, the helper only takes care of adjusting the room so that related data is populated and csum adapted out of the BPF program using it. Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 16 +++++- net/core/filter.c | 126 +++++++++++++++++++++++++++++++++++++++-- tools/include/uapi/linux/bpf.h | 16 +++++- 3 files changed, 151 insertions(+), 7 deletions(-) (limited to 'tools/include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a6a91e5e96fc..e99e3e6f8b37 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -531,6 +531,14 @@ union bpf_attr { * @optval: pointer to option value * @optlen: length of optval in byes * Return: 0 or negative error + * + * int bpf_skb_adjust_room(skb, len_diff, mode, flags) + * Grow or shrink room in sk_buff. + * @skb: pointer to skb + * @len_diff: (signed) amount of room to grow/shrink + * @mode: operation mode (enum bpf_adj_room_mode) + * @flags: reserved for future use + * Return: 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -582,7 +590,8 @@ union bpf_attr { FN(get_socket_cookie), \ FN(get_socket_uid), \ FN(set_hash), \ - FN(setsockopt), + FN(setsockopt), \ + FN(skb_adjust_room), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -632,6 +641,11 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Mode for BPF_FUNC_skb_adjust_room helper. */ +enum bpf_adj_room_mode { + BPF_ADJ_ROOM_NET, +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ diff --git a/net/core/filter.c b/net/core/filter.c index 68d8cd865c4a..29620df45b7c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2154,6 +2154,124 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = { .arg2_type = ARG_ANYTHING, }; +static u32 bpf_skb_net_base_len(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + return sizeof(struct iphdr); + case htons(ETH_P_IPV6): + return sizeof(struct ipv6hdr); + default: + return ~0U; + } +} + +static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) +{ + u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); + int ret; + + ret = skb_cow(skb, len_diff); + if (unlikely(ret < 0)) + return ret; + + ret = bpf_skb_net_hdr_push(skb, off, len_diff); + if (unlikely(ret < 0)) + return ret; + + if (skb_is_gso(skb)) { + /* Due to header grow, MSS needs to be downgraded. */ + skb_shinfo(skb)->gso_size -= len_diff; + /* Header must be checked, and gso_segs recomputed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + } + + return 0; +} + +static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) +{ + u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); + int ret; + + ret = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(ret < 0)) + return ret; + + ret = bpf_skb_net_hdr_pop(skb, off, len_diff); + if (unlikely(ret < 0)) + return ret; + + if (skb_is_gso(skb)) { + /* Due to header shrink, MSS can be upgraded. */ + skb_shinfo(skb)->gso_size += len_diff; + /* Header must be checked, and gso_segs recomputed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + } + + return 0; +} + +static u32 __bpf_skb_max_len(const struct sk_buff *skb) +{ + return skb->dev->mtu + skb->dev->hard_header_len; +} + +static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) +{ + bool trans_same = skb->transport_header == skb->network_header; + u32 len_cur, len_diff_abs = abs(len_diff); + u32 len_min = bpf_skb_net_base_len(skb); + u32 len_max = __bpf_skb_max_len(skb); + __be16 proto = skb->protocol; + bool shrink = len_diff < 0; + int ret; + + if (unlikely(len_diff_abs > 0xfffU)) + return -EFAULT; + if (unlikely(proto != htons(ETH_P_IP) && + proto != htons(ETH_P_IPV6))) + return -ENOTSUPP; + + len_cur = skb->len - skb_network_offset(skb); + if (skb_transport_header_was_set(skb) && !trans_same) + len_cur = skb_network_header_len(skb); + if ((shrink && (len_diff_abs >= len_cur || + len_cur - len_diff_abs < len_min)) || + (!shrink && (skb->len + len_diff_abs > len_max && + !skb_is_gso(skb)))) + return -ENOTSUPP; + + ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : + bpf_skb_net_grow(skb, len_diff_abs); + + bpf_compute_data_end(skb); + return 0; +} + +BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, + u32, mode, u64, flags) +{ + if (unlikely(flags)) + return -EINVAL; + if (likely(mode == BPF_ADJ_ROOM_NET)) + return bpf_skb_adjust_net(skb, len_diff); + + return -ENOTSUPP; +} + +static const struct bpf_func_proto bpf_skb_adjust_room_proto = { + .func = bpf_skb_adjust_room, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + static u32 __bpf_skb_min_len(const struct sk_buff *skb) { u32 min_len = skb_network_offset(skb); @@ -2166,11 +2284,6 @@ static u32 __bpf_skb_min_len(const struct sk_buff *skb) return min_len; } -static u32 __bpf_skb_max_len(const struct sk_buff *skb) -{ - return skb->dev->mtu + skb->dev->hard_header_len; -} - static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len) { unsigned int old_len = skb->len; @@ -2307,6 +2420,7 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_skb_change_proto || func == bpf_skb_change_head || func == bpf_skb_change_tail || + func == bpf_skb_adjust_room || func == bpf_skb_pull_data || func == bpf_clone_redirect || func == bpf_l3_csum_replace || @@ -2849,6 +2963,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_change_proto_proto; case BPF_FUNC_skb_change_type: return &bpf_skb_change_type_proto; + case BPF_FUNC_skb_adjust_room: + return &bpf_skb_adjust_room_proto; case BPF_FUNC_skb_change_tail: return &bpf_skb_change_tail_proto; case BPF_FUNC_skb_get_tunnel_key: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 284b3661f1df..ce2988be4f0e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -531,6 +531,14 @@ union bpf_attr { * @optval: pointer to option value * @optlen: length of optval in byes * Return: 0 or negative error + * + * int bpf_skb_adjust_room(skb, len_diff, mode, flags) + * Grow or shrink room in sk_buff. + * @skb: pointer to skb + * @len_diff: (signed) amount of room to grow/shrink + * @mode: operation mode (enum bpf_adj_room_mode) + * @flags: reserved for future use + * Return: 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -582,7 +590,8 @@ union bpf_attr { FN(get_socket_cookie), \ FN(get_socket_uid), \ FN(set_hash), \ - FN(setsockopt), + FN(setsockopt), \ + FN(skb_adjust_room), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -632,6 +641,11 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Mode for BPF_FUNC_skb_adjust_room helper. */ +enum bpf_adj_room_mode { + BPF_ADJ_ROOM_NET_OPTS, +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ -- cgit v1.2.3