diff options
author | David S. Miller <davem@davemloft.net> | 2019-04-28 08:42:41 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-04-28 08:42:41 -0400 |
commit | 5f0d736e7f7db586141f974821b6ca6c1d906d5b (patch) | |
tree | 8f62c1a24fd499a55deb455c6dd42bb51cd53275 /include | |
parent | b1a79360ee862f8ada4798ad2346fa45bb41b527 (diff) | |
parent | 9076c49bdca2aa68c805f2677b2bccc4bde2bac1 (diff) | |
download | linux-5f0d736e7f7db586141f974821b6ca6c1d906d5b.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2019-04-28
The following pull-request contains BPF updates for your *net-next* tree.
The main changes are:
1) Introduce BPF socket local storage map so that BPF programs can store
private data they associate with a socket (instead of e.g. separate hash
table), from Martin.
2) Add support for bpftool to dump BTF types. This is done through a new
`bpftool btf dump` sub-command, from Andrii.
3) Enable BPF-based flow dissector for skb-less eth_get_headlen() calls which
was currently not supported since skb was used to lookup netns, from Stanislav.
4) Add an opt-in interface for tracepoints to expose a writable context
for attached BPF programs, used here for NBD sockets, from Matt.
5) BPF xadd related arm64 JIT fixes and scalability improvements, from Daniel.
6) Change the skb->protocol for bpf_skb_adjust_room() helper in order to
support tunnels such as sit. Add selftests as well, from Willem.
7) Various smaller misc fixes.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bpf.h | 4 | ||||
-rw-r--r-- | include/linux/bpf_types.h | 2 | ||||
-rw-r--r-- | include/linux/etherdevice.h | 2 | ||||
-rw-r--r-- | include/linux/skbuff.h | 36 | ||||
-rw-r--r-- | include/linux/tracepoint-defs.h | 1 | ||||
-rw-r--r-- | include/net/bpf_sk_storage.h | 13 | ||||
-rw-r--r-- | include/net/flow_dissector.h | 7 | ||||
-rw-r--r-- | include/net/sch_generic.h | 11 | ||||
-rw-r--r-- | include/net/sock.h | 5 | ||||
-rw-r--r-- | include/trace/bpf_probe.h | 27 | ||||
-rw-r--r-- | include/trace/events/bpf_test_run.h | 50 | ||||
-rw-r--r-- | include/trace/events/nbd.h | 107 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 45 |
13 files changed, 286 insertions, 24 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f15432d90728..9a21848fdb07 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -184,6 +184,7 @@ enum bpf_arg_type { ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ + ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */ /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack @@ -204,6 +205,7 @@ enum bpf_arg_type { ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ ARG_PTR_TO_INT, /* pointer to int */ ARG_PTR_TO_LONG, /* pointer to long */ + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ }; /* type of values returned from helper functions */ @@ -272,6 +274,7 @@ enum bpf_reg_type { PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ + PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ }; /* The information passed from prog-specific *_is_valid_access @@ -361,6 +364,7 @@ struct bpf_prog_aux { u32 used_map_cnt; u32 max_ctx_offset; u32 max_pkt_offset; + u32 max_tp_access; u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index d26991a16894..5a9975678d6f 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) #endif #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) @@ -60,6 +61,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) #ifdef CONFIG_NET BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) #if defined(CONFIG_BPF_STREAM_PARSER) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index aa8bfd6f738c..25c0d049336f 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -33,7 +33,7 @@ struct device; int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); unsigned char *arch_get_platform_mac_address(void); int nvmem_get_mac_address(struct device *dev, void *addrbuf); -u32 eth_get_headlen(void *data, unsigned int max_len); +u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f42942a443b..6d58fa8a65fd 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1258,11 +1258,19 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, unsigned int key_count); #ifdef CONFIG_NET +int skb_flow_dissector_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr); int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr); #else +static inline int skb_flow_dissector_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EOPNOTSUPP; +} + static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { @@ -1275,12 +1283,12 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) } #endif -struct bpf_flow_keys; -bool __skb_flow_bpf_dissect(struct bpf_prog *prog, - const struct sk_buff *skb, - struct flow_dissector *flow_dissector, - struct bpf_flow_keys *flow_keys); -bool __skb_flow_dissect(const struct sk_buff *skb, +struct bpf_flow_dissector; +bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, + __be16 proto, int nhoff, int hlen); + +bool __skb_flow_dissect(const struct net *net, + const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, void *data, __be16 proto, int nhoff, int hlen, @@ -1290,8 +1298,8 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, unsigned int flags) { - return __skb_flow_dissect(skb, flow_dissector, target_container, - NULL, 0, 0, 0, flags); + return __skb_flow_dissect(NULL, skb, flow_dissector, + target_container, NULL, 0, 0, 0, flags); } static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, @@ -1299,18 +1307,19 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, unsigned int flags) { memset(flow, 0, sizeof(*flow)); - return __skb_flow_dissect(skb, &flow_keys_dissector, flow, - NULL, 0, 0, 0, flags); + return __skb_flow_dissect(NULL, skb, &flow_keys_dissector, + flow, NULL, 0, 0, 0, flags); } static inline bool -skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb, +skb_flow_dissect_flow_keys_basic(const struct net *net, + const struct sk_buff *skb, struct flow_keys_basic *flow, void *data, __be16 proto, int nhoff, int hlen, unsigned int flags) { memset(flow, 0, sizeof(*flow)); - return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow, + return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow, data, proto, nhoff, hlen, flags); } @@ -2488,7 +2497,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb) if (skb_transport_header_was_set(skb)) return; - if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) + if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, + NULL, 0, 0, 0, 0)) skb_set_transport_header(skb, keys.control.thoff); } diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 49ba9cde7e4b..b29950a19205 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -45,6 +45,7 @@ struct bpf_raw_event_map { struct tracepoint *tp; void *bpf_func; u32 num_args; + u32 writable_size; } __aligned(32); #endif diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h new file mode 100644 index 000000000000..b9dcb02e756b --- /dev/null +++ b/include/net/bpf_sk_storage.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Facebook */ +#ifndef _BPF_SK_STORAGE_H +#define _BPF_SK_STORAGE_H + +struct sock; + +void bpf_sk_storage_free(struct sock *sk); + +extern const struct bpf_func_proto bpf_sk_storage_get_proto; +extern const struct bpf_func_proto bpf_sk_storage_delete_proto; + +#endif /* _BPF_SK_STORAGE_H */ diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 2b26979efb48..7c5a8d9a8d2a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -305,4 +305,11 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec return ((char *)target_container) + flow_dissector->offset[key_id]; } +struct bpf_flow_dissector { + struct bpf_flow_keys *flow_keys; + const struct sk_buff *skb; + void *data; + void *data_end; +}; + #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e8f85cd2afce..21f434f3ac9e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -364,13 +364,10 @@ struct tcf_proto { }; struct qdisc_skb_cb { - union { - struct { - unsigned int pkt_len; - u16 slave_dev_queue_mapping; - u16 tc_classid; - }; - struct bpf_flow_keys *flow_keys; + struct { + unsigned int pkt_len; + u16 slave_dev_queue_mapping; + u16 tc_classid; }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; diff --git a/include/net/sock.h b/include/net/sock.h index 784cd19d5ff7..4d208c0f9c14 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -236,6 +236,8 @@ struct sock_common { /* public: */ }; +struct bpf_sk_storage; + /** * struct sock - network layer representation of sockets * @__sk_common: shared layout with inet_timewait_sock @@ -510,6 +512,9 @@ struct sock { #endif void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; +#ifdef CONFIG_BPF_SYSCALL + struct bpf_sk_storage __rcu *sk_bpf_storage; +#endif struct rcu_head sk_rcu; }; diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index 505dae0bed80..d6e556c0a085 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \ * to make sure that if the tracepoint handling changes, the * bpf probe will fail to compile unless it too is updated. */ -#undef DEFINE_EVENT -#define DEFINE_EVENT(template, call, proto, args) \ +#define __DEFINE_EVENT(template, call, proto, args, size) \ static inline void bpf_test_probe_##call(void) \ { \ check_trace_callback_type_##call(__bpf_trace_##template); \ @@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \ .tp = &__tracepoint_##call, \ .bpf_func = (void *)__bpf_trace_##template, \ .num_args = COUNT_ARGS(args), \ + .writable_size = size, \ }; +#define FIRST(x, ...) x + +#undef DEFINE_EVENT_WRITABLE +#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \ +static inline void bpf_test_buffer_##call(void) \ +{ \ + /* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \ + * BUILD_BUG_ON_ZERO() uses a different mechanism that is not \ + * dead-code-eliminated. \ + */ \ + FIRST(proto); \ + (void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \ +} \ +__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0) #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef DEFINE_EVENT_WRITABLE +#undef __DEFINE_EVENT +#undef FIRST + #endif /* CONFIG_BPF_EVENTS */ diff --git a/include/trace/events/bpf_test_run.h b/include/trace/events/bpf_test_run.h new file mode 100644 index 000000000000..265447e3f71a --- /dev/null +++ b/include/trace/events/bpf_test_run.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bpf_test_run + +#if !defined(_TRACE_BPF_TEST_RUN_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BPF_TEST_RUN_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(bpf_test_finish, + + TP_PROTO(int *err), + + TP_ARGS(err), + + TP_STRUCT__entry( + __field(int, err) + ), + + TP_fast_assign( + __entry->err = *err; + ), + + TP_printk("bpf_test_finish with err=%d", __entry->err) +); + +#ifdef DEFINE_EVENT_WRITABLE +#undef BPF_TEST_RUN_DEFINE_EVENT +#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \ + PARAMS(args), size) +#else +#undef BPF_TEST_RUN_DEFINE_EVENT +#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args)) +#endif + +BPF_TEST_RUN_DEFINE_EVENT(bpf_test_finish, bpf_test_finish, + + TP_PROTO(int *err), + + TP_ARGS(err), + + sizeof(int) +); + +#endif + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/nbd.h b/include/trace/events/nbd.h new file mode 100644 index 000000000000..9849956f34d8 --- /dev/null +++ b/include/trace/events/nbd.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nbd + +#if !defined(_TRACE_NBD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NBD_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(nbd_transport_event, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle), + + TP_STRUCT__entry( + __field(struct request *, req) + __field(u64, handle) + ), + + TP_fast_assign( + __entry->req = req; + __entry->handle = handle; + ), + + TP_printk( + "nbd transport event: request %p, handle 0x%016llx", + __entry->req, + __entry->handle + ) +); + +DEFINE_EVENT(nbd_transport_event, nbd_header_sent, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_payload_sent, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_header_received, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_payload_received, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DECLARE_EVENT_CLASS(nbd_send_request, + + TP_PROTO(struct nbd_request *nbd_request, int index, + struct request *rq), + + TP_ARGS(nbd_request, index, rq), + + TP_STRUCT__entry( + __field(struct nbd_request *, nbd_request) + __field(u64, dev_index) + __field(struct request *, request) + ), + + TP_fast_assign( + __entry->nbd_request = 0; + __entry->dev_index = index; + __entry->request = rq; + ), + + TP_printk("nbd%lld: request %p", __entry->dev_index, __entry->request) +); + +#ifdef DEFINE_EVENT_WRITABLE +#undef NBD_DEFINE_EVENT +#define NBD_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \ + PARAMS(args), size) +#else +#undef NBD_DEFINE_EVENT +#define NBD_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args)) +#endif + +NBD_DEFINE_EVENT(nbd_send_request, nbd_send_request, + + TP_PROTO(struct nbd_request *nbd_request, int index, + struct request *rq), + + TP_ARGS(nbd_request, index, rq), + + sizeof(struct nbd_request) +); + +#endif + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index eaf2d3284248..72336bac7573 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -133,6 +133,7 @@ enum bpf_map_type { BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, + BPF_MAP_TYPE_SK_STORAGE, }; /* Note that tracing related programs such as @@ -168,6 +169,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, }; enum bpf_attach_type { @@ -2629,6 +2631,42 @@ union bpf_attr { * was provided. * * **-ERANGE** if resulting value was out of range. + * + * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) + * Description + * Get a bpf-local-storage from a sk. + * + * Logically, it could be thought of getting the value from + * a *map* with *sk* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem(map, &sk)** except this + * helper enforces the key must be a **bpf_fullsock()** + * and the map must be a BPF_MAP_TYPE_SK_STORAGE also. + * + * Underneath, the value is stored locally at *sk* instead of + * the map. The *map* is used as the bpf-local-storage **type**. + * The bpf-local-storage **type** (i.e. the *map*) is searched + * against all bpf-local-storages residing at sk. + * + * An optional *flags* (BPF_SK_STORAGE_GET_F_CREATE) can be + * used such that a new bpf-local-storage will be + * created if one does not exist. *value* can be used + * together with BPF_SK_STORAGE_GET_F_CREATE to specify + * the initial value of a bpf-local-storage. If *value* is + * NULL, the new bpf-local-storage will be zero initialized. + * Return + * A bpf-local-storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf-local-storage. + * + * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * Description + * Delete a bpf-local-storage from a sk. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf-local-storage cannot be found. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2737,7 +2775,9 @@ union bpf_attr { FN(sysctl_get_new_value), \ FN(sysctl_set_new_value), \ FN(strtol), \ - FN(strtoul), + FN(strtoul), \ + FN(sk_storage_get), \ + FN(sk_storage_delete), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2813,6 +2853,9 @@ enum bpf_func_id { /* BPF_FUNC_sysctl_get_name flags. */ #define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) +/* BPF_FUNC_sk_storage_get flags */ +#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, |