summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf.h41
-rw-r--r--include/linux/bpf_types.h6
-rw-r--r--include/linux/cgroup.h30
-rw-r--r--include/linux/filter.h51
-rw-r--r--include/linux/skbuff.h1
-rw-r--r--include/net/addrconf.h1
-rw-r--r--include/net/sock_reuseport.h19
-rw-r--r--include/net/tcp.h30
-rw-r--r--include/net/xdp.h7
-rw-r--r--include/uapi/linux/bpf.h56
10 files changed, 231 insertions, 11 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cd8790d2c6ed..523481a3471b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -23,7 +23,7 @@ struct bpf_prog;
struct bpf_map;
struct sock;
struct seq_file;
-struct btf;
+struct btf_type;
/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
@@ -48,8 +48,9 @@ struct bpf_map_ops {
u32 (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
struct seq_file *m);
- int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf,
- u32 key_type_id, u32 value_type_id);
+ int (*map_check_btf)(const struct bpf_map *map,
+ const struct btf_type *key_type,
+ const struct btf_type *value_type);
};
struct bpf_map {
@@ -118,9 +119,13 @@ static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
{
- return map->ops->map_seq_show_elem && map->ops->map_check_btf;
+ return map->btf && map->ops->map_seq_show_elem;
}
+int map_check_no_btf(const struct bpf_map *map,
+ const struct btf_type *key_type,
+ const struct btf_type *value_type);
+
extern const struct bpf_map_ops bpf_map_offload_ops;
/* function argument constraints */
@@ -524,6 +529,7 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
}
struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
+int array_map_alloc_check(union bpf_attr *attr);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
@@ -769,6 +775,33 @@ static inline void __xsk_map_flush(struct bpf_map *map)
}
#endif
+#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
+void bpf_sk_reuseport_detach(struct sock *sk);
+int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+ void *value);
+int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags);
+#else
+static inline void bpf_sk_reuseport_detach(struct sock *sk)
+{
+}
+
+#ifdef CONFIG_BPF_SYSCALL
+static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
+ void *key, void *value)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
+ void *key, void *value,
+ u64 map_flags)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_BPF_SYSCALL */
+#endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */
+
/* verifier prototypes for helper functions called from eBPF programs */
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index add08be53b6f..cd26c090e7c0 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -29,6 +29,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
#ifdef CONFIG_BPF_LIRC_MODE2
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
#endif
+#ifdef CONFIG_INET
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
+#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
@@ -60,4 +63,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
#endif
+#ifdef CONFIG_INET
+BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
+#endif
#endif
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c9fdf6f57913..32c553556bbd 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -554,6 +554,36 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
}
/**
+ * cgroup_ancestor - find ancestor of cgroup
+ * @cgrp: cgroup to find ancestor of
+ * @ancestor_level: level of ancestor to find starting from root
+ *
+ * Find ancestor of cgroup at specified level starting from root if it exists
+ * and return pointer to it. Return NULL if @cgrp doesn't have ancestor at
+ * @ancestor_level.
+ *
+ * This function is safe to call as long as @cgrp is accessible.
+ */
+static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
+ int ancestor_level)
+{
+ struct cgroup *ptr;
+
+ if (cgrp->level < ancestor_level)
+ return NULL;
+
+ for (ptr = cgrp;
+ ptr && ptr->level > ancestor_level;
+ ptr = cgroup_parent(ptr))
+ ;
+
+ if (ptr && ptr->level == ancestor_level)
+ return ptr;
+
+ return NULL;
+}
+
+/**
* task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
* @task: the task to be tested
* @ancestor: possible ancestor of @task's cgroup
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c73dd7396886..5d565c50bcb2 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -32,6 +32,7 @@ struct seccomp_data;
struct bpf_prog_aux;
struct xdp_rxq_info;
struct xdp_buff;
+struct sock_reuseport;
/* ArgX, context and stack frame pointer register positions. Note,
* Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -537,6 +538,20 @@ struct sk_msg_buff {
struct list_head list;
};
+struct bpf_redirect_info {
+ u32 ifindex;
+ u32 flags;
+ struct bpf_map *map;
+ struct bpf_map *map_to_flush;
+ unsigned long map_owner;
+ u32 kern_flags;
+};
+
+DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
+
+/* flags for bpf_redirect_info kern_flags */
+#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */
+
/* Compute the linear packet data range [data, data_end) which
* will be accessed by various program types (cls_bpf, act_bpf,
* lwt, ...). Subsystems allowing direct data access must (!)
@@ -738,6 +753,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
int sk_attach_bpf(u32 ufd, struct sock *sk);
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
+void sk_reuseport_prog_free(struct bpf_prog *prog);
int sk_detach_filter(struct sock *sk);
int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
unsigned int len);
@@ -765,6 +781,27 @@ static inline bool bpf_dump_raw_ok(void)
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
const struct bpf_insn *patch, u32 len);
+static inline bool xdp_return_frame_no_direct(void)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT;
+}
+
+static inline void xdp_set_return_frame_no_direct(void)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT;
+}
+
+static inline void xdp_clear_return_frame_no_direct(void)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT;
+}
+
static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
unsigned int pktlen)
{
@@ -798,6 +835,20 @@ void bpf_warn_invalid_xdp_action(u32 act);
struct sock *do_sk_redirect_map(struct sk_buff *skb);
struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
+#ifdef CONFIG_INET
+struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
+ struct bpf_prog *prog, struct sk_buff *skb,
+ u32 hash);
+#else
+static inline struct sock *
+bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
+ struct bpf_prog *prog, struct sk_buff *skb,
+ u32 hash)
+{
+ return NULL;
+}
+#endif
+
#ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable;
extern int bpf_jit_harden;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7e237a63a70c..17a13e4785fc 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1038,6 +1038,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
}
struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
+void skb_headers_offset_update(struct sk_buff *skb, int off);
int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
void skb_copy_header(struct sk_buff *new, const struct sk_buff *old);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 5f43f7a70fe6..6def0351bcc3 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -108,6 +108,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
bool match_wildcard);
+bool inet_rcv_saddr_any(const struct sock *sk);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 0054b3a9b923..8a5f70c7cdf2 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -5,25 +5,36 @@
#include <linux/filter.h>
#include <linux/skbuff.h>
#include <linux/types.h>
+#include <linux/spinlock.h>
#include <net/sock.h>
+extern spinlock_t reuseport_lock;
+
struct sock_reuseport {
struct rcu_head rcu;
u16 max_socks; /* length of socks */
u16 num_socks; /* elements in socks */
+ /* The last synq overflow event timestamp of this
+ * reuse->socks[] group.
+ */
+ unsigned int synq_overflow_ts;
+ /* ID stays the same even after the size of socks[] grows. */
+ unsigned int reuseport_id;
+ bool bind_inany;
struct bpf_prog __rcu *prog; /* optional BPF sock selector */
struct sock *socks[0]; /* array of sock pointers */
};
-extern int reuseport_alloc(struct sock *sk);
-extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
+extern int reuseport_alloc(struct sock *sk, bool bind_inany);
+extern int reuseport_add_sock(struct sock *sk, struct sock *sk2,
+ bool bind_inany);
extern void reuseport_detach_sock(struct sock *sk);
extern struct sock *reuseport_select_sock(struct sock *sk,
u32 hash,
struct sk_buff *skb,
int hdr_len);
-extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
- struct bpf_prog *prog);
+extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
+int reuseport_get_id(struct sock_reuseport *reuse);
#endif /* _SOCK_REUSEPORT_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d769dc20359b..d196901c9dba 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -36,6 +36,7 @@
#include <net/inet_hashtables.h>
#include <net/checksum.h>
#include <net/request_sock.h>
+#include <net/sock_reuseport.h>
#include <net/sock.h>
#include <net/snmp.h>
#include <net/ip.h>
@@ -473,9 +474,22 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
*/
static inline void tcp_synq_overflow(const struct sock *sk)
{
- unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+ unsigned int last_overflow;
unsigned int now = jiffies;
+ if (sk->sk_reuseport) {
+ struct sock_reuseport *reuse;
+
+ reuse = rcu_dereference(sk->sk_reuseport_cb);
+ if (likely(reuse)) {
+ last_overflow = READ_ONCE(reuse->synq_overflow_ts);
+ if (time_after32(now, last_overflow + HZ))
+ WRITE_ONCE(reuse->synq_overflow_ts, now);
+ return;
+ }
+ }
+
+ last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
if (time_after32(now, last_overflow + HZ))
tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
}
@@ -483,9 +497,21 @@ static inline void tcp_synq_overflow(const struct sock *sk)
/* syncookies: no recent synqueue overflow on this listening socket? */
static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
{
- unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+ unsigned int last_overflow;
unsigned int now = jiffies;
+ if (sk->sk_reuseport) {
+ struct sock_reuseport *reuse;
+
+ reuse = rcu_dereference(sk->sk_reuseport_cb);
+ if (likely(reuse)) {
+ last_overflow = READ_ONCE(reuse->synq_overflow_ts);
+ return time_after32(now, last_overflow +
+ TCP_SYNCOOKIE_VALID);
+ }
+ }
+
+ last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
}
diff --git a/include/net/xdp.h b/include/net/xdp.h
index fcb033f51d8c..76b95256c266 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -84,6 +84,13 @@ struct xdp_frame {
struct net_device *dev_rx; /* used by cpumap */
};
+/* Clear kernel pointers in xdp_frame */
+static inline void xdp_scrub_frame(struct xdp_frame *frame)
+{
+ frame->data = NULL;
+ frame->dev_rx = NULL;
+}
+
/* Convert xdp_buff to xdp_frame */
static inline
struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd5758dc35d3..66917a4eba27 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -126,6 +126,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_XSKMAP,
BPF_MAP_TYPE_SOCKHASH,
BPF_MAP_TYPE_CGROUP_STORAGE,
+ BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
};
enum bpf_prog_type {
@@ -150,6 +151,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
BPF_PROG_TYPE_LWT_SEG6LOCAL,
BPF_PROG_TYPE_LIRC_MODE2,
+ BPF_PROG_TYPE_SK_REUSEPORT,
};
enum bpf_attach_type {
@@ -2091,6 +2093,24 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *skb* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *skb*, then return value will be same as that
+ * of **bpf_skb_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *skb*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_skb_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
* u64 bpf_get_current_cgroup_id(void)
* Return
* A 64-bit integer containing the current cgroup id based
@@ -2113,6 +2133,14 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ *
+ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map
+ * It checks the selected sk is matching the incoming
+ * request in the skb.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2196,7 +2224,9 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(sk_select_reuseport), \
+ FN(skb_ancestor_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2413,6 +2443,30 @@ struct sk_msg_md {
__u32 local_port; /* stored in host byte order */
};
+struct sk_reuseport_md {
+ /*
+ * Start of directly accessible data. It begins from
+ * the tcp/udp header.
+ */
+ void *data;
+ void *data_end; /* End of directly accessible data */
+ /*
+ * Total length of packet (starting from the tcp/udp header).
+ * Note that the directly accessible bytes (data_end - data)
+ * could be less than this "len". Those bytes could be
+ * indirectly read by a helper "bpf_skb_load_bytes()".
+ */
+ __u32 len;
+ /*
+ * Eth protocol in the mac header (network byte order). e.g.
+ * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
+ */
+ __u32 eth_protocol;
+ __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
+ __u32 bind_inany; /* Is sock bound to an INANY address? */
+ __u32 hash; /* A hash of the packet 4 tuples */
+};
+
#define BPF_TAG_SIZE 8
struct bpf_prog_info {