From 14fc6bd6b79c430f615500d0fe6cea4722110db8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 23 Jul 2020 11:41:09 -0700 Subject: bpf: Refactor bpf_iter_reg to have separate seq_info member There is no functionality change for this patch. Struct bpf_iter_reg is used to register a bpf_iter target, which includes information for both prog_load, link_create and seq_file creation. This patch puts fields related seq_file creation into a different structure. This will be useful for map elements iterator where one iterator covers different map types and different map types may have different seq_ops, init/fini private_data function and private_data size. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200723184109.590030-1-yhs@fb.com --- net/ipv4/tcp_ipv4.c | 8 ++++++-- net/ipv4/udp.c | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f8913923a6c0..cb288fdcf2ca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2947,17 +2947,21 @@ static void bpf_iter_fini_tcp(void *priv_data) bpf_iter_fini_seq_net(priv_data); } -static struct bpf_iter_reg tcp_reg_info = { - .target = "tcp", +static const struct bpf_iter_seq_info tcp_seq_info = { .seq_ops = &bpf_iter_tcp_seq_ops, .init_seq_private = bpf_iter_init_tcp, .fini_seq_private = bpf_iter_fini_tcp, .seq_priv_size = sizeof(struct tcp_iter_state), +}; + +static struct bpf_iter_reg tcp_reg_info = { + .target = "tcp", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__tcp, sk_common), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &tcp_seq_info, }; static void __init bpf_iter_register(void) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0fb5e4ea133f..1bc50ec2caef 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -3208,17 +3208,21 @@ static void bpf_iter_fini_udp(void *priv_data) bpf_iter_fini_seq_net(priv_data); } -static struct bpf_iter_reg udp_reg_info = { - .target = "udp", +static const struct bpf_iter_seq_info udp_seq_info = { .seq_ops = &bpf_iter_udp_seq_ops, .init_seq_private = bpf_iter_init_udp, .fini_seq_private = bpf_iter_fini_udp, .seq_priv_size = sizeof(struct udp_iter_state), +}; + +static struct bpf_iter_reg udp_reg_info = { + .target = "udp", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__udp, udp_sk), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &udp_seq_info, }; static void __init bpf_iter_register(void) -- cgit v1.2.3 From f9c792729581bd8b8473af163e8ab426c2c61d89 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 23 Jul 2020 11:41:10 -0700 Subject: bpf: Refactor to provide aux info to bpf_iter_init_seq_priv_t This patch refactored target bpf_iter_init_seq_priv_t callback function to accept additional information. This will be needed in later patches for map element targets since a particular map should be passed to traverse elements for that particular map. In the future, other information may be passed to target as well, e.g., pid, cgroup id, etc. to customize the iterator. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200723184110.590156-1-yhs@fb.com --- fs/proc/proc_net.c | 2 +- include/linux/bpf.h | 7 ++++++- include/linux/proc_fs.h | 3 ++- kernel/bpf/bpf_iter.c | 2 +- kernel/bpf/task_iter.c | 2 +- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/udp.c | 4 ++-- 7 files changed, 15 insertions(+), 9 deletions(-) (limited to 'net/ipv4') diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index dba63b2429f0..ed8a6306990c 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -98,7 +98,7 @@ static const struct proc_ops proc_net_seq_ops = { .proc_release = seq_release_net, }; -int bpf_iter_init_seq_net(void *priv_data) +int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux) { #ifdef CONFIG_NET_NS struct seq_net_private *p = priv_data; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 127067f71fd4..ef52717336cf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -33,11 +33,13 @@ struct btf; struct btf_type; struct exception_table_entry; struct seq_operations; +struct bpf_iter_aux_info; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; -typedef int (*bpf_iter_init_seq_priv_t)(void *private_data); +typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, + struct bpf_iter_aux_info *aux); typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); struct bpf_iter_seq_info { const struct seq_operations *seq_ops; @@ -1198,6 +1200,9 @@ int bpf_obj_get_user(const char __user *pathname, int flags); extern int bpf_iter_ ## target(args); \ int __init bpf_iter_ ## target(args) { return 0; } +struct bpf_iter_aux_info { +}; + #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index d1eed1b43651..2df965cd0974 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -133,7 +133,8 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo void *data); extern struct pid *tgid_pidfd_to_pid(const struct file *file); -extern int bpf_iter_init_seq_net(void *priv_data); +struct bpf_iter_aux_info; +extern int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux); extern void bpf_iter_fini_seq_net(void *priv_data); #ifdef CONFIG_PROC_PID_ARCH_STATUS diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 5b2387d6aa1f..8fa94cb1b5a0 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -442,7 +442,7 @@ static int prepare_seq_file(struct file *file, struct bpf_iter_link *link) } if (tinfo->reg_info->seq_info->init_seq_private) { - err = tinfo->reg_info->seq_info->init_seq_private(priv_data->target_private); + err = tinfo->reg_info->seq_info->init_seq_private(priv_data->target_private, NULL); if (err) goto release_seq_file; } diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 6d9cd23869bf..232df29793e9 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -293,7 +293,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v) } } -static int init_seq_pidns(void *priv_data) +static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux) { struct bpf_iter_seq_task_common *common = priv_data; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cb288fdcf2ca..5084333b5ab6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2921,7 +2921,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta, struct sock_common *sk_common, uid_t uid) -static int bpf_iter_init_tcp(void *priv_data) +static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux) { struct tcp_iter_state *st = priv_data; struct tcp_seq_afinfo *afinfo; @@ -2933,7 +2933,7 @@ static int bpf_iter_init_tcp(void *priv_data) afinfo->family = AF_UNSPEC; st->bpf_seq_afinfo = afinfo; - ret = bpf_iter_init_seq_net(priv_data); + ret = bpf_iter_init_seq_net(priv_data, aux); if (ret) kfree(afinfo); return ret; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1bc50ec2caef..7ce31beccfc2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -3181,7 +3181,7 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = { DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta, struct udp_sock *udp_sk, uid_t uid, int bucket) -static int bpf_iter_init_udp(void *priv_data) +static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) { struct udp_iter_state *st = priv_data; struct udp_seq_afinfo *afinfo; @@ -3194,7 +3194,7 @@ static int bpf_iter_init_udp(void *priv_data) afinfo->family = AF_UNSPEC; afinfo->udp_table = &udp_table; st->bpf_seq_afinfo = afinfo; - ret = bpf_iter_init_seq_net(priv_data); + ret = bpf_iter_init_seq_net(priv_data, aux); if (ret) kfree(afinfo); return ret; -- cgit v1.2.3 From c64c9c282a9a7ec0515b725d5aaed68c32e403a4 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sun, 26 Jul 2020 14:02:28 +0200 Subject: udp, bpf: Ignore connections in reuseport group after BPF sk lookup When BPF sk lookup invokes reuseport handling for the selected socket, it should ignore the fact that reuseport group can contain connected UDP sockets. With BPF sk lookup this is not relevant as we are not scoring sockets to find the best match, which might be a connected UDP socket. Fix it by unconditionally accepting the socket selected by reuseport. This fixes the following two failures reported by test_progs. # ./test_progs -t sk_lookup ... #73/14 UDP IPv4 redir and reuseport with conns:FAIL ... #73/20 UDP IPv6 redir and reuseport with conns:FAIL ... Fixes: a57066b1a019 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") Reported-by: Alexei Starovoitov Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200726120228.1414348-1-jakub@cloudflare.com --- net/ipv4/udp.c | 2 +- net/ipv6/udp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7ce31beccfc2..e88efba07551 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -473,7 +473,7 @@ static struct sock *udp4_lookup_run_bpf(struct net *net, return sk; reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); - if (reuse_sk && !reuseport_has_conns(sk, false)) + if (reuse_sk) sk = reuse_sk; return sk; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c394e674f486..29d9691359b9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -208,7 +208,7 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net, return sk; reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); - if (reuse_sk && !reuseport_has_conns(sk, false)) + if (reuse_sk) sk = reuse_sk; return sk; } -- cgit v1.2.3