summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2020-06-01 15:21:03 -0700
committerAlexei Starovoitov <ast@kernel.org>2020-06-01 15:21:12 -0700
commitcf51abcded837ef209faa03a62b2ea44e45995e8 (patch)
tree1edf7a1b44dac5062702fea067e9d9e94cf02c3e /net
parentfebeb6dff7beafcaf89521f6c8ff7b0adac08d54 (diff)
parent06716e04a043aa5e010f952a823ad038054b0e5c (diff)
downloadlinux-cf51abcded837ef209faa03a62b2ea44e45995e8.tar.bz2
Merge branch 'Link-based-attach-to-netns'
Jakub Sitnicki says: ==================== One of the pieces of feedback from recent review of BPF hooks for socket lookup [0] was that new program types should use bpf_link-based attachment. This series introduces new bpf_link type for attaching to network namespace. All link operations are supported. Errors returned from ops follow cgroup example. Patch 4 description goes into error semantics. The major change in v2 is a switch away from RCU to mutex-only synchronization. Andrii pointed out that it is not needed, and it makes sense to keep locking straightforward. Also, there were a couple of bugs in update_prog and fill_info initial implementation, one picked up by kbuild. Those are now fixed. Tests have been extended to cover them. Full changelog below. Series is organized as so: Patches 1-3 prepare a space in struct net to keep state for attached BPF programs, and massage the code in flow_dissector to make it attach type agnostic, to finally move it under kernel/bpf/. Patch 4, the most important one, introduces new bpf_link link type for attaching to network namespace. Patch 5 unifies the update error (ENOLINK) between BPF cgroup and netns. Patches 6-8 make libbpf and bpftool aware of the new link type. Patches 9-12 Add and extend tests to check that link low- and high-level API for operating on links to netns works as intended. Thanks to Alexei, Andrii, Lorenz, Marek, and Stanislav for feedback. -jkbs [0] https://lore.kernel.org/bpf/20200511185218.1422406-1-jakub@cloudflare.com/ Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com> Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com> Cc: Lorenz Bauer <lmb@cloudflare.com> Cc: Marek Majkowski <marek@cloudflare.com> Cc: Stanislav Fomichev <sdf@google.com> v1 -> v2: - Switch to mutex-only synchronization. Don't rely on RCU grace period guarantee when accessing struct net from link release / update / fill_info, and when accessing bpf_link from pernet pre_exit callback. (Andrii) - Drop patch 1, no longer needed with mutex-only synchronization. - Don't leak uninitialized variable contents from fill_info callback when link is in defunct state. (kbuild) - Make fill_info treat the link as defunct (i.e. no attached netns) when struct net refcount is 0, but link has not been yet auto-detached. - Add missing BPF_LINK_TYPE define in bpf_types.h for new link type. - Fix link update_prog callback to update the prog that will run, and not just the link itself. - Return EEXIST on prog attach when link already exists, and on link create when prog is already attached directly. (Andrii) - Return EINVAL on prog detach when link is attached. (Andrii) - Fold __netns_bpf_link_attach into its only caller. (Stanislav) - Get rid of a wrapper around container_of() (Andrii) - Use rcu_dereference_protected instead of rcu_access_pointer on update-side. (Stanislav) - Make return-on-success from netns_bpf_link_create less confusing. (Andrii) - Adapt bpf_link for cgroup to return ENOLINK when updating a defunct link. (Andrii, Alexei) - Order new exported symbols in libbpf.map alphabetically (Andrii) - Keep libbpf's "failed to attach link" warning message clear as to what we failed to attach to (cgroup vs netns). (Andrii) - Extract helpers for printing link attach type. (bpftool, Andrii) - Switch flow_dissector tests to BPF skeleton and extend them to exercise link-based flow dissector attachment. (Andrii) - Harden flow dissector attachment tests with prog query checks after prog attach/detach, or link create/update/close. - Extend flow dissector tests to cover fill_info for defunct links. - Rebase onto recent bpf-next ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/core/flow_dissector.c124
1 files changed, 20 insertions, 104 deletions
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 0aeb33572feb..d02df0b6d0d9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -31,8 +31,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_labels.h>
#endif
-
-static DEFINE_MUTEX(flow_dissector_mutex);
+#include <linux/bpf-netns.h>
static void dissector_set_key(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
@@ -70,54 +69,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
}
EXPORT_SYMBOL(skb_flow_dissector_init);
-int skb_flow_dissector_prog_query(const union bpf_attr *attr,
- union bpf_attr __user *uattr)
-{
- __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
- u32 prog_id, prog_cnt = 0, flags = 0;
- struct bpf_prog *attached;
- struct net *net;
-
- if (attr->query.query_flags)
- return -EINVAL;
-
- net = get_net_ns_by_fd(attr->query.target_fd);
- if (IS_ERR(net))
- return PTR_ERR(net);
-
- rcu_read_lock();
- attached = rcu_dereference(net->flow_dissector_prog);
- if (attached) {
- prog_cnt = 1;
- prog_id = attached->aux->id;
- }
- rcu_read_unlock();
-
- put_net(net);
-
- if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
- return -EFAULT;
- if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
- return -EFAULT;
-
- if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
- return 0;
-
- if (copy_to_user(prog_ids, &prog_id, sizeof(u32)))
- return -EFAULT;
-
- return 0;
-}
-
-int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
- struct bpf_prog *prog)
+#ifdef CONFIG_BPF_SYSCALL
+int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog)
{
+ enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
struct bpf_prog *attached;
- struct net *net;
- int ret = 0;
-
- net = current->nsproxy->net_ns;
- mutex_lock(&flow_dissector_mutex);
if (net == &init_net) {
/* BPF flow dissector in the root namespace overrides
@@ -130,70 +86,29 @@ int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
for_each_net(ns) {
if (ns == &init_net)
continue;
- if (rcu_access_pointer(ns->flow_dissector_prog)) {
- ret = -EEXIST;
- goto out;
- }
+ if (rcu_access_pointer(ns->bpf.progs[type]))
+ return -EEXIST;
}
} else {
/* Make sure root flow dissector is not attached
* when attaching to the non-root namespace.
*/
- if (rcu_access_pointer(init_net.flow_dissector_prog)) {
- ret = -EEXIST;
- goto out;
- }
+ if (rcu_access_pointer(init_net.bpf.progs[type]))
+ return -EEXIST;
}
- attached = rcu_dereference_protected(net->flow_dissector_prog,
- lockdep_is_held(&flow_dissector_mutex));
- if (attached == prog) {
+ attached = rcu_dereference_protected(net->bpf.progs[type],
+ lockdep_is_held(&netns_bpf_mutex));
+ if (attached == prog)
/* The same program cannot be attached twice */
- ret = -EINVAL;
- goto out;
- }
- rcu_assign_pointer(net->flow_dissector_prog, prog);
+ return -EINVAL;
+
+ rcu_assign_pointer(net->bpf.progs[type], prog);
if (attached)
bpf_prog_put(attached);
-out:
- mutex_unlock(&flow_dissector_mutex);
- return ret;
-}
-
-static int flow_dissector_bpf_prog_detach(struct net *net)
-{
- struct bpf_prog *attached;
-
- mutex_lock(&flow_dissector_mutex);
- attached = rcu_dereference_protected(net->flow_dissector_prog,
- lockdep_is_held(&flow_dissector_mutex));
- if (!attached) {
- mutex_unlock(&flow_dissector_mutex);
- return -ENOENT;
- }
- RCU_INIT_POINTER(net->flow_dissector_prog, NULL);
- bpf_prog_put(attached);
- mutex_unlock(&flow_dissector_mutex);
return 0;
}
-
-int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
-{
- return flow_dissector_bpf_prog_detach(current->nsproxy->net_ns);
-}
-
-static void __net_exit flow_dissector_pernet_pre_exit(struct net *net)
-{
- /* We're not racing with attach/detach because there are no
- * references to netns left when pre_exit gets called.
- */
- if (rcu_access_pointer(net->flow_dissector_prog))
- flow_dissector_bpf_prog_detach(net);
-}
-
-static struct pernet_operations flow_dissector_pernet_ops __net_initdata = {
- .pre_exit = flow_dissector_pernet_pre_exit,
-};
+#endif /* CONFIG_BPF_SYSCALL */
/**
* __skb_flow_get_ports - extract the upper layer ports and return them
@@ -1044,11 +959,13 @@ bool __skb_flow_dissect(const struct net *net,
WARN_ON_ONCE(!net);
if (net) {
+ enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
+
rcu_read_lock();
- attached = rcu_dereference(init_net.flow_dissector_prog);
+ attached = rcu_dereference(init_net.bpf.progs[type]);
if (!attached)
- attached = rcu_dereference(net->flow_dissector_prog);
+ attached = rcu_dereference(net->bpf.progs[type]);
if (attached) {
struct bpf_flow_keys flow_keys;
@@ -1869,7 +1786,6 @@ static int __init init_default_flow_dissectors(void)
skb_flow_dissector_init(&flow_keys_basic_dissector,
flow_keys_basic_dissector_keys,
ARRAY_SIZE(flow_keys_basic_dissector_keys));
-
- return register_pernet_subsys(&flow_dissector_pernet_ops);
+ return 0;
}
core_initcall(init_default_flow_dissectors);