From 09584b406742413ac4c8d7e030374d4daa045b69 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 2 Feb 2018 22:37:15 -0800 Subject: bpf: fix selftests/bpf test_kmod.sh failure when CONFIG_BPF_JIT_ALWAYS_ON=y With CONFIG_BPF_JIT_ALWAYS_ON is defined in the config file, tools/testing/selftests/bpf/test_kmod.sh failed like below: [root@localhost bpf]# ./test_kmod.sh sysctl: setting key "net.core.bpf_jit_enable": Invalid argument [ JIT enabled:0 hardened:0 ] [ 132.175681] test_bpf: #297 BPF_MAXINSNS: Jump, gap, jump, ... FAIL to prog_create err=-524 len=4096 [ 132.458834] test_bpf: Summary: 348 PASSED, 1 FAILED, [340/340 JIT'ed] [ JIT enabled:1 hardened:0 ] [ 133.456025] test_bpf: #297 BPF_MAXINSNS: Jump, gap, jump, ... FAIL to prog_create err=-524 len=4096 [ 133.730935] test_bpf: Summary: 348 PASSED, 1 FAILED, [340/340 JIT'ed] [ JIT enabled:1 hardened:1 ] [ 134.769730] test_bpf: #297 BPF_MAXINSNS: Jump, gap, jump, ... FAIL to prog_create err=-524 len=4096 [ 135.050864] test_bpf: Summary: 348 PASSED, 1 FAILED, [340/340 JIT'ed] [ JIT enabled:1 hardened:2 ] [ 136.442882] test_bpf: #297 BPF_MAXINSNS: Jump, gap, jump, ... FAIL to prog_create err=-524 len=4096 [ 136.821810] test_bpf: Summary: 348 PASSED, 1 FAILED, [340/340 JIT'ed] [root@localhost bpf]# The test_kmod.sh load/remove test_bpf.ko multiple times with different settings for sysctl net.core.bpf_jit_{enable,harden}. The failed test #297 of test_bpf.ko is designed such that JIT always fails. Commit 290af86629b2 (bpf: introduce BPF_JIT_ALWAYS_ON config) introduced the following tightening logic: ... if (!bpf_prog_is_dev_bound(fp->aux)) { fp = bpf_int_jit_compile(fp); #ifdef CONFIG_BPF_JIT_ALWAYS_ON if (!fp->jited) { *err = -ENOTSUPP; return fp; } #endif ... With this logic, Test #297 always gets return value -ENOTSUPP when CONFIG_BPF_JIT_ALWAYS_ON is defined, causing the test failure. This patch fixed the failure by marking Test #297 as expected failure when CONFIG_BPF_JIT_ALWAYS_ON is defined. Fixes: 290af86629b2 (bpf: introduce BPF_JIT_ALWAYS_ON config) Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- lib/test_bpf.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 4cd9ea9b3449..b4e22345963f 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -83,6 +83,7 @@ struct bpf_test { __u32 result; } test[MAX_SUBTESTS]; int (*fill_helper)(struct bpf_test *self); + int expected_errcode; /* used when FLAG_EXPECTED_FAIL is set in the aux */ __u8 frag_data[MAX_DATA]; int stack_depth; /* for eBPF only, since tests don't call verifier */ }; @@ -2026,7 +2027,9 @@ static struct bpf_test tests[] = { }, CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, { }, - { } + { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { "check: div_k_0", @@ -2036,7 +2039,9 @@ static struct bpf_test tests[] = { }, CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, { }, - { } + { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { "check: unknown insn", @@ -2047,7 +2052,9 @@ static struct bpf_test tests[] = { }, CLASSIC | FLAG_EXPECTED_FAIL, { }, - { } + { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { "check: out of range spill/fill", @@ -2057,7 +2064,9 @@ static struct bpf_test tests[] = { }, CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, { }, - { } + { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { "JUMPS + HOLES", @@ -2149,6 +2158,8 @@ static struct bpf_test tests[] = { CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, { }, { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { "check: LDX + RET X", @@ -2159,6 +2170,8 @@ static struct bpf_test tests[] = { CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, { }, { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { /* Mainly checking JIT here. */ "M[]: alt STX + LDX", @@ -2333,6 +2346,8 @@ static struct bpf_test tests[] = { CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, { }, { }, + .fill_helper = NULL, + .expected_errcode = -EINVAL, }, { /* Passes checker but fails during runtime. */ "LD [SKF_AD_OFF-1]", @@ -5395,6 +5410,7 @@ static struct bpf_test tests[] = { { }, { }, .fill_helper = bpf_fill_maxinsns4, + .expected_errcode = -EINVAL, }, { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Very long jump", @@ -5450,10 +5466,15 @@ static struct bpf_test tests[] = { { "BPF_MAXINSNS: Jump, gap, jump, ...", { }, +#ifdef CONFIG_BPF_JIT_ALWAYS_ON + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, +#else CLASSIC | FLAG_NO_DATA, +#endif { }, { { 0, 0xababcbac } }, .fill_helper = bpf_fill_maxinsns11, + .expected_errcode = -ENOTSUPP, }, { "BPF_MAXINSNS: ld_abs+get_processor_id", @@ -6344,7 +6365,7 @@ static struct bpf_prog *generate_filter(int which, int *err) *err = bpf_prog_create(&fp, &fprog); if (tests[which].aux & FLAG_EXPECTED_FAIL) { - if (*err == -EINVAL) { + if (*err == tests[which].expected_errcode) { pr_cont("PASS\n"); /* Verifier rejected filter as expected. */ *err = 0; -- cgit v1.2.3 From 7b4eb53d95c30bdc55c44647c6968f24227fd1ba Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 5 Feb 2018 16:25:24 -0800 Subject: tools/bpf: fix batch-mode test failure of test_xdp_redirect.sh The tests at tools/testing/selftests/bpf can run in patch mode, e.g., make -C tools/testing/selftests/bpf run_tests With the batch mode, I experimented intermittent test failure of test_xdp_redirect.sh. .... selftests: test_xdp_redirect [PASS] selftests: test_xdp_redirect.sh [PASS] RTNETLINK answers: File exists selftests: test_xdp_meta [FAILED] selftests: test_xdp_meta.sh [FAIL] .... The following illustrates what caused the failure: (1). test_xdp_redirect creates veth pairs (veth1,veth11) and (veth2,veth22), and assign veth11 and veth22 to namespace ns1 and ns2 respectively. (2). at the end of test_xdp_redirect test, ns1 and ns2 are deleted. During this process, the deletion of actual namespace resources, including deletion of veth1{1} and veth2{2}, is put into a workqueue to be processed asynchronously. (3). test_xdp_meta tries to create veth pair (veth1, veth2). The previous veth deletions in step (2) have not finished yet, and veth1 or veth2 may be still valid in the kernel, thus causing the failure. The fix is to explicitly delete the veth pair before test_xdp_redirect exits. Only one end of veth needs deletion as the kernel will delete the other end automatically. Also test_xdp_meta is also fixed in similar manner to avoid future potential issues. Fixes: 996139e801fd ("selftests: bpf: add a test for XDP redirect") Fixes: 22c8852624fc ("bpf: improve selftests and add tests for meta pointer") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_xdp_meta.sh | 1 + tools/testing/selftests/bpf/test_xdp_redirect.sh | 2 ++ 2 files changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh index 307aa856cee3..637fcf4fe4e3 100755 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -9,6 +9,7 @@ cleanup() fi set +e + ip link del veth1 2> /dev/null ip netns del ns1 2> /dev/null ip netns del ns2 2> /dev/null } diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh index 344a3656dea6..c4b17e08d431 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -19,6 +19,8 @@ cleanup() fi set +e + ip link del veth1 2> /dev/null + ip link del veth2 2> /dev/null ip netns del ns1 2> /dev/null ip netns del ns2 2> /dev/null } -- cgit v1.2.3 From b11a632c442eef34a0afeba61fab923241f317e9 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 5 Feb 2018 10:17:43 -0800 Subject: net: add a UID to use for ULP socket assignment Create a UID field and enum that can be used to assign ULPs to sockets. This saves a set of string comparisons if the ULP id is known. For sockmap, which is added in the next patches, a ULP is used to hook into TCP sockets close state. In this case the ULP being added is done at map insert time and the ULP is known and done on the kernel side. In this case the named lookup is not needed. Because we don't want to expose psock internals to user space socket options a user visible flag is also added. For TLS this is set for BPF it will be cleared. Alos remove pr_notice, user gets an error code back and should check that rather than rely on logs. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/net/tcp.h | 6 ++++++ net/ipv4/tcp_ulp.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++----- net/tls/tls_main.c | 2 ++ 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 58278669cc55..a58292d31e12 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1983,6 +1983,10 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer); #define TCP_ULP_MAX 128 #define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) +enum { + TCP_ULP_TLS, +}; + struct tcp_ulp_ops { struct list_head list; @@ -1991,7 +1995,9 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); + int uid; char name[TCP_ULP_NAME_MAX]; + bool user_visible; struct module *owner; }; int tcp_register_ulp(struct tcp_ulp_ops *type); diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 6bb9e14c710a..622caa4039e0 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -29,6 +29,18 @@ static struct tcp_ulp_ops *tcp_ulp_find(const char *name) return NULL; } +static struct tcp_ulp_ops *tcp_ulp_find_id(const int ulp) +{ + struct tcp_ulp_ops *e; + + list_for_each_entry_rcu(e, &tcp_ulp_list, list) { + if (e->uid == ulp) + return e; + } + + return NULL; +} + static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name) { const struct tcp_ulp_ops *ulp = NULL; @@ -51,6 +63,18 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name) return ulp; } +static const struct tcp_ulp_ops *__tcp_ulp_lookup(const int uid) +{ + const struct tcp_ulp_ops *ulp; + + rcu_read_lock(); + ulp = tcp_ulp_find_id(uid); + if (!ulp || !try_module_get(ulp->owner)) + ulp = NULL; + rcu_read_unlock(); + return ulp; +} + /* Attach new upper layer protocol to the list * of available protocols. */ @@ -59,13 +83,10 @@ int tcp_register_ulp(struct tcp_ulp_ops *ulp) int ret = 0; spin_lock(&tcp_ulp_list_lock); - if (tcp_ulp_find(ulp->name)) { - pr_notice("%s already registered or non-unique name\n", - ulp->name); + if (tcp_ulp_find(ulp->name)) ret = -EEXIST; - } else { + else list_add_tail_rcu(&ulp->list, &tcp_ulp_list); - } spin_unlock(&tcp_ulp_list_lock); return ret; @@ -124,6 +145,34 @@ int tcp_set_ulp(struct sock *sk, const char *name) if (!ulp_ops) return -ENOENT; + if (!ulp_ops->user_visible) { + module_put(ulp_ops->owner); + return -ENOENT; + } + + err = ulp_ops->init(sk); + if (err) { + module_put(ulp_ops->owner); + return err; + } + + icsk->icsk_ulp_ops = ulp_ops; + return 0; +} + +int tcp_set_ulp_id(struct sock *sk, int ulp) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_ulp_ops *ulp_ops; + int err; + + if (icsk->icsk_ulp_ops) + return -EEXIST; + + ulp_ops = __tcp_ulp_lookup(ulp); + if (!ulp_ops) + return -ENOENT; + err = ulp_ops->init(sk); if (err) { module_put(ulp_ops->owner); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 736719c8314e..b0d5fcea47e7 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -484,6 +484,8 @@ out: static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .name = "tls", + .uid = TCP_ULP_TLS, + .user_visible = true, .owner = THIS_MODULE, .init = tls_init, }; -- cgit v1.2.3 From 1aa12bdf1bfb95db7e75bfecf0e39a65f4e8fbf8 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 5 Feb 2018 10:17:49 -0800 Subject: bpf: sockmap, add sock close() hook to remove socks The selftests test_maps program was leaving dangling BPF sockmap programs around because not all psock elements were removed from the map. The elements in turn hold a reference on the BPF program they are attached to causing BPF programs to stay open even after test_maps has completed. The original intent was that sk_state_change() would be called when TCP socks went through TCP_CLOSE state. However, because socks may be in SOCK_DEAD state or the sock may be a listening socket the event is not always triggered. To resolve this use the ULP infrastructure and register our own proto close() handler. This fixes the above case. Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support") Reported-by: Prashant Bhole Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/net/tcp.h | 2 + kernel/bpf/sockmap.c | 168 +++++++++++++++++++++++++++++++-------------------- 2 files changed, 103 insertions(+), 67 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index a58292d31e12..e3fc667f9ac2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1985,6 +1985,7 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer); enum { TCP_ULP_TLS, + TCP_ULP_BPF, }; struct tcp_ulp_ops { @@ -2003,6 +2004,7 @@ struct tcp_ulp_ops { int tcp_register_ulp(struct tcp_ulp_ops *type); void tcp_unregister_ulp(struct tcp_ulp_ops *type); int tcp_set_ulp(struct sock *sk, const char *name); +int tcp_set_ulp_id(struct sock *sk, const int ulp); void tcp_get_available_ulp(char *buf, size_t len); void tcp_cleanup_ulp(struct sock *sk); diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 0314d1783d77..bd4a6d9c6709 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -86,9 +86,10 @@ struct smap_psock { struct work_struct tx_work; struct work_struct gc_work; + struct proto *sk_proto; + void (*save_close)(struct sock *sk, long timeout); void (*save_data_ready)(struct sock *sk); void (*save_write_space)(struct sock *sk); - void (*save_state_change)(struct sock *sk); }; static inline struct smap_psock *smap_psock_sk(const struct sock *sk) @@ -96,12 +97,102 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk) return rcu_dereference_sk_user_data(sk); } +static struct proto tcp_bpf_proto; +static int bpf_tcp_init(struct sock *sk) +{ + struct smap_psock *psock; + + rcu_read_lock(); + psock = smap_psock_sk(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + return -EINVAL; + } + + if (unlikely(psock->sk_proto)) { + rcu_read_unlock(); + return -EBUSY; + } + + psock->save_close = sk->sk_prot->close; + psock->sk_proto = sk->sk_prot; + sk->sk_prot = &tcp_bpf_proto; + rcu_read_unlock(); + return 0; +} + +static void bpf_tcp_release(struct sock *sk) +{ + struct smap_psock *psock; + + rcu_read_lock(); + psock = smap_psock_sk(sk); + + if (likely(psock)) { + sk->sk_prot = psock->sk_proto; + psock->sk_proto = NULL; + } + rcu_read_unlock(); +} + +static void smap_release_sock(struct smap_psock *psock, struct sock *sock); + +static void bpf_tcp_close(struct sock *sk, long timeout) +{ + void (*close_fun)(struct sock *sk, long timeout); + struct smap_psock_map_entry *e, *tmp; + struct smap_psock *psock; + struct sock *osk; + + rcu_read_lock(); + psock = smap_psock_sk(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + return sk->sk_prot->close(sk, timeout); + } + + /* The psock may be destroyed anytime after exiting the RCU critial + * section so by the time we use close_fun the psock may no longer + * be valid. However, bpf_tcp_close is called with the sock lock + * held so the close hook and sk are still valid. + */ + close_fun = psock->save_close; + + write_lock_bh(&sk->sk_callback_lock); + list_for_each_entry_safe(e, tmp, &psock->maps, list) { + osk = cmpxchg(e->entry, sk, NULL); + if (osk == sk) { + list_del(&e->list); + smap_release_sock(psock, sk); + } + } + write_unlock_bh(&sk->sk_callback_lock); + rcu_read_unlock(); + close_fun(sk, timeout); +} + enum __sk_action { __SK_DROP = 0, __SK_PASS, __SK_REDIRECT, }; +static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = { + .name = "bpf_tcp", + .uid = TCP_ULP_BPF, + .user_visible = false, + .owner = NULL, + .init = bpf_tcp_init, + .release = bpf_tcp_release, +}; + +static int bpf_tcp_ulp_register(void) +{ + tcp_bpf_proto = tcp_prot; + tcp_bpf_proto.close = bpf_tcp_close; + return tcp_register_ulp(&bpf_tcp_ulp_ops); +} + static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) { struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict); @@ -166,68 +257,6 @@ static void smap_report_sk_error(struct smap_psock *psock, int err) sk->sk_error_report(sk); } -static void smap_release_sock(struct smap_psock *psock, struct sock *sock); - -/* Called with lock_sock(sk) held */ -static void smap_state_change(struct sock *sk) -{ - struct smap_psock_map_entry *e, *tmp; - struct smap_psock *psock; - struct socket_wq *wq; - struct sock *osk; - - rcu_read_lock(); - - /* Allowing transitions into an established syn_recv states allows - * for early binding sockets to a smap object before the connection - * is established. - */ - switch (sk->sk_state) { - case TCP_SYN_SENT: - case TCP_SYN_RECV: - case TCP_ESTABLISHED: - break; - case TCP_CLOSE_WAIT: - case TCP_CLOSING: - case TCP_LAST_ACK: - case TCP_FIN_WAIT1: - case TCP_FIN_WAIT2: - case TCP_LISTEN: - break; - case TCP_CLOSE: - /* Only release if the map entry is in fact the sock in - * question. There is a case where the operator deletes - * the sock from the map, but the TCP sock is closed before - * the psock is detached. Use cmpxchg to verify correct - * sock is removed. - */ - psock = smap_psock_sk(sk); - if (unlikely(!psock)) - break; - write_lock_bh(&sk->sk_callback_lock); - list_for_each_entry_safe(e, tmp, &psock->maps, list) { - osk = cmpxchg(e->entry, sk, NULL); - if (osk == sk) { - list_del(&e->list); - smap_release_sock(psock, sk); - } - } - write_unlock_bh(&sk->sk_callback_lock); - break; - default: - psock = smap_psock_sk(sk); - if (unlikely(!psock)) - break; - smap_report_sk_error(psock, EPIPE); - break; - } - - wq = rcu_dereference(sk->sk_wq); - if (skwq_has_sleeper(wq)) - wake_up_interruptible_all(&wq->wait); - rcu_read_unlock(); -} - static void smap_read_sock_strparser(struct strparser *strp, struct sk_buff *skb) { @@ -322,10 +351,8 @@ static void smap_stop_sock(struct smap_psock *psock, struct sock *sk) return; sk->sk_data_ready = psock->save_data_ready; sk->sk_write_space = psock->save_write_space; - sk->sk_state_change = psock->save_state_change; psock->save_data_ready = NULL; psock->save_write_space = NULL; - psock->save_state_change = NULL; strp_stop(&psock->strp); psock->strp_enabled = false; } @@ -350,6 +377,7 @@ static void smap_release_sock(struct smap_psock *psock, struct sock *sock) if (psock->refcnt) return; + tcp_cleanup_ulp(sock); smap_stop_sock(psock, sock); clear_bit(SMAP_TX_RUNNING, &psock->state); rcu_assign_sk_user_data(sock, NULL); @@ -427,10 +455,8 @@ static void smap_start_sock(struct smap_psock *psock, struct sock *sk) return; psock->save_data_ready = sk->sk_data_ready; psock->save_write_space = sk->sk_write_space; - psock->save_state_change = sk->sk_state_change; sk->sk_data_ready = smap_data_ready; sk->sk_write_space = smap_write_space; - sk->sk_state_change = smap_state_change; psock->strp_enabled = true; } @@ -509,6 +535,10 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) if (attr->value_size > KMALLOC_MAX_SIZE) return ERR_PTR(-E2BIG); + err = bpf_tcp_ulp_register(); + if (err && err != -EEXIST) + return ERR_PTR(err); + stab = kzalloc(sizeof(*stab), GFP_USER); if (!stab) return ERR_PTR(-ENOMEM); @@ -754,6 +784,10 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops, goto out_progs; } + err = tcp_set_ulp_id(sock, TCP_ULP_BPF); + if (err) + goto out_progs; + set_bit(SMAP_TX_RUNNING, &psock->state); } -- cgit v1.2.3 From 3d9e952697de89b53227f06d4241f275eb99cfc4 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 5 Feb 2018 10:17:54 -0800 Subject: bpf: sockmap, fix leaking maps with attached but not detached progs When a program is attached to a map we increment the program refcnt to ensure that the program is not removed while it is potentially being referenced from sockmap side. However, if this same program also references the map (this is a reasonably common pattern in my programs) then the verifier will also increment the maps refcnt from the verifier. This is to ensure the map doesn't get garbage collected while the program has a reference to it. So we are left in a state where the map holds the refcnt on the program stopping it from being removed and releasing the map refcnt. And vice versa the program holds a refcnt on the map stopping it from releasing the refcnt on the prog. All this is fine as long as users detach the program while the map fd is still around. But, if the user omits this detach command we are left with a dangling map we can no longer release. To resolve this when the map fd is released decrement the program references and remove any reference from the map to the program. This fixes the issue with possibly dangling map and creates a user side API constraint. That is, the map fd must be held open for programs to be attached to a map. Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- kernel/bpf/sockmap.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index bd4a6d9c6709..48c33417d13c 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -620,11 +620,6 @@ static void sock_map_free(struct bpf_map *map) } rcu_read_unlock(); - if (stab->bpf_verdict) - bpf_prog_put(stab->bpf_verdict); - if (stab->bpf_parse) - bpf_prog_put(stab->bpf_parse); - sock_map_remove_complete(stab); } @@ -900,6 +895,19 @@ static int sock_map_update_elem(struct bpf_map *map, return err; } +static void sock_map_release(struct bpf_map *map, struct file *map_file) +{ + struct bpf_stab *stab = container_of(map, struct bpf_stab, map); + struct bpf_prog *orig; + + orig = xchg(&stab->bpf_parse, NULL); + if (orig) + bpf_prog_put(orig); + orig = xchg(&stab->bpf_verdict, NULL); + if (orig) + bpf_prog_put(orig); +} + const struct bpf_map_ops sock_map_ops = { .map_alloc = sock_map_alloc, .map_free = sock_map_free, @@ -907,6 +915,7 @@ const struct bpf_map_ops sock_map_ops = { .map_get_next_key = sock_map_get_next_key, .map_update_elem = sock_map_update_elem, .map_delete_elem = sock_map_delete_elem, + .map_release = sock_map_release, }; BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, -- cgit v1.2.3 From 035d808f7cfd578dc210b584beb17e365f34d39a Mon Sep 17 00:00:00 2001 From: Naresh Kamboju Date: Wed, 7 Feb 2018 23:45:34 +0530 Subject: selftests: bpf: test_kmod.sh: check the module path before insmod test_kmod.sh reported false failure when module not present. Check test_bpf.ko is present in the path before loading it. Two cases to be addressed here, In the development process of test_bpf.c unit testing will be done by developers by using "insmod $SRC_TREE/lib/test_bpf.ko" On the other hand testers run full tests by installing modules on device under test (DUT) and followed by modprobe to insert the modules accordingly. Signed-off-by: Naresh Kamboju Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_kmod.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh index ed4774d8d6ed..35669ccd4d23 100755 --- a/tools/testing/selftests/bpf/test_kmod.sh +++ b/tools/testing/selftests/bpf/test_kmod.sh @@ -10,9 +10,21 @@ test_run() echo "[ JIT enabled:$1 hardened:$2 ]" dmesg -C - insmod $SRC_TREE/lib/test_bpf.ko 2> /dev/null - if [ $? -ne 0 ]; then - rc=1 + if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then + insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null + if [ $? -ne 0 ]; then + rc=1 + fi + else + # Use modprobe dry run to check for missing test_bpf module + if ! /sbin/modprobe -q -n test_bpf; then + echo "test_bpf: [SKIP]" + elif /sbin/modprobe -q test_bpf; then + echo "test_bpf: ok" + else + echo "test_bpf: [FAIL]" + rc=1 + fi fi rmmod test_bpf 2> /dev/null dmesg | grep FAIL -- cgit v1.2.3 From b7d99235473ad3a550f8eb05bd4469edadf1c8e6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 7 Feb 2018 20:27:12 -0800 Subject: nfp: bpf: fix immed relocation for larger offsets Immed relocation is missing a shift which means for larger offsets the lower and higher part of the address would be ORed together. Fixes: ce4ebfd859c3 ("nfp: bpf: add helpers for updating immediate instructions") Signed-off-by: Jakub Kicinski Reviewed-by: Jiong Wang Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/nfp_asm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index 3f6952b66a49..1e597600c693 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -107,7 +107,7 @@ u16 immed_get_value(u64 instr) if (!unreg_is_imm(reg)) reg = FIELD_GET(OP_IMMED_B_SRC, instr); - return (reg & 0xff) | FIELD_GET(OP_IMMED_IMM, instr); + return (reg & 0xff) | FIELD_GET(OP_IMMED_IMM, instr) << 8; } void immed_set_value(u64 *instr, u16 immed) -- cgit v1.2.3 From 0badd331491097cc3702d05a6dd0264a434712b2 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 7 Feb 2018 20:27:13 -0800 Subject: libbpf: complete list of strings for guessing program type It seems that the type guessing feature for libbpf, based on the name of the ELF section the program is located in, was inspired from samples/bpf/prog_load.c, which was not used by any sample for loading programs of certain types such as TC actions and classifiers, or LWT-related types. As a consequence, libbpf is not able to guess the type of such programs and to load them automatically if type is not provided to the `bpf_load_prog()` function. Add ELF section names associated to those eBPF program types so that they can be loaded with e.g. bpftool as well. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 71ddc481f349..c64840365433 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1816,12 +1816,17 @@ static const struct { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), + BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), BPF_PROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK), BPF_PROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE), + BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), + BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT), + BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT), BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS), BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB), }; -- cgit v1.2.3 From 92426820f7616caebdf6ba665f749102a670b3d4 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 7 Feb 2018 20:27:14 -0800 Subject: tools: bpftool: exit doc Makefile early if rst2man is not available If rst2man is not available on the system, running `make doc` from the bpftool directory fails with an error message. However, it creates empty manual pages (.8 files in this case). A subsequent call to `make doc-install` would then succeed and install those empty man pages on the system. To prevent this, raise a Makefile error and exit immediately if rst2man is not available before generating the pages from the rst documentation. Fixes: ff69c21a85a4 ("tools: bpftool: add documentation") Reported-by: Jason van Aaardt Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index c462a928e03d..a9d47c1558bb 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -23,7 +23,12 @@ DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8)) man: man8 man8: $(DOC_MAN8) +RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) + $(OUTPUT)%.8: %.rst +ifndef RST2MAN_DEP + $(error "rst2man not found, but required to generate man pages") +endif $(QUIET_GEN)rst2man $< > $@ clean: -- cgit v1.2.3 From 2148481d5d3111e5e9aa5adc1905fa3539e548c8 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 7 Feb 2018 20:27:15 -0800 Subject: tools: bpftool: make syntax for program map update explicit in man page Specify in the documentation that when using bpftool to update a map of type BPF_MAP_TYPE_PROG_ARRAY, the syntax for the program used as a value should use the "id|tag|pinned" keywords convention, as used with "bpftool prog" commands. Fixes: ff69c21a85a4 ("tools: bpftool: add documentation") Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-map.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 0ab32b312aec..457e868bd32f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -31,7 +31,8 @@ MAP COMMANDS | **bpftool** **map help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } -| *VALUE* := { *BYTES* | *MAP* | *PROGRAM* } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *VALUE* := { *BYTES* | *MAP* | *PROG* } | *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } DESCRIPTION -- cgit v1.2.3 From 55f3538c4923e9dfca132e99ebec370e8094afda Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 7 Feb 2018 20:27:16 -0800 Subject: tools: bpftool: add bash completion for `bpftool prog load` Add bash completion for bpftool command `prog load`. Completion for this command is easy, as it only takes existing file paths as arguments. Fixes: 49a086c201a9 ("bpftool: implement prog load command") Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/bash-completion/bpftool | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 0137866bb8f6..17d246418348 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -230,10 +230,14 @@ _bpftool() fi return 0 ;; + load) + _filedir + return 0 + ;; *) [[ $prev == $object ]] && \ - COMPREPLY=( $( compgen -W 'dump help pin show list' -- \ - "$cur" ) ) + COMPREPLY=( $( compgen -W 'dump help pin load \ + show list' -- "$cur" ) ) ;; esac ;; -- cgit v1.2.3 From a827a164b98be2acba6a2e7559643ac9a5745086 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 7 Feb 2018 20:27:17 -0800 Subject: tools: bpftool: add bash completion for cgroup commands Add bash completion for "bpftool cgroup" command family. While at it, also fix the formatting of some keywords in the man page for cgroups. Fixes: 5ccda64d38cc ("bpftool: implement cgroup bpf operations") Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 4 +- tools/bpf/bpftool/bash-completion/bpftool | 64 ++++++++++++++++++++-- 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 2fe2a1bdbe3e..0e4e923235b6 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -26,8 +26,8 @@ MAP COMMANDS | **bpftool** **cgroup help** | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } -| *ATTACH_TYPE* := { *ingress* | *egress* | *sock_create* | *sock_ops* | *device* } -| *ATTACH_FLAGS* := { *multi* | *override* } +| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** } +| *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 17d246418348..08719c54a614 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -52,16 +52,24 @@ _bpftool_once_attr() done } -# Takes a list of words in argument; adds them all to COMPREPLY if none of them -# is already present on the command line. Returns no value. -_bpftool_one_of_list() +# Takes a list of words as argument; if any of those words is present on the +# command line, return 0. Otherwise, return 1. +_bpftool_search_list() { local w idx for w in $*; do for (( idx=3; idx < ${#words[@]}-1; idx++ )); do - [[ $w == ${words[idx]} ]] && return 1 + [[ $w == ${words[idx]} ]] && return 0 done done + return 1 +} + +# Takes a list of words in argument; adds them all to COMPREPLY if none of them +# is already present on the command line. Returns no value. +_bpftool_one_of_list() +{ + _bpftool_search_list $* && return 1 COMPREPLY+=( $( compgen -W "$*" -- "$cur" ) ) } @@ -351,6 +359,54 @@ _bpftool() ;; esac ;; + cgroup) + case $command in + show|list) + _filedir + return 0 + ;; + attach|detach) + local ATTACH_TYPES='ingress egress sock_create sock_ops \ + device' + local ATTACH_FLAGS='multi override' + local PROG_TYPE='id pinned tag' + case $prev in + $command) + _filedir + return 0 + ;; + ingress|egress|sock_create|sock_ops|device) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ + "$cur" ) ) + return 0 + ;; + id) + _bpftool_get_prog_ids + return 0 + ;; + *) + if ! _bpftool_search_list "$ATTACH_TYPES"; then + COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \ + "$cur" ) ) + elif [[ "$command" == "attach" ]]; then + # We have an attach type on the command line, + # but it is not the previous word, or + # "id|pinned|tag" (we already checked for + # that). This should only leave the case when + # we need attach flags for "attach" commamnd. + _bpftool_one_of_list "$ATTACH_FLAGS" + fi + return 0 + ;; + esac + ;; + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'help attach detach \ + show list' -- "$cur" ) ) + ;; + esac + ;; esac } && complete -F _bpftool bpftool -- cgit v1.2.3 From 8c88181ed452707f3815827225517b1964463b95 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 8 Feb 2018 12:48:12 +0100 Subject: bpf: Sync kernel ABI header with tooling header for bpf_common.h I recently fixed up a lot of commits that forgot to keep the tooling headers in sync. And then I forgot to do the same thing in commit cb5f7334d479 ("bpf: add comments to BPF ld/ldx sizes"). Let correct that before people notice ;-). Lawrence did partly fix/sync this for bpf.h in commit d6d4f60c3a09 ("bpf: add selftest for tcpbpf"). Fixes: cb5f7334d479 ("bpf: add comments to BPF ld/ldx sizes") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf_common.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/include/uapi/linux/bpf_common.h b/tools/include/uapi/linux/bpf_common.h index 18be90725ab0..ee97668bdadb 100644 --- a/tools/include/uapi/linux/bpf_common.h +++ b/tools/include/uapi/linux/bpf_common.h @@ -15,9 +15,10 @@ /* ld/ldx fields */ #define BPF_SIZE(code) ((code) & 0x18) -#define BPF_W 0x00 -#define BPF_H 0x08 -#define BPF_B 0x10 +#define BPF_W 0x00 /* 32-bit */ +#define BPF_H 0x08 /* 16-bit */ +#define BPF_B 0x10 /* 8-bit */ +/* eBPF BPF_DW 0x18 64-bit */ #define BPF_MODE(code) ((code) & 0xe0) #define BPF_IMM 0x00 #define BPF_ABS 0x20 -- cgit v1.2.3 From 077c066a6c5445423147496e6c9b9a2c2d2ee762 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 8 Feb 2018 12:48:17 +0100 Subject: tools/libbpf: improve the pr_debug statements to contain section numbers While debugging a bpf ELF loading issue, I needed to correlate the ELF section number with the failed relocation section reference. Thus, add section numbers/index to the pr_debug. In debug mode, also print section that were skipped. This helped me identify that a section (.eh_frame) was skipped, and this was the reason the relocation section (.rel.eh_frame) could not find that section number. The section numbers corresponds to the readelf tools Section Headers [Nr]. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index c64840365433..0c794f7fc050 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -319,8 +319,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->section_name = strdup(section_name); if (!prog->section_name) { - pr_warning("failed to alloc name for prog under section %s\n", - section_name); + pr_warning("failed to alloc name for prog under section(%d) %s\n", + idx, section_name); goto errout; } @@ -763,29 +763,29 @@ static int bpf_object__elf_collect(struct bpf_object *obj) idx++; if (gelf_getshdr(scn, &sh) != &sh) { - pr_warning("failed to get section header from %s\n", - obj->path); + pr_warning("failed to get section(%d) header from %s\n", + idx, obj->path); err = -LIBBPF_ERRNO__FORMAT; goto out; } name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); if (!name) { - pr_warning("failed to get section name from %s\n", - obj->path); + pr_warning("failed to get section(%d) name from %s\n", + idx, obj->path); err = -LIBBPF_ERRNO__FORMAT; goto out; } data = elf_getdata(scn, 0); if (!data) { - pr_warning("failed to get section data from %s(%s)\n", - name, obj->path); + pr_warning("failed to get section(%d) data from %s(%s)\n", + idx, name, obj->path); err = -LIBBPF_ERRNO__FORMAT; goto out; } - pr_debug("section %s, size %ld, link %d, flags %lx, type=%d\n", - name, (unsigned long)data->d_size, + pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", + idx, name, (unsigned long)data->d_size, (int)sh.sh_link, (unsigned long)sh.sh_flags, (int)sh.sh_type); @@ -840,6 +840,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) obj->efile.reloc[n].shdr = sh; obj->efile.reloc[n].data = data; } + } else { + pr_debug("skip section(%d) %s\n", idx, name); } if (err) goto out; @@ -1119,8 +1121,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) prog = bpf_object__find_prog_by_idx(obj, idx); if (!prog) { - pr_warning("relocation failed: no %d section\n", - idx); + pr_warning("relocation failed: no section(%d)\n", idx); return -LIBBPF_ERRNO__RELOC; } -- cgit v1.2.3 From 864db336c677c288d81524c30a656804785902f9 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 8 Feb 2018 12:48:22 +0100 Subject: selftests/bpf: add test program for loading BPF ELF files V2: Moved program into selftests/bpf from tools/libbpf This program can be used on its own for testing/debugging if a BPF ELF-object file can be loaded with libbpf (from tools/lib/bpf). If something is wrong with the ELF object, the program have a --debug mode that will display the ELF sections and especially the skipped sections. This allows for quickly identifying the problematic ELF section number, which can be corrolated with the readelf tool. The program signal error via return codes, and also have a --quiet mode, which is practical for use in scripts like selftests/bpf. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/test_libbpf_open.c | 150 +++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/test_libbpf_open.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 566d6adc172a..3f48da0866ba 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -20,7 +20,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ - sample_map_ret0.o test_tcpbpf_kern.o + sample_map_ret0.o test_tcpbpf_kern.o test_libbpf_open TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ test_offload.py diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c new file mode 100644 index 000000000000..8fcd1c076add --- /dev/null +++ b/tools/testing/selftests/bpf/test_libbpf_open.c @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. + */ +static const char *__doc__ = + "Libbpf test program for loading BPF ELF object files"; + +#include +#include +#include +#include +#include +#include + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"debug", no_argument, NULL, 'D' }, + {"quiet", no_argument, NULL, 'q' }, + {0, 0, NULL, 0 } +}; + +static void usage(char *argv[]) +{ + int i; + + printf("\nDOCUMENTATION:\n%s\n\n", __doc__); + printf(" Usage: %s (options-see-below) BPF_FILE\n", argv[0]); + printf(" Listing options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-12s", long_options[i].name); + printf(" short-option: -%c", + long_options[i].val); + printf("\n"); + } + printf("\n"); +} + +#define DEFINE_PRINT_FN(name, enabled) \ +static int libbpf_##name(const char *fmt, ...) \ +{ \ + va_list args; \ + int ret; \ + \ + va_start(args, fmt); \ + if (enabled) { \ + fprintf(stderr, "[" #name "] "); \ + ret = vfprintf(stderr, fmt, args); \ + } \ + va_end(args); \ + return ret; \ +} +DEFINE_PRINT_FN(warning, 1) +DEFINE_PRINT_FN(info, 1) +DEFINE_PRINT_FN(debug, 1) + +#define EXIT_FAIL_LIBBPF EXIT_FAILURE +#define EXIT_FAIL_OPTION 2 + +int test_walk_progs(struct bpf_object *obj, bool verbose) +{ + struct bpf_program *prog; + int cnt = 0; + + bpf_object__for_each_program(prog, obj) { + cnt++; + if (verbose) + printf("Prog (count:%d) section_name: %s\n", cnt, + bpf_program__title(prog, false)); + } + return 0; +} + +int test_walk_maps(struct bpf_object *obj, bool verbose) +{ + struct bpf_map *map; + int cnt = 0; + + bpf_map__for_each(map, obj) { + cnt++; + if (verbose) + printf("Map (count:%d) name: %s\n", cnt, + bpf_map__name(map)); + } + return 0; +} + +int test_open_file(char *filename, bool verbose) +{ + struct bpf_object *bpfobj = NULL; + long err; + + if (verbose) + printf("Open BPF ELF-file with libbpf: %s\n", filename); + + /* Load BPF ELF object file and check for errors */ + bpfobj = bpf_object__open(filename); + err = libbpf_get_error(bpfobj); + if (err) { + char err_buf[128]; + libbpf_strerror(err, err_buf, sizeof(err_buf)); + if (verbose) + printf("Unable to load eBPF objects in file '%s': %s\n", + filename, err_buf); + return EXIT_FAIL_LIBBPF; + } + test_walk_progs(bpfobj, verbose); + test_walk_maps(bpfobj, verbose); + + if (verbose) + printf("Close BPF ELF-file with libbpf: %s\n", + bpf_object__name(bpfobj)); + bpf_object__close(bpfobj); + + return 0; +} + +int main(int argc, char **argv) +{ + char filename[1024] = { 0 }; + bool verbose = 1; + int longindex = 0; + int opt; + + libbpf_set_print(libbpf_warning, libbpf_info, NULL); + + /* Parse commands line args */ + while ((opt = getopt_long(argc, argv, "hDq", + long_options, &longindex)) != -1) { + switch (opt) { + case 'D': + libbpf_set_print(libbpf_warning, libbpf_info, + libbpf_debug); + break; + case 'q': /* Use in scripting mode */ + verbose = 0; + break; + case 'h': + default: + usage(argv); + return EXIT_FAIL_OPTION; + } + } + if (optind >= argc) { + usage(argv); + printf("ERROR: Expected BPF_FILE argument after options\n"); + return EXIT_FAIL_OPTION; + } + snprintf(filename, sizeof(filename), "%s", argv[optind]); + + return test_open_file(filename, verbose); +} -- cgit v1.2.3 From f09b2e382e9a7053e3ae6f2fb6535efd5760cf5d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 8 Feb 2018 12:48:27 +0100 Subject: selftests/bpf: add selftest that use test_libbpf_open This script test_libbpf.sh will be part of the 'make run_tests' invocation, but can also be invoked manually in this directory, and a verbose mode can be enabled via setting the environment variable $VERBOSE like: $ VERBOSE=yes ./test_libbpf.sh The script contains some tests that are commented out, as they currently fail. They are reminders about what we need to improve for the libbpf loader library. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 14 +++++++-- tools/testing/selftests/bpf/test_libbpf.sh | 49 ++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 2 deletions(-) create mode 100755 tools/testing/selftests/bpf/test_libbpf.sh diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 3f48da0866ba..5c43c187f27c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -13,6 +13,7 @@ endif CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include LDLIBS += -lcap -lelf -lrt -lpthread +# Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user @@ -20,17 +21,26 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ - sample_map_ret0.o test_tcpbpf_kern.o test_libbpf_open + sample_map_ret0.o test_tcpbpf_kern.o -TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ +# Order correspond to 'make run_tests' order +TEST_PROGS := test_kmod.sh \ + test_libbpf.sh \ + test_xdp_redirect.sh \ + test_xdp_meta.sh \ test_offload.py +# Compile but not part of 'make run_tests' +TEST_GEN_PROGS_EXTENDED = test_libbpf_open + include ../lib.mk BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c $(TEST_GEN_PROGS): $(BPFOBJ) +$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a + .PHONY: force # force a rebuild of BPFOBJ when its dependencies are updated diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh new file mode 100755 index 000000000000..d97dc914cd49 --- /dev/null +++ b/tools/testing/selftests/bpf/test_libbpf.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +export TESTNAME=test_libbpf + +# Determine selftest success via shell exit code +exit_handler() +{ + if (( $? == 0 )); then + echo "selftests: $TESTNAME [PASS]"; + else + echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2 + echo "selftests: $TESTNAME [FAILED]"; + fi +} + +libbpf_open_file() +{ + LAST_LOADED=$1 + if [ -n "$VERBOSE" ]; then + ./test_libbpf_open $1 + else + ./test_libbpf_open --quiet $1 + fi +} + +# Exit script immediately (well catched by trap handler) if any +# program/thing exits with a non-zero status. +set -e + +# (Use 'trap -l' to list meaning of numbers) +trap exit_handler 0 2 3 6 9 + +libbpf_open_file test_l4lb.o + +# TODO: fix libbpf to load noinline functions +# [warning] libbpf: incorrect bpf_call opcode +#libbpf_open_file test_l4lb_noinline.o + +# TODO: fix test_xdp_meta.c to load with libbpf +# [warning] libbpf: test_xdp_meta.o doesn't provide kernel version +#libbpf_open_file test_xdp_meta.o + +# TODO: fix libbpf to handle .eh_frame +# [warning] libbpf: relocation failed: no section(10) +#libbpf_open_file ../../../../samples/bpf/tracex3_kern.o + +# Success +exit 0 -- cgit v1.2.3 From e3d91b0ca523d53158f435a3e13df7f0cb360ea2 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 8 Feb 2018 12:48:32 +0100 Subject: tools/libbpf: handle issues with bpf ELF objects containing .eh_frames V3: More generic skipping of relo-section (suggested by Daniel) If clang >= 4.0.1 is missing the option '-target bpf', it will cause llc/llvm to create two ELF sections for "Exception Frames", with section names '.eh_frame' and '.rel.eh_frame'. The BPF ELF loader library libbpf fails when loading files with these sections. The other in-kernel BPF ELF loader in samples/bpf/bpf_load.c, handle this gracefully. And iproute2 loader also seems to work with these "eh" sections. The issue in libbpf is caused by bpf_object__elf_collect() skipping some sections, and later when performing relocation it will be pointing to a skipped section, as these sections cannot be found by bpf_object__find_prog_by_idx() in bpf_object__collect_reloc(). This is a general issue that also occurs for other sections, like debug sections which are also skipped and can have relo section. As suggested by Daniel. To avoid keeping state about all skipped sections, instead perform a direct qlookup in the ELF object. Lookup the section that the relo-section points to and check if it contains executable machine instructions (denoted by the sh_flags SHF_EXECINSTR). Use this check to also skip irrelevant relo-sections. Note, for samples/bpf/ the '-target bpf' parameter to clang cannot be used due to incompatibility with asm embedded headers, that some of the samples include. This is explained in more details by Yonghong Song in bpf_devel_QA. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0c794f7fc050..97073d649c1a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -742,6 +742,24 @@ bpf_object__init_maps(struct bpf_object *obj) return 0; } +static bool section_have_execinstr(struct bpf_object *obj, int idx) +{ + Elf_Scn *scn; + GElf_Shdr sh; + + scn = elf_getscn(obj->efile.elf, idx); + if (!scn) + return false; + + if (gelf_getshdr(scn, &sh) != &sh) + return false; + + if (sh.sh_flags & SHF_EXECINSTR) + return true; + + return false; +} + static int bpf_object__elf_collect(struct bpf_object *obj) { Elf *elf = obj->efile.elf; @@ -825,6 +843,14 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (sh.sh_type == SHT_REL) { void *reloc = obj->efile.reloc; int nr_reloc = obj->efile.nr_reloc + 1; + int sec = sh.sh_info; /* points to other section */ + + /* Only do relo for section with exec instructions */ + if (!section_have_execinstr(obj, sec)) { + pr_debug("skip relo %s(%d) for section(%d)\n", + name, idx, sec); + continue; + } reloc = realloc(reloc, sizeof(*obj->efile.reloc) * nr_reloc); -- cgit v1.2.3