From f96afa767baffba7645f5e10998f5178948bb9aa Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 7 Nov 2018 12:28:45 +0000 Subject: selftests/bpf: enable (uncomment) all tests in test_libbpf.sh libbpf is now able to load successfully test_l4lb_noinline.o and samples/bpf/tracex3_kern.o. For the test_l4lb_noinline, uncomment related tests from test_libbpf.c and remove the associated "TODO". For tracex3_kern.o, instead of loading a program from samples/bpf/ that might not have been compiled at this stage, try loading a program from BPF selftests. Since this test case is about loading a program compiled without the "-target bpf" flag, change the Makefile to compile one program accordingly (instead of passing the flag for compiling all programs). Regarding test_xdp_noinline.o: in its current shape the program fails to load because it provides no version section, but the loader needs one. The test was added to make sure that libbpf could load XDP programs even if they do not provide a version number in a dedicated section. But libbpf is already capable of doing that: in our case loading fails because the loader does not know that this is an XDP program (it does not need to, since it does not attach the program). So trying to load test_xdp_noinline.o does not bring much here: just delete this subtest. For the record, the error message obtained with tracex3_kern.o was fixed by commit e3d91b0ca523 ("tools/libbpf: handle issues with bpf ELF objects containing .eh_frames") I have not been abled to reproduce the "libbpf: incorrect bpf_call opcode" error for test_l4lb_noinline.o, even with the version of libbpf present at the time when test_libbpf.sh and test_libbpf_open.c were created. RFC -> v1: - Compile test_xdp without the "-target bpf" flag, and try to load it instead of ../../samples/bpf/tracex3_kern.o. - Delete test_xdp_noinline.o subtest. Cc: Jesper Dangaard Brouer Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 10 ++++++++++ tools/testing/selftests/bpf/test_libbpf.sh | 14 ++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e39dfb4e7970..ecd79b7fb107 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -135,6 +135,16 @@ endif endif endif +# Have one program compiled without "-target bpf" to test whether libbpf loads +# it successfully +$(OUTPUT)/test_xdp.o: test_xdp.c + $(CLANG) $(CLANG_FLAGS) \ + -O2 -emit-llvm -c $< -o - | \ + $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ +ifeq ($(DWARF2BTF),y) + $(BTF_PAHOLE) -J $@ +endif + $(OUTPUT)/%.o: %.c $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh index 156d89f1edcc..2989b2e2d856 100755 --- a/tools/testing/selftests/bpf/test_libbpf.sh +++ b/tools/testing/selftests/bpf/test_libbpf.sh @@ -33,17 +33,11 @@ trap exit_handler 0 2 3 6 9 libbpf_open_file test_l4lb.o -# TODO: fix libbpf to load noinline functions -# [warning] libbpf: incorrect bpf_call opcode -#libbpf_open_file test_l4lb_noinline.o +# Load a program with BPF-to-BPF calls +libbpf_open_file test_l4lb_noinline.o -# TODO: fix test_xdp_meta.c to load with libbpf -# [warning] libbpf: test_xdp_meta.o doesn't provide kernel version -#libbpf_open_file test_xdp_meta.o - -# TODO: fix libbpf to handle .eh_frame -# [warning] libbpf: relocation failed: no section(10) -#libbpf_open_file ../../../../samples/bpf/tracex3_kern.o +# Load a program compiled without the "-target bpf" flag +libbpf_open_file test_xdp.o # Success exit 0 -- cgit v1.2.3 From 8302b9bd31d29f29dd24dd6b1e1e5682c302c11c Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 7 Nov 2018 12:29:30 +0000 Subject: tools: bpftool: adjust rlimit RLIMIT_MEMLOCK when loading programs, maps The limit for memory locked in the kernel by a process is usually set to 64 kbytes by default. This can be an issue when creating large BPF maps and/or loading many programs. A workaround is to raise this limit for the current process before trying to create a new BPF map. Changing the hard limit requires the CAP_SYS_RESOURCE and can usually only be done by root user (for non-root users, a call to setrlimit fails (and sets errno) and the program simply goes on with its rlimit unchanged). There is no API to get the current amount of memory locked for a user, therefore we cannot raise the limit only when required. One solution, used by bcc, is to try to create the map, and on getting a EPERM error, raising the limit to infinity before giving another try. Another approach, used in iproute2, is to raise the limit in all cases, before trying to create the map. Here we do the same as in iproute2: the rlimit is raised to infinity before trying to load programs or to create maps with bpftool. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/common.c | 8 ++++++++ tools/bpf/bpftool/main.h | 2 ++ tools/bpf/bpftool/map.c | 2 ++ tools/bpf/bpftool/prog.c | 2 ++ 4 files changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 25af85304ebe..1149565be4b1 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -99,6 +100,13 @@ static bool is_bpffs(char *path) return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; } +void set_max_rlimit(void) +{ + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + setrlimit(RLIMIT_MEMLOCK, &rinf); +} + static int mnt_bpffs(const char *target, char *buff, size_t bufflen) { bool bind_done = false; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 28322ace2856..14857c273bf6 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -100,6 +100,8 @@ bool is_prefix(const char *pfx, const char *str); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void usage(void) __noreturn; +void set_max_rlimit(void); + struct pinned_obj_table { DECLARE_HASHTABLE(table, 16); }; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 7bf38f0e152e..101b8a881225 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1140,6 +1140,8 @@ static int do_create(int argc, char **argv) return -1; } + set_max_rlimit(); + fd = bpf_create_map_xattr(&attr); if (fd < 0) { p_err("map create failed: %s", strerror(errno)); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 5302ee282409..b9b84553bec4 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -995,6 +995,8 @@ static int do_load(int argc, char **argv) goto err_close_obj; } + set_max_rlimit(); + err = bpf_object__load(obj); if (err) { p_err("failed to load object file"); -- cgit v1.2.3 From 0a9ac2e95409169ed42ea66dd4270bc16b868e21 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:34 +0100 Subject: selftests: add GRO support to udp bench rx program And fix a couple of buglets (port option processing, clean termination on SIGINT). This is preparatory work for GRO tests. rfc v2 -> rfc v3: - use ETH_MAX_MTU Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgso_bench_rx.c | 37 ++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index 727cf67a3f75..8f48d7fb32cf 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -31,9 +31,15 @@ #include #include +#ifndef UDP_GRO +#define UDP_GRO 104 +#endif + static int cfg_port = 8000; static bool cfg_tcp; static bool cfg_verify; +static bool cfg_read_all; +static bool cfg_gro_segment; static bool interrupted; static unsigned long packets, bytes; @@ -63,6 +69,8 @@ static void do_poll(int fd) do { ret = poll(&pfd, 1, 10); + if (interrupted) + break; if (ret == -1) error(1, errno, "poll"); if (ret == 0) @@ -70,7 +78,7 @@ static void do_poll(int fd) if (pfd.revents != POLLIN) error(1, errno, "poll: 0x%x expected 0x%x\n", pfd.revents, POLLIN); - } while (!ret && !interrupted); + } while (!ret); } static int do_socket(bool do_tcp) @@ -102,6 +110,8 @@ static int do_socket(bool do_tcp) error(1, errno, "listen"); do_poll(accept_fd); + if (interrupted) + exit(0); fd = accept(accept_fd, NULL, NULL); if (fd == -1) @@ -167,10 +177,10 @@ static void do_verify_udp(const char *data, int len) /* Flush all outstanding datagrams. Verify first few bytes of each. */ static void do_flush_udp(int fd) { - static char rbuf[ETH_DATA_LEN]; + static char rbuf[ETH_MAX_MTU]; int ret, len, budget = 256; - len = cfg_verify ? sizeof(rbuf) : 0; + len = cfg_read_all ? sizeof(rbuf) : 0; while (budget--) { /* MSG_TRUNC will make return value full datagram length */ ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT); @@ -178,7 +188,7 @@ static void do_flush_udp(int fd) return; if (ret == -1) error(1, errno, "recv"); - if (len) { + if (len && cfg_verify) { if (ret == 0) error(1, errno, "recv: 0 byte datagram\n"); @@ -192,23 +202,30 @@ static void do_flush_udp(int fd) static void usage(const char *filepath) { - error(1, 0, "Usage: %s [-tv] [-p port]", filepath); + error(1, 0, "Usage: %s [-Grtv] [-p port]", filepath); } static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "ptv")) != -1) { + while ((c = getopt(argc, argv, "Gp:rtv")) != -1) { switch (c) { + case 'G': + cfg_gro_segment = true; + break; case 'p': - cfg_port = htons(strtoul(optarg, NULL, 0)); + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'r': + cfg_read_all = true; break; case 't': cfg_tcp = true; break; case 'v': cfg_verify = true; + cfg_read_all = true; break; } } @@ -227,6 +244,12 @@ static void do_recv(void) fd = do_socket(cfg_tcp); + if (cfg_gro_segment && !cfg_tcp) { + int val = 1; + if (setsockopt(fd, IPPROTO_UDP, UDP_GRO, &val, sizeof(val))) + error(1, errno, "setsockopt UDP_GRO"); + } + treport = gettimeofday_ms() + 1000; do { do_poll(fd); -- cgit v1.2.3 From bd8e1afe6436558de3a7b25eaef34b69d98e5719 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:35 +0100 Subject: selftests: add dummy xdp test helper This trivial XDP program does nothing, but will be used by the next patch to test the GRO path in a net namespace, leveraging the veth XDP implementation. It's added here, despite its 'net' usage, to avoid the duplication of the llc-related makefile boilerplate. rfc v3 -> v1: - move the helper implementation into the bpf directory, don't touch udpgso_bench_rx rfc v2 -> rfc v3: - move 'x' option handling here Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/Makefile | 3 ++- tools/testing/selftests/bpf/xdp_dummy.c | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/xdp_dummy.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e39dfb4e7970..4a54236475ae 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -37,7 +37,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \ - test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o + test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o \ + xdp_dummy.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ diff --git a/tools/testing/selftests/bpf/xdp_dummy.c b/tools/testing/selftests/bpf/xdp_dummy.c new file mode 100644 index 000000000000..43b0ef1001ed --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_dummy.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include +#include "bpf_helpers.h" + +SEC("xdp_dummy") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From e87f53b4fab72f95952c17f672088e082e8e79ab Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:36 +0100 Subject: selftests: add some benchmark for UDP GRO Run on top of veth pair, using a dummy XDP program to enable the GRO. rfc v3 -> v1: - use ip route to attach the xdp helper to the veth Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/udpgro_bench.sh | 93 +++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100755 tools/testing/selftests/net/udpgro_bench.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 256d82d5fa87..b1bba2f60467 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -7,6 +7,7 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh +TEST_PROGS += udpgro_bench.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh new file mode 100755 index 000000000000..9ef4b1ee0f76 --- /dev/null +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -0,0 +1,93 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgro benchmarks + +readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" + +cleanup() { + local -r jobs="$(jobs -p)" + local -r ns="$(ip netns list|grep $PEER_NS)" + + [ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null + [ -n "$ns" ] && ip netns del $ns 2>/dev/null +} +trap cleanup EXIT + +run_one() { + # use 'rx' as separator between sender args and receiver args + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} + + ip netns add "${PEER_NS}" + ip -netns "${PEER_NS}" link set lo up + ip link add type veth + ip link set dev veth0 up + ip addr add dev veth0 192.168.1.2/24 + ip addr add dev veth0 2001:db8::2/64 nodad + + ip link set dev veth1 netns "${PEER_NS}" + ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 + ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad + ip -netns "${PEER_NS}" link set dev veth1 up + + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & + + # Hack: let bg programs complete the startup + sleep 0.1 + ./udpgso_bench_tx ${tx_args} +} + +run_in_netns() { + local -r args=$@ + + ./in_netns.sh $0 __subprocess ${args} +} + +run_udp() { + local -r args=$@ + + echo "udp gso - over veth touching data" + run_in_netns ${args} -S rx + + echo "udp gso and gro - over veth touching data" + run_in_netns ${args} -S rx -G +} + +run_tcp() { + local -r args=$@ + + echo "tcp - over veth touching data" + run_in_netns ${args} -t rx +} + +run_all() { + local -r core_args="-l 4" + local -r ipv4_args="${core_args} -4 -D 192.168.1.1" + local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + + echo "ipv4" + run_tcp "${ipv4_args}" + run_udp "${ipv4_args}" + + echo "ipv6" + run_tcp "${ipv4_args}" + run_udp "${ipv6_args}" +} + +if [ ! -f ../bpf/xdp_dummy.o ]; then + echo "Missing xdp_dummy helper. Build bpf selftest first" + exit -1 +fi + +if [[ $# -eq 0 ]]; then + run_all +elif [[ $1 == "__subprocess" ]]; then + shift + run_one $@ +else + run_in_netns $@ +fi -- cgit v1.2.3 From 3327a9c46352f111697d93d6134e7bf37c6bffca Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:37 +0100 Subject: selftests: add functionals test for UDP GRO Extends the existing udp programs to allow checking for proper GRO aggregation/GSO size, and run the tests via a shell script, using a veth pair with XDP program attached to trigger the GRO code path. rfc v3 -> v1: - use ip route to attach the xdp helper to the veth rfc v2 -> rfc v3: - add missing test program options documentation - fix sporatic test failures (receiver faster than sender) Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 2 +- tools/testing/selftests/net/udpgro.sh | 148 ++++++++++++++++++++++++++ tools/testing/selftests/net/udpgro_bench.sh | 8 +- tools/testing/selftests/net/udpgso_bench.sh | 2 +- tools/testing/selftests/net/udpgso_bench_rx.c | 123 ++++++++++++++++++--- tools/testing/selftests/net/udpgso_bench_tx.c | 22 ++-- 6 files changed, 282 insertions(+), 23 deletions(-) create mode 100755 tools/testing/selftests/net/udpgro.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b1bba2f60467..eec359895feb 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -7,7 +7,7 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh -TEST_PROGS += udpgro_bench.sh +TEST_PROGS += udpgro_bench.sh udpgro.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh new file mode 100755 index 000000000000..e94ef8067173 --- /dev/null +++ b/tools/testing/selftests/net/udpgro.sh @@ -0,0 +1,148 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgro functional tests. + +readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" + +cleanup() { + local -r jobs="$(jobs -p)" + local -r ns="$(ip netns list|grep $PEER_NS)" + + [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null + [ -n "$ns" ] && ip netns del $ns 2>/dev/null +} +trap cleanup EXIT + +cfg_veth() { + ip netns add "${PEER_NS}" + ip -netns "${PEER_NS}" link set lo up + ip link add type veth + ip link set dev veth0 up + ip addr add dev veth0 192.168.1.2/24 + ip addr add dev veth0 2001:db8::2/64 nodad + + ip link set dev veth1 netns "${PEER_NS}" + ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 + ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad + ip -netns "${PEER_NS}" link set dev veth1 up + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy +} + +run_one() { + # use 'rx' as separator between sender args and receiver args + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} + + cfg_veth + + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} && \ + echo "ok" || \ + echo "failed" & + + # Hack: let bg programs complete the startup + sleep 0.1 + ./udpgso_bench_tx ${tx_args} + wait $(jobs -p) +} + +run_test() { + local -r args=$@ + + printf " %-40s" "$1" + ./in_netns.sh $0 __subprocess $2 rx -G -r $3 +} + +run_one_nat() { + # use 'rx' as separator between sender args and receiver args + local addr1 addr2 pid family="" ipt_cmd=ip6tables + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} + + if [[ ${tx_args} = *-4* ]]; then + ipt_cmd=iptables + family=-4 + addr1=192.168.1.1 + addr2=192.168.1.3/24 + else + addr1=2001:db8::1 + addr2="2001:db8::3/64 nodad" + fi + + cfg_veth + ip -netns "${PEER_NS}" addr add dev veth1 ${addr2} + + # fool the GRO engine changing the destination address ... + ip netns exec "${PEER_NS}" $ipt_cmd -t nat -I PREROUTING -d ${addr1} -j DNAT --to-destination ${addr2%/*} + + # ... so that GRO will match the UDP_GRO enabled socket, but packets + # will land on the 'plain' one + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & + pid=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${family} -b ${addr2%/*} ${rx_args} && \ + echo "ok" || \ + echo "failed"& + + sleep 0.1 + ./udpgso_bench_tx ${tx_args} + kill -INT $pid + wait $(jobs -p) +} + +run_nat_test() { + local -r args=$@ + + printf " %-40s" "$1" + ./in_netns.sh $0 __subprocess_nat $2 rx -r $3 +} + +run_all() { + local -r core_args="-l 4" + local -r ipv4_args="${core_args} -4 -D 192.168.1.1" + local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + + echo "ipv4" + run_test "no GRO" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400" + + # explicitly check we are not receiving UDP_SEGMENT cmsg (-S -1) + # when GRO does not take place + run_test "no GRO chk cmsg" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400 -S -1" + + # the GSO packets are aggregated because: + # * veth schedule napi after each xmit + # * segmentation happens in BH context, veth napi poll is delayed after + # the transmission of the last segment + run_test "GRO" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720" + run_test "GRO chk cmsg" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + run_test "GRO with custom segment size" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720" + run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500" + + run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472" + + echo "ipv6" + run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400" + run_test "no GRO chk cmsg" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400 -S -1" + run_test "GRO" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520" + run_test "GRO chk cmsg" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520 -S 1452" + run_test "GRO with custom segment size" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520" + run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500" + + run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452" +} + +if [ ! -f ../bpf/xdp_dummy.o ]; then + echo "Missing xdp_dummy helper. Build bpf selftest first" + exit -1 +fi + +if [[ $# -eq 0 ]]; then + run_all +elif [[ $1 == "__subprocess" ]]; then + shift + run_one $@ +elif [[ $1 == "__subprocess_nat" ]]; then + shift + run_one_nat $@ +fi diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 9ef4b1ee0f76..820bc50f6b68 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -18,7 +18,9 @@ run_one() { # use 'rx' as separator between sender args and receiver args local -r all="$@" local -r tx_args=${all%rx*} - local -r rx_args=${all#*rx} + local rx_args=${all#*rx} + + [[ "${tx_args}" == *"-4"* ]] && rx_args="${rx_args} -4" ip netns add "${PEER_NS}" ip -netns "${PEER_NS}" link set lo up @@ -51,10 +53,10 @@ run_udp() { local -r args=$@ echo "udp gso - over veth touching data" - run_in_netns ${args} -S rx + run_in_netns ${args} -S 0 rx echo "udp gso and gro - over veth touching data" - run_in_netns ${args} -S rx -G + run_in_netns ${args} -S 0 rx -G } run_tcp() { diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 99e537ab5ad9..0f0628613f81 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -34,7 +34,7 @@ run_udp() { run_in_netns ${args} echo "udp gso" - run_in_netns ${args} -S + run_in_netns ${args} -S 0 } run_tcp() { diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index 8f48d7fb32cf..0c960f673324 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -40,6 +40,12 @@ static bool cfg_tcp; static bool cfg_verify; static bool cfg_read_all; static bool cfg_gro_segment; +static int cfg_family = PF_INET6; +static int cfg_alen = sizeof(struct sockaddr_in6); +static int cfg_expected_pkt_nr; +static int cfg_expected_pkt_len; +static int cfg_expected_gso_size; +static struct sockaddr_storage cfg_bind_addr; static bool interrupted; static unsigned long packets, bytes; @@ -50,6 +56,29 @@ static void sigint_handler(int signum) interrupted = true; } +static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + default: + error(1, 0, "illegal domain"); + } +} + static unsigned long gettimeofday_ms(void) { struct timeval tv; @@ -83,10 +112,9 @@ static void do_poll(int fd) static int do_socket(bool do_tcp) { - struct sockaddr_in6 addr = {0}; int fd, val; - fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); + fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); if (fd == -1) error(1, errno, "socket"); @@ -97,10 +125,7 @@ static int do_socket(bool do_tcp) if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val))) error(1, errno, "setsockopt reuseport"); - addr.sin6_family = PF_INET6; - addr.sin6_port = htons(cfg_port); - addr.sin6_addr = in6addr_any; - if (bind(fd, (void *) &addr, sizeof(addr))) + if (bind(fd, (void *)&cfg_bind_addr, cfg_alen)) error(1, errno, "bind"); if (do_tcp) { @@ -174,52 +199,117 @@ static void do_verify_udp(const char *data, int len) } } +static int recv_msg(int fd, char *buf, int len, int *gso_size) +{ + char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + struct cmsghdr *cmsg; + uint16_t *gsosizeptr; + int ret; + + iov.iov_base = buf; + iov.iov_len = len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + *gso_size = -1; + ret = recvmsg(fd, &msg, MSG_TRUNC | MSG_DONTWAIT); + if (ret != -1) { + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_UDP + && cmsg->cmsg_type == UDP_GRO) { + gsosizeptr = (uint16_t *) CMSG_DATA(cmsg); + *gso_size = *gsosizeptr; + break; + } + } + } + return ret; +} + /* Flush all outstanding datagrams. Verify first few bytes of each. */ static void do_flush_udp(int fd) { static char rbuf[ETH_MAX_MTU]; - int ret, len, budget = 256; + int ret, len, gso_size, budget = 256; len = cfg_read_all ? sizeof(rbuf) : 0; while (budget--) { /* MSG_TRUNC will make return value full datagram length */ - ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT); + if (!cfg_expected_gso_size) + ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT); + else + ret = recv_msg(fd, rbuf, len, &gso_size); if (ret == -1 && errno == EAGAIN) - return; + break; if (ret == -1) error(1, errno, "recv"); + if (cfg_expected_pkt_len && ret != cfg_expected_pkt_len) + error(1, 0, "recv: bad packet len, got %d," + " expected %d\n", ret, cfg_expected_pkt_len); if (len && cfg_verify) { if (ret == 0) error(1, errno, "recv: 0 byte datagram\n"); do_verify_udp(rbuf, ret); } + if (cfg_expected_gso_size && cfg_expected_gso_size != gso_size) + error(1, 0, "recv: bad gso size, got %d, expected %d " + "(-1 == no gso cmsg))\n", gso_size, + cfg_expected_gso_size); packets++; bytes += ret; + if (cfg_expected_pkt_nr && packets >= cfg_expected_pkt_nr) + break; } } static void usage(const char *filepath) { - error(1, 0, "Usage: %s [-Grtv] [-p port]", filepath); + error(1, 0, "Usage: %s [-Grtv] [-b addr] [-p port] [-l pktlen] [-n packetnr] [-S gsosize]", filepath); } static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "Gp:rtv")) != -1) { + /* bind to any by default */ + setup_sockaddr(PF_INET6, "::", &cfg_bind_addr); + while ((c = getopt(argc, argv, "4b:Gl:n:p:rS:tv")) != -1) { switch (c) { + case '4': + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + setup_sockaddr(PF_INET, "0.0.0.0", &cfg_bind_addr); + break; + case 'b': + setup_sockaddr(cfg_family, optarg, &cfg_bind_addr); + break; case 'G': cfg_gro_segment = true; break; + case 'l': + cfg_expected_pkt_len = strtoul(optarg, NULL, 0); + break; + case 'n': + cfg_expected_pkt_nr = strtoul(optarg, NULL, 0); + break; case 'p': cfg_port = strtoul(optarg, NULL, 0); break; case 'r': cfg_read_all = true; break; + case 'S': + cfg_expected_gso_size = strtol(optarg, NULL, 0); + break; case 't': cfg_tcp = true; break; @@ -240,7 +330,7 @@ static void parse_opts(int argc, char **argv) static void do_recv(void) { unsigned long tnow, treport; - int fd; + int fd, loop = 0; fd = do_socket(cfg_tcp); @@ -252,6 +342,11 @@ static void do_recv(void) treport = gettimeofday_ms() + 1000; do { + /* force termination after the second poll(); this cope both + * with sender slower than receiver and missing packet errors + */ + if (cfg_expected_pkt_nr && loop++) + interrupted = true; do_poll(fd); if (cfg_tcp) @@ -272,6 +367,10 @@ static void do_recv(void) } while (!interrupted); + if (cfg_expected_pkt_nr && (packets != cfg_expected_pkt_nr)) + error(1, 0, "wrong packet number! got %ld, expected %d\n", + packets, cfg_expected_pkt_nr); + if (close(fd)) error(1, errno, "close"); } diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index e821564053cf..4074538b5df5 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -52,6 +52,8 @@ static bool cfg_segment; static bool cfg_sendmmsg; static bool cfg_tcp; static bool cfg_zerocopy; +static int cfg_msg_nr; +static uint16_t cfg_gso_size; static socklen_t cfg_alen; static struct sockaddr_storage cfg_dst_addr; @@ -205,14 +207,14 @@ static void send_udp_segment_cmsg(struct cmsghdr *cm) cm->cmsg_level = SOL_UDP; cm->cmsg_type = UDP_SEGMENT; - cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss)); + cm->cmsg_len = CMSG_LEN(sizeof(cfg_gso_size)); valp = (void *)CMSG_DATA(cm); - *valp = cfg_mss; + *valp = cfg_gso_size; } static int send_udp_segment(int fd, char *data) { - char control[CMSG_SPACE(sizeof(cfg_mss))] = {0}; + char control[CMSG_SPACE(sizeof(cfg_gso_size))] = {0}; struct msghdr msg = {0}; struct iovec iov = {0}; int ret; @@ -241,7 +243,7 @@ static int send_udp_segment(int fd, char *data) static void usage(const char *filepath) { - error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]", + error(1, 0, "Usage: %s [-46cmtuz] [-C cpu] [-D dst ip] [-l secs] [-m messagenr] [-p port] [-s sendsize] [-S gsosize]", filepath); } @@ -250,7 +252,7 @@ static void parse_opts(int argc, char **argv) int max_len, hdrlen; int c; - while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) { + while ((c = getopt(argc, argv, "46cC:D:l:mM:p:s:S:tuz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -279,6 +281,9 @@ static void parse_opts(int argc, char **argv) case 'm': cfg_sendmmsg = true; break; + case 'M': + cfg_msg_nr = strtoul(optarg, NULL, 10); + break; case 'p': cfg_port = strtoul(optarg, NULL, 0); break; @@ -286,6 +291,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = strtoul(optarg, NULL, 0); break; case 'S': + cfg_gso_size = strtoul(optarg, NULL, 0); cfg_segment = true; break; case 't': @@ -317,6 +323,8 @@ static void parse_opts(int argc, char **argv) cfg_mss = ETH_DATA_LEN - hdrlen; max_len = ETH_MAX_MTU - hdrlen; + if (!cfg_gso_size) + cfg_gso_size = cfg_mss; if (cfg_payload_len > max_len) error(1, 0, "payload length %u exceeds max %u", @@ -392,10 +400,12 @@ int main(int argc, char **argv) else num_sends += send_udp(fd, buf[i]); num_msgs++; - if (cfg_zerocopy && ((num_msgs & 0xF) == 0)) flush_zerocopy(fd); + if (cfg_msg_nr && num_msgs >= cfg_msg_nr) + break; + tnow = gettimeofday_ms(); if (tnow > treport) { fprintf(stderr, -- cgit v1.2.3 From 582888792f7bc2d543d85cb610160e2162d5f132 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:17 +0100 Subject: selftests: pmtu: Introduce tests for IPv4/IPv6 over VXLAN over IPv4/IPv6 Use a router between endpoints, implemented via namespaces, set a low MTU between router and destination endpoint, exceed it and check PMTU value in route exceptions. v2: - Change all occurrences of VxLAN to VXLAN (Jiri Benc) - Introduce IPv4 tests right away, if iproute2 doesn't support the 'df' link option they will be skipped (David Ahern) Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- tools/testing/selftests/net/pmtu.sh | 143 +++++++++++++++++++++++++++++++----- 1 file changed, 125 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index a369d616b390..33cba295ad45 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -26,6 +26,23 @@ # - pmtu_ipv6 # Same as pmtu_ipv4, except for locked PMTU tests, using IPv6 # +# - pmtu_ipv4_vxlan4_exception +# Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel +# over IPv4 between A and B, routed via R1. On the link between R1 and B, +# set a MTU lower than the VXLAN MTU and the MTU on the link between A and +# R1. Send IPv4 packets, exceeding the MTU between R1 and B, over VXLAN +# from A to B and check that the PMTU exception is created with the right +# value on A +# +# - pmtu_ipv6_vxlan4_exception +# Same as pmtu_ipv4_vxlan4_exception, but send IPv6 packets from A to B +# +# - pmtu_ipv4_vxlan6_exception +# Same as pmtu_ipv4_vxlan4_exception, but use IPv6 transport from A to B +# +# - pmtu_ipv6_vxlan6_exception +# Same as pmtu_ipv4_vxlan6_exception, but send IPv6 packets from A to B +# # - pmtu_vti4_exception # Set up vti tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is not @@ -72,6 +89,10 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) tests=" pmtu_ipv4_exception ipv4: PMTU exceptions pmtu_ipv6_exception ipv6: PMTU exceptions + pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions + pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions + pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions + pmtu_ipv6_vxlan6_exception IPv6 over vxlan6: PMTU exceptions pmtu_vti6_exception vti6: PMTU exceptions pmtu_vti4_exception vti4: PMTU exceptions pmtu_vti4_default_mtu vti4: default MTU assignment @@ -95,8 +116,8 @@ ns_r2="ip netns exec ${NS_R2}" # Addresses are: # - IPv4: PREFIX4.SEGMENT.ID (/24) # - IPv6: PREFIX6:SEGMENT::ID (/64) -prefix4="192.168" -prefix6="fd00" +prefix4="10.0" +prefix6="fc00" a_r1=1 a_r2=2 b_r1=3 @@ -129,12 +150,12 @@ veth6_a_addr="fd00:1::a" veth6_b_addr="fd00:1::b" veth6_mask="64" -vti4_a_addr="192.168.2.1" -vti4_b_addr="192.168.2.2" -vti4_mask="24" -vti6_a_addr="fd00:2::a" -vti6_b_addr="fd00:2::b" -vti6_mask="64" +tunnel4_a_addr="192.168.2.1" +tunnel4_b_addr="192.168.2.2" +tunnel4_mask="24" +tunnel6_a_addr="fd00:2::a" +tunnel6_b_addr="fd00:2::b" +tunnel6_mask="64" dummy6_0_addr="fc00:1000::0" dummy6_1_addr="fc00:1001::0" @@ -202,11 +223,39 @@ setup_vti() { } setup_vti4() { - setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask} + setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask} } setup_vti6() { - setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask} + setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} +} + +setup_vxlan() { + a_addr="${1}" + b_addr="${2}" + opts="${3}" + + ${ns_a} ip link add vxlan_a type vxlan id 1 local ${a_addr} remote ${b_addr} ttl 64 dstport 4789 ${opts} || return 1 + ${ns_b} ip link add vxlan_b type vxlan id 1 local ${b_addr} remote ${a_addr} ttl 64 dstport 4789 ${opts} + + ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev vxlan_a + ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev vxlan_b + + ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev vxlan_a + ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev vxlan_b + + ${ns_a} ip link set vxlan_a up + ${ns_b} ip link set vxlan_b up + + sleep 1 +} + +setup_vxlan4() { + setup_vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" +} + +setup_vxlan6() { + setup_vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" } setup_xfrm() { @@ -465,6 +514,64 @@ test_pmtu_ipv6_exception() { test_pmtu_ipvX 6 } +test_pmtu_ipvX_over_vxlanY_exception() { + family=${1} + outer_family=${2} + ll_mtu=4000 + + if [ ${outer_family} -eq 4 ]; then + setup namespaces routing vxlan4 || return 2 + # IPv4 header UDP header VXLAN header Ethernet header + exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) + else + setup namespaces routing vxlan6 || return 2 + # IPv6 header UDP header VXLAN header Ethernet header + exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) + fi + + trace "${ns_a}" vxlan_a "${ns_b}" vxlan_b \ + "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B + + if [ ${family} -eq 4 ]; then + ping=ping + dst=${tunnel4_b_addr} + else + ping=${ping6} + dst=${tunnel6_b_addr} + fi + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) + mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) + mtu "${ns_b}" veth_B-R1 ${ll_mtu} + mtu "${ns_r1}" veth_R1-B ${ll_mtu} + + mtu "${ns_a}" vxlan_a $((${ll_mtu} + 1000)) + mtu "${ns_b}" vxlan_b $((${ll_mtu} + 1000)) + ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s $((${ll_mtu} + 500)) ${dst} > /dev/null + + # Check that exception was created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on VXLAN interface" +} + +test_pmtu_ipv4_vxlan4_exception() { + test_pmtu_ipvX_over_vxlanY_exception 4 4 +} + +test_pmtu_ipv6_vxlan4_exception() { + test_pmtu_ipvX_over_vxlanY_exception 6 4 +} + +test_pmtu_ipv4_vxlan6_exception() { + test_pmtu_ipvX_over_vxlanY_exception 4 6 +} + +test_pmtu_ipv6_vxlan6_exception() { + test_pmtu_ipvX_over_vxlanY_exception 6 6 +} + test_pmtu_vti4_exception() { setup namespaces veth vti4 xfrm4 || return 2 trace "${ns_a}" veth_a "${ns_b}" veth_b \ @@ -484,14 +591,14 @@ test_pmtu_vti4_exception() { # Send DF packet without exceeding link layer MTU, check that no # exception is created - ${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null - pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})" + ${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${tunnel4_b_addr} > /dev/null + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 # Now exceed link layer MTU by one byte, check that exception is created # with the right PMTU value - ${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null - pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})" + ${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${tunnel4_b_addr} > /dev/null + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" } @@ -506,20 +613,20 @@ test_pmtu_vti6_exception() { mtu "${ns_b}" veth_b 4000 mtu "${ns_a}" vti6_a 5000 mtu "${ns_b}" vti6_b 5000 - ${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null + ${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${tunnel6_b_addr} > /dev/null # Check that exception was created - pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 # Decrease tunnel MTU, check for PMTU decrease in route exception mtu "${ns_a}" vti6_a 3000 - pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 # Increase tunnel MTU, check for PMTU increase in route exception mtu "${ns_a}" vti6_a 9000 - pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 return ${fail} -- cgit v1.2.3 From ce7336610ca950cda131293def57a0178d860121 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:20 +0100 Subject: selftests: pmtu: Introduce tests for IPv4/IPv6 over GENEVE over IPv4/IPv6 Use a router between endpoints, implemented via namespaces, set a low MTU between router and destination endpoint, exceed it and check PMTU value in route exceptions. v2: - Introduce IPv4 tests right away, if iproute2 doesn't support the 'df' link option they will be skipped (David Ahern) Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- tools/testing/selftests/net/pmtu.sh | 117 ++++++++++++++++++++++++++---------- 1 file changed, 86 insertions(+), 31 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 33cba295ad45..14d20782ccb7 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -43,6 +43,22 @@ # - pmtu_ipv6_vxlan6_exception # Same as pmtu_ipv4_vxlan6_exception, but send IPv6 packets from A to B # +# - pmtu_ipv4_geneve4_exception +# Same as pmtu_ipv4_vxlan4_exception, but using a GENEVE tunnel instead of +# VXLAN +# +# - pmtu_ipv6_geneve4_exception +# Same as pmtu_ipv6_vxlan4_exception, but using a GENEVE tunnel instead of +# VXLAN +# +# - pmtu_ipv4_geneve6_exception +# Same as pmtu_ipv4_vxlan6_exception, but using a GENEVE tunnel instead of +# VXLAN +# +# - pmtu_ipv6_geneve6_exception +# Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of +# VXLAN +# # - pmtu_vti4_exception # Set up vti tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is not @@ -93,6 +109,10 @@ tests=" pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions pmtu_ipv6_vxlan6_exception IPv6 over vxlan6: PMTU exceptions + pmtu_ipv4_geneve4_exception IPv4 over geneve4: PMTU exceptions + pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions + pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions + pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions pmtu_vti6_exception vti6: PMTU exceptions pmtu_vti4_exception vti4: PMTU exceptions pmtu_vti4_default_mtu vti4: default MTU assignment @@ -230,32 +250,50 @@ setup_vti6() { setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} } -setup_vxlan() { - a_addr="${1}" - b_addr="${2}" - opts="${3}" +setup_vxlan_or_geneve() { + type="${1}" + a_addr="${2}" + b_addr="${3}" + opts="${4}" + + if [ "${type}" = "vxlan" ]; then + opts="${opts} ttl 64 dstport 4789" + opts_a="local ${a_addr}" + opts_b="local ${b_addr}" + else + opts_a="" + opts_b="" + fi - ${ns_a} ip link add vxlan_a type vxlan id 1 local ${a_addr} remote ${b_addr} ttl 64 dstport 4789 ${opts} || return 1 - ${ns_b} ip link add vxlan_b type vxlan id 1 local ${b_addr} remote ${a_addr} ttl 64 dstport 4789 ${opts} + ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1 + ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} - ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev vxlan_a - ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev vxlan_b + ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a + ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b - ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev vxlan_a - ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev vxlan_b + ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a + ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b - ${ns_a} ip link set vxlan_a up - ${ns_b} ip link set vxlan_b up + ${ns_a} ip link set ${type}_a up + ${ns_b} ip link set ${type}_b up sleep 1 } +setup_geneve4() { + setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" +} + setup_vxlan4() { - setup_vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" + setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" +} + +setup_geneve6() { + setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 } setup_vxlan6() { - setup_vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" + setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 } setup_xfrm() { @@ -514,22 +552,23 @@ test_pmtu_ipv6_exception() { test_pmtu_ipvX 6 } -test_pmtu_ipvX_over_vxlanY_exception() { - family=${1} - outer_family=${2} +test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() { + type=${1} + family=${2} + outer_family=${3} ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing vxlan4 || return 2 - # IPv4 header UDP header VXLAN header Ethernet header - exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) + setup namespaces routing ${type}4 || return 2 + # IPv4 header UDP header VXLAN/GENEVE header Ethernet header + exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing vxlan6 || return 2 - # IPv6 header UDP header VXLAN header Ethernet header - exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) + setup namespaces routing ${type}6 || return 2 + # IPv6 header UDP header VXLAN/GENEVE header Ethernet header + exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi - trace "${ns_a}" vxlan_a "${ns_b}" vxlan_b \ + trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B @@ -547,29 +586,45 @@ test_pmtu_ipvX_over_vxlanY_exception() { mtu "${ns_b}" veth_B-R1 ${ll_mtu} mtu "${ns_r1}" veth_R1-B ${ll_mtu} - mtu "${ns_a}" vxlan_a $((${ll_mtu} + 1000)) - mtu "${ns_b}" vxlan_b $((${ll_mtu} + 1000)) + mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000)) + mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s $((${ll_mtu} + 500)) ${dst} > /dev/null # Check that exception was created pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" - check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on VXLAN interface" + check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${type} interface" } test_pmtu_ipv4_vxlan4_exception() { - test_pmtu_ipvX_over_vxlanY_exception 4 4 + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 4 } test_pmtu_ipv6_vxlan4_exception() { - test_pmtu_ipvX_over_vxlanY_exception 6 4 + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 4 +} + +test_pmtu_ipv4_geneve4_exception() { + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 4 +} + +test_pmtu_ipv6_geneve4_exception() { + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 4 } test_pmtu_ipv4_vxlan6_exception() { - test_pmtu_ipvX_over_vxlanY_exception 4 6 + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 6 } test_pmtu_ipv6_vxlan6_exception() { - test_pmtu_ipvX_over_vxlanY_exception 6 6 + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 6 +} + +test_pmtu_ipv4_geneve6_exception() { + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 6 +} + +test_pmtu_ipv6_geneve6_exception() { + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6 } test_pmtu_vti4_exception() { -- cgit v1.2.3 From 56fd865f46b894681dd7e7f83761243add7a71a3 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:24 +0100 Subject: selftests: pmtu: Introduce FoU and GUE PMTU exceptions tests Introduce eight tests, for FoU and GUE, with IPv4 and IPv6 payload, on IPv4 and IPv6 transport, that check that PMTU exceptions are created with the right value when exceeding the MTU on a link of the path. Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- tools/testing/selftests/net/pmtu.sh | 179 ++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 14d20782ccb7..e2c94e47707c 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -59,6 +59,14 @@ # Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of # VXLAN # +# - pmtu_ipv{4,6}_fou{4,6}_exception +# Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation +# (FoU) over IPv4/IPv6, instead of VXLAN +# +# - pmtu_ipv{4,6}_fou{4,6}_exception +# Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6 +# encapsulation (GUE) over IPv4/IPv6, instead of VXLAN +# # - pmtu_vti4_exception # Set up vti tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is not @@ -113,6 +121,14 @@ tests=" pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions + pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions + pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions + pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions + pmtu_ipv6_fou6_exception IPv6 over fou6: PMTU exceptions + pmtu_ipv4_gue4_exception IPv4 over gue4: PMTU exceptions + pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions + pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions + pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions pmtu_vti6_exception vti6: PMTU exceptions pmtu_vti4_exception vti4: PMTU exceptions pmtu_vti4_default_mtu vti4: default MTU assignment @@ -200,6 +216,89 @@ nsname() { eval echo \$NS_$1 } +setup_fou_or_gue() { + outer="${1}" + inner="${2}" + encap="${3}" + + if [ "${outer}" = "4" ]; then + modprobe fou || return 2 + a_addr="${prefix4}.${a_r1}.1" + b_addr="${prefix4}.${b_r1}.1" + if [ "${inner}" = "4" ]; then + type="ipip" + ipproto="4" + else + type="sit" + ipproto="41" + fi + else + modprobe fou6 || return 2 + a_addr="${prefix6}:${a_r1}::1" + b_addr="${prefix6}:${b_r1}::1" + if [ "${inner}" = "4" ]; then + type="ip6tnl" + mode="mode ipip6" + ipproto="4 -6" + else + type="ip6tnl" + mode="mode ip6ip6" + ipproto="41 -6" + fi + fi + + ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2 + ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2 + + ${ns_b} ip fou add port 5556 ipproto ${ipproto} + ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555 + + if [ "${inner}" = "4" ]; then + ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a + ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b + else + ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a + ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b + fi + + ${ns_a} ip link set ${encap}_a up + ${ns_b} ip link set ${encap}_b up + + sleep 1 +} + +setup_fou44() { + setup_fou_or_gue 4 4 fou +} + +setup_fou46() { + setup_fou_or_gue 4 6 fou +} + +setup_fou64() { + setup_fou_or_gue 6 4 fou +} + +setup_fou66() { + setup_fou_or_gue 6 6 fou +} + +setup_gue44() { + setup_fou_or_gue 4 4 gue +} + +setup_gue46() { + setup_fou_or_gue 4 6 gue +} + +setup_gue64() { + setup_fou_or_gue 6 4 gue +} + +setup_gue66() { + setup_fou_or_gue 6 6 gue +} + setup_namespaces() { for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do ip netns add ${n} || return 1 @@ -627,6 +726,86 @@ test_pmtu_ipv6_geneve6_exception() { test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6 } +test_pmtu_ipvX_over_fouY_or_gueY() { + inner_family=${1} + outer_family=${2} + encap=${3} + ll_mtu=4000 + + setup namespaces routing ${encap}${outer_family}${inner_family} || return 2 + trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ + "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B + + if [ ${inner_family} -eq 4 ]; then + ping=ping + dst=${tunnel4_b_addr} + else + ping=${ping6} + dst=${tunnel6_b_addr} + fi + + if [ "${encap}" = "gue" ]; then + encap_overhead=4 + else + encap_overhead=0 + fi + + if [ ${outer_family} -eq 4 ]; then + # IPv4 header UDP header + exp_mtu=$((${ll_mtu} - 20 - 8 - ${encap_overhead})) + else + # IPv6 header Option 4 UDP header + exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - ${encap_overhead})) + fi + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) + mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) + mtu "${ns_b}" veth_B-R1 ${ll_mtu} + mtu "${ns_r1}" veth_R1-B ${ll_mtu} + + mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000)) + mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000)) + ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s $((${ll_mtu} + 500)) ${dst} > /dev/null + + # Check that exception was created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${encap} interface" +} + +test_pmtu_ipv4_fou4_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 4 4 fou +} + +test_pmtu_ipv6_fou4_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 6 4 fou +} + +test_pmtu_ipv4_fou6_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 4 6 fou +} + +test_pmtu_ipv6_fou6_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 6 6 fou +} + +test_pmtu_ipv4_gue4_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 4 4 gue +} + +test_pmtu_ipv6_gue4_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 6 4 gue +} + +test_pmtu_ipv4_gue6_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 4 6 gue +} + +test_pmtu_ipv6_gue6_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue +} + test_pmtu_vti4_exception() { setup namespaces veth vti4 xfrm4 || return 2 trace "${ns_a}" veth_a "${ns_b}" veth_b \ -- cgit v1.2.3 From 435f90a338ae42c0d7c0109a1742d1b16bc99bf6 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Wed, 7 Nov 2018 16:12:02 -0800 Subject: selftests/bpf: add a test case for sock_ops perf-event notification This patch provides a tcp_bpf based eBPF sample. The test - ncat(1) as the TCP client program to connect() to a port with the intention of triggerring SYN retransmissions: we first install an iptables DROP rule to make sure ncat SYNs are resent (instead of aborting instantly after a TCP RST) - has a bpf kernel module that sends a perf-event notification for each TCP retransmit, and also tracks the number of such notifications sent in the global_map The test passes when the number of event notifications intercepted in user-space matches the value in the global_map. Signed-off-by: Sowmini Varadhan Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/test_tcpnotify.h | 19 +++ tools/testing/selftests/bpf/test_tcpnotify_kern.c | 95 +++++++++++ tools/testing/selftests/bpf/test_tcpnotify_user.c | 186 ++++++++++++++++++++++ 4 files changed, 303 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/test_tcpnotify.h create mode 100644 tools/testing/selftests/bpf/test_tcpnotify_kern.c create mode 100644 tools/testing/selftests/bpf/test_tcpnotify_user.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index ecd79b7fb107..57b4712a6276 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -24,12 +24,13 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ - test_netcnt + test_netcnt test_tcpnotify_user TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ + test_tcpnotify_kern.o \ sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ @@ -74,6 +75,7 @@ $(OUTPUT)/test_sock_addr: cgroup_helpers.c $(OUTPUT)/test_socket_cookie: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c +$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c $(OUTPUT)/test_progs: trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c diff --git a/tools/testing/selftests/bpf/test_tcpnotify.h b/tools/testing/selftests/bpf/test_tcpnotify.h new file mode 100644 index 000000000000..8b6cea030bfc --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpnotify.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _TEST_TCPBPF_H +#define _TEST_TCPBPF_H + +struct tcpnotify_globals { + __u32 total_retrans; + __u32 ncalls; +}; + +struct tcp_notifier { + __u8 type; + __u8 subtype; + __u8 source; + __u8 hash; +}; + +#define TESTPORT 12877 +#endif diff --git a/tools/testing/selftests/bpf/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/test_tcpnotify_kern.c new file mode 100644 index 000000000000..edbca203ce2d --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpnotify_kern.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_tcpnotify.h" + +struct bpf_map_def SEC("maps") global_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tcpnotify_globals), + .max_entries = 4, +}; + +struct bpf_map_def SEC("maps") perf_event_map = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(__u32), + .max_entries = 2, +}; + +int _version SEC("version") = 1; + +SEC("sockops") +int bpf_testcb(struct bpf_sock_ops *skops) +{ + int rv = -1; + int op; + + op = (int) skops->op; + + if (bpf_ntohl(skops->remote_port) != TESTPORT) { + skops->reply = -1; + return 0; + } + + switch (op) { + case BPF_SOCK_OPS_TIMEOUT_INIT: + case BPF_SOCK_OPS_RWND_INIT: + case BPF_SOCK_OPS_NEEDS_ECN: + case BPF_SOCK_OPS_BASE_RTT: + case BPF_SOCK_OPS_RTO_CB: + rv = 1; + break; + + case BPF_SOCK_OPS_TCP_CONNECT_CB: + case BPF_SOCK_OPS_TCP_LISTEN_CB: + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + bpf_sock_ops_cb_flags_set(skops, (BPF_SOCK_OPS_RETRANS_CB_FLAG| + BPF_SOCK_OPS_RTO_CB_FLAG)); + rv = 1; + break; + case BPF_SOCK_OPS_RETRANS_CB: { + __u32 key = 0; + struct tcpnotify_globals g, *gp; + struct tcp_notifier msg = { + .type = 0xde, + .subtype = 0xad, + .source = 0xbe, + .hash = 0xef, + }; + + rv = 1; + + /* Update results */ + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.total_retrans = skops->total_retrans; + g.ncalls++; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + bpf_perf_event_output(skops, &perf_event_map, + BPF_F_CURRENT_CPU, + &msg, sizeof(msg)); + } + break; + default: + rv = -1; + } + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c new file mode 100644 index 000000000000..ff3c4522aed6 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_rlimit.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" + +#include "test_tcpnotify.h" +#include "trace_helpers.h" + +#define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L) + +pthread_t tid; +int rx_callbacks; + +static int dummyfn(void *data, int size) +{ + struct tcp_notifier *t = data; + + if (t->type != 0xde || t->subtype != 0xad || + t->source != 0xbe || t->hash != 0xef) + return 1; + rx_callbacks++; + return 0; +} + +void tcp_notifier_poller(int fd) +{ + while (1) + perf_event_poller(fd, dummyfn); +} + +static void *poller_thread(void *arg) +{ + int fd = *(int *)arg; + + tcp_notifier_poller(fd); + return arg; +} + +int verify_result(const struct tcpnotify_globals *result) +{ + return (result->ncalls > 0 && result->ncalls == rx_callbacks ? 0 : 1); +} + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) { + printf("%s:FAIL:map '%s' not found\n", test, name); + return -1; + } + return bpf_map__fd(map); +} + +static int setup_bpf_perf_event(int mapfd) +{ + struct perf_event_attr attr = { + .sample_type = PERF_SAMPLE_RAW, + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_BPF_OUTPUT, + }; + int key = 0; + int pmu_fd; + + pmu_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, 0); + if (pmu_fd < 0) + return pmu_fd; + bpf_map_update_elem(mapfd, &key, &pmu_fd, BPF_ANY); + + ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + return pmu_fd; +} + +int main(int argc, char **argv) +{ + const char *file = "test_tcpnotify_kern.o"; + int prog_fd, map_fd, perf_event_fd; + struct tcpnotify_globals g = {0}; + const char *cg_path = "/foo"; + int error = EXIT_FAILURE; + struct bpf_object *obj; + int cg_fd = -1; + __u32 key = 0; + int rv; + char test_script[80]; + int pmu_fd; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); + + if (setup_cgroup_environment()) + goto err; + + cg_fd = create_and_get_cgroup(cg_path); + if (!cg_fd) + goto err; + + if (join_cgroup(cg_path)) + goto err; + + if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { + printf("FAILED: load_bpf_file failed for: %s\n", file); + goto err; + } + + rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (rv) { + printf("FAILED: bpf_prog_attach: %d (%s)\n", + error, strerror(errno)); + goto err; + } + + perf_event_fd = bpf_find_map(__func__, obj, "perf_event_map"); + if (perf_event_fd < 0) + goto err; + + map_fd = bpf_find_map(__func__, obj, "global_map"); + if (map_fd < 0) + goto err; + + pmu_fd = setup_bpf_perf_event(perf_event_fd); + if (pmu_fd < 0 || perf_event_mmap(pmu_fd) < 0) + goto err; + + pthread_create(&tid, NULL, poller_thread, (void *)&pmu_fd); + + sprintf(test_script, + "/usr/sbin/iptables -A INPUT -p tcp --dport %d -j DROP", + TESTPORT); + system(test_script); + + sprintf(test_script, + "/usr/bin/nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ", + TESTPORT); + system(test_script); + + sprintf(test_script, + "/usr/sbin/iptables -D INPUT -p tcp --dport %d -j DROP", + TESTPORT); + system(test_script); + + rv = bpf_map_lookup_elem(map_fd, &key, &g); + if (rv != 0) { + printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); + goto err; + } + + sleep(10); + + if (verify_result(&g)) { + printf("FAILED: Wrong stats Expected %d calls, got %d\n", + g.ncalls, rx_callbacks); + goto err; + } + + printf("PASSED!\n"); + error = 0; +err: + bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + close(cg_fd); + cleanup_cgroup_environment(); + return error; +} -- cgit v1.2.3 From bf598a8f0f771302d4ecb0ef0003c54732221597 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 8 Nov 2018 13:00:07 -0800 Subject: bpftool: Improve handling of ENOENT on map dumps bpftool output is not user friendly when dumping a map with only a few populated entries: $ bpftool map 1: devmap name tx_devmap flags 0x0 key 4B value 4B max_entries 64 memlock 4096B 2: array name tx_idxmap flags 0x0 key 4B value 4B max_entries 64 memlock 4096B $ bpftool map dump id 1 key: 00 00 00 00 value: No such file or directory key: 01 00 00 00 value: No such file or directory key: 02 00 00 00 value: No such file or directory key: 03 00 00 00 value: 03 00 00 00 Handle ENOENT by keeping the line format sane and dumping "" for the value $ bpftool map dump id 1 key: 00 00 00 00 value: key: 01 00 00 00 value: key: 02 00 00 00 value: key: 03 00 00 00 value: 03 00 00 00 ... Signed-off-by: David Ahern Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 101b8a881225..dc9a8967ab8c 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -383,7 +383,10 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, printf(single_line ? " " : "\n"); printf("value:%c", break_names ? '\n' : ' '); - fprint_hex(stdout, value, info->value_size, " "); + if (value) + fprint_hex(stdout, value, info->value_size, " "); + else + printf(""); printf("\n"); } else { @@ -398,8 +401,11 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, for (i = 0; i < n; i++) { printf("value (CPU %02d):%c", i, info->value_size > 16 ? '\n' : ' '); - fprint_hex(stdout, value + i * step, - info->value_size, " "); + if (value) + fprint_hex(stdout, value + i * step, + info->value_size, " "); + else + printf(""); printf("\n"); } } @@ -731,7 +737,11 @@ static int dump_map_elem(int fd, void *key, void *value, jsonw_string_field(json_wtr, "error", strerror(lookup_errno)); jsonw_end_object(json_wtr); } else { - print_entry_error(map_info, key, strerror(lookup_errno)); + if (errno == ENOENT) + print_entry_plain(map_info, key, NULL); + else + print_entry_error(map_info, key, + strerror(lookup_errno)); } return 0; -- cgit v1.2.3 From b69d540da7db84e836cea77fbd56a518aafa1f2f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Nov 2018 23:00:31 +0100 Subject: selftests: add xfrm policy test script add a script that adds a ipsec tunnel between two network namespaces plus following policies: .0/24 -> ipsec tunnel .240/28 -> bypass .253/32 -> ipsec tunnel Then check that .254 bypasses tunnel (match /28 exception), and .2 (match /24) and .253 (match direct policy) pass through the tunnel. Abuses iptables to check if ping did resolve an ipsec policy or not. Also adds a bunch of 'block' rules that are not supposed to match. Signed-off-by: Florian Westphal Acked-by: David S. Miller Signed-off-by: Steffen Klassert --- tools/testing/selftests/net/Makefile | 3 +- tools/testing/selftests/net/xfrm_policy.sh | 276 +++++++++++++++++++++++++++++ 2 files changed, 278 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/xfrm_policy.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index eec359895feb..2ab3e3437a92 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -4,7 +4,8 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ -TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh +TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ + rtnetlink.sh xfrm_policy.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh new file mode 100755 index 000000000000..6ca63a6e50e9 --- /dev/null +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -0,0 +1,276 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check xfrm policy resolution. Topology: +# +# 1.2 1.1 3.1 3.10 2.1 2.2 +# eth1 eth1 veth0 veth0 eth1 eth1 +# ns1 ---- ns3 ----- ns4 ---- ns2 +# +# ns3 and ns4 are connected via ipsec tunnel. +# pings from ns1 to ns2 (and vice versa) are supposed to work like this: +# ns1: ping 10.0.2.2: passes via ipsec tunnel. +# ns2: ping 10.0.1.2: passes via ipsec tunnel. + +# ns1: ping 10.0.1.253: passes via ipsec tunnel (direct policy) +# ns2: ping 10.0.2.253: passes via ipsec tunnel (direct policy) +# +# ns1: ping 10.0.2.254: does NOT pass via ipsec tunnel (exception) +# ns2: ping 10.0.1.254: does NOT pass via ipsec tunnel (exception) + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +KEY_SHA=0xdeadbeef1234567890abcdefabcdefabcdefabcd +KEY_AES=0x0123456789abcdef0123456789012345 +SPI1=0x1 +SPI2=0x2 + +do_esp() { + local ns=$1 + local me=$2 + local remote=$3 + local lnet=$4 + local rnet=$5 + local spi_out=$6 + local spi_in=$7 + + ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet + ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet + + # to encrypt packets as they go out (includes forwarded packets that need encapsulation) + ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow + # to fwd decrypted packets after esp processing: + ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow +} + +do_exception() { + local ns=$1 + local me=$2 + local remote=$3 + local encryptip=$4 + local plain=$5 + + # network $plain passes without tunnel + ip -net $ns xfrm policy add dst $plain dir out priority 10 action allow + + # direct policy for $encryptip, use tunnel, higher prio takes precedence + ip -net $ns xfrm policy add dst $encryptip dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow +} + +# policies that are not supposed to match any packets generated in this test. +do_dummies4() { + local ns=$1 + + for i in $(seq 10 16);do + # dummy policy with wildcard src/dst. + echo netns exec $ns ip xfrm policy add src 0.0.0.0/0 dst 10.$i.99.0/30 dir out action block + echo netns exec $ns ip xfrm policy add src 10.$i.99.0/30 dst 0.0.0.0/0 dir out action block + for j in $(seq 32 64);do + echo netns exec $ns ip xfrm policy add src 10.$i.1.0/30 dst 10.$i.$j.0/30 dir out action block + # silly, as it encompasses the one above too, but its allowed: + echo netns exec $ns ip xfrm policy add src 10.$i.1.0/29 dst 10.$i.$j.0/29 dir out action block + # and yet again, even more broad one. + echo netns exec $ns ip xfrm policy add src 10.$i.1.0/24 dst 10.$i.$j.0/24 dir out action block + echo netns exec $ns ip xfrm policy add src 10.$i.$j.0/24 dst 10.$i.1.0/24 dir fwd action block + done + done | ip -batch /dev/stdin +} + +do_dummies6() { + local ns=$1 + + for i in $(seq 10 16);do + for j in $(seq 32 64);do + echo netns exec $ns ip xfrm policy add src dead:$i::/64 dst dead:$i:$j::/64 dir out action block + echo netns exec $ns ip xfrm policy add src dead:$i:$j::/64 dst dead:$i::/24 dir fwd action block + done + done | ip -batch /dev/stdin +} + +check_ipt_policy_count() +{ + ns=$1 + + ip netns exec $ns iptables-save -c |grep policy | ( read c rest + ip netns exec $ns iptables -Z + if [ x"$c" = x'[0:0]' ]; then + exit 0 + elif [ x"$c" = x ]; then + echo "ERROR: No counters" + ret=1 + exit 111 + else + exit 1 + fi + ) +} + +check_xfrm() { + # 0: iptables -m policy rule count == 0 + # 1: iptables -m policy rule count != 0 + rval=$1 + ip=$2 + ret=0 + + ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null + + check_ipt_policy_count ns3 + if [ $? -ne $rval ] ; then + ret=1 + fi + check_ipt_policy_count ns4 + if [ $? -ne $rval ] ; then + ret=1 + fi + + ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null + + check_ipt_policy_count ns3 + if [ $? -ne $rval ] ; then + ret=1 + fi + check_ipt_policy_count ns4 + if [ $? -ne $rval ] ; then + ret=1 + fi + + return $ret +} + +#check for needed privileges +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +ip -Version 2>/dev/null >/dev/null +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without the ip tool" + exit $ksft_skip +fi + +# needed to check if policy lookup got valid ipsec result +iptables --version 2>/dev/null >/dev/null +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without iptables tool" + exit $ksft_skip +fi + +for i in 1 2 3 4; do + ip netns add ns$i + ip -net ns$i link set lo up +done + +DEV=veth0 +ip link add $DEV netns ns1 type veth peer name eth1 netns ns3 +ip link add $DEV netns ns2 type veth peer name eth1 netns ns4 + +ip link add $DEV netns ns3 type veth peer name veth0 netns ns4 + +DEV=veth0 +for i in 1 2; do + ip -net ns$i link set $DEV up + ip -net ns$i addr add 10.0.$i.2/24 dev $DEV + ip -net ns$i addr add dead:$i::2/64 dev $DEV + + ip -net ns$i addr add 10.0.$i.253 dev $DEV + ip -net ns$i addr add 10.0.$i.254 dev $DEV + ip -net ns$i addr add dead:$i::fd dev $DEV + ip -net ns$i addr add dead:$i::fe dev $DEV +done + +for i in 3 4; do +ip -net ns$i link set eth1 up +ip -net ns$i link set veth0 up +done + +ip -net ns1 route add default via 10.0.1.1 +ip -net ns2 route add default via 10.0.2.1 + +ip -net ns3 addr add 10.0.1.1/24 dev eth1 +ip -net ns3 addr add 10.0.3.1/24 dev veth0 +ip -net ns3 addr add 2001:1::1/64 dev eth1 +ip -net ns3 addr add 2001:3::1/64 dev veth0 + +ip -net ns3 route add default via 10.0.3.10 + +ip -net ns4 addr add 10.0.2.1/24 dev eth1 +ip -net ns4 addr add 10.0.3.10/24 dev veth0 +ip -net ns4 addr add 2001:2::1/64 dev eth1 +ip -net ns4 addr add 2001:3::10/64 dev veth0 +ip -net ns4 route add default via 10.0.3.1 + +for j in 4 6; do + for i in 3 4;do + ip netns exec ns$i sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null + ip netns exec ns$i sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null + done +done + +# abuse iptables rule counter to check if ping matches a policy +ip netns exec ns3 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +ip netns exec ns4 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +if [ $? -ne 0 ];then + echo "SKIP: Could not insert iptables rule" + for i in 1 2 3 4;do ip netns del ns$i;done + exit $ksft_skip +fi + +# localip remoteip localnet remotenet +do_esp ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 +do_esp ns3 dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2 +do_esp ns4 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 +do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1 + +do_dummies4 ns3 +do_dummies6 ns4 + +# ping to .254 should use ipsec, exception is not installed. +check_xfrm 1 254 +if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .254 to use ipsec tunnel" + ret=1 +else + echo "PASS: policy before exception matches" +fi + +# installs exceptions +# localip remoteip encryptdst plaindst +do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 +do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28 + +do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 +do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 + +# ping to .254 should now be excluded from the tunnel +check_xfrm 0 254 +if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .254 to fail" + ret=1 +else + echo "PASS: ping to .254 bypassed ipsec tunnel" +fi + +# ping to .253 should use use ipsec due to direct policy exception. +check_xfrm 1 253 +if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .253 to use ipsec tunnel" + ret=1 +else + echo "PASS: direct policy matches" +fi + +# ping to .2 should use ipsec. +check_xfrm 1 2 +if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .2 to use ipsec tunnel" + ret=1 +else + echo "PASS: policy matches" +fi + +for i in 1 2 3 4;do ip netns del ns$i;done + +exit $ret -- cgit v1.2.3 From 108d50a976db70c59e6f2fc58d3252fd38ef3fc4 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 9 Nov 2018 08:21:40 -0800 Subject: selftests/bpf: rename flow dissector section to flow_dissector Makes it compatible with the logic that derives program type from section name in libbpf_prog_type_by_name. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_flow.c | 2 +- tools/testing/selftests/bpf/test_flow_dissector.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c index 107350a7821d..b9798f558ca7 100644 --- a/tools/testing/selftests/bpf/bpf_flow.c +++ b/tools/testing/selftests/bpf/bpf_flow.c @@ -116,7 +116,7 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) return BPF_DROP; } -SEC("dissect") +SEC("flow_dissector") int _dissect(struct __sk_buff *skb) { if (!skb->vlan_present) diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index c0fb073b5eab..d23d4da66b83 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -59,7 +59,7 @@ else fi # Attach BPF program -./flow_dissector_load -p bpf_flow.o -s dissect +./flow_dissector_load -p bpf_flow.o -s flow_dissector # Setup tc qdisc add dev lo ingress -- cgit v1.2.3 From 0c19a9fbc9cdba29c7effb34fd5a97226bf934e6 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 9 Nov 2018 08:21:41 -0800 Subject: libbpf: cleanup after partial failure in bpf_object__pin bpftool will use bpf_object__pin in the next commits to pin all programs and maps from the file; in case of a partial failure, we need to get back to the clean state (undo previous program/map pins). As part of a cleanup, I've added and exported separate routines to pin all maps (bpf_object__pin_maps) and progs (bpf_object__pin_programs) of an object. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 324 +++++++++++++++++++++++++++++++++++++++++++++---- tools/lib/bpf/libbpf.h | 18 +++ 2 files changed, 319 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d6e62e90e8d4..341008f47c8a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1699,6 +1699,34 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, return 0; } +int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, + int instance) +{ + int err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warning("invalid program pointer\n"); + return -EINVAL; + } + + if (instance < 0 || instance >= prog->instances.nr) { + pr_warning("invalid prog instance %d of prog %s (max %d)\n", + instance, prog->section_name, prog->instances.nr); + return -EINVAL; + } + + err = unlink(path); + if (err != 0) + return -errno; + pr_debug("unpinned program '%s'\n", path); + + return 0; +} + static int make_dir(const char *path) { char *cp, errmsg[STRERR_BUFSIZE]; @@ -1737,6 +1765,64 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) if (err) return err; + for (i = 0; i < prog->instances.nr; i++) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%d", path, i); + if (len < 0) { + err = -EINVAL; + goto err_unpin; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin; + } + + err = bpf_program__pin_instance(prog, buf, i); + if (err) + goto err_unpin; + } + + return 0; + +err_unpin: + for (i = i - 1; i >= 0; i--) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%d", path, i); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_program__unpin_instance(prog, buf, i); + } + + rmdir(path); + + return err; +} + +int bpf_program__unpin(struct bpf_program *prog, const char *path) +{ + int i, err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warning("invalid program pointer\n"); + return -EINVAL; + } + + if (prog->instances.nr <= 0) { + pr_warning("no instances of prog %s to pin\n", + prog->section_name); + return -EINVAL; + } + for (i = 0; i < prog->instances.nr; i++) { char buf[PATH_MAX]; int len; @@ -1747,11 +1833,15 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) else if (len >= PATH_MAX) return -ENAMETOOLONG; - err = bpf_program__pin_instance(prog, buf, i); + err = bpf_program__unpin_instance(prog, buf, i); if (err) return err; } + err = rmdir(path); + if (err) + return -errno; + return 0; } @@ -1776,12 +1866,33 @@ int bpf_map__pin(struct bpf_map *map, const char *path) } pr_debug("pinned map '%s'\n", path); + return 0; } -int bpf_object__pin(struct bpf_object *obj, const char *path) +int bpf_map__unpin(struct bpf_map *map, const char *path) +{ + int err; + + err = check_path(path); + if (err) + return err; + + if (map == NULL) { + pr_warning("invalid map pointer\n"); + return -EINVAL; + } + + err = unlink(path); + if (err != 0) + return -errno; + pr_debug("unpinned map '%s'\n", path); + + return 0; +} + +int bpf_object__pin_maps(struct bpf_object *obj, const char *path) { - struct bpf_program *prog; struct bpf_map *map; int err; @@ -1797,6 +1908,53 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) if (err) return err; + bpf_map__for_each(map, obj) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); + if (len < 0) { + err = -EINVAL; + goto err_unpin_maps; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin_maps; + } + + err = bpf_map__pin(map, buf); + if (err) + goto err_unpin_maps; + } + + return 0; + +err_unpin_maps: + while ((map = bpf_map__prev(map, obj))) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_map__unpin(map, buf); + } + + return err; +} + +int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) +{ + struct bpf_map *map; + int err; + + if (!obj) + return -ENOENT; + bpf_map__for_each(map, obj) { char buf[PATH_MAX]; int len; @@ -1808,11 +1966,78 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) else if (len >= PATH_MAX) return -ENAMETOOLONG; - err = bpf_map__pin(map, buf); + err = bpf_map__unpin(map, buf); if (err) return err; } + return 0; +} + +int bpf_object__pin_programs(struct bpf_object *obj, const char *path) +{ + struct bpf_program *prog; + int err; + + if (!obj) + return -ENOENT; + + if (!obj->loaded) { + pr_warning("object not yet loaded; load it first\n"); + return -ENOENT; + } + + err = make_dir(path); + if (err) + return err; + + bpf_object__for_each_program(prog, obj) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + prog->section_name); + if (len < 0) { + err = -EINVAL; + goto err_unpin_programs; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin_programs; + } + + err = bpf_program__pin(prog, buf); + if (err) + goto err_unpin_programs; + } + + return 0; + +err_unpin_programs: + while ((prog = bpf_program__prev(prog, obj))) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + prog->section_name); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_program__unpin(prog, buf); + } + + return err; +} + +int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) +{ + struct bpf_program *prog; + int err; + + if (!obj) + return -ENOENT; + bpf_object__for_each_program(prog, obj) { char buf[PATH_MAX]; int len; @@ -1824,7 +2049,7 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) else if (len >= PATH_MAX) return -ENAMETOOLONG; - err = bpf_program__pin(prog, buf); + err = bpf_program__unpin(prog, buf); if (err) return err; } @@ -1832,6 +2057,23 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) return 0; } +int bpf_object__pin(struct bpf_object *obj, const char *path) +{ + int err; + + err = bpf_object__pin_maps(obj, path); + if (err) + return err; + + err = bpf_object__pin_programs(obj, path); + if (err) { + bpf_object__unpin_maps(obj, path); + return err; + } + + return 0; +} + void bpf_object__close(struct bpf_object *obj) { size_t i; @@ -1918,23 +2160,20 @@ void *bpf_object__priv(struct bpf_object *obj) } static struct bpf_program * -__bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) +__bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, int i) { - size_t idx; + ssize_t idx; if (!obj->programs) return NULL; - /* First handler */ - if (prev == NULL) - return &obj->programs[0]; - if (prev->obj != obj) { + if (p->obj != obj) { pr_warning("error: program handler doesn't match object\n"); return NULL; } - idx = (prev - obj->programs) + 1; - if (idx >= obj->nr_programs) + idx = (p - obj->programs) + i; + if (idx >= obj->nr_programs || idx < 0) return NULL; return &obj->programs[idx]; } @@ -1944,8 +2183,29 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) { struct bpf_program *prog = prev; + if (prev == NULL) + return obj->programs; + do { - prog = __bpf_program__next(prog, obj); + prog = __bpf_program__iter(prog, obj, 1); + } while (prog && bpf_program__is_function_storage(prog, obj)); + + return prog; +} + +struct bpf_program * +bpf_program__prev(struct bpf_program *next, struct bpf_object *obj) +{ + struct bpf_program *prog = next; + + if (next == NULL) { + if (!obj->nr_programs) + return NULL; + return obj->programs + obj->nr_programs - 1; + } + + do { + prog = __bpf_program__iter(prog, obj, -1); } while (prog && bpf_program__is_function_storage(prog, obj)); return prog; @@ -2272,10 +2532,10 @@ void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) map->map_ifindex = ifindex; } -struct bpf_map * -bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) +static struct bpf_map * +__bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i) { - size_t idx; + ssize_t idx; struct bpf_map *s, *e; if (!obj || !obj->maps) @@ -2284,21 +2544,39 @@ bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) s = obj->maps; e = obj->maps + obj->nr_maps; - if (prev == NULL) - return s; - - if ((prev < s) || (prev >= e)) { + if ((m < s) || (m >= e)) { pr_warning("error in %s: map handler doesn't belong to object\n", __func__); return NULL; } - idx = (prev - obj->maps) + 1; - if (idx >= obj->nr_maps) + idx = (m - obj->maps) + i; + if (idx >= obj->nr_maps || idx < 0) return NULL; return &obj->maps[idx]; } +struct bpf_map * +bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) +{ + if (prev == NULL) + return obj->maps; + + return __bpf_map__iter(prev, obj, 1); +} + +struct bpf_map * +bpf_map__prev(struct bpf_map *next, struct bpf_object *obj) +{ + if (next == NULL) { + if (!obj->nr_maps) + return NULL; + return obj->maps + obj->nr_maps - 1; + } + + return __bpf_map__iter(next, obj, -1); +} + struct bpf_map * bpf_object__find_map_by_name(struct bpf_object *obj, const char *name) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 1f3468dad8b2..b1686a787102 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -71,6 +71,13 @@ struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, size_t obj_buf_sz, const char *name); +LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path); +LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj, + const char *path); +LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj, + const char *path); +LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj, + const char *path); LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); LIBBPF_API void bpf_object__close(struct bpf_object *object); @@ -112,6 +119,9 @@ LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, (pos) != NULL; \ (pos) = bpf_program__next((pos), (obj))) +LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog, + struct bpf_object *obj); + typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); @@ -131,7 +141,11 @@ LIBBPF_API int bpf_program__fd(struct bpf_program *prog); LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, const char *path, int instance); +LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, + const char *path, + int instance); LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); +LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path); LIBBPF_API void bpf_program__unload(struct bpf_program *prog); struct bpf_insn; @@ -260,6 +274,9 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj); (pos) != NULL; \ (pos) = bpf_map__next((pos), (obj))) +LIBBPF_API struct bpf_map * +bpf_map__prev(struct bpf_map *map, struct bpf_object *obj); + LIBBPF_API int bpf_map__fd(struct bpf_map *map); LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map); LIBBPF_API const char *bpf_map__name(struct bpf_map *map); @@ -274,6 +291,7 @@ LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); +LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); LIBBPF_API long libbpf_get_error(const void *ptr); -- cgit v1.2.3 From fd734c5cca62b7630703244d3613be135d646a0e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 9 Nov 2018 08:21:42 -0800 Subject: libbpf: bpf_program__pin: add special case for instances.nr == 1 When bpf_program has only one instance, don't create a subdirectory with per-instance pin files (/0). Instead, just create a single pin file for that single instance. This simplifies object pinning by not creating unnecessary subdirectories. This can potentially break existing users that depend on the case where '/0' is always created. However, I couldn't find any serious usage of bpf_program__pin inside the kernel tree and I suppose there should be none outside. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 341008f47c8a..97ce9f214002 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1761,6 +1761,11 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) return -EINVAL; } + if (prog->instances.nr == 1) { + /* don't create subdirs when pinning single instance */ + return bpf_program__pin_instance(prog, path, 0); + } + err = make_dir(path); if (err) return err; @@ -1823,6 +1828,11 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) return -EINVAL; } + if (prog->instances.nr == 1) { + /* don't create subdirs when pinning single instance */ + return bpf_program__unpin_instance(prog, path, 0); + } + for (i = 0; i < prog->instances.nr; i++) { char buf[PATH_MAX]; int len; -- cgit v1.2.3 From 33a2c75c55e24aa30ff9fed805ae8bea13c1e2a3 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 9 Nov 2018 08:21:43 -0800 Subject: libbpf: add internal pin_name pin_name is the same as section_name where '/' is replaced by '_'. bpf_object__pin_programs is converted to use pin_name to avoid the situation where section_name would require creating another subdirectory for a pin (as, for example, when calling bpf_object__pin_programs for programs in sections like "cgroup/connect6"). Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 97ce9f214002..e827542ffa3a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -124,6 +124,10 @@ struct bpf_program { char *name; int prog_ifindex; char *section_name; + /* section_name with / replaced by _; makes recursive pinning + * in bpf_object__pin_programs easier + */ + char *pin_name; struct bpf_insn *insns; size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; @@ -253,6 +257,7 @@ static void bpf_program__exit(struct bpf_program *prog) bpf_program__unload(prog); zfree(&prog->name); zfree(&prog->section_name); + zfree(&prog->pin_name); zfree(&prog->insns); zfree(&prog->reloc_desc); @@ -261,6 +266,17 @@ static void bpf_program__exit(struct bpf_program *prog) prog->idx = -1; } +static char *__bpf_program__pin_name(struct bpf_program *prog) +{ + char *name, *p; + + name = p = strdup(prog->section_name); + while ((p = strchr(p, '/'))) + *p = '_'; + + return name; +} + static int bpf_program__init(void *data, size_t size, char *section_name, int idx, struct bpf_program *prog) @@ -279,6 +295,13 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, goto errout; } + prog->pin_name = __bpf_program__pin_name(prog); + if (!prog->pin_name) { + pr_warning("failed to alloc pin name for prog under section(%d) %s\n", + idx, section_name); + goto errout; + } + prog->insns = malloc(size); if (!prog->insns) { pr_warning("failed to alloc insns for prog under section %s\n", @@ -2006,7 +2029,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) int len; len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->section_name); + prog->pin_name); if (len < 0) { err = -EINVAL; goto err_unpin_programs; @@ -2028,7 +2051,7 @@ err_unpin_programs: int len; len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->section_name); + prog->pin_name); if (len < 0) continue; else if (len >= PATH_MAX) @@ -2053,7 +2076,7 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) int len; len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->section_name); + prog->pin_name); if (len < 0) return -EINVAL; else if (len >= PATH_MAX) -- cgit v1.2.3 From 77380998d91dee8aafdbe42634776ba1ef692f1e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 9 Nov 2018 08:21:44 -0800 Subject: bpftool: add loadall command This patch adds new *loadall* command which slightly differs from the existing *load*. *load* command loads all programs from the obj file, but pins only the first programs. *loadall* pins all programs from the obj file under specified directory. The intended usecase is flow_dissector, where we want to load a bunch of progs, pin them all and after that construct a jump table. Signed-off-by: Stanislav Fomichev Acked-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 14 +++-- tools/bpf/bpftool/bash-completion/bpftool | 4 +- tools/bpf/bpftool/common.c | 31 +++++----- tools/bpf/bpftool/main.h | 1 + tools/bpf/bpftool/prog.c | 74 +++++++++++++++++------- 5 files changed, 81 insertions(+), 43 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index ac4e904b10fb..984d125c507a 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -15,7 +15,8 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **help** } + { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** + | **loadall** | **help** } MAP COMMANDS ============= @@ -24,7 +25,7 @@ MAP COMMANDS | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}] | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog pin** *PROG* *FILE* -| **bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] +| **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] | **bpftool** **prog attach** *PROG* *ATTACH_TYPE* *MAP* | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* *MAP* | **bpftool** **prog help** @@ -79,8 +80,11 @@ DESCRIPTION contain a dot character ('.'), which is reserved for future extensions of *bpffs*. - **bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] - Load bpf program from binary *OBJ* and pin as *FILE*. + **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] + Load bpf program(s) from binary *OBJ* and pin as *PATH*. + **bpftool prog load** pins only the first program from the + *OBJ* as *PATH*. **bpftool prog loadall** pins all programs + from the *OBJ* under *PATH* directory. **type** is optional, if not specified program type will be inferred from section names. By default bpftool will create new maps as declared in the ELF @@ -93,7 +97,7 @@ DESCRIPTION If **dev** *NAME* is specified program will be loaded onto given networking device (offload). - Note: *FILE* must be located in *bpffs* mount. It must not + Note: *PATH* must be located in *bpffs* mount. It must not contain a dot character ('.'), which is reserved for future extensions of *bpffs*. diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 3f78e6404589..780ebafb756a 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -243,7 +243,7 @@ _bpftool() # Completion depends on object and command in use case $object in prog) - if [[ $command != "load" ]]; then + if [[ $command != "load" && $command != "loadall" ]]; then case $prev in id) _bpftool_get_prog_ids @@ -309,7 +309,7 @@ _bpftool() fi return 0 ;; - load) + load|loadall) local obj if [[ ${#words[@]} -lt 6 ]]; then diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 1149565be4b1..cb06a5b6e016 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -177,34 +177,23 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) return fd; } -int do_pin_fd(int fd, const char *name) +int mount_bpffs_for_pin(const char *name) { char err_str[ERR_MAX_LEN]; char *file; char *dir; int err = 0; - err = bpf_obj_pin(fd, name); - if (!err) - goto out; - file = malloc(strlen(name) + 1); strcpy(file, name); dir = dirname(file); - if (errno != EPERM || is_bpffs(dir)) { - p_err("can't pin the object (%s): %s", name, strerror(errno)); + if (is_bpffs(dir)) + /* nothing to do if already mounted */ goto out_free; - } - /* Attempt to mount bpffs, then retry pinning. */ err = mnt_bpffs(dir, err_str, ERR_MAX_LEN); - if (!err) { - err = bpf_obj_pin(fd, name); - if (err) - p_err("can't pin the object (%s): %s", name, - strerror(errno)); - } else { + if (err) { err_str[ERR_MAX_LEN - 1] = '\0'; p_err("can't mount BPF file system to pin the object (%s): %s", name, err_str); @@ -212,10 +201,20 @@ int do_pin_fd(int fd, const char *name) out_free: free(file); -out: return err; } +int do_pin_fd(int fd, const char *name) +{ + int err; + + err = mount_bpffs_for_pin(name); + if (err) + return err; + + return bpf_obj_pin(fd, name); +} + int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) { unsigned int id; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 14857c273bf6..61d82020af58 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -131,6 +131,7 @@ const char *get_fd_type_name(enum bpf_obj_type type); char *get_fdinfo(int fd, const char *key); int open_obj_pinned(char *path); int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); +int mount_bpffs_for_pin(const char *name); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); int do_pin_fd(int fd, const char *name); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index b9b84553bec4..97d930042cd5 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -792,15 +792,16 @@ static int do_detach(int argc, char **argv) jsonw_null(json_wtr); return 0; } -static int do_load(int argc, char **argv) + +static int load_with_options(int argc, char **argv, bool first_prog_only) { enum bpf_attach_type expected_attach_type; struct bpf_object_open_attr attr = { .prog_type = BPF_PROG_TYPE_UNSPEC, }; struct map_replace *map_replace = NULL; + struct bpf_program *prog = NULL, *pos; unsigned int old_map_fds = 0; - struct bpf_program *prog; struct bpf_object *obj; struct bpf_map *map; const char *pinfile; @@ -918,26 +919,25 @@ static int do_load(int argc, char **argv) goto err_free_reuse_maps; } - prog = bpf_program__next(NULL, obj); - if (!prog) { - p_err("object file doesn't contain any bpf program"); - goto err_close_obj; - } + bpf_object__for_each_program(pos, obj) { + enum bpf_prog_type prog_type = attr.prog_type; - bpf_program__set_ifindex(prog, ifindex); - if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) { - const char *sec_name = bpf_program__title(prog, false); + if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) { + const char *sec_name = bpf_program__title(pos, false); - err = libbpf_prog_type_by_name(sec_name, &attr.prog_type, - &expected_attach_type); - if (err < 0) { - p_err("failed to guess program type based on section name %s\n", - sec_name); - goto err_close_obj; + err = libbpf_prog_type_by_name(sec_name, &prog_type, + &expected_attach_type); + if (err < 0) { + p_err("failed to guess program type based on section name %s\n", + sec_name); + goto err_close_obj; + } } + + bpf_program__set_ifindex(pos, ifindex); + bpf_program__set_type(pos, prog_type); + bpf_program__set_expected_attach_type(pos, expected_attach_type); } - bpf_program__set_type(prog, attr.prog_type); - bpf_program__set_expected_attach_type(prog, expected_attach_type); qsort(map_replace, old_map_fds, sizeof(*map_replace), map_replace_compar); @@ -1003,9 +1003,31 @@ static int do_load(int argc, char **argv) goto err_close_obj; } - if (do_pin_fd(bpf_program__fd(prog), pinfile)) + err = mount_bpffs_for_pin(pinfile); + if (err) goto err_close_obj; + if (first_prog_only) { + prog = bpf_program__next(NULL, obj); + if (!prog) { + p_err("object file doesn't contain any bpf program"); + goto err_close_obj; + } + + err = bpf_obj_pin(bpf_program__fd(prog), pinfile); + if (err) { + p_err("failed to pin program %s", + bpf_program__title(prog, false)); + goto err_close_obj; + } + } else { + err = bpf_object__pin_programs(obj, pinfile); + if (err) { + p_err("failed to pin all programs"); + goto err_close_obj; + } + } + if (json_output) jsonw_null(json_wtr); @@ -1025,6 +1047,16 @@ err_free_reuse_maps: return -1; } +static int do_load(int argc, char **argv) +{ + return load_with_options(argc, argv, true); +} + +static int do_loadall(int argc, char **argv) +{ + return load_with_options(argc, argv, false); +} + static int do_help(int argc, char **argv) { if (json_output) { @@ -1037,7 +1069,8 @@ static int do_help(int argc, char **argv) " %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n" " %s %s dump jited PROG [{ file FILE | opcodes }]\n" " %s %s pin PROG FILE\n" - " %s %s load OBJ FILE [type TYPE] [dev NAME] \\\n" + " %s %s { load | loadall } OBJ PATH \\\n" + " [type TYPE] [dev NAME] \\\n" " [map { idx IDX | name NAME } MAP]\n" " %s %s attach PROG ATTACH_TYPE MAP\n" " %s %s detach PROG ATTACH_TYPE MAP\n" @@ -1069,6 +1102,7 @@ static const struct cmd cmds[] = { { "dump", do_dump }, { "pin", do_pin }, { "load", do_load }, + { "loadall", do_loadall }, { "attach", do_attach }, { "detach", do_detach }, { 0 } -- cgit v1.2.3 From 3767a94b3253fc8c3df96913d7dec796619161c7 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 9 Nov 2018 08:21:45 -0800 Subject: bpftool: add pinmaps argument to the load/loadall This new additional argument lets users pin all maps from the object at specified path. Signed-off-by: Stanislav Fomichev Acked-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 4 +++- tools/bpf/bpftool/bash-completion/bpftool | 3 ++- tools/bpf/bpftool/prog.c | 24 +++++++++++++++++++++++- 3 files changed, 28 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 984d125c507a..15e9172f7e55 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -80,7 +80,7 @@ DESCRIPTION contain a dot character ('.'), which is reserved for future extensions of *bpffs*. - **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] + **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] Load bpf program(s) from binary *OBJ* and pin as *PATH*. **bpftool prog load** pins only the first program from the *OBJ* as *PATH*. **bpftool prog loadall** pins all programs @@ -96,6 +96,8 @@ DESCRIPTION use, referring to it by **id** or through a **pinned** file. If **dev** *NAME* is specified program will be loaded onto given networking device (offload). + Optional **pinmaps** argument can be provided to pin all + maps under *MAP_DIR* directory. Note: *PATH* must be located in *bpffs* mount. It must not contain a dot character ('.'), which is reserved for future diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 780ebafb756a..a05d0071f39f 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -346,7 +346,7 @@ _bpftool() _bpftool_get_map_ids return 0 ;; - pinned) + pinned|pinmaps) _filedir return 0 ;; @@ -358,6 +358,7 @@ _bpftool() COMPREPLY=( $( compgen -W "map" -- "$cur" ) ) _bpftool_once_attr 'type' _bpftool_once_attr 'dev' + _bpftool_once_attr 'pinmaps' return 0 ;; esac diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 97d930042cd5..c2ce4220bbca 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -802,6 +802,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) struct map_replace *map_replace = NULL; struct bpf_program *prog = NULL, *pos; unsigned int old_map_fds = 0; + const char *pinmaps = NULL; struct bpf_object *obj; struct bpf_map *map; const char *pinfile; @@ -906,6 +907,13 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_free_reuse_maps; } NEXT_ARG(); + } else if (is_prefix(*argv, "pinmaps")) { + NEXT_ARG(); + + if (!REQ_ARGS(1)) + goto err_free_reuse_maps; + + pinmaps = GET_ARG(); } else { p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?", *argv); @@ -1028,6 +1036,14 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) } } + if (pinmaps) { + err = bpf_object__pin_maps(obj, pinmaps); + if (err) { + p_err("failed to pin all maps"); + goto err_unpin; + } + } + if (json_output) jsonw_null(json_wtr); @@ -1038,6 +1054,11 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) return 0; +err_unpin: + if (first_prog_only) + unlink(pinfile); + else + bpf_object__unpin_programs(obj, pinfile); err_close_obj: bpf_object__close(obj); err_free_reuse_maps: @@ -1071,7 +1092,8 @@ static int do_help(int argc, char **argv) " %s %s pin PROG FILE\n" " %s %s { load | loadall } OBJ PATH \\\n" " [type TYPE] [dev NAME] \\\n" - " [map { idx IDX | name NAME } MAP]\n" + " [map { idx IDX | name NAME } MAP]\\\n" + " [pinmaps MAP_DIR]\n" " %s %s attach PROG ATTACH_TYPE MAP\n" " %s %s detach PROG ATTACH_TYPE MAP\n" " %s %s help\n" -- cgit v1.2.3 From 092f08927300086b6520dfa3aa4d9450266f27ae Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 9 Nov 2018 08:21:46 -0800 Subject: bpftool: support loading flow dissector This commit adds support for loading/attaching/detaching flow dissector program. When `bpftool loadall` is called with a flow_dissector prog (i.e. when the 'type flow_dissector' argument is passed), we load and pin all programs. User is responsible to construct the jump table for the tail calls. The last argument of `bpftool attach` is made optional for this use case. Example: bpftool prog load tools/testing/selftests/bpf/bpf_flow.o \ /sys/fs/bpf/flow type flow_dissector \ pinmaps /sys/fs/bpf/flow bpftool map update pinned /sys/fs/bpf/flow/jmp_table \ key 0 0 0 0 \ value pinned /sys/fs/bpf/flow/IP bpftool map update pinned /sys/fs/bpf/flow/jmp_table \ key 1 0 0 0 \ value pinned /sys/fs/bpf/flow/IPV6 bpftool map update pinned /sys/fs/bpf/flow/jmp_table \ key 2 0 0 0 \ value pinned /sys/fs/bpf/flow/IPV6OP bpftool map update pinned /sys/fs/bpf/flow/jmp_table \ key 3 0 0 0 \ value pinned /sys/fs/bpf/flow/IPV6FR bpftool map update pinned /sys/fs/bpf/flow/jmp_table \ key 4 0 0 0 \ value pinned /sys/fs/bpf/flow/MPLS bpftool map update pinned /sys/fs/bpf/flow/jmp_table \ key 5 0 0 0 \ value pinned /sys/fs/bpf/flow/VLAN bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector flow_dissector Tested by using the above lines to load the prog in the test_flow_dissector.sh selftest. Signed-off-by: Stanislav Fomichev Acked-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 26 +++++--- tools/bpf/bpftool/bash-completion/bpftool | 14 +++- tools/bpf/bpftool/prog.c | 85 +++++++++++++----------- 3 files changed, 74 insertions(+), 51 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 15e9172f7e55..8db78ed82a71 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -26,8 +26,8 @@ MAP COMMANDS | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog pin** *PROG* *FILE* | **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] -| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* *MAP* -| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* *MAP* +| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] +| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } @@ -40,7 +40,9 @@ MAP COMMANDS | **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | | **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | } -| *ATTACH_TYPE* := { **msg_verdict** | **skb_verdict** | **skb_parse** } +| *ATTACH_TYPE* := { +| **msg_verdict** | **skb_verdict** | **skb_parse** | **flow_dissector** +| } DESCRIPTION @@ -103,13 +105,17 @@ DESCRIPTION contain a dot character ('.'), which is reserved for future extensions of *bpffs*. - **bpftool prog attach** *PROG* *ATTACH_TYPE* *MAP* - Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*) - to the map *MAP*. - - **bpftool prog detach** *PROG* *ATTACH_TYPE* *MAP* - Detach bpf program *PROG* (with type specified by *ATTACH_TYPE*) - from the map *MAP*. + **bpftool prog attach** *PROG* *ATTACH_TYPE* [*MAP*] + Attach bpf program *PROG* (with type specified by + *ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP* + parameter, with the exception of *flow_dissector* which is + attached to current networking name space. + + **bpftool prog detach** *PROG* *ATTACH_TYPE* [*MAP*] + Detach bpf program *PROG* (with type specified by + *ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP* + parameter, with the exception of *flow_dissector* which is + detached from the current networking name space. **bpftool prog help** Print short help message. diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index a05d0071f39f..45c2db257d2b 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -299,7 +299,8 @@ _bpftool() fi if [[ ${#words[@]} == 6 ]]; then - COMPREPLY=( $( compgen -W "msg_verdict skb_verdict skb_parse" -- "$cur" ) ) + COMPREPLY=( $( compgen -W "msg_verdict skb_verdict \ + skb_parse flow_dissector" -- "$cur" ) ) return 0 fi @@ -338,7 +339,16 @@ _bpftool() case $prev in type) - COMPREPLY=( $( compgen -W "socket kprobe kretprobe classifier action tracepoint raw_tracepoint xdp perf_event cgroup/skb cgroup/sock cgroup/dev lwt_in lwt_out lwt_xmit lwt_seg6local sockops sk_skb sk_msg lirc_mode2 cgroup/bind4 cgroup/bind6 cgroup/connect4 cgroup/connect6 cgroup/sendmsg4 cgroup/sendmsg6 cgroup/post_bind4 cgroup/post_bind6" -- \ + COMPREPLY=( $( compgen -W "socket kprobe \ + kretprobe classifier flow_dissector \ + action tracepoint raw_tracepoint \ + xdp perf_event cgroup/skb cgroup/sock \ + cgroup/dev lwt_in lwt_out lwt_xmit \ + lwt_seg6local sockops sk_skb sk_msg \ + lirc_mode2 cgroup/bind4 cgroup/bind6 \ + cgroup/connect4 cgroup/connect6 \ + cgroup/sendmsg4 cgroup/sendmsg6 \ + cgroup/post_bind4 cgroup/post_bind6" -- \ "$cur" ) ) return 0 ;; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index c2ce4220bbca..5ff5544596e7 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -81,6 +81,7 @@ static const char * const attach_type_strings[] = { [BPF_SK_SKB_STREAM_PARSER] = "stream_parser", [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict", [BPF_SK_MSG_VERDICT] = "msg_verdict", + [BPF_FLOW_DISSECTOR] = "flow_dissector", [__MAX_BPF_ATTACH_TYPE] = NULL, }; @@ -721,30 +722,49 @@ int map_replace_compar(const void *p1, const void *p2) return a->idx - b->idx; } -static int do_attach(int argc, char **argv) +static int parse_attach_detach_args(int argc, char **argv, int *progfd, + enum bpf_attach_type *attach_type, + int *mapfd) { - enum bpf_attach_type attach_type; - int err, mapfd, progfd; - - if (!REQ_ARGS(5)) { - p_err("too few parameters for map attach"); + if (!REQ_ARGS(3)) return -EINVAL; - } - progfd = prog_parse_fd(&argc, &argv); - if (progfd < 0) - return progfd; + *progfd = prog_parse_fd(&argc, &argv); + if (*progfd < 0) + return *progfd; - attach_type = parse_attach_type(*argv); - if (attach_type == __MAX_BPF_ATTACH_TYPE) { - p_err("invalid attach type"); + *attach_type = parse_attach_type(*argv); + if (*attach_type == __MAX_BPF_ATTACH_TYPE) { + p_err("invalid attach/detach type"); return -EINVAL; } + + if (*attach_type == BPF_FLOW_DISSECTOR) { + *mapfd = -1; + return 0; + } + NEXT_ARG(); + if (!REQ_ARGS(2)) + return -EINVAL; + + *mapfd = map_parse_fd(&argc, &argv); + if (*mapfd < 0) + return *mapfd; + + return 0; +} - mapfd = map_parse_fd(&argc, &argv); - if (mapfd < 0) - return mapfd; +static int do_attach(int argc, char **argv) +{ + enum bpf_attach_type attach_type; + int err, progfd; + int mapfd; + + err = parse_attach_detach_args(argc, argv, + &progfd, &attach_type, &mapfd); + if (err) + return err; err = bpf_prog_attach(progfd, mapfd, attach_type, 0); if (err) { @@ -760,27 +780,13 @@ static int do_attach(int argc, char **argv) static int do_detach(int argc, char **argv) { enum bpf_attach_type attach_type; - int err, mapfd, progfd; - - if (!REQ_ARGS(5)) { - p_err("too few parameters for map detach"); - return -EINVAL; - } - - progfd = prog_parse_fd(&argc, &argv); - if (progfd < 0) - return progfd; + int err, progfd; + int mapfd; - attach_type = parse_attach_type(*argv); - if (attach_type == __MAX_BPF_ATTACH_TYPE) { - p_err("invalid attach type"); - return -EINVAL; - } - NEXT_ARG(); - - mapfd = map_parse_fd(&argc, &argv); - if (mapfd < 0) - return mapfd; + err = parse_attach_detach_args(argc, argv, + &progfd, &attach_type, &mapfd); + if (err) + return err; err = bpf_prog_detach2(progfd, mapfd, attach_type); if (err) { @@ -1094,8 +1100,8 @@ static int do_help(int argc, char **argv) " [type TYPE] [dev NAME] \\\n" " [map { idx IDX | name NAME } MAP]\\\n" " [pinmaps MAP_DIR]\n" - " %s %s attach PROG ATTACH_TYPE MAP\n" - " %s %s detach PROG ATTACH_TYPE MAP\n" + " %s %s attach PROG ATTACH_TYPE [MAP]\n" + " %s %s detach PROG ATTACH_TYPE [MAP]\n" " %s %s help\n" "\n" " " HELP_SPEC_MAP "\n" @@ -1107,7 +1113,8 @@ static int do_help(int argc, char **argv) " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" " cgroup/sendmsg4 | cgroup/sendmsg6 }\n" - " ATTACH_TYPE := { msg_verdict | skb_verdict | skb_parse }\n" + " ATTACH_TYPE := { msg_verdict | skb_verdict | skb_parse |\n" + " flow_dissector }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], -- cgit v1.2.3 From 6c2afb674dbda9b736b8f09c976516e1e788860a Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Sat, 10 Nov 2018 22:15:14 -0800 Subject: selftests/bpf: Test narrow loads with off > 0 in test_verifier Test the following narrow loads in test_verifier for context __sk_buff: * off=1, size=1 - ok; * off=2, size=1 - ok; * off=3, size=1 - ok; * off=0, size=2 - ok; * off=1, size=2 - fail; * off=0, size=2 - ok; * off=3, size=2 - fail. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 48 +++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 6f61df62f690..54d16fbdef8b 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2026,29 +2026,27 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "check skb->hash byte load not permitted 1", + "check skb->hash byte load permitted 1", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 1), BPF_EXIT_INSN(), }, - .errstr = "invalid bpf_context access", - .result = REJECT, + .result = ACCEPT, }, { - "check skb->hash byte load not permitted 2", + "check skb->hash byte load permitted 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 2), BPF_EXIT_INSN(), }, - .errstr = "invalid bpf_context access", - .result = REJECT, + .result = ACCEPT, }, { - "check skb->hash byte load not permitted 3", + "check skb->hash byte load permitted 3", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -2060,8 +2058,7 @@ static struct bpf_test tests[] = { #endif BPF_EXIT_INSN(), }, - .errstr = "invalid bpf_context access", - .result = REJECT, + .result = ACCEPT, }, { "check cb access: byte, wrong type", @@ -2173,7 +2170,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "check skb->hash half load not permitted", + "check skb->hash half load permitted 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -2182,6 +2179,37 @@ static struct bpf_test tests[] = { #else BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash)), +#endif + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check skb->hash half load not permitted, unaligned 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 1), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#endif + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check skb->hash half load not permitted, unaligned 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 1), #endif BPF_EXIT_INSN(), }, -- cgit v1.2.3 From e7605475f5f1af58668701b5ffe7763bdeb28527 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Sat, 10 Nov 2018 22:15:15 -0800 Subject: selftests/bpf: Test narrow loads with off > 0 for bpf_sock_addr Add more test cases for context bpf_sock_addr to test narrow loads with offset > 0 for ctx->user_ip4 field (__u32): * off=1, size=1; * off=2, size=1; * off=3, size=1; * off=2, size=2. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_sock_addr.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index aeeb76a54d63..73b7493d4120 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -574,24 +574,44 @@ static int bind4_prog_load(const struct sock_addr_test *test) /* if (sk.family == AF_INET && */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, family)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 16), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 24), /* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, type)), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1), BPF_JMP_A(1), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 12), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 20), /* 1st_byte_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 10), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 18), + + /* 2nd_byte_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 16), + + /* 3rd_byte_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 14), + + /* 4th_byte_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 3), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 12), /* 1st_half_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 8), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 10), + + /* 2nd_half_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 8), /* whole_user_ip4 == expected) { */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, -- cgit v1.2.3 From 39aa6928d462d0f4fd809ff8109f98f24843b28b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 15 Nov 2018 02:51:57 +0100 Subject: xfrm: policy: fix netlink/pf_key policy lookups Colin Ian King says: Static analysis with CoverityScan found a potential issue [..] It seems that pointer pol is set to NULL and then a check to see if it is non-null is used to set pol to tmp; howeverm this check is always going to be false because pol is always NULL. Fix this and update test script to catch this. Updated script only: ./xfrm_policy.sh ; echo $? RTNETLINK answers: No such file or directory FAIL: ip -net ns3 xfrm policy get src 10.0.1.0/24 dst 10.0.2.0/24 dir out RTNETLINK answers: No such file or directory [..] PASS: policy before exception matches PASS: ping to .254 bypassed ipsec tunnel PASS: direct policy matches PASS: policy matches 1 Fixes: 6be3b0db6db ("xfrm: policy: add inexact policy search tree infrastructure") Reported-by: Colin Ian King Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 5 +++- tools/testing/selftests/net/xfrm_policy.sh | 38 +++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e8b8e722da62..ddc3335dd552 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1663,7 +1663,10 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, tmp = __xfrm_policy_bysel_ctx(cand.res[i], mark, if_id, type, dir, sel, ctx); - if (tmp && pol && tmp->pos < pol->pos) + if (!tmp) + continue; + + if (!pol || tmp->pos < pol->pos) pol = tmp; } } else { diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 6ca63a6e50e9..8db35b99457c 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -21,6 +21,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 ret=0 +policy_checks_ok=1 KEY_SHA=0xdeadbeef1234567890abcdefabcdefabcdefabcd KEY_AES=0x0123456789abcdef0123456789012345 @@ -45,6 +46,26 @@ do_esp() { ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow } +do_esp_policy_get_check() { + local ns=$1 + local lnet=$2 + local rnet=$3 + + ip -net $ns xfrm policy get src $lnet dst $rnet dir out > /dev/null + if [ $? -ne 0 ] && [ $policy_checks_ok -eq 1 ] ;then + policy_checks_ok=0 + echo "FAIL: ip -net $ns xfrm policy get src $lnet dst $rnet dir out" + ret=1 + fi + + ip -net $ns xfrm policy get src $rnet dst $lnet dir fwd > /dev/null + if [ $? -ne 0 ] && [ $policy_checks_ok -eq 1 ] ;then + policy_checks_ok=0 + echo "FAIL: ip -net $ns xfrm policy get src $rnet dst $lnet dir fwd" + ret=1 + fi +} + do_exception() { local ns=$1 local me=$2 @@ -112,31 +133,31 @@ check_xfrm() { # 1: iptables -m policy rule count != 0 rval=$1 ip=$2 - ret=0 + lret=0 ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null check_ipt_policy_count ns3 if [ $? -ne $rval ] ; then - ret=1 + lret=1 fi check_ipt_policy_count ns4 if [ $? -ne $rval ] ; then - ret=1 + lret=1 fi ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null check_ipt_policy_count ns3 if [ $? -ne $rval ] ; then - ret=1 + lret=1 fi check_ipt_policy_count ns4 if [ $? -ne $rval ] ; then - ret=1 + lret=1 fi - return $ret + return $lret } #check for needed privileges @@ -227,6 +248,11 @@ do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1 do_dummies4 ns3 do_dummies6 ns4 +do_esp_policy_get_check ns3 10.0.1.0/24 10.0.2.0/24 +do_esp_policy_get_check ns4 10.0.2.0/24 10.0.1.0/24 +do_esp_policy_get_check ns3 dead:1::/64 dead:2::/64 +do_esp_policy_get_check ns4 dead:2::/64 dead:1::/64 + # ping to .254 should use ipsec, exception is not installed. check_xfrm 1 254 if [ $? -ne 0 ]; then -- cgit v1.2.3 From 36107c485f5efabe92ec4e0743724df5205511a4 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Nov 2018 08:22:26 +0000 Subject: selftests: Adjust spectrum-2 two_mask_test In order for this to behave as required with delta bits, change the mask for rule with handle 103. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 3b75180f455d..6b96eb6f6e74 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -142,7 +142,7 @@ two_masks_test() tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ $tcflags dst_ip 192.0.2.2 action drop tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ - $tcflags dst_ip 192.0.0.0/16 action drop + $tcflags dst_ip 192.0.0.0/8 action drop $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ -t ip -q -- cgit v1.2.3 From 7dc5a0eeea185e5f3af3f97a86e76419791cdd60 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Nov 2018 08:22:27 +0000 Subject: selftests: Adjust spectrum-2 ctcam_two_atcam_masks_test In order for this to behave as required with delta bits, change the mask for rule with handle 103. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 6b96eb6f6e74..84ef95320c96 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -235,7 +235,7 @@ ctcam_two_atcam_masks_test() $tcflags dst_ip 192.0.2.2 action drop # Filter goes into A-TCAM tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ - $tcflags dst_ip 192.0.2.0/24 action drop + $tcflags dst_ip 192.0.0.0/16 action drop $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ -t ip -q -- cgit v1.2.3 From 3b423271b89a78757ad49725e131472e4fe1a49c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Nov 2018 08:22:36 +0000 Subject: selftests: mlxsw: spectrum-2: Add simple delta test Track the basic codepaths of delta handling, using objagg tracepoints. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/spectrum-2/tc_flower.sh | 82 +++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 84ef95320c96..00ae99fbc253 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -8,7 +8,7 @@ lib_dir=$(dirname $0)/../../../../net/forwarding ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ - multiple_masks_test ctcam_edge_cases_test" + multiple_masks_test ctcam_edge_cases_test delta_simple_test" NUM_NETIFS=2 source $lib_dir/tc_common.sh source $lib_dir/lib.sh @@ -324,6 +324,86 @@ ctcam_edge_cases_test() ctcam_no_atcam_masks_test } +tp_record() +{ + local tracepoint=$1 + local cmd=$2 + + perf record -q -e $tracepoint $cmd + return $? +} + +tp_check_hits() +{ + local tracepoint=$1 + local count=$2 + + perf_output=`perf script -F trace:event,trace` + hits=`echo $perf_output | grep "$tracepoint:" | wc -l` + if [[ "$count" -ne "$hits" ]]; then + return 1 + fi + return 0 +} + +delta_simple_test() +{ + # The first filter will create eRP, the second filter will fit into + # the first eRP with delta. Remove the first rule then and check that + # the eRP stays (referenced by the second filter). + + RET=0 + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \ + pref 1 handle 101 flower $tcflags dst_ip 192.0.0.0/24 \ + action drop" + tp_check_hits "objagg:objagg_obj_root_create" 1 + check_err $? "eRP was not created" + + tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \ + pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \ + action drop" + tp_check_hits "objagg:objagg_obj_root_create" 0 + check_err $? "eRP was incorrectly created" + tp_check_hits "objagg:objagg_obj_parent_assign" 1 + check_err $? "delta was not created" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \ + pref 1 handle 101 flower" + tp_check_hits "objagg:objagg_obj_root_destroy" 0 + check_err $? "eRP was incorrectly destroyed" + tp_check_hits "objagg:objagg_obj_parent_unassign" 0 + check_err $? "delta was incorrectly destroyed" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_err $? "Did not match on correct filter after the first was removed" + + tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \ + pref 2 handle 102 flower" + tp_check_hits "objagg:objagg_obj_parent_unassign" 1 + check_err $? "delta was not destroyed" + tp_check_hits "objagg:objagg_obj_root_destroy" 1 + check_err $? "eRP was not destroyed" + + log_test "delta simple test ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} -- cgit v1.2.3 From 5c86d2125b58949122e03f04ce940e6f5b8534ba Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 9 Nov 2018 10:18:16 -0800 Subject: selftests/bpf: Fix uninitialized duration warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Daniel Borkmann reports: test_progs.c: In function ‘main’: test_progs.c:81:3: warning: ‘duration’ may be used uninitialized in this function [-Wmaybe-uninitialized] printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\ ^~~~~~ test_progs.c:1706:8: note: ‘duration’ was declared here __u32 duration; ^~~~~~~~ Signed-off-by: Joe Stringer Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_progs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 2d3c04f45530..c1e688f61061 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1703,7 +1703,7 @@ static void test_reference_tracking() const char *file = "./test_sk_lookup_kern.o"; struct bpf_object *obj; struct bpf_program *prog; - __u32 duration; + __u32 duration = 0; int err = 0; obj = bpf_object__open(file); -- cgit v1.2.3 From a83d6e76a67424ebbbbed643f51e97934ffc2bc2 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 12 Nov 2018 15:44:53 -0800 Subject: bpf: libbpf: Fix bpf_program__next() API This patch restores the behavior in commit eac7d84519a3 ("tools: libbpf: don't return '.text' as a program for multi-function programs") such that bpf_program__next() does not return pseudo programs in ".text". Fixes: 0c19a9fbc9cd ("libbpf: cleanup after partial failure in bpf_object__pin") Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e827542ffa3a..a01eb9584e52 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2193,19 +2193,25 @@ void *bpf_object__priv(struct bpf_object *obj) } static struct bpf_program * -__bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, int i) +__bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, bool forward) { + size_t nr_programs = obj->nr_programs; ssize_t idx; - if (!obj->programs) + if (!nr_programs) return NULL; + if (!p) + /* Iter from the beginning */ + return forward ? &obj->programs[0] : + &obj->programs[nr_programs - 1]; + if (p->obj != obj) { pr_warning("error: program handler doesn't match object\n"); return NULL; } - idx = (p - obj->programs) + i; + idx = (p - obj->programs) + (forward ? 1 : -1); if (idx >= obj->nr_programs || idx < 0) return NULL; return &obj->programs[idx]; @@ -2216,11 +2222,8 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) { struct bpf_program *prog = prev; - if (prev == NULL) - return obj->programs; - do { - prog = __bpf_program__iter(prog, obj, 1); + prog = __bpf_program__iter(prog, obj, true); } while (prog && bpf_program__is_function_storage(prog, obj)); return prog; @@ -2231,14 +2234,8 @@ bpf_program__prev(struct bpf_program *next, struct bpf_object *obj) { struct bpf_program *prog = next; - if (next == NULL) { - if (!obj->nr_programs) - return NULL; - return obj->programs + obj->nr_programs - 1; - } - do { - prog = __bpf_program__iter(prog, obj, -1); + prog = __bpf_program__iter(prog, obj, false); } while (prog && bpf_program__is_function_storage(prog, obj)); return prog; -- cgit v1.2.3 From 9108e3a023d3e4e77d94b589b07d397b0a790285 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 9 Nov 2018 10:54:02 -0800 Subject: selftest/bpf: Use bpf_sk_lookup_{tcp, udp} in test_sock_addr Use bpf_sk_lookup_tcp, bpf_sk_lookup_udp and bpf_sk_release helpers from test_sock_addr programs to make sure they're available and can lookup and release socket properly for IPv4/IPv4, TCP/UDP. Reading from a few fields of returned struct bpf_sock is also tested. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/connect4_prog.c | 43 +++++++++++++++++----- tools/testing/selftests/bpf/connect6_prog.c | 56 ++++++++++++++++++++++------- 2 files changed, 78 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c index 5a88a681d2ab..b8395f3c43e9 100644 --- a/tools/testing/selftests/bpf/connect4_prog.c +++ b/tools/testing/selftests/bpf/connect4_prog.c @@ -21,23 +21,48 @@ int _version SEC("version") = 1; SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { + struct bpf_sock_tuple tuple = {}; struct sockaddr_in sa; + struct bpf_sock *sk; + + /* Verify that new destination is available. */ + memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr)); + memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport)); + + tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4); + tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4); + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 0; + else if (ctx->type == SOCK_STREAM) + sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4), 0, 0); + else + sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4), 0, 0); + + if (!sk) + return 0; + + if (sk->src_ip4 != tuple.ipv4.daddr || + sk->src_port != DST_REWRITE_PORT4) { + bpf_sk_release(sk); + return 0; + } + + bpf_sk_release(sk); /* Rewrite destination. */ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4); ctx->user_port = bpf_htons(DST_REWRITE_PORT4); - if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) { - ///* Rewrite source. */ - memset(&sa, 0, sizeof(sa)); + /* Rewrite source. */ + memset(&sa, 0, sizeof(sa)); - sa.sin_family = AF_INET; - sa.sin_port = bpf_htons(0); - sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); + sa.sin_family = AF_INET; + sa.sin_port = bpf_htons(0); + sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); - if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) - return 0; - } + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; return 1; } diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c index 8ea3f7d12dee..25f5dc7b7aa0 100644 --- a/tools/testing/selftests/bpf/connect6_prog.c +++ b/tools/testing/selftests/bpf/connect6_prog.c @@ -29,7 +29,41 @@ int _version SEC("version") = 1; SEC("cgroup/connect6") int connect_v6_prog(struct bpf_sock_addr *ctx) { + struct bpf_sock_tuple tuple = {}; struct sockaddr_in6 sa; + struct bpf_sock *sk; + + /* Verify that new destination is available. */ + memset(&tuple.ipv6.saddr, 0, sizeof(tuple.ipv6.saddr)); + memset(&tuple.ipv6.sport, 0, sizeof(tuple.ipv6.sport)); + + tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0); + tuple.ipv6.daddr[1] = bpf_htonl(DST_REWRITE_IP6_1); + tuple.ipv6.daddr[2] = bpf_htonl(DST_REWRITE_IP6_2); + tuple.ipv6.daddr[3] = bpf_htonl(DST_REWRITE_IP6_3); + + tuple.ipv6.dport = bpf_htons(DST_REWRITE_PORT6); + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 0; + else if (ctx->type == SOCK_STREAM) + sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6), 0, 0); + else + sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv6), 0, 0); + + if (!sk) + return 0; + + if (sk->src_ip6[0] != tuple.ipv6.daddr[0] || + sk->src_ip6[1] != tuple.ipv6.daddr[1] || + sk->src_ip6[2] != tuple.ipv6.daddr[2] || + sk->src_ip6[3] != tuple.ipv6.daddr[3] || + sk->src_port != DST_REWRITE_PORT6) { + bpf_sk_release(sk); + return 0; + } + + bpf_sk_release(sk); /* Rewrite destination. */ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0); @@ -39,21 +73,19 @@ int connect_v6_prog(struct bpf_sock_addr *ctx) ctx->user_port = bpf_htons(DST_REWRITE_PORT6); - if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) { - /* Rewrite source. */ - memset(&sa, 0, sizeof(sa)); + /* Rewrite source. */ + memset(&sa, 0, sizeof(sa)); - sa.sin6_family = AF_INET6; - sa.sin6_port = bpf_htons(0); + sa.sin6_family = AF_INET6; + sa.sin6_port = bpf_htons(0); - sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0); - sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1); - sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2); - sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3); + sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0); + sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1); + sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2); + sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3); - if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) - return 0; - } + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; return 1; } -- cgit v1.2.3 From 29a9c10e4110e368443f0b606d71557edee7f2cc Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 12 Nov 2018 13:44:10 -0800 Subject: bpftool: make libbfd optional Make it possible to build bpftool without libbfd. libbfd and libopcodes are typically provided in dev/dbg packages (binutils-dev in debian) which we usually don't have installed on the fleet machines and we'd like a way to have bpftool version that works without installing any additional packages. This excludes support for disassembling jit-ted code and prints an error if the user tries to use these features. Tested by: cat > FEATURES_DUMP.bpftool < Acked-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Makefile | 13 +++++++++++-- tools/bpf/bpftool/jit_disasm.c | 8 +++++++- tools/bpf/bpftool/main.c | 3 --- tools/bpf/bpftool/main.h | 14 ++++++++++++++ tools/bpf/bpftool/prog.c | 3 +++ 5 files changed, 35 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index dac7eff4c7e5..1bea6b979082 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -53,7 +53,7 @@ ifneq ($(EXTRA_LDFLAGS),) LDFLAGS += $(EXTRA_LDFLAGS) endif -LIBS = -lelf -lbfd -lopcodes $(LIBBPF) +LIBS = -lelf $(LIBBPF) INSTALL ?= install RM ?= rm -f @@ -90,7 +90,16 @@ include $(wildcard $(OUTPUT)*.d) all: $(OUTPUT)bpftool -SRCS = $(wildcard *.c) +BFD_SRCS = jit_disasm.c + +SRCS = $(filter-out $(BFD_SRCS),$(wildcard *.c)) + +ifeq ($(feature-libbfd),1) +CFLAGS += -DHAVE_LIBBFD_SUPPORT +SRCS += $(BFD_SRCS) +LIBS += -lbfd -lopcodes +endif + OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index c75ffd9ce2bb..b2ed5ee1af5f 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -109,7 +109,7 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, if (inf) { bfdf->arch_info = inf; } else { - p_err("No libfd support for %s", arch); + p_err("No libbfd support for %s", arch); return; } } @@ -183,3 +183,9 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, bfd_close(bfdf); } + +int disasm_init(void) +{ + bfd_init(); + return 0; +} diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 75a3296dc0bc..5c4c1cd5a7ba 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -31,7 +31,6 @@ * SOFTWARE. */ -#include #include #include #include @@ -399,8 +398,6 @@ int main(int argc, char **argv) if (argc < 0) usage(); - bfd_init(); - ret = cmd_select(cmds, argc, argv, do_help); if (json_output) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 61d82020af58..10c6c16fae29 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -147,8 +147,22 @@ int prog_parse_fd(int *argc, char ***argv); int map_parse_fd(int *argc, char ***argv); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); +#ifdef HAVE_LIBBFD_SUPPORT void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, const char *arch, const char *disassembler_options); +int disasm_init(void); +#else +static inline +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch, const char *disassembler_options) +{ +} +static inline int disasm_init(void) +{ + p_err("No libbfd support"); + return -1; +} +#endif void print_data_json(uint8_t *data, size_t len); void print_hex_data_json(uint8_t *data, size_t len); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 5ff5544596e7..c176e1aa66fe 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -467,6 +467,9 @@ static int do_dump(int argc, char **argv) int fd; if (is_prefix(*argv, "jited")) { + if (disasm_init()) + return -1; + member_len = &info.jited_prog_len; member_ptr = &info.jited_prog_insns; } else if (is_prefix(*argv, "xlated")) { -- cgit v1.2.3 From 9c549a6b057386df478e4307902cbc84f1eee058 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Nov 2018 03:24:05 +0100 Subject: selftests: add explicit test for multiple concurrent GRO sockets This covers for proper accounting of encap needed static keys Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro.sh | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index e94ef8067173..aeac53a99aeb 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -91,6 +91,28 @@ run_one_nat() { wait $(jobs -p) } +run_one_2sock() { + # use 'rx' as separator between sender args and receiver args + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} + + cfg_veth + + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -p 12345 & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} && \ + echo "ok" || \ + echo "failed" & + + # Hack: let bg programs complete the startup + sleep 0.1 + ./udpgso_bench_tx ${tx_args} -p 12345 + sleep 0.1 + # first UDP GSO socket should be closed at this point + ./udpgso_bench_tx ${tx_args} + wait $(jobs -p) +} + run_nat_test() { local -r args=$@ @@ -98,6 +120,13 @@ run_nat_test() { ./in_netns.sh $0 __subprocess_nat $2 rx -r $3 } +run_2sock_test() { + local -r args=$@ + + printf " %-40s" "$1" + ./in_netns.sh $0 __subprocess_2sock $2 rx -G -r $3 +} + run_all() { local -r core_args="-l 4" local -r ipv4_args="${core_args} -4 -D 192.168.1.1" @@ -120,6 +149,7 @@ run_all() { run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500" run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472" + run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" echo "ipv6" run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400" @@ -130,6 +160,7 @@ run_all() { run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500" run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452" + run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452" } if [ ! -f ../bpf/xdp_dummy.o ]; then @@ -145,4 +176,7 @@ elif [[ $1 == "__subprocess" ]]; then elif [[ $1 == "__subprocess_nat" ]]; then shift run_one_nat $@ +elif [[ $1 == "__subprocess_2sock" ]]; then + shift + run_one_2sock $@ fi -- cgit v1.2.3 From 23499442c319412aa8e54e7a939e2eb531bdd77d Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 19 Nov 2018 14:49:01 -0800 Subject: bpf: libbpf: retry map creation without the name Since commit 88cda1c9da02 ("bpf: libbpf: Provide basic API support to specify BPF obj name"), libbpf unconditionally sets bpf_attr->name for maps. Pre v4.14 kernels don't know about map names and return an error about unexpected non-zero data. Retry sys_bpf without a map name to cover older kernels. v2 changes: * check for errno == EINVAL as suggested by Daniel Borkmann Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/lib/bpf/bpf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 03f9bcc4ef50..961e1b9fc592 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -69,6 +69,7 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0; union bpf_attr attr; + int ret; memset(&attr, '\0', sizeof(attr)); @@ -86,7 +87,15 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) attr.map_ifindex = create_attr->map_ifindex; attr.inner_map_fd = create_attr->inner_map_fd; - return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + if (ret < 0 && errno == EINVAL && create_attr->name) { + /* Retry the same syscall, but without the name. + * Pre v4.14 kernels don't support map names. + */ + memset(attr.map_name, 0, sizeof(attr.map_name)); + return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + } + return ret; } int bpf_create_map_node(enum bpf_map_type map_type, const char *name, -- cgit v1.2.3 From 608114e441ad3a4fa1fced4d6d00653a34765eee Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 16 Nov 2018 11:41:10 +0000 Subject: tools: sync linux/bpf.h Synchronize changes to linux/bpf.h from * "bpf: allow zero-initializing hash map seed" * "bpf: move BPF_F_QUERY_EFFECTIVE after map flags" Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 852dc17ab47a..05d95290b848 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -257,9 +257,6 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) -/* flags for BPF_PROG_QUERY */ -#define BPF_F_QUERY_EFFECTIVE (1U << 0) - #define BPF_OBJ_NAME_LEN 16U /* Flags for accessing BPF object */ @@ -269,6 +266,12 @@ enum bpf_attach_type { /* Flag for stack_map, store build_id+offset instead of pointer */ #define BPF_F_STACK_BUILD_ID (1U << 5) +/* Zero-initialize hash function seed. This should only be used for testing. */ +#define BPF_F_ZERO_SEED (1U << 6) + +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, @@ -2201,6 +2204,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) * Description @@ -2233,6 +2238,8 @@ union bpf_attr { * **CONFIG_NET** configuration option. * Return * Pointer to *struct bpf_sock*, or NULL in case of failure. + * For sockets with reuseport option, *struct bpf_sock* + * return is from reuse->socks[] using hash of the packet. * * int bpf_sk_release(struct bpf_sock *sk) * Description -- cgit v1.2.3 From bf5d68c7304008ef838f1a2ca1aae8fab74d633d Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 16 Nov 2018 11:41:11 +0000 Subject: tools: add selftest for BPF_F_ZERO_SEED Check that iterating two separate hash maps produces the same order of keys if BPF_F_ZERO_SEED is used. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_maps.c | 64 ++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 4db2116e52be..9f0a5b16a246 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -258,24 +258,36 @@ static void test_hashmap_percpu(int task, void *data) close(fd); } -static void test_hashmap_walk(int task, void *data) +static int helper_fill_hashmap(int max_entries) { - int fd, i, max_entries = 1000; - long long key, value, next_key; - bool next_key_valid = true; + int i, fd, ret; + long long key, value; fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), max_entries, map_flags); - if (fd < 0) { - printf("Failed to create hashmap '%s'!\n", strerror(errno)); - exit(1); - } + CHECK(fd < 0, + "failed to create hashmap", + "err: %s, flags: 0x%x\n", strerror(errno), map_flags); for (i = 0; i < max_entries; i++) { key = i; value = key; - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0); + ret = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(ret != 0, + "can't update hashmap", + "err: %s\n", strerror(ret)); } + return fd; +} + +static void test_hashmap_walk(int task, void *data) +{ + int fd, i, max_entries = 1000; + long long key, value, next_key; + bool next_key_valid = true; + + fd = helper_fill_hashmap(max_entries); + for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key, &next_key) == 0; i++) { key = next_key; @@ -306,6 +318,39 @@ static void test_hashmap_walk(int task, void *data) close(fd); } +static void test_hashmap_zero_seed(void) +{ + int i, first, second, old_flags; + long long key, next_first, next_second; + + old_flags = map_flags; + map_flags |= BPF_F_ZERO_SEED; + + first = helper_fill_hashmap(3); + second = helper_fill_hashmap(3); + + for (i = 0; ; i++) { + void *key_ptr = !i ? NULL : &key; + + if (bpf_map_get_next_key(first, key_ptr, &next_first) != 0) + break; + + CHECK(bpf_map_get_next_key(second, key_ptr, &next_second) != 0, + "next_key for second map must succeed", + "key_ptr: %p", key_ptr); + CHECK(next_first != next_second, + "keys must match", + "i: %d first: %lld second: %lld\n", i, + next_first, next_second); + + key = next_first; + } + + map_flags = old_flags; + close(first); + close(second); +} + static void test_arraymap(int task, void *data) { int key, next_key, fd; @@ -1534,6 +1579,7 @@ static void run_all_tests(void) test_hashmap(0, NULL); test_hashmap_percpu(0, NULL); test_hashmap_walk(0, NULL); + test_hashmap_zero_seed(); test_arraymap(0, NULL); test_arraymap_percpu(0, NULL); -- cgit v1.2.3 From 601bc1c13916899a14e682ada3dfa3ec21b13318 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:08 +0000 Subject: selftests: forwarding: lib: Support NUM_NETIFS of 0 So far the case of NUM_NETIFS of 0 has not been interesting. However if one wishes to reuse the lib.sh routines in a setup of a separate namespace, being able to import like this is handy. Therefore replace the {1..$NUM_NETIFS} references, which cause iteration over 1 and 0, with an explicit for loop like we do in setup_wait() and tc_offload_check(), so that for NUM_NETIFS of 0 no iteration is done. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/lib.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 85d253546684..bb0e9fdf893e 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -104,7 +104,7 @@ create_netif_veth() { local i - for i in $(eval echo {1..$NUM_NETIFS}); do + for ((i = 1; i <= NUM_NETIFS; ++i)); do local j=$((i+1)) ip link show dev ${NETIFS[p$i]} &> /dev/null @@ -135,7 +135,7 @@ if [[ "$NETIF_CREATE" = "yes" ]]; then create_netif fi -for i in $(eval echo {1..$NUM_NETIFS}); do +for ((i = 1; i <= NUM_NETIFS; ++i)); do ip link show dev ${NETIFS[p$i]} &> /dev/null if [[ $? -ne 0 ]]; then echo "SKIP: could not find all required interfaces" -- cgit v1.2.3 From d0540d1706c3cfbb51769de02858f846d3836389 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:10 +0000 Subject: selftests: forwarding: lib: Add in_ns() In order to run a certain command inside another network namespace, it's possible to use "ip netns exec ns command". However then one can't use functions defined in lib.sh or a test suite. One option is to do "ip netns exec ns bash -c command", provided that all functions that one wishes to use (and their dependencies) are published using "export -f". That may not be practical. Therefore, introduce a helper in_ns(), which wraps a given command in a boilerplate of "ip netns exec" and "source lib.sh", thus making all library functions available. (Custom functions that a script wishes to run within a namespace still need to be exported.) Because quotes in "$@" aren't recognized in heredoc, hand-expand the array in an explicit for loop, leveraging printf %q to handle proper quoting. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/lib.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index bb0e9fdf893e..93d6e9df483e 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -783,6 +783,17 @@ multipath_eval() log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio" } +in_ns() +{ + local name=$1; shift + + ip netns exec $name bash <<-EOF + NUM_NETIFS=0 + source lib.sh + $(for a in "$@"; do printf "%q${IFS:0:1}" "$a"; done) + EOF +} + ############################################################################## # Tests -- cgit v1.2.3 From 58c7a2d19e90a6ec3fbe6cd0c6b1025b731bc000 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:11 +0000 Subject: selftests: forwarding: ping{6, }_test(): Add description argument Have ping_test() recognize an optional argument with a description of the test. This is handy if there are several ping test, to make it clear which is which. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/lib.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 93d6e9df483e..fa734ddce2e9 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -813,7 +813,7 @@ ping_test() ping_do $1 $2 check_err $? - log_test "ping" + log_test "ping$3" } ping6_do() @@ -832,7 +832,7 @@ ping6_test() ping6_do $1 $2 check_err $? - log_test "ping6" + log_test "ping6$3" } learning_test() -- cgit v1.2.3 From d20b0f214aa74a3f6b28b8d522f273e1fd2abea3 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:12 +0000 Subject: selftests: forwarding: ping{6, }_do(): Allow passing ping arguments Make the ping routine more generic by allowing passing arbitrary ping command-line arguments. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/lib.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index fa734ddce2e9..e916663a1019 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -801,10 +801,11 @@ ping_do() { local if_name=$1 local dip=$2 + local args=$3 local vrf_name vrf_name=$(master_name_get $if_name) - ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null + ip vrf exec $vrf_name $PING $args $dip -c 10 -i 0.1 -w 2 &> /dev/null } ping_test() @@ -820,10 +821,11 @@ ping6_do() { local if_name=$1 local dip=$2 + local args=$3 local vrf_name vrf_name=$(master_name_get $if_name) - ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null + ip vrf exec $vrf_name $PING6 $args $dip -c 10 -i 0.1 -w 2 &> /dev/null } ping6_test() -- cgit v1.2.3 From d1038cd0f67e542f2744782f958b95c4909fb63a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:13 +0000 Subject: selftests: forwarding: lib: Add link_stats_rx_errors_get() Such a function will be useful for counting malformed packets in the ECN decap test. To that end, introduce a common handler for handling stat-fetching, and reuse it in link_stats_tx_packets_get() and link_stats_rx_errors_get(). Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/lib.sh | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index e916663a1019..7af5a03bcb32 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -477,11 +477,24 @@ master_name_get() ip -j link show dev $if_name | jq -r '.[]["master"]' } +link_stats_get() +{ + local if_name=$1; shift + local dir=$1; shift + local stat=$1; shift + + ip -j -s link show dev $if_name \ + | jq '.[]["stats64"]["'$dir'"]["'$stat'"]' +} + link_stats_tx_packets_get() { - local if_name=$1 + link_stats_get $1 tx packets +} - ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]' +link_stats_rx_errors_get() +{ + link_stats_get $1 rx errors } tc_rule_stats_get() -- cgit v1.2.3 From fd64d5a2e383568bb0d6b7b8c619358e68b6718c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:14 +0000 Subject: selftests: forwarding: Add a skeleton of vxlan_bridge_1d This skeleton sets up a topology with three VXLAN endpoints: one "local", possibly offloaded, and two "remote", formed using veth pairs and likely purely software bridges. The "local" endpoint is connected to host systems by a VLAN-unaware bridge. Since VXLAN tunnels must be unique per namespace, each of the "remote" endpoints is in its own namespace. H3 forms the bridge between the three domains. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/vxlan_bridge_1d.sh | 296 +++++++++++++++++++++ 1 file changed, 296 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh new file mode 100755 index 000000000000..af88e1c146a7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -0,0 +1,296 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | + $h2 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# +----|---------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 192.0.2.17 | | +# | | remote 192.0.2.34 192.0.2.50 | | +# | | id 1000 dstport $VXPORT | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | 192.0.2.48/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# +----|---------------------------------------|----------------+ +# | | +# +----|------------------------------+ +----|------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 192.0.2.34/28 | | 192.0.2.50/28 | +# | | | | +# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 | +# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1d) | | | | BR2 (802.1d) | | +# | | + vx2 (vxlan) | | | | + vx2 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | VW2 (vrf) | | | | | VW2 (vrf) | | +# | | + w2 (veth) | | | | + w2 (veth) | | +# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-----------------------------------+ +-----------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + tc qdisc add dev $h1 clsact +} + +h1_destroy() +{ + tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 +} + +rp1_set_addr() +{ + ip address add dev $rp1 192.0.2.17/28 + + ip route add 192.0.2.32/28 nexthop via 192.0.2.18 + ip route add 192.0.2.48/28 nexthop via 192.0.2.18 +} + +rp1_unset_addr() +{ + ip route del 192.0.2.48/28 nexthop via 192.0.2.18 + ip route del 192.0.2.32/28 nexthop via 192.0.2.18 + + ip address del dev $rp1 192.0.2.17/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + + ip link add name vx1 type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx1 up + + ip link set dev vx1 master br1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self +} + +switch_destroy() +{ + rp1_unset_addr + ip link set dev $rp1 down + + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + + ip link set dev vx1 nomaster + ip link set dev vx1 down + ip link del dev vx1 + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + __simple_if_init v1 v$rp2 192.0.2.33/28 + __simple_if_init v3 v$rp2 192.0.2.49/28 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 192.0.2.49/28 + __simple_if_fini v1 192.0.2.33/28 + simple_if_fini $rp2 192.0.2.18/28 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/28 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + ip link add name vx2 type vxlan id 1000 local $in_addr dstport "$VXPORT" + ip link set dev vx2 up + bridge fdb append dev vx2 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx2 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx2 master br2 + tc qdisc add dev vx2 clsact + + simple_if_init w2 $host_addr/28 + + ip route add 192.0.2.16/28 nexthop via $nh_addr + ip route add $other_in_addr/32 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 192.0.2.3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 192.0.2.4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +test_all() +{ + echo "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS -- cgit v1.2.3 From 5852fd07c441bc7c2e720440d8df2e2b80aea58b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:15 +0000 Subject: selftests: forwarding: vxlan_bridge_1d: Add ping test Test end-to-end reachability between local and remote endpoints. Note that because learning is disabled on the VXLAN device, the ICMP requests will end up being flooded to all remotes. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index af88e1c146a7..0e3d7abc70d3 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -65,6 +65,7 @@ export VXPORT : ${ALL_TESTS:=" + ping_ipv4 "} NUM_NETIFS=6 @@ -281,6 +282,13 @@ cleanup() vrf_cleanup } +ping_ipv4() +{ + ping_test $h1 192.0.2.2 ": local->local" + ping_test $h1 192.0.2.3 ": local->remote 1" + ping_test $h1 192.0.2.4 ": local->remote 2" +} + test_all() { echo "Running tests with UDP port $VXPORT" -- cgit v1.2.3 From edaa117efe46845e17275c51f61591df15d9e9a9 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:17 +0000 Subject: selftests: forwarding: vxlan_bridge_1d: Add flood test Test that when sending traffic to an unlearned MAC address, the traffic is flooded to both remote VXLAN endpoints. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/vxlan_bridge_1d.sh | 105 +++++++++++++++++++++ 1 file changed, 105 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 0e3d7abc70d3..1edd5189c41c 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -66,6 +66,7 @@ export VXPORT : ${ALL_TESTS:=" ping_ipv4 + test_flood "} NUM_NETIFS=6 @@ -289,6 +290,110 @@ ping_ipv4() ping_test $h1 192.0.2.4 ": local->remote 2" } +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_sw 2>/dev/null || \ + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_hw +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx2 ns1" "vx2 ns2") + local counter + local key + + for counter in "${counters[@]}"; do + flood_counter_install $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ $h1 -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $counter + done +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local what=$1; shift + + RET=0 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood" +} + test_all() { echo "Running tests with UDP port $VXPORT" -- cgit v1.2.3 From bfd1e27038ab7e9ee3f730667cd0ae0e0eb23f88 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:18 +0000 Subject: selftests: forwarding: vxlan_bridge_1d: Add unicast test Test that when sending traffic to a learned MAC address, the traffic is forwarded accurately only to the right endpoint. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/vxlan_bridge_1d.sh | 54 ++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 1edd5189c41c..1a3486ec1d21 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -67,6 +67,7 @@ export VXPORT : ${ALL_TESTS:=" ping_ipv4 test_flood + test_unicast "} NUM_NETIFS=6 @@ -263,6 +264,10 @@ setup_prepare() vrp2_create ns1_create ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) } cleanup() @@ -394,6 +399,55 @@ test_flood() __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood" } +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx1 192.0.2.34" + "$r2_mac vx1 192.0.2.50") + local target + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add $target + done + + __test_unicast $h2_mac 192.0.2.2 0 "local MAC unicast" + __test_unicast $r1_mac 192.0.2.3 1 "remote MAC 1 unicast" + __test_unicast $r2_mac 192.0.2.4 2 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del $target + done +} + test_all() { echo "Running tests with UDP port $VXPORT" -- cgit v1.2.3 From 50a02b0825668fe96884d47914fa082c91bda504 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:19 +0000 Subject: selftests: forwarding: vxlan_bridge_1d: Reconfigure & rerun tests The ordering of the topology creation can have impact on whether a driver is successful in offloading VXLAN. Therefore add a pseudo-test that reshuffles bits of the topology, and then reruns the same suite of tests again to make sure that the new setup is supported as well. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/vxlan_bridge_1d.sh | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 1a3486ec1d21..a943d8da14b9 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -68,6 +68,10 @@ export VXPORT ping_ipv4 test_flood test_unicast + reapply_config + ping_ipv4 + test_flood + test_unicast "} NUM_NETIFS=6 @@ -288,6 +292,28 @@ cleanup() vrf_cleanup } +# For the first round of tests, vx1 is the first device to get attached to the +# bridge, and that at the point that the local IP is already configured. Try the +# other scenario of attaching the device to an already-offloaded bridge, and +# only then attach the local IP. +reapply_config() +{ + echo "Reapplying configuration" + + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + rp1_unset_addr + ip link set dev vx1 nomaster + sleep 5 + + ip link set dev vx1 master br1 + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self + sleep 1 + rp1_set_addr + sleep 5 +} + ping_ipv4() { ping_test $h1 192.0.2.2 ": local->local" -- cgit v1.2.3 From b3a7ee74ee659c789b6e6254d0839789477f5c3e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:20 +0000 Subject: selftests: forwarding: vxlan_bridge_1d: Add a TTL test This tests whether TTL of VXLAN envelope packets is properly set based on the device configuration. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/vxlan_bridge_1d.sh | 33 ++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index a943d8da14b9..0c17b0d427ba 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -68,6 +68,7 @@ export VXPORT ping_ipv4 test_flood test_unicast + test_ttl reapply_config ping_ipv4 test_flood @@ -474,6 +475,38 @@ test_unicast() done } +vxlan_ping_test() +{ + local ping_dev=$1; shift + local ping_dip=$1; shift + local ping_args=$1; shift + local capture_dev=$1; shift + local capture_dir=$1; shift + local capture_pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + ping_do $ping_dev $ping_dip "$ping_args" + local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." +} + +test_ttl() +{ + RET=0 + + tc filter add dev v1 egress pref 77 prot ip \ + flower ip_ttl 99 action pass + vxlan_ping_test $h1 192.0.2.3 "" v1 egress 77 10 + tc filter del dev v1 egress pref 77 prot ip + + log_test "VXLAN: envelope TTL" +} + test_all() { echo "Running tests with UDP port $VXPORT" -- cgit v1.2.3 From d417ecf533fe014de667705f58ae5c579f7740b9 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:21 +0000 Subject: selftests: forwarding: vxlan_bridge_1d: Add a TOS test Test that TOS is inherited from the tunneled packet into the envelope as configured at the VXLAN device. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 0c17b0d427ba..ac1070937ae7 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -69,6 +69,7 @@ export VXPORT test_flood test_unicast test_ttl + test_tos reapply_config ping_ipv4 test_flood @@ -507,6 +508,19 @@ test_ttl() log_test "VXLAN: envelope TTL" } +test_tos() +{ + RET=0 + + tc filter add dev v1 egress pref 77 prot ip \ + flower ip_tos 0x40 action pass + vxlan_ping_test $h1 192.0.2.3 "-Q 0x40" v1 egress 77 10 + vxlan_ping_test $h1 192.0.2.3 "-Q 0x30" v1 egress 77 0 + tc filter del dev v1 egress pref 77 prot ip + + log_test "VXLAN: envelope TOS inheritance" +} + test_all() { echo "Running tests with UDP port $VXPORT" -- cgit v1.2.3 From 1e5abfb3ff41a532f687875ada9ea2af7a1069b3 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:22 +0000 Subject: selftests: forwarding: vxlan_bridge_1d: Add an ECN encap test Test that ECN bits in the VXLAN envelope are correctly deduced from the overlay packet. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/vxlan_bridge_1d.sh | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index ac1070937ae7..82a124cbd523 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -70,6 +70,7 @@ export VXPORT test_unicast test_ttl test_tos + test_ecn_encap reapply_config ping_ipv4 test_flood @@ -521,6 +522,31 @@ test_tos() log_test "VXLAN: envelope TOS inheritance" } +__test_ecn_encap() +{ + local q=$1; shift + local tos=$1; shift + + RET=0 + + tc filter add dev v1 egress pref 77 prot ip \ + flower ip_tos $tos action pass + sleep 1 + vxlan_ping_test $h1 192.0.2.3 "-Q $q" v1 egress 77 10 + tc filter del dev v1 egress pref 77 prot ip + + log_test "VXLAN: ECN encap: $q->$tos" +} + +test_ecn_encap() +{ + # In accordance with INET_ECN_encapsulate() + __test_ecn_encap 0x00 0x00 + __test_ecn_encap 0x01 0x01 + __test_ecn_encap 0x02 0x02 + __test_ecn_encap 0x03 0x02 +} + test_all() { echo "Running tests with UDP port $VXPORT" -- cgit v1.2.3 From a0b61f3d8ebfe927d596c1aaca9b6e23bd185f55 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:24 +0000 Subject: selftests: forwarding: vxlan_bridge_1d: Add an ECN decap test Test that when decapsulating from VXLAN, the values of inner and outer TOS are handled appropriately. Because VXLAN driver on its own won't produce the arbitrary TOS combinations necessary to test this feature, simply open-code a single ICMP packet and have mausezahn assemble it. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/vxlan_bridge_1d.sh | 116 +++++++++++++++++++++ 1 file changed, 116 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 82a124cbd523..5cc6ac74eb74 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -71,6 +71,7 @@ export VXPORT test_ttl test_tos test_ecn_encap + test_ecn_decap reapply_config ping_ipv4 test_flood @@ -547,6 +548,121 @@ test_ecn_encap() __test_ecn_encap 0x03 0x02 } +vxlan_encapped_ping_do() +{ + local count=$1; shift + local dev=$1; shift + local next_hop_mac=$1; shift + local dest_ip=$1; shift + local dest_mac=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + + $MZ $dev -c $count -d 100msec -q \ + -b $next_hop_mac -B $dest_ip \ + -t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$(mac_get w2):"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"$inner_tos:"$( : IP TOS + )"00:54:"$( : IP total length + )"99:83:"$( : IP identification + )"40:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"01:"$( : IP proto + )"00:00:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + )"08:"$( : ICMP type + )"00:"$( : ICMP code + )"8b:f2:"$( : ICMP csum + )"1f:6a:"$( : ICMP request identifier + )"00:01:"$( : ICMP request sequence number + )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload + )"6d:74:0b:00:00:00:00:00:"$( : + )"10:11:12:13:14:15:16:17:"$( : + )"18:19:1a:1b:1c:1d:1e:1f:"$( : + )"20:21:22:23:24:25:26:27:"$( : + )"28:29:2a:2b:2c:2d:2e:2f:"$( : + )"30:31:32:33:34:35:36:37" +} +export -f vxlan_encapped_ping_do + +vxlan_encapped_ping_test() +{ + local ping_dev=$1; shift + local nh_dev=$1; shift + local ping_dip=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + local stat_get=$1; shift + local expect=$1; shift + + local t0=$($stat_get) + + in_ns ns1 \ + vxlan_encapped_ping_do 10 $ping_dev $(mac_get $nh_dev) \ + $ping_dip $(mac_get $h1) \ + $inner_tos $outer_tos + + local t1=$($stat_get) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "Expected to capture $expect packets, got $delta." +} +export -f vxlan_encapped_ping_test + +__test_ecn_decap() +{ + local orig_inner_tos=$1; shift + local orig_outer_tos=$1; shift + local decapped_tos=$1; shift + + RET=0 + + tc filter add dev $h1 ingress pref 77 prot ip \ + flower ip_tos $decapped_tos action pass + sleep 1 + vxlan_encapped_ping_test v2 v1 192.0.2.17 \ + $orig_inner_tos $orig_outer_tos \ + "tc_rule_stats_get $h1 77 ingress" 10 + tc filter del dev $h1 ingress pref 77 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->$decapped_tos" +} + +test_ecn_decap_error() +{ + local orig_inner_tos=00 + local orig_outer_tos=03 + + RET=0 + + vxlan_encapped_ping_test v2 v1 192.0.2.17 \ + $orig_inner_tos $orig_outer_tos \ + "link_stats_rx_errors_get vx1" 10 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->error" +} + +test_ecn_decap() +{ + # In accordance with INET_ECN_decapsulate() + __test_ecn_decap 00 00 0x00 + __test_ecn_decap 01 01 0x01 + __test_ecn_decap 02 01 0x02 + __test_ecn_decap 01 03 0x03 + __test_ecn_decap 02 03 0x03 + test_ecn_decap_error +} + test_all() { echo "Running tests with UDP port $VXPORT" -- cgit v1.2.3 From 3485f87cb7f82434cb6526a2ab5088012378dcb6 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 19 Nov 2018 16:11:25 +0000 Subject: selftests: forwarding: vxlan_bridge_1d_port_8472: New test This simple wrapper reruns the VXLAN ping test with a port number of 8472. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh new file mode 100755 index 000000000000..3bf3da69195f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 +" +source vxlan_bridge_1d.sh -- cgit v1.2.3 From 99c9b084f0bec000ef394d1bb58f33c4910752d9 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 19 Nov 2018 16:11:26 +0000 Subject: selftests: mlxsw: Add a test for VxLAN configuration Test various aspects of VxLAN offloading which are specific to mlxsw, such as sanitization of invalid configurations and offload indication. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/vxlan.sh | 664 +++++++++++++++++++++ 1 file changed, 664 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/vxlan.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh new file mode 100755 index 000000000000..52e78adfe081 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -0,0 +1,664 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test various aspects of VxLAN offloading which are specific to mlxsw, such +# as sanitization of invalid configurations and offload indication. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="sanitization_test offload_indication_test" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +sanitization_single_dev_test_pass() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + + ip link set dev $swp1 nomaster + + ip link set dev $swp1 master br0 + check_err $? +} + +sanitization_single_dev_test_fail() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 &> /dev/null + check_fail $? + + ip link set dev $swp1 nomaster + + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp1 master br0 &> /dev/null + check_fail $? +} + +sanitization_single_dev_valid_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_pass + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device - valid configuration" +} + +sanitization_single_dev_vlan_aware_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a vlan-aware bridge" +} + +sanitization_single_dev_mcast_enabled_test() +{ + RET=0 + + ip link add dev br0 type bridge + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a multicast enabled bridge" +} + +sanitization_single_dev_mcast_group_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ + dev $swp2 group 239.0.0.1 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a multicast group" +} + +sanitization_single_dev_no_local_ip_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with no local ip" +} + +sanitization_single_dev_local_ipv6_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 2001:db8::1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with local ipv6 address" +} + +sanitization_single_dev_learning_enabled_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 learning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with learning enabled" +} + +sanitization_single_dev_local_interface_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev $swp2 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with local interface" +} + +sanitization_single_dev_port_range_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ + srcport 4000 5000 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with udp source port range" +} + +sanitization_single_dev_tos_static_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos 20 local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with static tos" +} + +sanitization_single_dev_ttl_inherit_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl inherit tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with inherit ttl" +} + +sanitization_single_dev_udp_checksum_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning udpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with udp checksum" +} + +sanitization_single_dev_test() +{ + # These tests make sure that we correctly sanitize VxLAN device + # configurations we do not support + sanitization_single_dev_valid_test + sanitization_single_dev_vlan_aware_test + sanitization_single_dev_mcast_enabled_test + sanitization_single_dev_mcast_group_test + sanitization_single_dev_no_local_ip_test + sanitization_single_dev_local_ipv6_test + sanitization_single_dev_learning_enabled_test + sanitization_single_dev_local_interface_test + sanitization_single_dev_port_range_test + sanitization_single_dev_tos_static_test + sanitization_single_dev_ttl_inherit_test + sanitization_single_dev_udp_checksum_test +} + +sanitization_multi_devs_test_pass() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? + ip link set dev vxlan1 master br1 + check_err $? + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link set dev $swp1 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? +} + +sanitization_multi_devs_test_fail() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? + ip link set dev vxlan1 master br1 &> /dev/null + check_fail $? + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link set dev vxlan1 master br1 + check_err $? + ip link set dev $swp1 master br0 + check_err $? + ip link set dev $swp2 master br1 &> /dev/null + check_fail $? +} + +sanitization_multi_devs_valid_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_multi_devs_test_pass + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices - valid configuration" +} + +sanitization_multi_devs_ttl_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 40 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different ttl" +} + +sanitization_multi_devs_udp_dstport_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 5789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different udp destination port" +} + +sanitization_multi_devs_local_ip_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.2 dstport 4789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different local ip" +} + +sanitization_multi_devs_test() +{ + # The device has a single VTEP, which means all the VxLAN devices + # we offload must share certain properties such as source IP and + # UDP destination port. These tests make sure that we forbid + # configurations that violate this limitation + sanitization_multi_devs_valid_test + sanitization_multi_devs_ttl_test + sanitization_multi_devs_udp_dstport_test + sanitization_multi_devs_local_ip_test +} + +sanitization_test() +{ + sanitization_single_dev_test + sanitization_multi_devs_test +} + +offload_indication_setup_create() +{ + # Create a simple setup with two bridges, each with a VxLAN device + # and one local port + ip link add name br0 up type bridge mcast_snooping 0 + ip link add name br1 up type bridge mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + + ip address add 198.51.100.1/32 dev lo + + ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 +} + +offload_indication_setup_destroy() +{ + ip link del dev vxlan1 + ip link del dev vxlan0 + + ip address del 198.51.100.1/32 dev lo + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link del dev br1 + ip link del dev br0 +} + +offload_indication_fdb_flood_test() +{ + RET=0 + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 + + bridge fdb show brport vxlan0 | grep 00:00:00:00:00:00 \ + | grep -q offload + check_err $? + + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self + + log_test "vxlan flood entry offload indication" +} + +offload_indication_fdb_bridge_test() +{ + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \ + dst 198.51.100.2 + + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - initial state" + + # Remove FDB entry from the bridge driver and check that corresponding + # entry in the VxLAN driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 master + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from bridge" + + # Add the FDB entry back to the bridge driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 master static + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to bridge" + + # Remove FDB entry from the VxLAN driver and check that corresponding + # entry in the bridge driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from vxlan" + + # Add the FDB entry back to the VxLAN driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2 + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to vxlan" + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self master +} + +offload_indication_fdb_test() +{ + offload_indication_fdb_flood_test + offload_indication_fdb_bridge_test +} + +offload_indication_decap_route_test() +{ + RET=0 + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev vxlan0 down + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev vxlan1 down + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + log_test "vxlan decap route - vxlan device down" + + RET=0 + + ip link set dev vxlan1 up + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev vxlan0 up + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - vxlan device up" + + RET=0 + + ip address delete 198.51.100.1/32 dev lo + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + ip address add 198.51.100.1/32 dev lo + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - add local route" + + RET=0 + + ip link set dev $swp1 nomaster + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev $swp2 nomaster + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - local ports enslavement" + + RET=0 + + ip link del dev br0 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link del dev br1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + log_test "vxlan decap route - bridge device deletion" + + RET=0 + + ip link add name br0 up type bridge mcast_snooping 0 + ip link add name br1 up type bridge mcast_snooping 0 + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + ip link set dev vxlan0 master br0 + ip link set dev vxlan1 master br1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link del dev vxlan0 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link del dev vxlan1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + log_test "vxlan decap route - vxlan device deletion" + + ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 +} + +offload_indication_test() +{ + offload_indication_setup_create + offload_indication_fdb_test + offload_indication_decap_route_test + offload_indication_setup_destroy +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From 94d302deae25a0925dc736e444005918522be730 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 19 Nov 2018 16:11:27 +0000 Subject: selftests: mlxsw: Add a test for VxLAN flooding The device stores flood records in a singly linked list where each record stores up to three IPv4 addresses of remote VTEPs. The test verifies that packets are correctly flooded in various cases such as deletion of a record in the middle of the list. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/vxlan_flooding.sh | 309 +++++++++++++++++++++ 1 file changed, 309 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh new file mode 100755 index 000000000000..fedcb7b35af9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh @@ -0,0 +1,309 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test VxLAN flooding. The device stores flood records in a singly linked list +# where each record stores up to three IPv4 addresses of remote VTEPs. The test +# verifies that packets are correctly flooded in various cases such as deletion +# of a record in the middle of the list. +# +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 203.0.113.1/24| +# +----|---------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | + vxlan0 (vxlan) | | +# | | local 198.51.100.1 | | +# | | remote 198.51.100.{2..13} | | +# | | id 10 dstport 4789 | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 198.51.100.0/24 via 192.0.2.2 | +# | | +# | + $rp1 | +# | | 192.0.2.1/24 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | R2 (vrf) | +# | + $rp2 | +# | 192.0.2.2/24 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="flooding_test" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 203.0.113.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 203.0.113.1/24 +} + +switch_create() +{ + # Make sure the bridge uses the MAC address of the local port and + # not that of the VxLAN's device + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 address $(mac_get $swp1) + + ip link add name vxlan0 type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + ip address add 198.51.100.1/32 dev lo + + ip link set dev $swp1 master br0 + ip link set dev vxlan0 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev vxlan0 up +} + +switch_destroy() +{ + ip link set dev vxlan0 down + ip link set dev $swp1 down + ip link set dev br0 down + + ip link set dev vxlan0 nomaster + ip link set dev $swp1 nomaster + + ip address del 198.51.100.1/32 dev lo + + ip link del dev vxlan0 + + ip link del dev br0 +} + +router1_create() +{ + # This router is in the default VRF, where the VxLAN device is + # performing the L3 lookup + ip link set dev $rp1 up + ip address add 192.0.2.1/24 dev $rp1 + ip route add 198.51.100.0/24 via 192.0.2.2 +} + +router1_destroy() +{ + ip route del 198.51.100.0/24 via 192.0.2.2 + ip address del 192.0.2.1/24 dev $rp1 + ip link set dev $rp1 down +} + +router2_create() +{ + # This router is not in the default VRF, so use simple_if_init() + simple_if_init $rp2 192.0.2.2/24 +} + +router2_destroy() +{ + simple_if_fini $rp2 192.0.2.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + + h1_create + + switch_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +flooding_remotes_add() +{ + local num_remotes=$1 + local lsb + local i + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \ + dst 198.51.100.$lsb + done +} + +flooding_filters_add() +{ + local num_remotes=$1 + local lsb + local i + + tc qdisc add dev $rp2 clsact + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + tc filter add dev $rp2 ingress protocol ip pref $i handle $i \ + flower ip_proto udp dst_ip 198.51.100.$lsb \ + dst_port 4789 skip_sw action drop + done +} + +flooding_filters_del() +{ + local num_remotes=$1 + local i + + for i in $(eval echo {1..$num_remotes}); do + tc filter del dev $rp2 ingress protocol ip pref $i \ + handle $i flower + done + + tc qdisc del dev $rp2 clsact +} + +flooding_check_packets() +{ + local packets=("$@") + local num_remotes=${#packets[@]} + local i + + for i in $(eval echo {1..$num_remotes}); do + tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]} + check_err $? "remote $i - did not get expected number of packets" + done +} + +flooding_test() +{ + # Use 12 remote VTEPs that will be stored in 4 records. The array + # 'packets' will store how many packets are expected to be received + # by each remote VTEP at each stage of the test + declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1) + local num_remotes=12 + + RET=0 + + # Add FDB entries for remote VTEPs and corresponding tc filters on the + # ingress of the nexthop router. These filters will count how many + # packets were flooded to each remote VTEP + flooding_remotes_add $num_remotes + flooding_filters_add $num_remotes + + # Send one packet and make sure it is flooded to all the remote VTEPs + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 1 packet" + + # Delete the third record which corresponds to VTEPs with LSB 8..10 + # and check that packet is flooded correctly when we remove a record + # from the middle of the list + RET=0 + + packets=(2 2 2 2 2 2 1 1 1 2 2 2) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.8 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.9 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.10 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 2 packets" + + # Delete the first record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 3 3 3 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.3 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.4 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 3 packets" + + # Delete the last record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 4 4 4 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.11 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.12 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.13 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 4 packets" + + # Delete the last record, one entry at a time and make sure single + # entries are correctly removed + RET=0 + + packets=(2 2 2 4 5 5 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.5 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 5 packets" + + RET=0 + + packets=(2 2 2 4 5 6 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.6 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 6 packets" + + RET=0 + + packets=(2 2 2 4 5 6 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.7 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 7 packets" + + flooding_filters_del $num_remotes +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From 781e775e296ce3aabe0a4a0f773dccda02267695 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 19 Nov 2018 15:29:09 -0800 Subject: tools/bpf: Sync kernel btf.h header The kernel uapi btf.h is synced to the tools directory. Signed-off-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/btf.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 972265f32871..14f66948fc95 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -40,7 +40,8 @@ struct btf_type { /* "size" is used by INT, ENUM, STRUCT and UNION. * "size" tells the size of the type it is describing. * - * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT. + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, + * FUNC and FUNC_PROTO. * "type" is a type_id referring to another type. */ union { @@ -64,8 +65,10 @@ struct btf_type { #define BTF_KIND_VOLATILE 9 /* Volatile */ #define BTF_KIND_CONST 10 /* Const */ #define BTF_KIND_RESTRICT 11 /* Restrict */ -#define BTF_KIND_MAX 11 -#define NR_BTF_KINDS 12 +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#define BTF_KIND_MAX 13 +#define NR_BTF_KINDS 14 /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. @@ -110,4 +113,13 @@ struct btf_member { __u32 offset; /* offset in bits */ }; +/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". + * The exact number of btf_param is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_param { + __u32 name_off; + __u32 type; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ -- cgit v1.2.3 From 78a2540e8945678b390a5f41eb82459bc6f0f36c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 19 Nov 2018 15:29:10 -0800 Subject: tools/bpf: Add tests for BTF_KIND_FUNC_PROTO and BTF_KIND_FUNC This patch adds unit tests for BTF_KIND_FUNC_PROTO and BTF_KIND_FUNC to test_btf. Signed-off-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 4 + tools/testing/selftests/bpf/test_btf.c | 474 ++++++++++++++++++++++++++++++++- 2 files changed, 476 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 449591aa9900..31225e64766f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -165,6 +165,10 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) case BTF_KIND_ENUM: next_type += vlen * sizeof(struct btf_enum); break; + case BTF_KIND_FUNC_PROTO: + next_type += vlen * sizeof(struct btf_param); + break; + case BTF_KIND_FUNC: case BTF_KIND_TYPEDEF: case BTF_KIND_PTR: case BTF_KIND_FWD: diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index f42b3396d622..e0eeee5c8c04 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -85,8 +85,20 @@ static int __base_pr(const char *format, ...) #define BTF_TYPEDEF_ENC(name, type) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type) -#define BTF_PTR_ENC(name, type) \ - BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type) +#define BTF_PTR_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type) + +#define BTF_CONST_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), type) + +#define BTF_FUNC_PROTO_ENC(ret_type, nargs) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, nargs), ret_type) + +#define BTF_FUNC_PROTO_ARG_ENC(name, type) \ + (name), (type) + +#define BTF_FUNC_ENC(name, func_proto) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto) #define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f @@ -1374,6 +1386,464 @@ static struct btf_raw_test raw_tests[] = { .map_create_err = true, }, +{ + .descr = "func proto (int (*)(int, unsigned int))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* int (*)(int, unsigned int) */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (vararg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int, unsigned int, ...) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_FUNC_PROTO_ARG_ENC(0, 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (vararg with name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b, ... c) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0c", + .str_sec_size = sizeof("\0a\0b\0c"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#3", +}, + +{ + .descr = "func proto (arg after vararg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, ..., unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 0), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0a\0b", + .str_sec_size = sizeof("\0a\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (CONST=>TYPEDEF=>PTR=>FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* typedef void (*func_ptr)(int, unsigned int) */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [3] */ + /* const func_ptr */ + BTF_CONST_ENC(3), /* [4] */ + BTF_PTR_ENC(6), /* [5] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [6] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_ptr", + .str_sec_size = sizeof("\0func_ptr"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (CONST=>TYPEDEF=>FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + BTF_CONST_ENC(4), /* [3] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [4] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [5] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_typedef", + .str_sec_size = sizeof("\0func_typedef"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, + +{ + .descr = "func proto (btf_resolve(arg))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* void (*)(const void *) */ + BTF_FUNC_PROTO_ENC(0, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(0, 3), + BTF_CONST_ENC(4), /* [3] */ + BTF_PTR_ENC(0), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (Not all arg has name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0b", + .str_sec_size = sizeof("\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (Bad arg name_off)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int ) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0xffffffff, 2), + BTF_END_RAW, + }, + .str_sec = "\0a", + .str_sec_size = sizeof("\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (Bad arg name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int !!!) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0a\0!!!", + .str_sec_size = sizeof("\0a\0!!!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (Invalid return type)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* (*)(int, unsigned int) */ + BTF_FUNC_PROTO_ENC(100, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid return type", +}, + +{ + .descr = "func proto (with func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void func_proto(int, unsigned int) */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 0), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_proto", + .str_sec_size = sizeof("\0func_proto"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func proto (const void arg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(const void) */ + BTF_FUNC_PROTO_ENC(0, 1), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 4), + BTF_CONST_ENC(0), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#1", +}, + +{ + .descr = "func (void func(int a, unsigned int b))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void func(int a, unsigned int b) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0func", + .str_sec_size = sizeof("\0a\0b\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func (No func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void (int a, unsigned int b) */ + BTF_FUNC_ENC(0, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b", + .str_sec_size = sizeof("\0a\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func (Invalid func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void !!!(int a, unsigned int b) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0!!!", + .str_sec_size = sizeof("\0a\0b\0!!!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func (Some arg has no name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + /* void func(int a, unsigned int) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0func", + .str_sec_size = sizeof("\0a\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func (Non zero vlen)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void func(int a, unsigned int b) */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0func", + .str_sec_size = sizeof("\0a\0b\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "vlen != 0", +}, + +{ + .descr = "func (Not referring to FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_ENC(NAME_TBD, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0func", + .str_sec_size = sizeof("\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, + }; /* struct btf_raw_test raw_tests[] */ static const char *get_next_str(const char *start, const char *end) -- cgit v1.2.3 From cc19435cb2ee34a3663f0be69f3a4647795b0417 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:13 -0800 Subject: tools/bpf: sync kernel uapi bpf.h header to tools directory The kernel uapi bpf.h is synced to tools directory. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 05d95290b848..c1554aa07465 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -338,6 +338,10 @@ union bpf_attr { * (context accesses, allowed helpers, etc). */ __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -2638,6 +2642,10 @@ struct bpf_prog_info { __u32 nr_jited_func_lens; __aligned_u64 jited_ksyms; __aligned_u64 jited_func_lens; + __u32 btf_id; + __u32 func_info_rec_size; + __aligned_u64 func_info; + __u32 func_info_cnt; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2949,4 +2957,9 @@ struct bpf_flow_keys { }; }; +struct bpf_func_info { + __u32 insn_offset; + __u32 type_id; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 7e0d0fb5522a388700ceff723af98c47ffa8a0a9 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:14 -0800 Subject: tools/bpf: add new fields for program load in lib/bpf The new fields are added for program load in lib/bpf so application uses api bpf_load_program_xattr() is able to load program with btf and func_info data. This functionality will be used in next patch by bpf selftest test_btf. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 4 ++++ tools/lib/bpf/bpf.h | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 961e1b9fc592..9b5cf22c4e64 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -205,6 +205,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.log_level = 0; attr.kern_version = load_attr->kern_version; attr.prog_ifindex = load_attr->prog_ifindex; + attr.prog_btf_fd = load_attr->prog_btf_fd; + attr.func_info_rec_size = load_attr->func_info_rec_size; + attr.func_info_cnt = load_attr->func_info_cnt; + attr.func_info = ptr_to_u64(load_attr->func_info); memcpy(attr.prog_name, load_attr->name, min(name_len, BPF_OBJ_NAME_LEN - 1)); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 26a51538213c..8bdfd806253a 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -74,6 +74,10 @@ struct bpf_load_program_attr { const char *license; __u32 kern_version; __u32 prog_ifindex; + __u32 prog_btf_fd; + __u32 func_info_rec_size; + const void *func_info; + __u32 func_info_cnt; }; /* Flags to direct loading requirements */ -- cgit v1.2.3 From 4798c4ba3ba94e4da37b2557dfda04f80a94e8d5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:15 -0800 Subject: tools/bpf: extends test_btf to test load/retrieve func_type info A two function bpf program is loaded with btf and func_info. After successful prog load, the bpf_get_info syscall is called to retrieve prog info to ensure the types returned from the kernel matches the types passed to the kernel from the user space. Several negative tests are also added to test loading/retriving of func_type info. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_btf.c | 332 ++++++++++++++++++++++++++++++++- 1 file changed, 329 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index e0eeee5c8c04..8fd3a16fea4d 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -22,9 +23,13 @@ #include "bpf_rlimit.h" #include "bpf_util.h" +#define MAX_INSNS 512 +#define MAX_SUBPROGS 16 + static uint32_t pass_cnt; static uint32_t error_cnt; static uint32_t skip_cnt; +static bool jit_enabled; #define CHECK(condition, format...) ({ \ int __ret = !!(condition); \ @@ -60,6 +65,24 @@ static int __base_pr(const char *format, ...) return err; } +static bool is_jit_enabled(void) +{ + const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; + bool enabled = false; + int sysctl_fd; + + sysctl_fd = open(jit_sysctl, 0, O_RDONLY); + if (sysctl_fd != -1) { + char tmpc; + + if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) + enabled = (tmpc != '0'); + close(sysctl_fd); + } + + return enabled; +} + #define BTF_INFO_ENC(kind, root, vlen) \ ((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) @@ -115,6 +138,7 @@ static struct args { bool get_info_test; bool pprint_test; bool always_log; + bool func_type_test; } args; static char btf_log_buf[BTF_LOG_BUF_SIZE]; @@ -2947,16 +2971,310 @@ static int test_pprint(void) return err; } +static struct btf_func_type_test { + const char *descr; + const char *str_sec; + __u32 raw_types[MAX_NR_RAW_TYPES]; + __u32 str_sec_size; + struct bpf_insn insns[MAX_INSNS]; + __u32 prog_type; + __u32 func_info[MAX_SUBPROGS][2]; + __u32 func_info_rec_size; + __u32 func_info_cnt; + bool expected_prog_load_failure; +} func_type_test[] = { +{ + .descr = "func_type (main func + one sub)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 2, +}, + +{ + .descr = "func_type (Incorrect func_info_rec_size)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 4, + .func_info_cnt = 2, + .expected_prog_load_failure = true, +}, + +{ + .descr = "func_type (Incorrect func_info_cnt)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 1, + .expected_prog_load_failure = true, +}, + +{ + .descr = "func_type (Incorrect bpf_func_info.insn_offset)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {2, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 2, + .expected_prog_load_failure = true, +}, + +}; + +static size_t probe_prog_length(const struct bpf_insn *fp) +{ + size_t len; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].imm != 0) + break; + return len + 1; +} + +static int do_test_func_type(int test_num) +{ + const struct btf_func_type_test *test = &func_type_test[test_num]; + unsigned int raw_btf_size, info_len, rec_size; + int i, btf_fd = -1, prog_fd = -1, err = 0; + struct bpf_load_program_attr attr = {}; + void *raw_btf, *func_info = NULL; + struct bpf_prog_info info = {}; + struct bpf_func_info *finfo; + + fprintf(stderr, "%s......", test->descr); + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, + test->str_sec, test->str_sec_size, + &raw_btf_size); + + if (!raw_btf) + return -1; + + *btf_log_buf = '\0'; + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + args.always_log); + free(raw_btf); + + if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) { + err = -1; + goto done; + } + + if (*btf_log_buf && args.always_log) + fprintf(stderr, "\n%s", btf_log_buf); + + attr.prog_type = test->prog_type; + attr.insns = test->insns; + attr.insns_cnt = probe_prog_length(attr.insns); + attr.license = "GPL"; + attr.prog_btf_fd = btf_fd; + attr.func_info_rec_size = test->func_info_rec_size; + attr.func_info_cnt = test->func_info_cnt; + attr.func_info = test->func_info; + + *btf_log_buf = '\0'; + prog_fd = bpf_load_program_xattr(&attr, btf_log_buf, + BTF_LOG_BUF_SIZE); + if (test->expected_prog_load_failure && prog_fd == -1) { + err = 0; + goto done; + } + if (CHECK(prog_fd == -1, "invalid prog_id errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (!jit_enabled) { + skip_cnt++; + fprintf(stderr, "SKIPPED, please enable sysctl bpf_jit_enable\n"); + err = 0; + goto done; + } + + /* get necessary lens */ + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.func_info_cnt != 2, + "incorrect info.func_info_cnt (1st) %d\n", + info.func_info_cnt)) { + err = -1; + goto done; + } + rec_size = info.func_info_rec_size; + if (CHECK(rec_size < 4, + "incorrect info.func_info_rec_size (1st) %d\n", rec_size)) { + err = -1; + goto done; + } + + func_info = malloc(info.func_info_cnt * rec_size); + if (CHECK(!func_info, "out of memeory")) { + err = -1; + goto done; + } + + /* reset info to only retrieve func_info related data */ + memset(&info, 0, sizeof(info)); + info.func_info_cnt = 2; + info.func_info_rec_size = rec_size; + info.func_info = ptr_to_u64(func_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.func_info_cnt != 2, + "incorrect info.func_info_cnt (2nd) %d\n", + info.func_info_cnt)) { + err = -1; + goto done; + } + if (CHECK(info.func_info_rec_size != rec_size, + "incorrect info.func_info_rec_size (2nd) %d\n", + info.func_info_rec_size)) { + err = -1; + goto done; + } + + finfo = func_info; + for (i = 0; i < 2; i++) { + if (CHECK(finfo->type_id != test->func_info[i][1], + "incorrect func_type %u expected %u", + finfo->type_id, test->func_info[i][1])) { + err = -1; + goto done; + } + finfo = (void *)finfo + rec_size; + } + +done: + if (*btf_log_buf && (err || args.always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + + if (btf_fd != -1) + close(btf_fd); + if (prog_fd != -1) + close(prog_fd); + free(func_info); + return err; +} + +static int test_func_type(void) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < ARRAY_SIZE(func_type_test); i++) + err |= count_result(do_test_func_type(i)); + + return err; +} + static void usage(const char *cmd) { - fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n", + fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] |" + " [-g test_num (1 - %zu)] |" + " [-f test_num (1 - %zu)] | [-p] | [-k] ]\n", cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests), ARRAY_SIZE(file_tests)); } static int parse_args(int argc, char **argv) { - const char *optstr = "lpf:r:g:"; + const char *optstr = "lpkf:r:g:"; int opt; while ((opt = getopt(argc, argv, optstr)) != -1) { @@ -2979,6 +3297,9 @@ static int parse_args(int argc, char **argv) case 'p': args.pprint_test = true; break; + case 'k': + args.func_type_test = true; + break; case 'h': usage(argv[0]); exit(0); @@ -3032,6 +3353,8 @@ int main(int argc, char **argv) if (args.always_log) libbpf_set_print(__base_pr, __base_pr, __base_pr); + jit_enabled = is_jit_enabled(); + if (args.raw_test) err |= test_raw(); @@ -3044,8 +3367,11 @@ int main(int argc, char **argv) if (args.pprint_test) err |= test_pprint(); + if (args.func_type_test) + err |= test_func_type(); + if (args.raw_test || args.get_info_test || args.file_test || - args.pprint_test) + args.pprint_test || args.func_type_test) goto done; err |= test_raw(); -- cgit v1.2.3 From 2993e0515bb44e157c17c9ba7309ba46366b6add Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:16 -0800 Subject: tools/bpf: add support to read .BTF.ext sections The .BTF section is already available to encode types. These types can be used for map pretty print. The whole .BTF will be passed to the kernel as well for which kernel can verify and return to the user space for pretty print etc. The llvm patch at https://reviews.llvm.org/D53736 will generate .BTF section and one more section .BTF.ext. The .BTF.ext section encodes function type information and line information. Note that this patch set only supports function type info. The functionality is implemented in libbpf. The .BTF section can be directly loaded into the kernel, and the .BTF.ext section cannot. The loader may need to do some relocation and merging, similar to merging multiple code sections, before loading into the kernel. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 46 ++++++++- tools/lib/bpf/btf.c | 274 +++++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/btf.h | 50 +++++++++ tools/lib/bpf/libbpf.c | 87 +++++++++++++--- 4 files changed, 442 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9b5cf22c4e64..836447bb4f14 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -186,6 +186,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, char *log_buf, size_t log_buf_sz) { union bpf_attr attr; + void *finfo = NULL; __u32 name_len; int fd; @@ -216,12 +217,55 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, if (fd >= 0 || !log_buf || !log_buf_sz) return fd; + /* After bpf_prog_load, the kernel may modify certain attributes + * to give user space a hint how to deal with loading failure. + * Check to see whether we can make some changes and load again. + */ + if (errno == E2BIG && attr.func_info_cnt && + attr.func_info_rec_size < load_attr->func_info_rec_size) { + __u32 actual_rec_size = load_attr->func_info_rec_size; + __u32 expected_rec_size = attr.func_info_rec_size; + __u32 finfo_cnt = load_attr->func_info_cnt; + __u64 finfo_len = actual_rec_size * finfo_cnt; + const void *orecord; + void *nrecord; + int i; + + finfo = malloc(finfo_len); + if (!finfo) + /* further try with log buffer won't help */ + return fd; + + /* zero out bytes kernel does not understand */ + orecord = load_attr->func_info; + nrecord = finfo; + for (i = 0; i < load_attr->func_info_cnt; i++) { + memcpy(nrecord, orecord, expected_rec_size); + memset(nrecord + expected_rec_size, 0, + actual_rec_size - expected_rec_size); + orecord += actual_rec_size; + nrecord += actual_rec_size; + } + + /* try with corrected func info records */ + attr.func_info = ptr_to_u64(finfo); + attr.func_info_rec_size = load_attr->func_info_rec_size; + + fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + + if (fd >= 0 || !log_buf || !log_buf_sz) + goto done; + } + /* Try again with log */ attr.log_buf = ptr_to_u64(log_buf); attr.log_size = log_buf_sz; attr.log_level = 1; log_buf[0] = 0; - return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); +done: + free(finfo); + return fd; } int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 31225e64766f..fe87cb48a6a9 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -37,6 +37,18 @@ struct btf { int fd; }; +struct btf_ext { + void *func_info; + __u32 func_info_rec_size; + __u32 func_info_len; +}; + +/* The minimum bpf_func_info checked by the loader */ +struct bpf_func_info_min { + __u32 insn_offset; + __u32 type_id; +}; + static int btf_add_type(struct btf *btf, struct btf_type *t) { if (btf->types_size - btf->nr_types < 2) { @@ -397,3 +409,265 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset) else return NULL; } + +static int btf_ext_validate_func_info(const void *finfo, __u32 size, + btf_print_fn_t err_log) +{ + int sec_hdrlen = sizeof(struct btf_sec_func_info); + __u32 size_left, num_records, record_size; + const struct btf_sec_func_info *sinfo; + __u64 total_record_size; + + /* At least a func_info record size */ + if (size < sizeof(__u32)) { + elog("BTF.ext func_info record size not found"); + return -EINVAL; + } + + /* The record size needs to meet below minimum standard */ + record_size = *(__u32 *)finfo; + if (record_size < sizeof(struct bpf_func_info_min) || + record_size % sizeof(__u32)) { + elog("BTF.ext func_info invalid record size"); + return -EINVAL; + } + + sinfo = finfo + sizeof(__u32); + size_left = size - sizeof(__u32); + + /* If no func_info records, return failure now so .BTF.ext + * won't be used. + */ + if (!size_left) { + elog("BTF.ext no func info records"); + return -EINVAL; + } + + while (size_left) { + if (size_left < sec_hdrlen) { + elog("BTF.ext func_info header not found"); + return -EINVAL; + } + + num_records = sinfo->num_func_info; + if (num_records == 0) { + elog("incorrect BTF.ext num_func_info"); + return -EINVAL; + } + + total_record_size = sec_hdrlen + + (__u64)num_records * record_size; + if (size_left < total_record_size) { + elog("incorrect BTF.ext num_func_info"); + return -EINVAL; + } + + size_left -= total_record_size; + sinfo = (void *)sinfo + total_record_size; + } + + return 0; +} + +static int btf_ext_parse_hdr(__u8 *data, __u32 data_size, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + __u32 meta_left, last_func_info_pos; + void *finfo; + + if (data_size < offsetof(struct btf_ext_header, func_info_off) || + data_size < hdr->hdr_len) { + elog("BTF.ext header not found"); + return -EINVAL; + } + + if (hdr->magic != BTF_MAGIC) { + elog("Invalid BTF.ext magic:%x\n", hdr->magic); + return -EINVAL; + } + + if (hdr->version != BTF_VERSION) { + elog("Unsupported BTF.ext version:%u\n", hdr->version); + return -ENOTSUP; + } + + if (hdr->flags) { + elog("Unsupported BTF.ext flags:%x\n", hdr->flags); + return -ENOTSUP; + } + + meta_left = data_size - hdr->hdr_len; + if (!meta_left) { + elog("BTF.ext has no data\n"); + return -EINVAL; + } + + if (meta_left < hdr->func_info_off) { + elog("Invalid BTF.ext func_info section offset:%u\n", + hdr->func_info_off); + return -EINVAL; + } + + if (hdr->func_info_off & 0x03) { + elog("BTF.ext func_info section is not aligned to 4 bytes\n"); + return -EINVAL; + } + + last_func_info_pos = hdr->hdr_len + hdr->func_info_off + + hdr->func_info_len; + if (last_func_info_pos > data_size) { + elog("Invalid BTF.ext func_info section size:%u\n", + hdr->func_info_len); + return -EINVAL; + } + + finfo = data + hdr->hdr_len + hdr->func_info_off; + return btf_ext_validate_func_info(finfo, hdr->func_info_len, + err_log); +} + +void btf_ext__free(struct btf_ext *btf_ext) +{ + if (!btf_ext) + return; + + free(btf_ext->func_info); + free(btf_ext); +} + +struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr; + struct btf_ext *btf_ext; + void *org_fdata, *fdata; + __u32 hdrlen, size_u32; + int err; + + err = btf_ext_parse_hdr(data, size, err_log); + if (err) + return ERR_PTR(err); + + btf_ext = calloc(1, sizeof(struct btf_ext)); + if (!btf_ext) + return ERR_PTR(-ENOMEM); + + hdr = (const struct btf_ext_header *)data; + hdrlen = hdr->hdr_len; + size_u32 = sizeof(__u32); + fdata = malloc(hdr->func_info_len - size_u32); + if (!fdata) { + free(btf_ext); + return ERR_PTR(-ENOMEM); + } + + /* remember record size and copy rest of func_info data */ + org_fdata = data + hdrlen + hdr->func_info_off; + btf_ext->func_info_rec_size = *(__u32 *)org_fdata; + memcpy(fdata, org_fdata + size_u32, hdr->func_info_len - size_u32); + btf_ext->func_info = fdata; + btf_ext->func_info_len = hdr->func_info_len - size_u32; + + return btf_ext; +} + +int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext, + const char *sec_name, void **func_info, + __u32 *func_info_rec_size, __u32 *func_info_len) +{ + __u32 sec_hdrlen = sizeof(struct btf_sec_func_info); + __u32 i, record_size, records_len; + struct btf_sec_func_info *sinfo; + const char *info_sec_name; + __s64 remain_len; + void *data; + + record_size = btf_ext->func_info_rec_size; + sinfo = btf_ext->func_info; + remain_len = btf_ext->func_info_len; + + while (remain_len > 0) { + records_len = sinfo->num_func_info * record_size; + info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); + if (strcmp(info_sec_name, sec_name)) { + remain_len -= sec_hdrlen + records_len; + sinfo = (void *)sinfo + sec_hdrlen + records_len; + continue; + } + + data = malloc(records_len); + if (!data) + return -ENOMEM; + + memcpy(data, sinfo->data, records_len); + + /* adjust the insn_offset, the data in .BTF.ext is + * the actual byte offset, and the kernel expects + * the offset in term of bpf_insn. + * + * adjust the insn offset only, the rest data will + * be passed to kernel. + */ + for (i = 0; i < sinfo->num_func_info; i++) { + struct bpf_func_info_min *record; + + record = data + i * record_size; + record->insn_offset /= sizeof(struct bpf_insn); + } + + *func_info = data; + *func_info_len = records_len; + *func_info_rec_size = record_size; + return 0; + } + + return -EINVAL; +} + +int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *func_info_len) +{ + __u32 sec_hdrlen = sizeof(struct btf_sec_func_info); + __u32 i, record_size, existing_flen, records_len; + struct btf_sec_func_info *sinfo; + const char *info_sec_name; + __u64 remain_len; + void *data; + + record_size = btf_ext->func_info_rec_size; + sinfo = btf_ext->func_info; + remain_len = btf_ext->func_info_len; + while (remain_len > 0) { + records_len = sinfo->num_func_info * record_size; + info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); + if (strcmp(info_sec_name, sec_name)) { + remain_len -= sec_hdrlen + records_len; + sinfo = (void *)sinfo + sec_hdrlen + records_len; + continue; + } + + existing_flen = *func_info_len; + data = realloc(*func_info, existing_flen + records_len); + if (!data) + return -ENOMEM; + + memcpy(data + existing_flen, sinfo->data, records_len); + /* adjust insn_offset only, the rest data will be passed + * to the kernel. + */ + for (i = 0; i < sinfo->num_func_info; i++) { + struct bpf_func_info_min *record; + + record = data + existing_flen + i * record_size; + record->insn_offset = + record->insn_offset / sizeof(struct bpf_insn) + + insns_cnt; + } + *func_info = data; + *func_info_len = existing_flen + records_len; + return 0; + } + + return -EINVAL; +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b77e7080f7e7..578171e8cb26 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -11,10 +11,51 @@ #endif #define BTF_ELF_SEC ".BTF" +#define BTF_EXT_ELF_SEC ".BTF.ext" struct btf; +struct btf_ext; struct btf_type; +/* + * The .BTF.ext ELF section layout defined as + * struct btf_ext_header + * func_info subsection + * + * The func_info subsection layout: + * record size for struct bpf_func_info in the func_info subsection + * struct btf_sec_func_info for section #1 + * a list of bpf_func_info records for section #1 + * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h + * but may not be identical + * struct btf_sec_func_info for section #2 + * a list of bpf_func_info records for section #2 + * ...... + * + * Note that the bpf_func_info record size in .BTF.ext may not + * be the same as the one defined in include/uapi/linux/bpf.h. + * The loader should ensure that record_size meets minimum + * requirement and pass the record as is to the kernel. The + * kernel will handle the func_info properly based on its contents. + */ +struct btf_ext_header { + __u16 magic; + __u8 version; + __u8 flags; + __u32 hdr_len; + + /* All offsets are in bytes relative to the end of this header */ + __u32 func_info_off; + __u32 func_info_len; +}; + +struct btf_sec_func_info { + __u32 sec_name_off; + __u32 num_func_info; + /* Followed by num_func_info number of bpf func_info records */ + __u8 data[0]; +}; + typedef int (*btf_print_fn_t)(const char *, ...) __attribute__((format(printf, 1, 2))); @@ -29,4 +70,13 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); +struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log); +void btf_ext__free(struct btf_ext *btf_ext); +int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext, + const char *sec_name, void **func_info, + __u32 *func_info_rec_size, __u32 *func_info_len); +int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, void **func_info, + __u32 *func_info_len); + #endif /* __LIBBPF_BTF_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a01eb9584e52..cb6565d79603 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -156,6 +156,10 @@ struct bpf_program { bpf_program_clear_priv_t clear_priv; enum bpf_attach_type expected_attach_type; + int btf_fd; + void *func_info; + __u32 func_info_rec_size; + __u32 func_info_len; }; struct bpf_map { @@ -212,6 +216,7 @@ struct bpf_object { struct list_head list; struct btf *btf; + struct btf_ext *btf_ext; void *priv; bpf_object_clear_priv_t clear_priv; @@ -241,6 +246,9 @@ void bpf_program__unload(struct bpf_program *prog) prog->instances.nr = -1; zfree(&prog->instances.fds); + + zclose(prog->btf_fd); + zfree(&prog->func_info); } static void bpf_program__exit(struct bpf_program *prog) @@ -315,6 +323,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->instances.fds = NULL; prog->instances.nr = -1; prog->type = BPF_PROG_TYPE_KPROBE; + prog->btf_fd = -1; return 0; errout: @@ -807,6 +816,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) BTF_ELF_SEC, PTR_ERR(obj->btf)); obj->btf = NULL; } + } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { + obj->btf_ext = btf_ext__new(data->d_buf, data->d_size, + __pr_debug); + if (IS_ERR(obj->btf_ext)) { + pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_EXT_ELF_SEC, + PTR_ERR(obj->btf_ext)); + obj->btf_ext = NULL; + } } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warning("bpf: multiple SYMTAB in %s\n", @@ -1190,6 +1208,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, struct bpf_insn *insn, *new_insn; struct bpf_program *text; size_t new_cnt; + int err; if (relo->type != RELO_CALL) return -LIBBPF_ERRNO__RELOC; @@ -1212,6 +1231,20 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, pr_warning("oom in prog realloc\n"); return -ENOMEM; } + + if (obj->btf && obj->btf_ext) { + err = btf_ext__reloc(obj->btf, obj->btf_ext, + text->section_name, + prog->insns_cnt, + &prog->func_info, + &prog->func_info_len); + if (err) { + pr_warning("error in btf_ext__reloc for sec %s\n", + text->section_name); + return err; + } + } + memcpy(new_insn + prog->insns_cnt, text->insns, text->insns_cnt * sizeof(*insn)); prog->insns = new_insn; @@ -1231,7 +1264,24 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) { int i, err; - if (!prog || !prog->reloc_desc) + if (!prog) + return 0; + + if (obj->btf && obj->btf_ext) { + err = btf_ext__reloc_init(obj->btf, obj->btf_ext, + prog->section_name, + &prog->func_info, + &prog->func_info_rec_size, + &prog->func_info_len); + if (err) { + pr_warning("err in btf_ext__reloc_init for sec %s\n", + prog->section_name); + return err; + } + prog->btf_fd = btf__fd(obj->btf); + } + + if (!prog->reloc_desc) return 0; for (i = 0; i < prog->nr_reloc; i++) { @@ -1319,9 +1369,9 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) } static int -load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, - const char *name, struct bpf_insn *insns, int insns_cnt, - char *license, __u32 kern_version, int *pfd, int prog_ifindex) +load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, + char *license, __u32 kern_version, int *pfd, + __u32 func_info_cnt) { struct bpf_load_program_attr load_attr; char *cp, errmsg[STRERR_BUFSIZE]; @@ -1329,14 +1379,18 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, int ret; memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); - load_attr.prog_type = type; - load_attr.expected_attach_type = expected_attach_type; - load_attr.name = name; + load_attr.prog_type = prog->type; + load_attr.expected_attach_type = prog->expected_attach_type; + load_attr.name = prog->name; load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; load_attr.license = license; load_attr.kern_version = kern_version; - load_attr.prog_ifindex = prog_ifindex; + load_attr.prog_ifindex = prog->prog_ifindex; + load_attr.prog_btf_fd = prog->btf_fd; + load_attr.func_info = prog->func_info; + load_attr.func_info_rec_size = prog->func_info_rec_size; + load_attr.func_info_cnt = func_info_cnt; if (!load_attr.insns || !load_attr.insns_cnt) return -EINVAL; @@ -1394,8 +1448,14 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_version) { + __u32 func_info_cnt; int err = 0, fd, i; + if (prog->func_info_len == 0) + func_info_cnt = 0; + else + func_info_cnt = prog->func_info_len / prog->func_info_rec_size; + if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { pr_warning("Internal error: can't load program '%s'\n", @@ -1417,10 +1477,9 @@ bpf_program__load(struct bpf_program *prog, pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", prog->section_name, prog->instances.nr); } - err = load_program(prog->type, prog->expected_attach_type, - prog->name, prog->insns, prog->insns_cnt, + err = load_program(prog, prog->insns, prog->insns_cnt, license, kern_version, &fd, - prog->prog_ifindex); + func_info_cnt); if (!err) prog->instances.fds[0] = fd; goto out; @@ -1448,11 +1507,10 @@ bpf_program__load(struct bpf_program *prog, continue; } - err = load_program(prog->type, prog->expected_attach_type, - prog->name, result.new_insn_ptr, + err = load_program(prog, result.new_insn_ptr, result.new_insn_cnt, license, kern_version, &fd, - prog->prog_ifindex); + func_info_cnt); if (err) { pr_warning("Loading the %dth instance of program '%s' failed\n", @@ -2120,6 +2178,7 @@ void bpf_object__close(struct bpf_object *obj) bpf_object__elf_finish(obj); bpf_object__unload(obj); btf__free(obj->btf); + btf_ext__free(obj->btf_ext); for (i = 0; i < obj->nr_maps; i++) { zfree(&obj->maps[i].name); -- cgit v1.2.3 From 9ce6ae22c8e878aee7a96836b2ed9fd9a8173e41 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:17 -0800 Subject: tools/bpf: do not use pahole if clang/llvm can generate BTF sections Add additional checks in tools/testing/selftests/bpf and samples/bpf such that if clang/llvm compiler can generate BTF sections, do not use pahole. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- samples/bpf/Makefile | 8 ++++++++ tools/testing/selftests/bpf/Makefile | 8 ++++++++ 2 files changed, 16 insertions(+) (limited to 'tools') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index be0a961450bc..35444f4a846b 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -208,12 +208,20 @@ endif BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') +BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ + $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ + readelf -S ./llvm_btf_verify.o | grep BTF; \ + /bin/rm -f ./llvm_btf_verify.o) +ifneq ($(BTF_LLVM_PROBE),) + EXTRA_CFLAGS += -g +else ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),) EXTRA_CFLAGS += -g LLC_FLAGS += -mattr=dwarfris DWARF2BTF = y endif +endif # Trick to allow make to be run from this directory all: diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 57b4712a6276..1dde03ea1484 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -126,7 +126,14 @@ $(OUTPUT)/test_stack_map.o: test_queue_stack_map.h BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') +BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ + $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ + readelf -S ./llvm_btf_verify.o | grep BTF; \ + /bin/rm -f ./llvm_btf_verify.o) +ifneq ($(BTF_LLVM_PROBE),) + CLANG_FLAGS += -g +else ifneq ($(BTF_LLC_PROBE),) ifneq ($(BTF_PAHOLE_PROBE),) ifneq ($(BTF_OBJCOPY_PROBE),) @@ -136,6 +143,7 @@ ifneq ($(BTF_OBJCOPY_PROBE),) endif endif endif +endif # Have one program compiled without "-target bpf" to test whether libbpf loads # it successfully -- cgit v1.2.3 From d7f5b5e051554c91bab995b67101af4625af591a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:18 -0800 Subject: tools/bpf: refactor to implement btf_get_from_id() in lib/bpf The function get_btf() is implemented in tools/bpf/bpftool/map.c to get a btf structure given a map_info. This patch refactored this function to be function btf_get_from_id() in tools/lib/bpf so that it can be used later. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/map.c | 68 ++---------------------------------------------- tools/lib/bpf/btf.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/btf.h | 1 + 3 files changed, 72 insertions(+), 66 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index dc9a8967ab8c..a1ae2a3e9fef 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -215,70 +215,6 @@ err_end_obj: return ret; } -static int get_btf(struct bpf_map_info *map_info, struct btf **btf) -{ - struct bpf_btf_info btf_info = { 0 }; - __u32 len = sizeof(btf_info); - __u32 last_size; - int btf_fd; - void *ptr; - int err; - - err = 0; - *btf = NULL; - btf_fd = bpf_btf_get_fd_by_id(map_info->btf_id); - if (btf_fd < 0) - return 0; - - /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so - * let's start with a sane default - 4KiB here - and resize it only if - * bpf_obj_get_info_by_fd() needs a bigger buffer. - */ - btf_info.btf_size = 4096; - last_size = btf_info.btf_size; - ptr = malloc(last_size); - if (!ptr) { - err = -ENOMEM; - goto exit_free; - } - - bzero(ptr, last_size); - btf_info.btf = ptr_to_u64(ptr); - err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); - - if (!err && btf_info.btf_size > last_size) { - void *temp_ptr; - - last_size = btf_info.btf_size; - temp_ptr = realloc(ptr, last_size); - if (!temp_ptr) { - err = -ENOMEM; - goto exit_free; - } - ptr = temp_ptr; - bzero(ptr, last_size); - btf_info.btf = ptr_to_u64(ptr); - err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); - } - - if (err || btf_info.btf_size > last_size) { - err = errno; - goto exit_free; - } - - *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL); - if (IS_ERR(*btf)) { - err = PTR_ERR(*btf); - *btf = NULL; - } - -exit_free: - close(btf_fd); - free(ptr); - - return err; -} - static json_writer_t *get_btf_writer(void) { json_writer_t *jw = jsonw_new(stdout); @@ -775,7 +711,7 @@ static int do_dump(int argc, char **argv) prev_key = NULL; - err = get_btf(&info, &btf); + err = btf_get_from_id(info.btf_id, &btf); if (err) { p_err("failed to get btf"); goto exit_free; @@ -919,7 +855,7 @@ static int do_lookup(int argc, char **argv) } /* here means bpf_map_lookup_elem() succeeded */ - err = get_btf(&info, &btf); + err = btf_get_from_id(info.btf_id, &btf); if (err) { p_err("failed to get btf"); goto exit_free; diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index fe87cb48a6a9..13ddc4bd24ee 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -49,6 +49,11 @@ struct bpf_func_info_min { __u32 type_id; }; +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + static int btf_add_type(struct btf *btf, struct btf_type *t) { if (btf->types_size - btf->nr_types < 2) { @@ -410,6 +415,70 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset) return NULL; } +int btf_get_from_id(__u32 id, struct btf **btf) +{ + struct bpf_btf_info btf_info = { 0 }; + __u32 len = sizeof(btf_info); + __u32 last_size; + int btf_fd; + void *ptr; + int err; + + err = 0; + *btf = NULL; + btf_fd = bpf_btf_get_fd_by_id(id); + if (btf_fd < 0) + return 0; + + /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so + * let's start with a sane default - 4KiB here - and resize it only if + * bpf_obj_get_info_by_fd() needs a bigger buffer. + */ + btf_info.btf_size = 4096; + last_size = btf_info.btf_size; + ptr = malloc(last_size); + if (!ptr) { + err = -ENOMEM; + goto exit_free; + } + + bzero(ptr, last_size); + btf_info.btf = ptr_to_u64(ptr); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + + if (!err && btf_info.btf_size > last_size) { + void *temp_ptr; + + last_size = btf_info.btf_size; + temp_ptr = realloc(ptr, last_size); + if (!temp_ptr) { + err = -ENOMEM; + goto exit_free; + } + ptr = temp_ptr; + bzero(ptr, last_size); + btf_info.btf = ptr_to_u64(ptr); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + } + + if (err || btf_info.btf_size > last_size) { + err = errno; + goto exit_free; + } + + *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL); + if (IS_ERR(*btf)) { + err = PTR_ERR(*btf); + *btf = NULL; + } + +exit_free: + close(btf_fd); + free(ptr); + + return err; +} + static int btf_ext_validate_func_info(const void *finfo, __u32 size, btf_print_fn_t err_log) { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 578171e8cb26..386b2ffc32a3 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -69,6 +69,7 @@ LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); +LIBBPF_API int btf_get_from_id(__u32 id, struct btf **btf); struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log); void btf_ext__free(struct btf_ext *btf_ext); -- cgit v1.2.3 From 999d82cbc04416cc7f2b5cb6daab947c16f0fd3a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:20 -0800 Subject: tools/bpf: enhance test_btf file testing to test func info Change the bpf programs test_btf_haskv.c and test_btf_nokv.c to have two sections, and enhance test_btf.c test_file feature to test btf func_info returned by the kernel. Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_btf.c | 117 ++++++++++++++++++++++++--- tools/testing/selftests/bpf/test_btf_haskv.c | 16 +++- tools/testing/selftests/bpf/test_btf_nokv.c | 16 +++- 3 files changed, 136 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 8fd3a16fea4d..7b1b160d6e67 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -2434,13 +2434,13 @@ static struct btf_file_test file_tests[] = { }, }; -static int file_has_btf_elf(const char *fn) +static int file_has_btf_elf(const char *fn, bool *has_btf_ext) { Elf_Scn *scn = NULL; GElf_Ehdr ehdr; + int ret = 0; int elf_fd; Elf *elf; - int ret; if (CHECK(elf_version(EV_CURRENT) == EV_NONE, "elf_version(EV_CURRENT) == EV_NONE")) @@ -2472,14 +2472,12 @@ static int file_has_btf_elf(const char *fn) } sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); - if (!strcmp(sh_name, BTF_ELF_SEC)) { + if (!strcmp(sh_name, BTF_ELF_SEC)) ret = 1; - goto done; - } + if (!strcmp(sh_name, BTF_EXT_ELF_SEC)) + *has_btf_ext = true; } - ret = 0; - done: close(elf_fd); elf_end(elf); @@ -2489,15 +2487,24 @@ done: static int do_test_file(unsigned int test_num) { const struct btf_file_test *test = &file_tests[test_num - 1]; + const char *expected_fnames[] = {"_dummy_tracepoint", + "test_long_fname_1", + "test_long_fname_2"}; + struct bpf_prog_info info = {}; struct bpf_object *obj = NULL; + struct bpf_func_info *finfo; struct bpf_program *prog; + __u32 info_len, rec_size; + bool has_btf_ext = false; + struct btf *btf = NULL; + void *func_info = NULL; struct bpf_map *map; - int err; + int i, err, prog_fd; fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num, test->file); - err = file_has_btf_elf(test->file); + err = file_has_btf_elf(test->file, &has_btf_ext); if (err == -1) return err; @@ -2525,6 +2532,7 @@ static int do_test_file(unsigned int test_num) err = bpf_object__load(obj); if (CHECK(err < 0, "bpf_object__load: %d", err)) goto done; + prog_fd = bpf_program__fd(prog); map = bpf_object__find_map_by_name(obj, "btf_map"); if (CHECK(!map, "btf_map not found")) { @@ -2539,9 +2547,100 @@ static int do_test_file(unsigned int test_num) test->btf_kv_notfound)) goto done; + if (!jit_enabled || !has_btf_ext) + goto skip_jit; + + /* get necessary program info */ + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.func_info_cnt != 3, + "incorrect info.func_info_cnt (1st) %d", + info.func_info_cnt)) { + err = -1; + goto done; + } + rec_size = info.func_info_rec_size; + if (CHECK(rec_size < 4, + "incorrect info.func_info_rec_size (1st) %d\n", rec_size)) { + err = -1; + goto done; + } + + func_info = malloc(info.func_info_cnt * rec_size); + if (CHECK(!func_info, "out of memeory")) { + err = -1; + goto done; + } + + /* reset info to only retrieve func_info related data */ + memset(&info, 0, sizeof(info)); + info.func_info_cnt = 3; + info.func_info_rec_size = rec_size; + info.func_info = ptr_to_u64(func_info); + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.func_info_cnt != 3, + "incorrect info.func_info_cnt (2nd) %d", + info.func_info_cnt)) { + err = -1; + goto done; + } + if (CHECK(info.func_info_rec_size != rec_size, + "incorrect info.func_info_rec_size (2nd) %d", + info.func_info_rec_size)) { + err = -1; + goto done; + } + + err = btf_get_from_id(info.btf_id, &btf); + if (CHECK(err, "cannot get btf from kernel, err: %d", err)) + goto done; + + /* check three functions */ + finfo = func_info; + for (i = 0; i < 3; i++) { + const struct btf_type *t; + const char *fname; + + t = btf__type_by_id(btf, finfo->type_id); + if (CHECK(!t, "btf__type_by_id failure: id %u", + finfo->type_id)) { + err = -1; + goto done; + } + + fname = btf__name_by_offset(btf, t->name_off); + err = strcmp(fname, expected_fnames[i]); + /* for the second and third functions in .text section, + * the compiler may order them either way. + */ + if (i && err) + err = strcmp(fname, expected_fnames[3 - i]); + if (CHECK(err, "incorrect fname %s", fname ? : "")) { + err = -1; + goto done; + } + + finfo = (void *)finfo + rec_size; + } + +skip_jit: fprintf(stderr, "OK"); done: + free(func_info); bpf_object__close(obj); return err; } diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c index b21b876f475d..e5c79fe0ffdb 100644 --- a/tools/testing/selftests/bpf/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/test_btf_haskv.c @@ -24,8 +24,8 @@ struct dummy_tracepoint_args { struct sock *sock; }; -SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +__attribute__((noinline)) +static int test_long_fname_2(struct dummy_tracepoint_args *arg) { struct ipv_counts *counts; int key = 0; @@ -42,4 +42,16 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg) return 0; } +__attribute__((noinline)) +static int test_long_fname_1(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_2(arg); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_1(arg); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c index 0ed8e088eebf..434188c37774 100644 --- a/tools/testing/selftests/bpf/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/test_btf_nokv.c @@ -22,8 +22,8 @@ struct dummy_tracepoint_args { struct sock *sock; }; -SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +__attribute__((noinline)) +static int test_long_fname_2(struct dummy_tracepoint_args *arg) { struct ipv_counts *counts; int key = 0; @@ -40,4 +40,16 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg) return 0; } +__attribute__((noinline)) +static int test_long_fname_1(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_2(arg); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_1(arg); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 254471e57a86b8dc1a2cc19848e99f5d7c0558f4 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 19 Nov 2018 15:29:21 -0800 Subject: tools/bpf: bpftool: add support for func types This patch added support to print function signature if btf func_info is available. Note that ksym now uses function name instead of prog_name as prog_name has a limit of 16 bytes including ending '\0'. The following is a sample output for selftests test_btf with file test_btf_haskv.o for translated insns and jited insns respectively. $ bpftool prog dump xlated id 1 int _dummy_tracepoint(struct dummy_tracepoint_args * arg): 0: (85) call pc+2#bpf_prog_2dcecc18072623fc_test_long_fname_1 1: (b7) r0 = 0 2: (95) exit int test_long_fname_1(struct dummy_tracepoint_args * arg): 3: (85) call pc+1#bpf_prog_89d64e4abf0f0126_test_long_fname_2 4: (95) exit int test_long_fname_2(struct dummy_tracepoint_args * arg): 5: (b7) r2 = 0 6: (63) *(u32 *)(r10 -4) = r2 7: (79) r1 = *(u64 *)(r1 +8) ... 22: (07) r1 += 1 23: (63) *(u32 *)(r0 +4) = r1 24: (95) exit $ bpftool prog dump jited id 1 int _dummy_tracepoint(struct dummy_tracepoint_args * arg): bpf_prog_b07ccb89267cf242__dummy_tracepoint: 0: push %rbp 1: mov %rsp,%rbp ...... 3c: add $0x28,%rbp 40: leaveq 41: retq int test_long_fname_1(struct dummy_tracepoint_args * arg): bpf_prog_2dcecc18072623fc_test_long_fname_1: 0: push %rbp 1: mov %rsp,%rbp ...... 3a: add $0x28,%rbp 3e: leaveq 3f: retq int test_long_fname_2(struct dummy_tracepoint_args * arg): bpf_prog_89d64e4abf0f0126_test_long_fname_2: 0: push %rbp 1: mov %rsp,%rbp ...... 80: add $0x28,%rbp 84: leaveq 85: retq Signed-off-by: Yonghong Song Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/btf_dumper.c | 136 ++++++++++++++++++++++++++++++++++++++ tools/bpf/bpftool/main.h | 2 + tools/bpf/bpftool/prog.c | 56 ++++++++++++++++ tools/bpf/bpftool/xlated_dumper.c | 33 +++++++++ tools/bpf/bpftool/xlated_dumper.h | 3 + 5 files changed, 230 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 55bc512a1831..c3fd3a7cb787 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -249,3 +249,139 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, { return btf_dumper_do_type(d, type_id, 0, data); } + +#define BTF_PRINT_ARG(...) \ + do { \ + pos += snprintf(func_sig + pos, size - pos, \ + __VA_ARGS__); \ + if (pos >= size) \ + return -1; \ + } while (0) +#define BTF_PRINT_TYPE(type) \ + do { \ + pos = __btf_dumper_type_only(btf, type, func_sig, \ + pos, size); \ + if (pos == -1) \ + return -1; \ + } while (0) + +static int btf_dump_func(const struct btf *btf, char *func_sig, + const struct btf_type *func_proto, + const struct btf_type *func, int pos, int size); + +static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id, + char *func_sig, int pos, int size) +{ + const struct btf_type *proto_type; + const struct btf_array *array; + const struct btf_type *t; + + if (!type_id) { + BTF_PRINT_ARG("void "); + return pos; + } + + t = btf__type_by_id(btf, type_id); + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + BTF_PRINT_ARG("%s ", btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_STRUCT: + BTF_PRINT_ARG("struct %s ", + btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_UNION: + BTF_PRINT_ARG("union %s ", + btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_ENUM: + BTF_PRINT_ARG("enum %s ", + btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_ARRAY: + array = (struct btf_array *)(t + 1); + BTF_PRINT_TYPE(array->type); + BTF_PRINT_ARG("[%d]", array->nelems); + break; + case BTF_KIND_PTR: + BTF_PRINT_TYPE(t->type); + BTF_PRINT_ARG("* "); + break; + case BTF_KIND_UNKN: + case BTF_KIND_FWD: + case BTF_KIND_TYPEDEF: + return -1; + case BTF_KIND_VOLATILE: + BTF_PRINT_ARG("volatile "); + BTF_PRINT_TYPE(t->type); + break; + case BTF_KIND_CONST: + BTF_PRINT_ARG("const "); + BTF_PRINT_TYPE(t->type); + break; + case BTF_KIND_RESTRICT: + BTF_PRINT_ARG("restrict "); + BTF_PRINT_TYPE(t->type); + break; + case BTF_KIND_FUNC_PROTO: + pos = btf_dump_func(btf, func_sig, t, NULL, pos, size); + if (pos == -1) + return -1; + break; + case BTF_KIND_FUNC: + proto_type = btf__type_by_id(btf, t->type); + pos = btf_dump_func(btf, func_sig, proto_type, t, pos, size); + if (pos == -1) + return -1; + break; + default: + return -1; + } + + return pos; +} + +static int btf_dump_func(const struct btf *btf, char *func_sig, + const struct btf_type *func_proto, + const struct btf_type *func, int pos, int size) +{ + int i, vlen; + + BTF_PRINT_TYPE(func_proto->type); + if (func) + BTF_PRINT_ARG("%s(", btf__name_by_offset(btf, func->name_off)); + else + BTF_PRINT_ARG("("); + vlen = BTF_INFO_VLEN(func_proto->info); + for (i = 0; i < vlen; i++) { + struct btf_param *arg = &((struct btf_param *)(func_proto + 1))[i]; + + if (i) + BTF_PRINT_ARG(", "); + if (arg->type) { + BTF_PRINT_TYPE(arg->type); + BTF_PRINT_ARG("%s", + btf__name_by_offset(btf, arg->name_off)); + } else { + BTF_PRINT_ARG("..."); + } + } + BTF_PRINT_ARG(")"); + + return pos; +} + +void btf_dumper_type_only(const struct btf *btf, __u32 type_id, char *func_sig, + int size) +{ + int err; + + func_sig[0] = '\0'; + if (!btf) + return; + + err = __btf_dumper_type_only(btf, type_id, func_sig, 0, size); + if (err < 0) + func_sig[0] = '\0'; +} diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 10c6c16fae29..3e8979567cf1 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -187,6 +187,8 @@ struct btf_dumper { */ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, const void *data); +void btf_dumper_type_only(const struct btf *btf, __u32 func_type_id, + char *func_only, int size); struct nlattr; struct ifinfomsg; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index c176e1aa66fe..37b1daf19da6 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -47,6 +47,7 @@ #include #include +#include #include #include "cfg.h" @@ -451,14 +452,19 @@ static int do_dump(int argc, char **argv) struct bpf_prog_info info = {}; unsigned int *func_lens = NULL; const char *disasm_opt = NULL; + unsigned int finfo_rec_size; unsigned int nr_func_ksyms; unsigned int nr_func_lens; struct dump_data dd = {}; __u32 len = sizeof(info); + struct btf *btf = NULL; + void *func_info = NULL; + unsigned int finfo_cnt; unsigned int buf_size; char *filepath = NULL; bool opcodes = false; bool visual = false; + char func_sig[1024]; unsigned char *buf; __u32 *member_len; __u64 *member_ptr; @@ -551,6 +557,17 @@ static int do_dump(int argc, char **argv) } } + finfo_cnt = info.func_info_cnt; + finfo_rec_size = info.func_info_rec_size; + if (finfo_cnt && finfo_rec_size) { + func_info = malloc(finfo_cnt * finfo_rec_size); + if (!func_info) { + p_err("mem alloc failed"); + close(fd); + goto err_free; + } + } + memset(&info, 0, sizeof(info)); *member_ptr = ptr_to_u64(buf); @@ -559,6 +576,9 @@ static int do_dump(int argc, char **argv) info.nr_jited_ksyms = nr_func_ksyms; info.jited_func_lens = ptr_to_u64(func_lens); info.nr_jited_func_lens = nr_func_lens; + info.func_info_cnt = finfo_cnt; + info.func_info_rec_size = finfo_rec_size; + info.func_info = ptr_to_u64(func_info); err = bpf_obj_get_info_by_fd(fd, &info, &len); close(fd); @@ -582,6 +602,18 @@ static int do_dump(int argc, char **argv) goto err_free; } + if (info.func_info_cnt != finfo_cnt) { + p_err("incorrect func_info_cnt %d vs. expected %d", + info.func_info_cnt, finfo_cnt); + goto err_free; + } + + if (info.func_info_rec_size != finfo_rec_size) { + p_err("incorrect func_info_rec_size %d vs. expected %d", + info.func_info_rec_size, finfo_rec_size); + goto err_free; + } + if ((member_len == &info.jited_prog_len && info.jited_prog_insns == 0) || (member_len == &info.xlated_prog_len && @@ -590,6 +622,11 @@ static int do_dump(int argc, char **argv) goto err_free; } + if (info.btf_id && btf_get_from_id(info.btf_id, &btf)) { + p_err("failed to get btf"); + goto err_free; + } + if (filepath) { fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd < 0) { @@ -622,6 +659,7 @@ static int do_dump(int argc, char **argv) if (info.nr_jited_func_lens && info.jited_func_lens) { struct kernel_sym *sym = NULL; + struct bpf_func_info *record; char sym_name[SYM_MAX_NAME]; unsigned char *img = buf; __u64 *ksyms = NULL; @@ -648,12 +686,25 @@ static int do_dump(int argc, char **argv) strcpy(sym_name, "unknown"); } + if (func_info) { + record = func_info + i * finfo_rec_size; + btf_dumper_type_only(btf, record->type_id, + func_sig, + sizeof(func_sig)); + } + if (json_output) { jsonw_start_object(json_wtr); + if (func_info && func_sig[0] != '\0') { + jsonw_name(json_wtr, "proto"); + jsonw_string(json_wtr, func_sig); + } jsonw_name(json_wtr, "name"); jsonw_string(json_wtr, sym_name); jsonw_name(json_wtr, "insns"); } else { + if (func_info && func_sig[0] != '\0') + printf("%s:\n", func_sig); printf("%s:\n", sym_name); } @@ -682,6 +733,9 @@ static int do_dump(int argc, char **argv) kernel_syms_load(&dd); dd.nr_jited_ksyms = info.nr_jited_ksyms; dd.jited_ksyms = (__u64 *) info.jited_ksyms; + dd.btf = btf; + dd.func_info = func_info; + dd.finfo_rec_size = finfo_rec_size; if (json_output) dump_xlated_json(&dd, buf, *member_len, opcodes); @@ -693,12 +747,14 @@ static int do_dump(int argc, char **argv) free(buf); free(func_ksyms); free(func_lens); + free(func_info); return 0; err_free: free(buf); free(func_ksyms); free(func_lens); + free(func_info); return -1; } diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 3284759df98a..e06ac0286a75 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -242,11 +242,15 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, .cb_imm = print_imm, .private_data = dd, }; + struct bpf_func_info *record; struct bpf_insn *insn = buf; + struct btf *btf = dd->btf; bool double_insn = false; + char func_sig[1024]; unsigned int i; jsonw_start_array(json_wtr); + record = dd->func_info; for (i = 0; i < len / sizeof(*insn); i++) { if (double_insn) { double_insn = false; @@ -255,6 +259,20 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); jsonw_start_object(json_wtr); + + if (btf && record) { + if (record->insn_offset == i) { + btf_dumper_type_only(btf, record->type_id, + func_sig, + sizeof(func_sig)); + if (func_sig[0] != '\0') { + jsonw_name(json_wtr, "proto"); + jsonw_string(json_wtr, func_sig); + } + record = (void *)record + dd->finfo_rec_size; + } + } + jsonw_name(json_wtr, "disasm"); print_bpf_insn(&cbs, insn + i, true); @@ -297,16 +315,31 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, .cb_imm = print_imm, .private_data = dd, }; + struct bpf_func_info *record; struct bpf_insn *insn = buf; + struct btf *btf = dd->btf; bool double_insn = false; + char func_sig[1024]; unsigned int i; + record = dd->func_info; for (i = 0; i < len / sizeof(*insn); i++) { if (double_insn) { double_insn = false; continue; } + if (btf && record) { + if (record->insn_offset == i) { + btf_dumper_type_only(btf, record->type_id, + func_sig, + sizeof(func_sig)); + if (func_sig[0] != '\0') + printf("%s:\n", func_sig); + record = (void *)record + dd->finfo_rec_size; + } + } + double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); printf("% 4d: ", i); diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h index 33d86e2b369b..aec31723e1e5 100644 --- a/tools/bpf/bpftool/xlated_dumper.h +++ b/tools/bpf/bpftool/xlated_dumper.h @@ -51,6 +51,9 @@ struct dump_data { __u32 sym_count; __u64 *jited_ksyms; __u32 nr_jited_ksyms; + struct btf *btf; + void *func_info; + __u32 finfo_rec_size; char scratch_buff[SYM_MAX_NAME + 8]; }; -- cgit v1.2.3 From 462c124c590fe633564192dbfa26e99af788a67c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 21 Nov 2018 11:22:42 -0800 Subject: bpf: fix a libbpf loader issue Commit 2993e0515bb4 ("tools/bpf: add support to read .BTF.ext sections") added support to read .BTF.ext sections from an object file, create and pass prog_btf_fd and func_info to the kernel. The program btf_fd (prog->btf_fd) is initialized to be -1 to please zclose so we do not need special handling dur prog close. Passing -1 to the kernel, however, will cause loading error. Passing btf_fd 0 to the kernel if prog->btf_fd is invalid fixed the problem. Fixes: 2993e0515bb4 ("tools/bpf: add support to read .BTF.ext sections") Reported-by: Andrey Ignatov Reported-by: Emre Cantimur Tested-by: Andrey Ignatov Signed-off-by: Yonghong Song Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index cb6565d79603..f022ac82e882 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1387,7 +1387,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.license = license; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; - load_attr.prog_btf_fd = prog->btf_fd; + load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; load_attr.func_info_cnt = func_info_cnt; -- cgit v1.2.3 From 8c4905b995c649ac71e21611abc2fcefc904b56a Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 21 Nov 2018 09:29:44 -0800 Subject: libbpf: make sure bpf headers are c++ include-able Wrap headers in extern "C", to turn off C++ mangling. This simplifies including libbpf in c++ and linking against it. v2 changes: * do the same for btf.h v3 changes: * test_libbpf.cpp to test for possible future c++ breakages Signed-off-by: Stanislav Fomichev Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/lib/bpf/Makefile | 15 ++++++++++++--- tools/lib/bpf/bpf.h | 9 +++++++++ tools/lib/bpf/btf.h | 8 ++++++++ tools/lib/bpf/libbpf.h | 9 +++++++++ tools/lib/bpf/test_libbpf.cpp | 18 ++++++++++++++++++ 5 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 tools/lib/bpf/test_libbpf.cpp (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 425b480bda75..1b4a683a00fc 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -66,7 +66,7 @@ ifndef VERBOSE endif FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-mmap bpf reallocarray +FEATURE_TESTS = libelf libelf-mmap bpf reallocarray cxx FEATURE_DISPLAY = libelf bpf INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi @@ -148,6 +148,12 @@ LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) CMD_TARGETS = $(LIB_FILE) +CXX_TEST_TARGET = $(OUTPUT)test_libbpf + +ifeq ($(feature-cxx), 1) + CMD_TARGETS += $(CXX_TEST_TARGET) +endif + TARGETS = $(CMD_TARGETS) all: fixdep all_cmd @@ -175,6 +181,9 @@ $(OUTPUT)libbpf.so: $(BPF_IN) $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ +$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a + $(QUIET_LINK)$(CXX) $^ -lelf -o $@ + define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ @@ -201,8 +210,8 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \ - $(RM) LIBBPF-CFLAGS + $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ + *.o *~ *.a *.so .*.d .*.cmd LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 8bdfd806253a..09e8bbe111d4 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -27,6 +27,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) #endif @@ -132,4 +136,9 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + #endif /* __LIBBPF_BPF_H */ diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 386b2ffc32a3..701ad2b6c41f 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -6,6 +6,10 @@ #include +#ifdef __cplusplus +extern "C" { +#endif + #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) #endif @@ -80,4 +84,8 @@ int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext, const char *sec_name, __u32 insns_cnt, void **func_info, __u32 *func_info_len); +#ifdef __cplusplus +} /* extern "C" */ +#endif + #endif /* __LIBBPF_BTF_H */ diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index b1686a787102..74e57e041705 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -16,6 +16,10 @@ #include // for size_t #include +#ifdef __cplusplus +extern "C" { +#endif + #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) #endif @@ -335,4 +339,9 @@ int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + #endif /* __LIBBPF_LIBBPF_H */ diff --git a/tools/lib/bpf/test_libbpf.cpp b/tools/lib/bpf/test_libbpf.cpp new file mode 100644 index 000000000000..abf3fc25c9fa --- /dev/null +++ b/tools/lib/bpf/test_libbpf.cpp @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#include "libbpf.h" +#include "bpf.h" +#include "btf.h" + +/* do nothing, just make sure we can link successfully */ + +int main(int argc, char *argv[]) +{ + /* libbpf.h */ + libbpf_set_print(NULL, NULL, NULL); + + /* bpf.h */ + bpf_prog_get_fd_by_id(0); + + /* btf.h */ + btf__new(NULL, 0, NULL); +} -- cgit v1.2.3 From 47eff61777c7b2db58805f974994713c8acbe9a6 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 20 Nov 2018 17:11:19 -0800 Subject: bpf, libbpf: introduce bpf_object__probe_caps to test BPF capabilities It currently only checks whether kernel supports map/prog names. This capability check will be used in the next two commits to skip setting prog/map names. Suggested-by: Daniel Borkmann Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f022ac82e882..dffdd68b5e6b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -114,6 +115,11 @@ void libbpf_set_print(libbpf_print_fn_t warn, # define LIBBPF_ELF_C_READ_MMAP ELF_C_READ #endif +struct bpf_capabilities { + /* v4.14: kernel support for program & map names. */ + __u32 name:1; +}; + /* * bpf_prog should be a better name but it has been used in * linux/filter.h. @@ -160,6 +166,8 @@ struct bpf_program { void *func_info; __u32 func_info_rec_size; __u32 func_info_len; + + struct bpf_capabilities *caps; }; struct bpf_map { @@ -221,6 +229,8 @@ struct bpf_object { void *priv; bpf_object_clear_priv_t clear_priv; + struct bpf_capabilities caps; + char path[]; }; #define obj_elf_valid(o) ((o)->efile.elf) @@ -342,6 +352,7 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, if (err) return err; + prog.caps = &obj->caps; progs = obj->programs; nr_progs = obj->nr_programs; @@ -1135,6 +1146,52 @@ err_free_new_name: return -errno; } +static int +bpf_object__probe_name(struct bpf_object *obj) +{ + struct bpf_load_program_attr attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret; + + /* make sure basic loading works */ + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = insns; + attr.insns_cnt = ARRAY_SIZE(insns); + attr.license = "GPL"; + + ret = bpf_load_program_xattr(&attr, NULL, 0); + if (ret < 0) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n", + __func__, cp, errno); + return -errno; + } + close(ret); + + /* now try the same program, but with the name */ + + attr.name = "test"; + ret = bpf_load_program_xattr(&attr, NULL, 0); + if (ret >= 0) { + obj->caps.name = 1; + close(ret); + } + + return 0; +} + +static int +bpf_object__probe_caps(struct bpf_object *obj) +{ + return bpf_object__probe_name(obj); +} + static int bpf_object__create_maps(struct bpf_object *obj) { @@ -1708,6 +1765,7 @@ int bpf_object__load(struct bpf_object *obj) obj->loaded = true; + CHECK_ERR(bpf_object__probe_caps(obj), err, out); CHECK_ERR(bpf_object__create_maps(obj), err, out); CHECK_ERR(bpf_object__relocate(obj), err, out); CHECK_ERR(bpf_object__load_progs(obj), err, out); -- cgit v1.2.3 From 94cb310cfaa16582cd49ebbeea5925e8f49324a1 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 20 Nov 2018 17:11:20 -0800 Subject: bpf: libbpf: remove map name retry from bpf_create_map_xattr Instead, check for a newly created caps.name bpf_object capability. If kernel doesn't support names, don't specify the attribute. See commit 23499442c319 ("bpf: libbpf: retry map creation without the name") for rationale. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/lib/bpf/bpf.c | 11 +---------- tools/lib/bpf/libbpf.c | 3 ++- 2 files changed, 3 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 836447bb4f14..ce1822194590 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -69,7 +69,6 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0; union bpf_attr attr; - int ret; memset(&attr, '\0', sizeof(attr)); @@ -87,15 +86,7 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) attr.map_ifindex = create_attr->map_ifindex; attr.inner_map_fd = create_attr->inner_map_fd; - ret = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); - if (ret < 0 && errno == EINVAL && create_attr->name) { - /* Retry the same syscall, but without the name. - * Pre v4.14 kernels don't support map names. - */ - memset(attr.map_name, 0, sizeof(attr.map_name)); - return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); - } - return ret; + return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } int bpf_create_map_node(enum bpf_map_type map_type, const char *name, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index dffdd68b5e6b..f28e64dd8b5a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1211,7 +1211,8 @@ bpf_object__create_maps(struct bpf_object *obj) continue; } - create_attr.name = map->name; + if (obj->caps.name) + create_attr.name = map->name; create_attr.map_ifindex = map->map_ifindex; create_attr.map_type = def->type; create_attr.map_flags = def->map_flags; -- cgit v1.2.3 From 5b32a23e1d879b78ee72144311314eccf7581bf4 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 20 Nov 2018 17:11:21 -0800 Subject: bpf: libbpf: don't specify prog name if kernel doesn't support it Use recently added capability check. See commit 23499442c319 ("bpf: libbpf: retry map creation without the name") for rationale. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f28e64dd8b5a..edbae2b1b046 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1439,7 +1439,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); load_attr.prog_type = prog->type; load_attr.expected_attach_type = prog->expected_attach_type; - load_attr.name = prog->name; + if (prog->caps->name) + load_attr.name = prog->name; load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; load_attr.license = license; -- cgit v1.2.3 From addb9fc90f13898e7779da54f471792e3dfb0d55 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 20 Nov 2018 20:55:56 -0800 Subject: bpf: adding support for map in map in libbpf idea is pretty simple. for specified map (pointed by struct bpf_map) we would provide descriptor of already loaded map, which is going to be used as a prototype for inner map. proposed workflow: 1) open bpf's object (bpf_object__open) 2) create bpf's map which is going to be used as a prototype 3) find (by name) map-in-map which you want to load and update w/ descriptor of inner map w/ a new helper from this patch 4) load bpf program w/ bpf_object__load Signed-off-by: Nikita V. Shirokov Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 40 ++++++++++++++++++++++++++++++++++------ tools/lib/bpf/libbpf.h | 2 ++ 2 files changed, 36 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index edbae2b1b046..0f14f7c074c2 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -175,6 +175,7 @@ struct bpf_map { char *name; size_t offset; int map_ifindex; + int inner_map_fd; struct bpf_map_def def; __u32 btf_key_type_id; __u32 btf_value_type_id; @@ -605,6 +606,14 @@ static int compare_bpf_map(const void *_a, const void *_b) return a->offset - b->offset; } +static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) +{ + if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + type == BPF_MAP_TYPE_HASH_OF_MAPS) + return true; + return false; +} + static int bpf_object__init_maps(struct bpf_object *obj, int flags) { @@ -668,13 +677,15 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) } obj->nr_maps = nr_maps; - /* - * fill all fd with -1 so won't close incorrect - * fd (fd=0 is stdin) when failure (zclose won't close - * negative fd)). - */ - for (i = 0; i < nr_maps; i++) + for (i = 0; i < nr_maps; i++) { + /* + * fill all fd with -1 so won't close incorrect + * fd (fd=0 is stdin) when failure (zclose won't close + * negative fd)). + */ obj->maps[i].fd = -1; + obj->maps[i].inner_map_fd = -1; + } /* * Fill obj->maps using data in "maps" section. @@ -1222,6 +1233,9 @@ bpf_object__create_maps(struct bpf_object *obj) create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; + if (bpf_map_type__is_map_in_map(def->type) && + map->inner_map_fd >= 0) + create_attr.inner_map_fd = map->inner_map_fd; if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) { create_attr.btf_fd = btf__fd(obj->btf); @@ -2681,6 +2695,20 @@ void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) map->map_ifindex = ifindex; } +int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) +{ + if (!bpf_map_type__is_map_in_map(map->def.type)) { + pr_warning("error: unsupported map type\n"); + return -EINVAL; + } + if (map->inner_map_fd != -1) { + pr_warning("error: inner_map_fd already specified\n"); + return -EINVAL; + } + map->inner_map_fd = fd; + return 0; +} + static struct bpf_map * __bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 74e57e041705..f30c3d07bb7d 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -297,6 +297,8 @@ LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); +LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); + LIBBPF_API long libbpf_get_error(const void *ptr); struct bpf_prog_load_attr { -- cgit v1.2.3 From b1957c92eba5acad7d8a0a6f968561e08714e978 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 20 Nov 2018 20:55:57 -0800 Subject: bpf: adding tests for map_in_map helpber in libbpf adding test/example of bpf_map__set_inner_map_fd usage Signed-off-by: Nikita V. Shirokov Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/test_map_in_map.c | 49 +++++++++++++++ tools/testing/selftests/bpf/test_maps.c | 90 +++++++++++++++++++++++++++ 3 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/test_map_in_map.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 1dde03ea1484..43157bd89165 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -38,7 +38,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \ - test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o + test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o \ + test_map_in_map.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ diff --git a/tools/testing/selftests/bpf/test_map_in_map.c b/tools/testing/selftests/bpf/test_map_in_map.c new file mode 100644 index 000000000000..ce923e67e08e --- /dev/null +++ b/tools/testing/selftests/bpf/test_map_in_map.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Facebook */ +#include +#include +#include +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") mim_array = { + .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, + .key_size = sizeof(int), + /* must be sizeof(__u32) for map in map */ + .value_size = sizeof(__u32), + .max_entries = 1, + .map_flags = 0, +}; + +struct bpf_map_def SEC("maps") mim_hash = { + .type = BPF_MAP_TYPE_HASH_OF_MAPS, + .key_size = sizeof(int), + /* must be sizeof(__u32) for map in map */ + .value_size = sizeof(__u32), + .max_entries = 1, + .map_flags = 0, +}; + +SEC("xdp_mimtest") +int xdp_mimtest0(struct xdp_md *ctx) +{ + int value = 123; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&mim_array, &key); + if (!map) + return XDP_DROP; + + bpf_map_update_elem(map, &key, &value, 0); + + map = bpf_map_lookup_elem(&mim_hash, &key); + if (!map) + return XDP_DROP; + + bpf_map_update_elem(map, &key, &value, 0); + + return XDP_PASS; +} + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 9f0a5b16a246..9c79ee017df3 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1125,6 +1125,94 @@ out_sockmap: exit(1); } +#define MAPINMAP_PROG "./test_map_in_map.o" +static void test_map_in_map(void) +{ + struct bpf_program *prog; + struct bpf_object *obj; + struct bpf_map *map; + int mim_fd, fd, err; + int pos = 0; + + obj = bpf_object__open(MAPINMAP_PROG); + + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), sizeof(int), + 2, 0); + if (fd < 0) { + printf("Failed to create hashmap '%s'!\n", strerror(errno)); + exit(1); + } + + map = bpf_object__find_map_by_name(obj, "mim_array"); + if (IS_ERR(map)) { + printf("Failed to load array of maps from test prog\n"); + goto out_map_in_map; + } + err = bpf_map__set_inner_map_fd(map, fd); + if (err) { + printf("Failed to set inner_map_fd for array of maps\n"); + goto out_map_in_map; + } + + map = bpf_object__find_map_by_name(obj, "mim_hash"); + if (IS_ERR(map)) { + printf("Failed to load hash of maps from test prog\n"); + goto out_map_in_map; + } + err = bpf_map__set_inner_map_fd(map, fd); + if (err) { + printf("Failed to set inner_map_fd for hash of maps\n"); + goto out_map_in_map; + } + + bpf_object__for_each_program(prog, obj) { + bpf_program__set_xdp(prog); + } + bpf_object__load(obj); + + map = bpf_object__find_map_by_name(obj, "mim_array"); + if (IS_ERR(map)) { + printf("Failed to load array of maps from test prog\n"); + goto out_map_in_map; + } + mim_fd = bpf_map__fd(map); + if (mim_fd < 0) { + printf("Failed to get descriptor for array of maps\n"); + goto out_map_in_map; + } + + err = bpf_map_update_elem(mim_fd, &pos, &fd, 0); + if (err) { + printf("Failed to update array of maps\n"); + goto out_map_in_map; + } + + map = bpf_object__find_map_by_name(obj, "mim_hash"); + if (IS_ERR(map)) { + printf("Failed to load hash of maps from test prog\n"); + goto out_map_in_map; + } + mim_fd = bpf_map__fd(map); + if (mim_fd < 0) { + printf("Failed to get descriptor for hash of maps\n"); + goto out_map_in_map; + } + + err = bpf_map_update_elem(mim_fd, &pos, &fd, 0); + if (err) { + printf("Failed to update hash of maps\n"); + goto out_map_in_map; + } + + close(fd); + bpf_object__close(obj); + return; + +out_map_in_map: + close(fd); + exit(1); +} + #define MAP_SIZE (32 * 1024) static void test_map_large(void) @@ -1600,6 +1688,8 @@ static void run_all_tests(void) test_queuemap(0, NULL); test_stackmap(0, NULL); + + test_map_in_map(); } int main(void) -- cgit v1.2.3 From dbd4485a69e15b850107eea516cd64387dff1f2e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Nov 2018 08:02:51 +0000 Subject: selftests: mlxsw: Consider VxLAN learning enabled as valid The test currently expects that a configuration which includes a VxLAN device with learning enabled to fail. Previous patches enabled VxLAN learning in mlxsw, so change the test accordingly. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/vxlan.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index 52e78adfe081..ffa79b738f2a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -167,7 +167,7 @@ sanitization_single_dev_learning_enabled_test() ip link add name vxlan0 up type vxlan id 10 learning noudpcsum \ ttl 20 tos inherit local 198.51.100.1 dstport 4789 - sanitization_single_dev_test_fail + sanitization_single_dev_test_pass ip link del dev vxlan0 ip link del dev br0 -- cgit v1.2.3 From c39c56a8c8a06330eacc739fda7d175dd1781db5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Nov 2018 08:02:52 +0000 Subject: selftests: forwarding: vxlan_bridge_1d: Add learning test Add a test which checks that the VxLAN driver can learn FDB entries and that these entries are correctly deleted and aged-out. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../selftests/net/forwarding/vxlan_bridge_1d.sh | 108 +++++++++++++++++++++ 1 file changed, 108 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 5cc6ac74eb74..56cef3b1c194 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -76,6 +76,7 @@ export VXPORT ping_ipv4 test_flood test_unicast + test_learning "} NUM_NETIFS=6 @@ -663,6 +664,113 @@ test_ecn_decap() test_ecn_decap_error } +test_learning() +{ + local mac=de:ad:be:ef:13:37 + local dst=192.0.2.100 + + # Enable learning on the VxLAN device and set ageing time to 10 seconds + ip link set dev br1 type bridge ageing_time 1000 + ip link set dev vx1 type vxlan ageing 10 + ip link set dev vx1 type vxlan learning + reapply_config + + # Check that flooding works + RET=0 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: flood before learning" + + # Send a packet with source mac set to $mac from host w2 and check that + # a corresponding entry is created in VxLAN device vx1 + RET=0 + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q self + check_err $? + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_err $? + + log_test "VXLAN: show learned FDB entry" + + # Repeat first test and check that packets only reach host w2 in ns1 + RET=0 + + vxlan_flood_test $mac $dst 0 10 0 + + log_test "VXLAN: learned FDB entry" + + # Delete the learned FDB entry from the VxLAN and bridge devices and + # check that packets are flooded + RET=0 + + bridge fdb del dev vx1 $mac master self + sleep 1 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: deletion of learned FDB entry" + + # Re-learn the first FDB entry and check that it is correctly aged-out + RET=0 + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q self + check_err $? + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_err $? + + vxlan_flood_test $mac $dst 0 10 0 + + sleep 20 + + bridge fdb show brport vx1 | grep $mac | grep -q self + check_fail $? + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_fail $? + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: Ageing of learned FDB entry" + + # Toggle learning on the bridge port and check that the bridge's FDB + # is populated only when it should + RET=0 + + ip link set dev vx1 type bridge_slave learning off + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_fail $? + + ip link set dev vx1 type bridge_slave learning on + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_err $? + + log_test "VXLAN: learning toggling on bridge port" + + # Restore previous settings + ip link set dev vx1 type vxlan nolearning + ip link set dev vx1 type vxlan ageing 300 + ip link set dev br1 type bridge ageing_time 30000 + reapply_config +} + test_all() { echo "Running tests with UDP port $VXPORT" -- cgit v1.2.3 From ab85b0143428739cb0f2533182d5824375b6b66b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 22 Nov 2018 10:13:45 +0000 Subject: tools/bpf: fix spelling mistake "memeory" -> "memory" The CHECK message contains a spelling mistake, fix it. Signed-off-by: Colin Ian King Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_btf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 7b1b160d6e67..bcbda7037840 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -2573,7 +2573,7 @@ static int do_test_file(unsigned int test_num) } func_info = malloc(info.func_info_cnt * rec_size); - if (CHECK(!func_info, "out of memeory")) { + if (CHECK(!func_info, "out of memory")) { err = -1; goto done; } @@ -3299,7 +3299,7 @@ static int do_test_func_type(int test_num) } func_info = malloc(info.func_info_cnt * rec_size); - if (CHECK(!func_info, "out of memeory")) { + if (CHECK(!func_info, "out of memory")) { err = -1; goto done; } -- cgit v1.2.3 From 1d0795ea9ca37be0fa0823776e0367b8fc398c91 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 21 Nov 2018 14:31:15 +0100 Subject: selftests: explicitly require kernel features needed by udpgro tests commit 3327a9c46352f1 ("selftests: add functionals test for UDP GRO") make use of ipv6 NAT, but such a feature is not currently implied by selftests. Since the 'ip[6]tables' commands may actually create nft rules, depending on the specific user-space version, let's pull both NF and NFT nat modules plus the needed deps. Reported-by: Naresh Kamboju Fixes: 3327a9c46352f1 ("selftests: add functionals test for UDP GRO") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- tools/testing/selftests/net/config | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index cd3a2f1545b5..5821bdd98d20 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -14,3 +14,17 @@ CONFIG_IPV6_VTI=y CONFIG_DUMMY=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_NAT_IPV6=m +CONFIG_NF_NAT_IPV4=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP_NF_NAT=m +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_IPV6=y +CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_CHAIN_NAT_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV4=m -- cgit v1.2.3 From f11216b24219ab26d8d159fbfa12dff886b16e32 Mon Sep 17 00:00:00 2001 From: Vlad Dumitrescu Date: Thu, 22 Nov 2018 14:39:16 -0500 Subject: bpf: add skb->tstamp r/w access from tc clsact and cg skb progs This could be used to rate limit egress traffic in concert with a qdisc which supports Earliest Departure Time, such as FQ. Write access from cg skb progs only with CAP_SYS_ADMIN, since the value will be used by downstream qdiscs. It might make sense to relax this. Changes v1 -> v2: - allow access from cg skb, write only with CAP_SYS_ADMIN Signed-off-by: Vlad Dumitrescu Acked-by: Eric Dumazet Acked-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 29 +++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 1 + tools/testing/selftests/bpf/test_verifier.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c1554aa07465..23e2031a43d4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2468,6 +2468,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; + __u64 tstamp; }; struct bpf_tunnel_key { diff --git a/net/core/filter.c b/net/core/filter.c index f6ca38a7d433..65dc13aeca7c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5573,6 +5573,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type if (size != sizeof(struct bpf_flow_keys *)) return false; break; + case bpf_ctx_range(struct __sk_buff, tstamp): + if (size != sizeof(__u64)) + return false; + break; default: /* Only narrow read access allowed for now. */ if (type == BPF_WRITE) { @@ -5600,6 +5604,7 @@ static bool sk_filter_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_end): case bpf_ctx_range(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): + case bpf_ctx_range(struct __sk_buff, tstamp): return false; } @@ -5638,6 +5643,10 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, priority): case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; + case bpf_ctx_range(struct __sk_buff, tstamp): + if (!capable(CAP_SYS_ADMIN)) + return false; + break; default: return false; } @@ -5665,6 +5674,7 @@ static bool lwt_is_valid_access(int off, int size, case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range(struct __sk_buff, tstamp): return false; } @@ -5874,6 +5884,7 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, priority): case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): + case bpf_ctx_range(struct __sk_buff, tstamp): break; default: return false; @@ -6093,6 +6104,7 @@ static bool sk_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range(struct __sk_buff, tstamp): return false; } @@ -6179,6 +6191,7 @@ static bool flow_dissector_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range_till(struct __sk_buff, family, local_port): + case bpf_ctx_range(struct __sk_buff, tstamp): return false; } @@ -6488,6 +6501,22 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, off); break; + + case offsetof(struct __sk_buff, tstamp): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_DW, + si->dst_reg, si->src_reg, + bpf_target_off(struct sk_buff, + tstamp, 8, + target_size)); + else + *insn++ = BPF_LDX_MEM(BPF_DW, + si->dst_reg, si->src_reg, + bpf_target_off(struct sk_buff, + tstamp, 8, + target_size)); } return insn - insn_buf; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c1554aa07465..23e2031a43d4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2468,6 +2468,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; + __u64 tstamp; }; struct bpf_tunnel_key { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 54d16fbdef8b..537a8f91af02 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2446,6 +2446,10 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, tc_index)), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), BPF_EXIT_INSN(), }, .errstr_unpriv = "", @@ -5297,6 +5301,31 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R2 leaks addr into helper function", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, + { + "write tstamp from CGROUP_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid bpf_context access off=152 size=8", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "read tstamp from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, { "multiple registers share map_lookup_elem result", .insns = { -- cgit v1.2.3 From 47ae7e3d0be539c6af83b7b349ece31a18001aa6 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Fri, 23 Nov 2018 12:58:12 -0800 Subject: libbpf: make bpf_object__open default to UNSPEC currently by default libbpf's bpf_object__open requires bpf's program to specify version in a code because of two things: 1) default prog type is set to KPROBE 2) KPROBE requires (in kernel/bpf/syscall.c) version to be specified in this patch i'm changing default prog type to UNSPEC and also changing requirments for version's section to be present in object file. now it would reflect what we have today in kernel (only KPROBE prog type requires for version to be explicitly set). v1 -> v2: - RFC tag has been dropped Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0f14f7c074c2..ed4212a4c5f9 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -333,7 +333,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->idx = idx; prog->instances.fds = NULL; prog->instances.nr = -1; - prog->type = BPF_PROG_TYPE_KPROBE; + prog->type = BPF_PROG_TYPE_UNSPEC; prog->btf_fd = -1; return 0; @@ -1649,12 +1649,12 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: - return false; case BPF_PROG_TYPE_UNSPEC: - case BPF_PROG_TYPE_KPROBE: case BPF_PROG_TYPE_TRACEPOINT: - case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + return false; + case BPF_PROG_TYPE_KPROBE: default: return true; } -- cgit v1.2.3 From 197c2dac74e4ae3158ad10f848c5402236d6b176 Mon Sep 17 00:00:00 2001 From: David Calavera Date: Thu, 22 Nov 2018 12:59:45 -0800 Subject: bpf: Add BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK to bpftool-map I noticed that these two new BPF Maps are not defined in bpftool. This patch defines those two maps and adds their names to the bpftool-map documentation. Signed-off-by: David Calavera Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-map.rst | 3 ++- tools/bpf/bpftool/map.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index f55a2daed59b..9e827e342d9e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -42,7 +42,8 @@ MAP COMMANDS | | **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash** | | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** | | **devmap** | **sockmap** | **cpumap** | **xskmap** | **sockhash** -| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** } +| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** +| | **queue** | **stack** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index a1ae2a3e9fef..b0ebbed7d1a6 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -74,6 +74,8 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", + [BPF_MAP_TYPE_QUEUE] = "queue", + [BPF_MAP_TYPE_STACK] = "stack", }; static bool map_is_per_cpu(__u32 type) -- cgit v1.2.3 From 358be656406dce296fafeac7a913ba33e9a66971 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 24 Nov 2018 21:09:26 -0500 Subject: selftests/net: add txring_overwrite Packet sockets with PACKET_TX_RING send skbs with user data in frags. Before commit 5cd8d46ea156 ("packet: copy user buffers before orphan or clone") ring slots could be released prematurely, possibly allowing a process to overwrite data still in flight. This test opens two packet sockets, one to send and one to read. The sender has a tx ring of one slot. It sends two packets with different payload, then reads both and verifies their payload. Before the above commit, both receive calls return the same data as the send calls use the same buffer. From the commit, the clone needed for looping onto a packet socket triggers an skb_copy_ubufs to create a private copy. The separate sends each arrive correctly. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 2 +- tools/testing/selftests/net/run_afpackettests | 10 ++ tools/testing/selftests/net/txring_overwrite.c | 179 +++++++++++++++++++++++++ 4 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/net/txring_overwrite.c (limited to 'tools') diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 8cf22b3c2563..7f57b916e6b2 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -14,4 +14,5 @@ udpgso_bench_rx udpgso_bench_tx tcp_inq tls +txring_overwrite ip_defrag diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index eec359895feb..7aebbcaa91bf 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -11,7 +11,7 @@ TEST_PROGS += udpgro_bench.sh udpgro.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy -TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd +TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests index bea079edc278..2dc95fda7ef7 100755 --- a/tools/testing/selftests/net/run_afpackettests +++ b/tools/testing/selftests/net/run_afpackettests @@ -25,3 +25,13 @@ if [ $? -ne 0 ]; then else echo "[PASS]" fi + +echo "--------------------" +echo "running txring_overwrite test" +echo "--------------------" +./in_netns.sh ./txring_overwrite +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi diff --git a/tools/testing/selftests/net/txring_overwrite.c b/tools/testing/selftests/net/txring_overwrite.c new file mode 100644 index 000000000000..fd8b1c663c39 --- /dev/null +++ b/tools/testing/selftests/net/txring_overwrite.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Verify that consecutive sends over packet tx_ring are mirrored + * with their original content intact. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const int eth_off = TPACKET_HDRLEN - sizeof(struct sockaddr_ll); +const int cfg_frame_size = 1000; + +static void build_packet(void *buffer, size_t blen, char payload_char) +{ + struct udphdr *udph; + struct ethhdr *eth; + struct iphdr *iph; + size_t off = 0; + + memset(buffer, 0, blen); + + eth = buffer; + eth->h_proto = htons(ETH_P_IP); + + off += sizeof(*eth); + iph = buffer + off; + iph->ttl = 8; + iph->ihl = 5; + iph->version = 4; + iph->saddr = htonl(INADDR_LOOPBACK); + iph->daddr = htonl(INADDR_LOOPBACK + 1); + iph->protocol = IPPROTO_UDP; + iph->tot_len = htons(blen - off); + iph->check = 0; + + off += sizeof(*iph); + udph = buffer + off; + udph->dest = htons(8000); + udph->source = htons(8001); + udph->len = htons(blen - off); + udph->check = 0; + + off += sizeof(*udph); + memset(buffer + off, payload_char, blen - off); +} + +static int setup_rx(void) +{ + int fdr; + + fdr = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP)); + if (fdr == -1) + error(1, errno, "socket r"); + + return fdr; +} + +static int setup_tx(char **ring) +{ + struct sockaddr_ll laddr = {}; + struct tpacket_req req = {}; + int fdt; + + fdt = socket(PF_PACKET, SOCK_RAW, 0); + if (fdt == -1) + error(1, errno, "socket t"); + + laddr.sll_family = AF_PACKET; + laddr.sll_protocol = htons(0); + laddr.sll_ifindex = if_nametoindex("lo"); + if (!laddr.sll_ifindex) + error(1, errno, "if_nametoindex"); + + if (bind(fdt, (void *)&laddr, sizeof(laddr))) + error(1, errno, "bind fdt"); + + req.tp_block_size = getpagesize(); + req.tp_block_nr = 1; + req.tp_frame_size = getpagesize(); + req.tp_frame_nr = 1; + + if (setsockopt(fdt, SOL_PACKET, PACKET_TX_RING, + (void *)&req, sizeof(req))) + error(1, errno, "setsockopt ring"); + + *ring = mmap(0, req.tp_block_size * req.tp_block_nr, + PROT_READ | PROT_WRITE, MAP_SHARED, fdt, 0); + if (!*ring) + error(1, errno, "mmap"); + + return fdt; +} + +static void send_pkt(int fdt, void *slot, char payload_char) +{ + struct tpacket_hdr *header = slot; + int ret; + + while (header->tp_status != TP_STATUS_AVAILABLE) + usleep(1000); + + build_packet(slot + eth_off, cfg_frame_size, payload_char); + + header->tp_len = cfg_frame_size; + header->tp_status = TP_STATUS_SEND_REQUEST; + + ret = sendto(fdt, NULL, 0, 0, NULL, 0); + if (ret == -1) + error(1, errno, "kick tx"); +} + +static int read_verify_pkt(int fdr, char payload_char) +{ + char buf[100]; + int ret; + + ret = read(fdr, buf, sizeof(buf)); + if (ret != sizeof(buf)) + error(1, errno, "read"); + + if (buf[60] != payload_char) { + printf("wrong pattern: 0x%x != 0x%x\n", buf[60], payload_char); + return 1; + } + + printf("read: %c (0x%x)\n", buf[60], buf[60]); + return 0; +} + +int main(int argc, char **argv) +{ + const char payload_patterns[] = "ab"; + char *ring; + int fdr, fdt, ret = 0; + + fdr = setup_rx(); + fdt = setup_tx(&ring); + + send_pkt(fdt, ring, payload_patterns[0]); + send_pkt(fdt, ring, payload_patterns[1]); + + ret |= read_verify_pkt(fdr, payload_patterns[0]); + ret |= read_verify_pkt(fdr, payload_patterns[1]); + + if (close(fdt)) + error(1, errno, "close t"); + if (close(fdr)) + error(1, errno, "close r"); + + return ret; +} -- cgit v1.2.3 From ffac28f95a98a87db0850801cd98771a08bb1dec Mon Sep 17 00:00:00 2001 From: David Calavera Date: Fri, 23 Nov 2018 15:58:39 -0800 Subject: bpf: align map type names formatting. Make the formatting for map_type_name array consistent. Signed-off-by: David Calavera Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index b0ebbed7d1a6..cbd3080e72c7 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -52,30 +52,30 @@ #include "main.h" static const char * const map_type_name[] = { - [BPF_MAP_TYPE_UNSPEC] = "unspec", - [BPF_MAP_TYPE_HASH] = "hash", - [BPF_MAP_TYPE_ARRAY] = "array", - [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", - [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", - [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", - [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", - [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", - [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", - [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", - [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", - [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", - [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", - [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", - [BPF_MAP_TYPE_DEVMAP] = "devmap", - [BPF_MAP_TYPE_SOCKMAP] = "sockmap", - [BPF_MAP_TYPE_CPUMAP] = "cpumap", - [BPF_MAP_TYPE_XSKMAP] = "xskmap", - [BPF_MAP_TYPE_SOCKHASH] = "sockhash", - [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", - [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", + [BPF_MAP_TYPE_UNSPEC] = "unspec", + [BPF_MAP_TYPE_HASH] = "hash", + [BPF_MAP_TYPE_ARRAY] = "array", + [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", + [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", + [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", + [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", + [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", + [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", + [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", + [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", + [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", + [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", + [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", + [BPF_MAP_TYPE_DEVMAP] = "devmap", + [BPF_MAP_TYPE_SOCKMAP] = "sockmap", + [BPF_MAP_TYPE_CPUMAP] = "cpumap", + [BPF_MAP_TYPE_XSKMAP] = "xskmap", + [BPF_MAP_TYPE_SOCKHASH] = "sockhash", + [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", + [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", - [BPF_MAP_TYPE_QUEUE] = "queue", - [BPF_MAP_TYPE_STACK] = "stack", + [BPF_MAP_TYPE_QUEUE] = "queue", + [BPF_MAP_TYPE_STACK] = "stack", }; static bool map_is_per_cpu(__u32 type) -- cgit v1.2.3 From 812dd689da2a9af1208449588e4529c89a9d661d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 24 Nov 2018 23:20:45 -0800 Subject: tools/bpf: change selftest test_btf for both jit and non-jit The selftest test_btf is changed to test both jit and non-jit. The test result should be the same regardless of whether jit is enabled or not. Signed-off-by: Yonghong Song Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_btf.c | 33 +++------------------------------ 1 file changed, 3 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index bcbda7037840..b4c8725b3004 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -29,7 +29,6 @@ static uint32_t pass_cnt; static uint32_t error_cnt; static uint32_t skip_cnt; -static bool jit_enabled; #define CHECK(condition, format...) ({ \ int __ret = !!(condition); \ @@ -65,24 +64,6 @@ static int __base_pr(const char *format, ...) return err; } -static bool is_jit_enabled(void) -{ - const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; - bool enabled = false; - int sysctl_fd; - - sysctl_fd = open(jit_sysctl, 0, O_RDONLY); - if (sysctl_fd != -1) { - char tmpc; - - if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) - enabled = (tmpc != '0'); - close(sysctl_fd); - } - - return enabled; -} - #define BTF_INFO_ENC(kind, root, vlen) \ ((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) @@ -2547,8 +2528,8 @@ static int do_test_file(unsigned int test_num) test->btf_kv_notfound)) goto done; - if (!jit_enabled || !has_btf_ext) - goto skip_jit; + if (!has_btf_ext) + goto skip; /* get necessary program info */ info_len = sizeof(struct bpf_prog_info); @@ -2636,7 +2617,7 @@ static int do_test_file(unsigned int test_num) finfo = (void *)finfo + rec_size; } -skip_jit: +skip: fprintf(stderr, "OK"); done: @@ -3270,12 +3251,6 @@ static int do_test_func_type(int test_num) err = -1; goto done; } - if (!jit_enabled) { - skip_cnt++; - fprintf(stderr, "SKIPPED, please enable sysctl bpf_jit_enable\n"); - err = 0; - goto done; - } /* get necessary lens */ info_len = sizeof(struct bpf_prog_info); @@ -3452,8 +3427,6 @@ int main(int argc, char **argv) if (args.always_log) libbpf_set_print(__base_pr, __base_pr, __base_pr); - jit_enabled = is_jit_enabled(); - if (args.raw_test) err |= test_raw(); -- cgit v1.2.3 From 1d2f44ca342445f7da56555ca7a43d19aac8996a Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 23 Nov 2018 16:44:32 -0800 Subject: libbpf: Name changing for btf_get_from_id s/btf_get_from_id/btf__get_from_id/ to restore the API naming convention. Signed-off-by: Martin KaFai Lau Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/map.c | 4 ++-- tools/bpf/bpftool/prog.c | 2 +- tools/lib/bpf/btf.c | 2 +- tools/lib/bpf/btf.h | 2 +- tools/testing/selftests/bpf/test_btf.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index cbd3080e72c7..3850f8d65703 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -713,7 +713,7 @@ static int do_dump(int argc, char **argv) prev_key = NULL; - err = btf_get_from_id(info.btf_id, &btf); + err = btf__get_from_id(info.btf_id, &btf); if (err) { p_err("failed to get btf"); goto exit_free; @@ -857,7 +857,7 @@ static int do_lookup(int argc, char **argv) } /* here means bpf_map_lookup_elem() succeeded */ - err = btf_get_from_id(info.btf_id, &btf); + err = btf__get_from_id(info.btf_id, &btf); if (err) { p_err("failed to get btf"); goto exit_free; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 37b1daf19da6..521a1073d1b4 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -622,7 +622,7 @@ static int do_dump(int argc, char **argv) goto err_free; } - if (info.btf_id && btf_get_from_id(info.btf_id, &btf)) { + if (info.btf_id && btf__get_from_id(info.btf_id, &btf)) { p_err("failed to get btf"); goto err_free; } diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 13ddc4bd24ee..eadcf8dfd295 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -415,7 +415,7 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset) return NULL; } -int btf_get_from_id(__u32 id, struct btf **btf) +int btf__get_from_id(__u32 id, struct btf **btf) { struct bpf_btf_info btf_info = { 0 }; __u32 len = sizeof(btf_info); diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 701ad2b6c41f..5336b2f37293 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -73,7 +73,7 @@ LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); -LIBBPF_API int btf_get_from_id(__u32 id, struct btf **btf); +LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log); void btf_ext__free(struct btf_ext *btf_ext); diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index b4c8725b3004..bae7308b7ec5 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -2585,7 +2585,7 @@ static int do_test_file(unsigned int test_num) goto done; } - err = btf_get_from_id(info.btf_id, &btf); + err = btf__get_from_id(info.btf_id, &btf); if (CHECK(err, "cannot get btf from kernel, err: %d", err)) goto done; -- cgit v1.2.3 From 16192a771d8c4d844529cbc920c75d170d16fb14 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 23 Nov 2018 16:44:33 -0800 Subject: libbpf: Add version script for DSO More and more projects use libbpf and one day it'll likely be packaged and distributed as DSO and that requires ABI versioning so that both compatible and incompatible changes to ABI can be introduced in a safe way in the future without breaking executables dynamically linked with a previous version of the library. Usual way to do ABI versioning is version script for the linker. Add such a script for libbpf. All global symbols currently exported via LIBBPF_API macro are added to the version script libbpf.map. The version name LIBBPF_0.0.1 is constructed from the name of the library + version specified by $(LIBBPF_VERSION) in Makefile. Version script does not duplicate the work done by LIBBPF_API macro, it rather complements it. The macro is used at compile time and can be used by compiler to do optimization that can't be done at link time, it is purely about global symbol visibility. The version script, in turn, is used at link time and takes care of ABI versioning. Both techniques are described in details in [1]. Whenever ABI is changed in the future, version script should be changed appropriately. [1] https://www.akkadia.org/drepper/dsohowto.pdf Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Makefile | 4 +- tools/lib/bpf/libbpf.map | 121 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 tools/lib/bpf/libbpf.map (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 1b4a683a00fc..22c5ffe22825 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -145,6 +145,7 @@ include $(srctree)/tools/build/Makefile.include BPF_IN := $(OUTPUT)libbpf-in.o LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) +VERSION_SCRIPT := libbpf.map CMD_TARGETS = $(LIB_FILE) @@ -176,7 +177,8 @@ $(BPF_IN): force elfdep bpfdep $(Q)$(MAKE) $(build)=libbpf $(OUTPUT)libbpf.so: $(BPF_IN) - $(QUIET_LINK)$(CC) --shared $^ -o $@ + $(QUIET_LINK)$(CC) --shared -Wl,--version-script=$(VERSION_SCRIPT) \ + $^ -o $@ $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map new file mode 100644 index 000000000000..4fb29f6d7a80 --- /dev/null +++ b/tools/lib/bpf/libbpf.map @@ -0,0 +1,121 @@ +LIBBPF_0.0.1 { + global: + bpf_btf_get_fd_by_id; + bpf_create_map; + bpf_create_map_in_map; + bpf_create_map_in_map_node; + bpf_create_map_name; + bpf_create_map_node; + bpf_create_map_xattr; + bpf_load_btf; + bpf_load_program; + bpf_load_program_xattr; + bpf_map__btf_key_type_id; + bpf_map__btf_value_type_id; + bpf_map__def; + bpf_map__fd; + bpf_map__is_offload_neutral; + bpf_map__name; + bpf_map__next; + bpf_map__pin; + bpf_map__prev; + bpf_map__priv; + bpf_map__reuse_fd; + bpf_map__set_ifindex; + bpf_map__set_inner_map_fd; + bpf_map__set_priv; + bpf_map__unpin; + bpf_map_delete_elem; + bpf_map_get_fd_by_id; + bpf_map_get_next_id; + bpf_map_get_next_key; + bpf_map_lookup_and_delete_elem; + bpf_map_lookup_elem; + bpf_map_update_elem; + bpf_obj_get; + bpf_obj_get_info_by_fd; + bpf_obj_pin; + bpf_object__btf_fd; + bpf_object__close; + bpf_object__find_map_by_name; + bpf_object__find_map_by_offset; + bpf_object__find_program_by_title; + bpf_object__kversion; + bpf_object__load; + bpf_object__name; + bpf_object__next; + bpf_object__open; + bpf_object__open_buffer; + bpf_object__open_xattr; + bpf_object__pin; + bpf_object__pin_maps; + bpf_object__pin_programs; + bpf_object__priv; + bpf_object__set_priv; + bpf_object__unload; + bpf_object__unpin_maps; + bpf_object__unpin_programs; + bpf_perf_event_read_simple; + bpf_prog_attach; + bpf_prog_detach; + bpf_prog_detach2; + bpf_prog_get_fd_by_id; + bpf_prog_get_next_id; + bpf_prog_load; + bpf_prog_load_xattr; + bpf_prog_query; + bpf_prog_test_run; + bpf_program__fd; + bpf_program__is_kprobe; + bpf_program__is_perf_event; + bpf_program__is_raw_tracepoint; + bpf_program__is_sched_act; + bpf_program__is_sched_cls; + bpf_program__is_socket_filter; + bpf_program__is_tracepoint; + bpf_program__is_xdp; + bpf_program__load; + bpf_program__next; + bpf_program__nth_fd; + bpf_program__pin; + bpf_program__pin_instance; + bpf_program__prev; + bpf_program__priv; + bpf_program__set_expected_attach_type; + bpf_program__set_ifindex; + bpf_program__set_kprobe; + bpf_program__set_perf_event; + bpf_program__set_prep; + bpf_program__set_priv; + bpf_program__set_raw_tracepoint; + bpf_program__set_sched_act; + bpf_program__set_sched_cls; + bpf_program__set_socket_filter; + bpf_program__set_tracepoint; + bpf_program__set_type; + bpf_program__set_xdp; + bpf_program__title; + bpf_program__unload; + bpf_program__unpin; + bpf_program__unpin_instance; + bpf_raw_tracepoint_open; + bpf_set_link_xdp_fd; + bpf_task_fd_query; + bpf_verify_program; + btf__fd; + btf__find_by_name; + btf__free; + btf__get_from_id; + btf__name_by_offset; + btf__new; + btf__resolve_size; + btf__resolve_type; + btf__type_by_id; + libbpf_attach_type_by_name; + libbpf_get_error; + libbpf_prog_type_by_name; + libbpf_set_print; + libbpf_strerror; + local: + *; +}; -- cgit v1.2.3 From 306b267cb3c4c2a3d12138041fed10478804da82 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 23 Nov 2018 16:44:34 -0800 Subject: libbpf: Verify versioned symbols Since ABI versioning info is kept separately from the code it's easy to forget to update it while adding a new API. Add simple verification that all global symbols exported with LIBBPF_API are versioned in libbpf.map version script. The idea is to check that number of global symbols in libbpf-in.o, that is the input to the linker, matches with number of unique versioned symbols in libbpf.so, that is the output of the linker. If these numbers don't match, it may mean some symbol was not versioned and make will fail. "Unique" means that if a symbol is present in more than one version of ABI due to ABI changes, it'll be counted once. Another option to calculate number of global symbols in the "input" could be to count number of LIBBPF_ABI entries in C headers but it seems to be fragile. Example of output when a symbol is missing in version script: ... LD libbpf-in.o LINK libbpf.a LINK libbpf.so Warning: Num of global symbols in libbpf-in.o (115) does NOT match with num of versioned symbols in libbpf.so (114). Please make sure all LIBBPF_API symbols are versioned in libbpf.map. make: *** [check_abi] Error 1 Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Makefile | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 22c5ffe22825..34d9c3619c96 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -147,6 +147,11 @@ BPF_IN := $(OUTPUT)libbpf-in.o LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) VERSION_SCRIPT := libbpf.map +GLOBAL_SYM_COUNT = $(shell readelf -s $(BPF_IN) | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}') +VERSIONED_SYM_COUNT = $(shell readelf -s $(OUTPUT)libbpf.so | \ + grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) + CMD_TARGETS = $(LIB_FILE) CXX_TEST_TARGET = $(OUTPUT)test_libbpf @@ -159,7 +164,7 @@ TARGETS = $(CMD_TARGETS) all: fixdep all_cmd -all_cmd: $(CMD_TARGETS) +all_cmd: $(CMD_TARGETS) check $(BPF_IN): force elfdep bpfdep @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ @@ -186,6 +191,18 @@ $(OUTPUT)libbpf.a: $(BPF_IN) $(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a $(QUIET_LINK)$(CXX) $^ -lelf -o $@ +check: check_abi + +check_abi: $(OUTPUT)libbpf.so + @if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \ + echo "Warning: Num of global symbols in $(BPF_IN)" \ + "($(GLOBAL_SYM_COUNT)) does NOT match with num of" \ + "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \ + "Please make sure all LIBBPF_API symbols are" \ + "versioned in $(VERSION_SCRIPT)." >&2; \ + exit 1; \ + fi + define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ -- cgit v1.2.3 From 76d1b894c5155dbc44d24f6df2e0da3726d54bbd Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 23 Nov 2018 16:44:35 -0800 Subject: libbpf: Document API and ABI conventions Document API and ABI for libbpf: naming convention, symbol visibility, ABI versioning. This is just a starting point. Documentation can be significantly extended in the future to cover more topics. ABI versioning section touches only a few basic points with a link to more comprehensive documentation from Ulrich Drepper. This section can be extended in the future when there is better understanding what works well and what not so well in libbpf development process and production usage. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/README.rst | 139 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 tools/lib/bpf/README.rst (limited to 'tools') diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst new file mode 100644 index 000000000000..2ced9e061c4b --- /dev/null +++ b/tools/lib/bpf/README.rst @@ -0,0 +1,139 @@ +.. SPDX-License-Identifier: GPL-2.0 + +libbpf API naming convention +============================ + +libbpf API provides access to a few logically separated groups of +functions and types. Every group has its own naming convention +described here. It's recommended to follow these conventions whenever a +new function or type is added to keep libbpf API clean and consistent. + +All types and functions provided by libbpf API should have one of the +following prefixes: ``bpf_``, ``btf_``, ``libbpf_``. + +System call wrappers +-------------------- + +System call wrappers are simple wrappers for commands supported by +sys_bpf system call. These wrappers should go to ``bpf.h`` header file +and map one-on-one to corresponding commands. + +For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM`` +command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc. + +Objects +------- + +Another class of types and functions provided by libbpf API is "objects" +and functions to work with them. Objects are high-level abstractions +such as BPF program or BPF map. They're represented by corresponding +structures such as ``struct bpf_object``, ``struct bpf_program``, +``struct bpf_map``, etc. + +Structures are forward declared and access to their fields should be +provided via corresponding getters and setters rather than directly. + +These objects are associated with corresponding parts of ELF object that +contains compiled BPF programs. + +For example ``struct bpf_object`` represents ELF object itself created +from an ELF file or from a buffer, ``struct bpf_program`` represents a +program in ELF object and ``struct bpf_map`` is a map. + +Functions that work with an object have names built from object name, +double underscore and part that describes function purpose. + +For example ``bpf_object__open`` consists of the name of corresponding +object, ``bpf_object``, double underscore and ``open`` that defines the +purpose of the function to open ELF file and create ``bpf_object`` from +it. + +Another example: ``bpf_program__load`` is named for corresponding +object, ``bpf_program``, that is separated from other part of the name +by double underscore. + +All objects and corresponding functions other than BTF related should go +to ``libbpf.h``. BTF types and functions should go to ``btf.h``. + +Auxiliary functions +------------------- + +Auxiliary functions and types that don't fit well in any of categories +described above should have ``libbpf_`` prefix, e.g. +``libbpf_get_error`` or ``libbpf_prog_type_by_name``. + +libbpf ABI +========== + +libbpf can be both linked statically or used as DSO. To avoid possible +conflicts with other libraries an application is linked with, all +non-static libbpf symbols should have one of the prefixes mentioned in +API documentation above. See API naming convention to choose the right +name for a new symbol. + +Symbol visibility +----------------- + +libbpf follow the model when all global symbols have visibility "hidden" +by default and to make a symbol visible it has to be explicitly +attributed with ``LIBBPF_API`` macro. For example: + +.. code-block:: c + + LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); + +This prevents from accidentally exporting a symbol, that is not supposed +to be a part of ABI what, in turn, improves both libbpf developer- and +user-experiences. + +ABI versionning +--------------- + +To make future ABI extensions possible libbpf ABI is versioned. +Versioning is implemented by ``libbpf.map`` version script that is +passed to linker. + +Version name is ``LIBBPF_`` prefix + three-component numeric version, +starting from ``0.0.1``. + +Every time ABI is being changed, e.g. because a new symbol is added or +semantic of existing symbol is changed, ABI version should be bumped. + +For example, if current state of ``libbpf.map`` is: + +.. code-block:: + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + +, and a new symbol ``bpf_func_c`` is being introduced, then +``libbpf.map`` should be changed like this: + +.. code-block:: + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + LIBBPF_0.0.2 { + global: + bpf_func_c; + } LIBBPF_0.0.1; + +, where new version ``LIBBPF_0.0.2`` depends on the previous +``LIBBPF_0.0.1``. + +Format of version script and ways to handle ABI changes, including +incompatible ones, described in details in [1]. + +Links +===== + +[1] https://www.akkadia.org/drepper/dsohowto.pdf + (Chapter 3. Maintaining APIs and ABIs). -- cgit v1.2.3 From d913a2273a8988cd810a031bfa08249ca7dd5f7c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 26 Nov 2018 14:16:18 -0800 Subject: bpf: add msg_pop_data helper to tools Add the necessary header definitions to tools for new msg_pop_data_helper. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 16 +++++++++++++++- tools/testing/selftests/bpf/bpf_helpers.h | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 23e2031a43d4..597afdbc1ab9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2268,6 +2268,19 @@ union bpf_attr { * * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) + * Description + * Will remove *pop* bytes from a *msg* starting at byte *start*. + * This may result in **ENOMEM** errors under certain situations if + * an allocation and copy are required due to a full ring buffer. + * However, the helper will try to avoid doing the allocation + * if possible. Other errors can occur if input parameters are + * invalid either due to *start* byte not being valid part of msg + * payload and/or *pop* value being to large. + * + * Return + * 0 on success, or a negative erro in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2360,7 +2373,8 @@ union bpf_attr { FN(map_push_elem), \ FN(map_pop_elem), \ FN(map_peek_elem), \ - FN(msg_push_data), + FN(msg_push_data), \ + FN(msg_pop_data), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 686e57ce40f4..7b69519a09b1 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -113,6 +113,8 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_pull_data; static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_push_data; +static int (*bpf_msg_pop_data)(void *ctx, int start, int cut, int flags) = + (void *) BPF_FUNC_msg_pop_data; static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = (void *) BPF_FUNC_bind; static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = -- cgit v1.2.3 From 1ade9abadfca0250c8256bb222a1cd5cc5fa8bbc Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 26 Nov 2018 14:16:19 -0800 Subject: bpf: test_sockmap, add options for msg_pop_data() helper Similar to msg_pull_data and msg_push_data add a set of options to have msg_pop_data() exercised. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_sockmap.c | 127 +++++++++++++++++++++++- tools/testing/selftests/bpf/test_sockmap_kern.h | 70 ++++++++++--- 2 files changed, 180 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 622ade0a0957..e85a771f607b 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -79,6 +79,8 @@ int txmsg_start; int txmsg_end; int txmsg_start_push; int txmsg_end_push; +int txmsg_start_pop; +int txmsg_pop; int txmsg_ingress; int txmsg_skb; int ktls; @@ -104,6 +106,8 @@ static const struct option long_options[] = { {"txmsg_end", required_argument, NULL, 'e'}, {"txmsg_start_push", required_argument, NULL, 'p'}, {"txmsg_end_push", required_argument, NULL, 'q'}, + {"txmsg_start_pop", required_argument, NULL, 'w'}, + {"txmsg_pop", required_argument, NULL, 'x'}, {"txmsg_ingress", no_argument, &txmsg_ingress, 1 }, {"txmsg_skb", no_argument, &txmsg_skb, 1 }, {"ktls", no_argument, &ktls, 1 }, @@ -473,13 +477,27 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, clock_gettime(CLOCK_MONOTONIC, &s->end); } else { int slct, recvp = 0, recv, max_fd = fd; + float total_bytes, txmsg_pop_total; int fd_flags = O_NONBLOCK; struct timeval timeout; - float total_bytes; fd_set w; fcntl(fd, fd_flags); + /* Account for pop bytes noting each iteration of apply will + * call msg_pop_data helper so we need to account for this + * by calculating the number of apply iterations. Note user + * of the tool can create cases where no data is sent by + * manipulating pop/push/pull/etc. For example txmsg_apply 1 + * with txmsg_pop 1 will try to apply 1B at a time but each + * iteration will then pop 1B so no data will ever be sent. + * This is really only useful for testing edge cases in code + * paths. + */ total_bytes = (float)iov_count * (float)iov_length * (float)cnt; + txmsg_pop_total = txmsg_pop; + if (txmsg_apply) + txmsg_pop_total *= (total_bytes / txmsg_apply); + total_bytes -= txmsg_pop_total; err = clock_gettime(CLOCK_MONOTONIC, &s->start); if (err < 0) perror("recv start time: "); @@ -488,7 +506,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, timeout.tv_sec = 0; timeout.tv_usec = 300000; } else { - timeout.tv_sec = 1; + timeout.tv_sec = 3; timeout.tv_usec = 0; } @@ -503,7 +521,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, goto out_errno; } else if (!slct) { if (opt->verbose) - fprintf(stderr, "unexpected timeout\n"); + fprintf(stderr, "unexpected timeout: recved %zu/%f pop_total %f\n", s->bytes_recvd, total_bytes, txmsg_pop_total); errno = -EIO; clock_gettime(CLOCK_MONOTONIC, &s->end); goto out_errno; @@ -619,7 +637,7 @@ static int sendmsg_test(struct sockmap_options *opt) iov_count = 1; err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false, opt); - if (err && opt->verbose) + if (opt->verbose) fprintf(stderr, "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n", iov_count, iov_buf, cnt, err); @@ -931,6 +949,39 @@ run: } } + if (txmsg_start_pop) { + i = 4; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_start_pop, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem %i@%i (txmsg_start_pop): %d (%s)\n", + txmsg_start_pop, i, err, strerror(errno)); + goto out; + } + } else { + i = 4; + bpf_map_update_elem(map_fd[5], + &i, &txmsg_start_pop, BPF_ANY); + } + + if (txmsg_pop) { + i = 5; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_pop, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem %i@%i (txmsg_pop): %d (%s)\n", + txmsg_pop, i, err, strerror(errno)); + goto out; + } + } else { + i = 5; + bpf_map_update_elem(map_fd[5], + &i, &txmsg_pop, BPF_ANY); + + } + if (txmsg_ingress) { int in = BPF_F_INGRESS; @@ -1082,6 +1133,11 @@ static void test_options(char *options) snprintf(tstr, OPTSTRING, "end %d,", txmsg_end); strncat(options, tstr, OPTSTRING); } + if (txmsg_start_pop) { + snprintf(tstr, OPTSTRING, "pop (%d,%d),", + txmsg_start_pop, txmsg_start_pop + txmsg_pop); + strncat(options, tstr, OPTSTRING); + } if (txmsg_ingress) strncat(options, "ingress,", OPTSTRING); if (txmsg_skb) @@ -1264,6 +1320,7 @@ static int test_mixed(int cgrp) txmsg_apply = txmsg_cork = 0; txmsg_start = txmsg_end = 0; txmsg_start_push = txmsg_end_push = 0; + txmsg_start_pop = txmsg_pop = 0; /* Test small and large iov_count values with pass/redir/apply/cork */ txmsg_pass = 1; @@ -1383,6 +1440,19 @@ static int test_start_end(int cgrp) txmsg_end = 2; txmsg_start_push = 1; txmsg_end_push = 2; + txmsg_start_pop = 1; + txmsg_pop = 1; + err = test_txmsg(cgrp); + if (err) + goto out; + + /* Cut a byte of pushed data but leave reamining in place */ + txmsg_start = 1; + txmsg_end = 2; + txmsg_start_push = 1; + txmsg_end_push = 3; + txmsg_start_pop = 1; + txmsg_pop = 1; err = test_txmsg(cgrp); if (err) goto out; @@ -1393,6 +1463,9 @@ static int test_start_end(int cgrp) opt.iov_length = 100; txmsg_cork = 1600; + txmsg_start_pop = 0; + txmsg_pop = 0; + for (i = 99; i <= 1600; i += 500) { txmsg_start = 0; txmsg_end = i; @@ -1403,6 +1476,17 @@ static int test_start_end(int cgrp) goto out; } + /* Test pop data in middle of cork */ + for (i = 99; i <= 1600; i += 500) { + txmsg_start_pop = 10; + txmsg_pop = i; + err = test_exec(cgrp, &opt); + if (err) + goto out; + } + txmsg_start_pop = 0; + txmsg_pop = 0; + /* Test start/end with cork but pull data in middle */ for (i = 199; i <= 1600; i += 500) { txmsg_start = 100; @@ -1423,6 +1507,15 @@ static int test_start_end(int cgrp) if (err) goto out; + /* Test pop with cork pulling last sg entry */ + txmsg_start_pop = 1500; + txmsg_pop = 1600; + err = test_exec(cgrp, &opt); + if (err) + goto out; + txmsg_start_pop = 0; + txmsg_pop = 0; + /* Test start/end pull of single byte in last page */ txmsg_start = 1111; txmsg_end = 1112; @@ -1432,6 +1525,13 @@ static int test_start_end(int cgrp) if (err) goto out; + /* Test pop of single byte in last page */ + txmsg_start_pop = 1111; + txmsg_pop = 1112; + err = test_exec(cgrp, &opt); + if (err) + goto out; + /* Test start/end with end < start */ txmsg_start = 1111; txmsg_end = 0; @@ -1456,7 +1556,20 @@ static int test_start_end(int cgrp) txmsg_start_push = 1601; txmsg_end_push = 1600; err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test pop with start > data */ + txmsg_start_pop = 1601; + txmsg_pop = 1; + err = test_exec(cgrp, &opt); + if (err) + goto out; + /* Test pop with pop range > data */ + txmsg_start_pop = 1599; + txmsg_pop = 10; + err = test_exec(cgrp, &opt); out: txmsg_start = 0; txmsg_end = 0; @@ -1641,6 +1754,12 @@ int main(int argc, char **argv) case 'q': txmsg_end_push = atoi(optarg); break; + case 'w': + txmsg_start_pop = atoi(optarg); + break; + case 'x': + txmsg_pop = atoi(optarg); + break; case 'a': txmsg_apply = atoi(optarg); break; diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h index 14b8bbac004f..e7639f66a941 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h @@ -74,7 +74,7 @@ struct bpf_map_def SEC("maps") sock_bytes = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), - .max_entries = 4 + .max_entries = 6 }; struct bpf_map_def SEC("maps") sock_redir_flags = { @@ -181,8 +181,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops) SEC("sk_msg1") int bpf_prog4(struct sk_msg_md *msg) { - int *bytes, zero = 0, one = 1, two = 2, three = 3; - int *start, *end, *start_push, *end_push; + int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int *start, *end, *start_push, *end_push, *start_pop, *pop; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -198,15 +198,19 @@ int bpf_prog4(struct sk_msg_md *msg) end_push = bpf_map_lookup_elem(&sock_bytes, &three); if (start_push && end_push) bpf_msg_push_data(msg, *start_push, *end_push, 0); + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); return SK_PASS; } SEC("sk_msg2") int bpf_prog5(struct sk_msg_md *msg) { - int zero = 0, one = 1, two = 2, three = 3; - int *start, *end, *start_push, *end_push; - int *bytes, len1, len2 = 0, len3; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int *start, *end, *start_push, *end_push, *start_pop, *pop; + int *bytes, len1, len2 = 0, len3, len4; int err1 = -1, err2 = -1; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); @@ -247,6 +251,20 @@ int bpf_prog5(struct sk_msg_md *msg) bpf_printk("sk_msg2: length push_update %i->%i\n", len2 ? len2 : len1, len3); } + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) { + int err; + + bpf_printk("sk_msg2: pop(%i@%i)\n", + start_pop, pop); + err = bpf_msg_pop_data(msg, *start_pop, *pop, 0); + if (err) + bpf_printk("sk_msg2: pop_data err %i\n", err); + len4 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg2: length pop_data %i->%i\n", + len1 ? len1 : 0, len4); + } bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", len1, err1, err2); @@ -256,8 +274,8 @@ int bpf_prog5(struct sk_msg_md *msg) SEC("sk_msg3") int bpf_prog6(struct sk_msg_md *msg) { - int *bytes, *start, *end, *start_push, *end_push, *f; - int zero = 0, one = 1, two = 2, three = 3, key = 0; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; __u64 flags = 0; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); @@ -277,6 +295,11 @@ int bpf_prog6(struct sk_msg_md *msg) if (start_push && end_push) bpf_msg_push_data(msg, *start_push, *end_push, 0); + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -292,8 +315,9 @@ int bpf_prog6(struct sk_msg_md *msg) SEC("sk_msg4") int bpf_prog7(struct sk_msg_md *msg) { - int zero = 0, one = 1, two = 2, three = 3, len1, len2 = 0, len3; - int *bytes, *start, *end, *start_push, *end_push, *f; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int len1, len2 = 0, len3, len4; int err1 = 0, err2 = 0, key = 0; __u64 flags = 0; @@ -335,6 +359,22 @@ int bpf_prog7(struct sk_msg_md *msg) len2 ? len2 : len1, len3); } + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) { + int err; + + bpf_printk("sk_msg4: pop(%i@%i)\n", + start_pop, pop); + err = bpf_msg_pop_data(msg, *start_pop, *pop, 0); + if (err) + bpf_printk("sk_msg4: pop_data err %i\n", err); + len4 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg4: length pop_data %i->%i\n", + len1 ? len1 : 0, len4); + } + + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -389,8 +429,8 @@ int bpf_prog9(struct sk_msg_md *msg) SEC("sk_msg7") int bpf_prog10(struct sk_msg_md *msg) { - int *bytes, *start, *end, *start_push, *end_push; - int zero = 0, one = 1, two = 2, three = 3; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -406,7 +446,11 @@ int bpf_prog10(struct sk_msg_md *msg) end_push = bpf_map_lookup_elem(&sock_bytes, &three); if (start_push && end_push) bpf_msg_push_data(msg, *start_push, *end_push, 0); - + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); + bpf_printk("return sk drop\n"); return SK_DROP; } -- cgit v1.2.3 From 295daee4ba108a025b9ac66231b0700e18ff5315 Mon Sep 17 00:00:00 2001 From: Naresh Kamboju Date: Tue, 27 Nov 2018 20:54:44 +0530 Subject: selftests/bpf: add config fragment CONFIG_FTRACE_SYSCALLS CONFIG_FTRACE_SYSCALLS=y is required for get_cgroup_id_user test case this test reads a file from debug trace path /sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id Signed-off-by: Naresh Kamboju Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 7f90d3645af8..37f947ec44ed 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -22,3 +22,4 @@ CONFIG_NET_CLS_FLOWER=m CONFIG_LWTUNNEL=y CONFIG_BPF_STREAM_PARSER=y CONFIG_XDP_SOCKETS=y +CONFIG_FTRACE_SYSCALLS=y -- cgit v1.2.3 From 1ad93ab10e24856ea1ae9f0fd3352992afa67e1b Mon Sep 17 00:00:00 2001 From: David Miller Date: Wed, 28 Nov 2018 12:56:10 -0800 Subject: bpf: Fix various lib and testsuite build failures on 32-bit. Cannot cast a u64 to a pointer on 32-bit without an intervening (long) cast otherwise GCC warns. Signed-off-by: David S. Miller Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 2 +- tools/testing/selftests/bpf/test_progs.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index eadcf8dfd295..c2d641f3e16e 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -466,7 +466,7 @@ int btf__get_from_id(__u32 id, struct btf **btf) goto exit_free; } - *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL); + *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size, NULL); if (IS_ERR(*btf)) { err = PTR_ERR(*btf); *btf = NULL; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c1e688f61061..1c57abbe0945 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -524,7 +524,7 @@ static void test_bpf_obj_id(void) load_time < now - 60 || load_time > now + 60 || prog_infos[i].created_by_uid != my_uid || prog_infos[i].nr_map_ids != 1 || - *(int *)prog_infos[i].map_ids != map_infos[i].id || + *(int *)(long)prog_infos[i].map_ids != map_infos[i].id || strcmp((char *)prog_infos[i].name, expected_prog_name), "get-prog-info(fd)", "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", @@ -539,7 +539,7 @@ static void test_bpf_obj_id(void) load_time, now, prog_infos[i].created_by_uid, my_uid, prog_infos[i].nr_map_ids, 1, - *(int *)prog_infos[i].map_ids, map_infos[i].id, + *(int *)(long)prog_infos[i].map_ids, map_infos[i].id, prog_infos[i].name, expected_prog_name)) goto done; } @@ -585,7 +585,7 @@ static void test_bpf_obj_id(void) bzero(&prog_info, sizeof(prog_info)); info_len = sizeof(prog_info); - saved_map_id = *(int *)(prog_infos[i].map_ids); + saved_map_id = *(int *)((long)prog_infos[i].map_ids); prog_info.map_ids = prog_infos[i].map_ids; prog_info.nr_map_ids = 2; err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); @@ -593,12 +593,12 @@ static void test_bpf_obj_id(void) prog_infos[i].xlated_prog_insns = 0; CHECK(err || info_len != sizeof(struct bpf_prog_info) || memcmp(&prog_info, &prog_infos[i], info_len) || - *(int *)prog_info.map_ids != saved_map_id, + *(int *)(long)prog_info.map_ids != saved_map_id, "get-prog-info(next_id->fd)", "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n", err, errno, info_len, sizeof(struct bpf_prog_info), memcmp(&prog_info, &prog_infos[i], info_len), - *(int *)prog_info.map_ids, saved_map_id); + *(int *)(long)prog_info.map_ids, saved_map_id); close(prog_fd); } CHECK(nr_id_found != nr_iters, -- cgit v1.2.3 From b42699547fc9fb1057795bccc21a6445743a7fde Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 29 Nov 2018 15:31:45 -0800 Subject: tools/bpf: make libbpf _GNU_SOURCE friendly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During porting libbpf to bcc, I got some warnings like below: ... [ 2%] Building C object src/cc/CMakeFiles/bpf-shared.dir/libbpf/src/libbpf.c.o /home/yhs/work/bcc2/src/cc/libbpf/src/libbpf.c:12:0: warning: "_GNU_SOURCE" redefined [enabled by default] #define _GNU_SOURCE ... [ 3%] Building C object src/cc/CMakeFiles/bpf-shared.dir/libbpf/src/libbpf_errno.c.o /home/yhs/work/bcc2/src/cc/libbpf/src/libbpf_errno.c: In function ‘libbpf_strerror’: /home/yhs/work/bcc2/src/cc/libbpf/src/libbpf_errno.c:45:7: warning: assignment makes integer from pointer without a cast [enabled by default] ret = strerror_r(err, buf, size); ... bcc is built with _GNU_SOURCE defined and this caused the above warning. This patch intends to make libpf _GNU_SOURCE friendly by . define _GNU_SOURCE in libbpf.c unless it is not defined . undefine _GNU_SOURCE as non-gnu version of strerror_r is expected. Signed-off-by: Yonghong Song Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 2 ++ tools/lib/bpf/libbpf_errno.c | 1 + 2 files changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ed4212a4c5f9..59b748ebd15f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9,7 +9,9 @@ * Copyright (C) 2017 Nicira, Inc. */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include #include #include diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c index d83b17f8435c..4343e40588c6 100644 --- a/tools/lib/bpf/libbpf_errno.c +++ b/tools/lib/bpf/libbpf_errno.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 Nicira, Inc. */ +#undef _GNU_SOURCE #include #include -- cgit v1.2.3 From 327e5dab6dd8559ba28739ec8873350d7b5bb080 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 30 Nov 2018 16:25:44 +0000 Subject: tools: bpftool: use "/proc/self/" i.o. crafting links with getpid() The getpid() function is called in a couple of places in bpftool to craft links of the shape "/proc//...". Instead, it is possible to use the "/proc/self/" shortcut, which makes things a bit easier, in particular in jit_disasm.c. Do the replacement, and remove the includes of from the relevant files, now we do not use getpid() anymore. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/common.c | 5 ++--- tools/bpf/bpftool/jit_disasm.c | 11 +---------- 2 files changed, 3 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 4e217d57118e..4349b6683ca8 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -48,7 +48,6 @@ #include #include #include -#include #include #include @@ -276,7 +275,7 @@ int get_fd_type(int fd) char buf[512]; ssize_t n; - snprintf(path, sizeof(path), "/proc/%d/fd/%d", getpid(), fd); + snprintf(path, sizeof(path), "/proc/self/fd/%d", fd); n = readlink(path, buf, sizeof(buf)); if (n < 0) { @@ -304,7 +303,7 @@ char *get_fdinfo(int fd, const char *key) ssize_t n; FILE *fdi; - snprintf(path, sizeof(path), "/proc/%d/fdinfo/%d", getpid(), fd); + snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd); fdi = fopen(path, "r"); if (!fdi) { diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index b2ed5ee1af5f..545a92471c33 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -28,20 +27,12 @@ static void get_exec_path(char *tpath, size_t size) { + const char *path = "/proc/self/exe"; ssize_t len; - char *path; - - snprintf(tpath, size, "/proc/%d/exe", (int) getpid()); - tpath[size - 1] = 0; - - path = strdup(tpath); - assert(path); len = readlink(path, tpath, size - 1); assert(len > 0); tpath[len] = 0; - - free(path); } static int oper_count; -- cgit v1.2.3 From cad4977344b35ea116ec5fefe91a76b1dfa113f5 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 30 Nov 2018 16:25:45 +0000 Subject: tools: bpftool: fix bash completion for bpftool prog (attach|detach) Fix bash completion for "bpftool prog (attach|detach) PROG TYPE MAP" so that the list of indices proposed for MAP are map indices, and not PROG indices. Also use variables for map and prog reference types ("id", "pinned", and "tag" for programs). Fixes: b7d3826c2ed6 ("bpf: bpftool, add support for attaching programs to maps") Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/bash-completion/bpftool | 73 +++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 45c2db257d2b..b7e6c4f25ad1 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -243,16 +243,20 @@ _bpftool() # Completion depends on object and command in use case $object in prog) - if [[ $command != "load" && $command != "loadall" ]]; then - case $prev in - id) - _bpftool_get_prog_ids - return 0 - ;; - esac - fi + # Complete id, only for subcommands that use prog (but no map) ids + case $command in + show|list|dump|pin) + case $prev in + id) + _bpftool_get_prog_ids + return 0 + ;; + esac + ;; + esac local PROG_TYPE='id pinned tag' + local MAP_TYPE='id pinned' case $command in show|list) [[ $prev != "$command" ]] && return 0 @@ -293,22 +297,43 @@ _bpftool() return 0 ;; attach|detach) - if [[ ${#words[@]} == 7 ]]; then - COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) ) - return 0 - fi - - if [[ ${#words[@]} == 6 ]]; then - COMPREPLY=( $( compgen -W "msg_verdict skb_verdict \ - skb_parse flow_dissector" -- "$cur" ) ) - return 0 - fi - - if [[ $prev == "$command" ]]; then - COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) ) - return 0 - fi - return 0 + case $cword in + 3) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) + return 0 + ;; + 4) + case $prev in + id) + _bpftool_get_prog_ids + ;; + pinned) + _filedir + ;; + esac + return 0 + ;; + 5) + COMPREPLY=( $( compgen -W 'msg_verdict skb_verdict \ + skb_parse flow_dissector' -- "$cur" ) ) + return 0 + ;; + 6) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + return 0 + ;; + 7) + case $prev in + id) + _bpftool_get_map_ids + ;; + pinned) + _filedir + ;; + esac + return 0 + ;; + esac ;; load|loadall) local obj -- cgit v1.2.3 From 4f0fc6f571d29837bc09aa8c51591f330f17bbf0 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 30 Nov 2018 16:25:46 +0000 Subject: tools: bpftool: fix bash completion for new map types (queue and stack) Commit 197c2dac74e4 ("bpf: Add BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK to bpftool-map") added support for queue and stack eBPF map types in bpftool map handling. Let's update the bash completion accordingly. Fixes: 197c2dac74e4 ("bpf: Add BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK to bpftool-map") Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/bash-completion/bpftool | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index b7e6c4f25ad1..9a60080f085f 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -436,7 +436,7 @@ _bpftool() lru_percpu_hash lpm_trie array_of_maps \ hash_of_maps devmap sockmap cpumap xskmap \ sockhash cgroup_storage reuseport_sockarray \ - percpu_cgroup_storage' -- \ + percpu_cgroup_storage queue stack' -- \ "$cur" ) ) return 0 ;; -- cgit v1.2.3 From 73f0b9db53836a68c40691fad1467ed44a3123e8 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 30 Nov 2018 16:25:47 +0000 Subject: tools: bpftool: mark offloaded programs more explicitly in plain output In bpftool (plain) output for "bpftool prog show" or "bpftool map show", an offloaded BPF object is simply denoted with "dev ifname", which is not really explicit. Change it with something that clearly shows the program is offloaded. While at it also add an additional space, as done between other information fields. Example output, before: # bpftool prog show 1337: xdp tag a04f5eef06a7f555 dev foo loaded_at 2018-10-19T16:40:36+0100 uid 0 xlated 16B not jited memlock 4096B After: # bpftool prog show 1337: xdp tag a04f5eef06a7f555 offloaded_to foo loaded_at 2018-10-19T16:40:36+0100 uid 0 xlated 16B not jited memlock 4096B Suggested-by: Daniel Borkmann Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 4349b6683ca8..172d3761d9ab 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -604,7 +604,7 @@ void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) if (!ifindex) return; - printf(" dev "); + printf(" offloaded_to "); if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name)) printf("%s", name); else -- cgit v1.2.3 From 99a44bef587082fbbaddf6aaa1e823ebbb629339 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 30 Nov 2018 16:25:48 +0000 Subject: tools: bpftool: add owner_prog_type and owner_jited to bpftool output For prog array maps, the type of the owner program, and the JIT-ed state of that program, are available from the file descriptor information under /proc. Add them to "bpftool map show" output. Example output: # bpftool map show 158225: prog_array name jmp_table flags 0x0 key 4B value 4B max_entries 8 memlock 4096B owner_prog_type flow_dissector owner jited # bpftool --json --pretty map show [{ "id": 1337, "type": "prog_array", "name": "jmp_table", "flags": 0, "bytes_key": 4, "bytes_value": 4, "max_entries": 8, "bytes_memlock": 4096, "owner_prog_type": "flow_dissector", "owner_jited": true } ] As we move the table used for associating names to program types, complete it with the missing types (lwt_seg6local and sk_reuseport). Also add missing types to the help message for "bpftool prog" (sk_reuseport and flow_dissector). Suggested-by: Daniel Borkmann Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/main.h | 26 +++++++++++++++++++++++++ tools/bpf/bpftool/map.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++-- tools/bpf/bpftool/prog.c | 25 +----------------------- 3 files changed, 75 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 7431669fae0a..2761981669c8 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -78,6 +78,32 @@ #define HELP_SPEC_MAP \ "MAP := { id MAP_ID | pinned FILE }" +static const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", + [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", + [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", + [BPF_PROG_TYPE_XDP] = "xdp", + [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", + [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", + [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", + [BPF_PROG_TYPE_LWT_IN] = "lwt_in", + [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", + [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", + [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", + [BPF_PROG_TYPE_SK_SKB] = "sk_skb", + [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", + [BPF_PROG_TYPE_SK_MSG] = "sk_msg", + [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", + [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", + [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", + [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", + [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", +}; + enum bpf_obj_type { BPF_OBJ_UNKNOWN, BPF_OBJ_PROG, diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 3850f8d65703..8469ea6cf1c8 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -487,7 +487,6 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) char *memlock; memlock = get_fdinfo(fd, "memlock"); - close(fd); jsonw_start_object(json_wtr); @@ -514,6 +513,30 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_int_field(json_wtr, "bytes_memlock", atoi(memlock)); free(memlock); + if (info->type == BPF_MAP_TYPE_PROG_ARRAY) { + char *owner_prog_type = get_fdinfo(fd, "owner_prog_type"); + char *owner_jited = get_fdinfo(fd, "owner_jited"); + + if (owner_prog_type) { + unsigned int prog_type = atoi(owner_prog_type); + + if (prog_type < ARRAY_SIZE(prog_type_name)) + jsonw_string_field(json_wtr, "owner_prog_type", + prog_type_name[prog_type]); + else + jsonw_uint_field(json_wtr, "owner_prog_type", + prog_type); + } + if (atoi(owner_jited)) + jsonw_bool_field(json_wtr, "owner_jited", true); + else + jsonw_bool_field(json_wtr, "owner_jited", false); + + free(owner_prog_type); + free(owner_jited); + } + close(fd); + if (!hash_empty(map_table.table)) { struct pinned_obj *obj; @@ -536,7 +559,6 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) char *memlock; memlock = get_fdinfo(fd, "memlock"); - close(fd); printf("%u: ", info->id); if (info->type < ARRAY_SIZE(map_type_name)) @@ -557,6 +579,30 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) printf(" memlock %sB", memlock); free(memlock); + if (info->type == BPF_MAP_TYPE_PROG_ARRAY) { + char *owner_prog_type = get_fdinfo(fd, "owner_prog_type"); + char *owner_jited = get_fdinfo(fd, "owner_jited"); + + printf("\n\t"); + if (owner_prog_type) { + unsigned int prog_type = atoi(owner_prog_type); + + if (prog_type < ARRAY_SIZE(prog_type_name)) + printf("owner_prog_type %s ", + prog_type_name[prog_type]); + else + printf("owner_prog_type %d ", prog_type); + } + if (atoi(owner_jited)) + printf("owner jited"); + else + printf("owner not jited"); + + free(owner_prog_type); + free(owner_jited); + } + close(fd); + printf("\n"); if (!hash_empty(map_table.table)) { struct pinned_obj *obj; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 4e04699a2350..56db61c5a91f 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -54,30 +54,6 @@ #include "main.h" #include "xlated_dumper.h" -static const char * const prog_type_name[] = { - [BPF_PROG_TYPE_UNSPEC] = "unspec", - [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", - [BPF_PROG_TYPE_KPROBE] = "kprobe", - [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", - [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", - [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", - [BPF_PROG_TYPE_XDP] = "xdp", - [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", - [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", - [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", - [BPF_PROG_TYPE_LWT_IN] = "lwt_in", - [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", - [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", - [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", - [BPF_PROG_TYPE_SK_SKB] = "sk_skb", - [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", - [BPF_PROG_TYPE_SK_MSG] = "sk_msg", - [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", - [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", - [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", - [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", -}; - static const char * const attach_type_strings[] = { [BPF_SK_SKB_STREAM_PARSER] = "stream_parser", [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict", @@ -1172,6 +1148,7 @@ static int do_help(int argc, char **argv) " tracepoint | raw_tracepoint | xdp | perf_event | cgroup/skb |\n" " cgroup/sock | cgroup/dev | lwt_in | lwt_out | lwt_xmit |\n" " lwt_seg6local | sockops | sk_skb | sk_msg | lirc_mode2 |\n" + " sk_reuseport | flow_dissector |\n" " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" " cgroup/sendmsg4 | cgroup/sendmsg6 }\n" -- cgit v1.2.3 From bbe210615dc1fa7ec88516adb7dfccb47bccba21 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 28 Nov 2018 20:07:05 +0000 Subject: selftests: mlxsw: Consider VLAN-aware bridges as valid Previous patches add the ability to work with VLAN-aware bridges and VxLAN devices, so make sure such configuration no longer fails. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/vxlan.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index ffa79b738f2a..d00d4665c807 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -81,7 +81,7 @@ sanitization_single_dev_vlan_aware_test() ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ ttl 20 tos inherit local 198.51.100.1 dstport 4789 - sanitization_single_dev_test_fail + sanitization_single_dev_test_pass ip link del dev vxlan0 ip link del dev br0 -- cgit v1.2.3 From f07232375d7f9384ec971d55343bf44ea1881fc4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 28 Nov 2018 20:07:06 +0000 Subject: selftests: mlxsw: Add a test for VxLAN configuration with a VLAN-aware bridge Extend the existing VLAN-unaware tests with their VLAN-aware counterparts. This includes sanitization of invalid configuration and offload indication on the local route performing decapsulation and the FDB entries perform encapsulation. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/vxlan.sh | 204 ++++++++++++++++++++- 1 file changed, 203 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index d00d4665c807..90b4998a3b70 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -6,7 +6,8 @@ lib_dir=$(dirname $0)/../../../net/forwarding -ALL_TESTS="sanitization_test offload_indication_test" +ALL_TESTS="sanitization_test offload_indication_test \ + sanitization_vlan_aware_test offload_indication_vlan_aware_test" NUM_NETIFS=2 source $lib_dir/lib.sh @@ -654,6 +655,207 @@ offload_indication_test() offload_indication_setup_destroy } +sanitization_vlan_aware_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 + + ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + # Test that when each VNI is mapped to a different VLAN we can enslave + # a port to the bridge + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged + + ip link set dev $swp1 master br0 + check_err $? + + log_test "vlan-aware - enslavement to vlan-aware bridge" + + # Try to map both VNIs to the same VLAN and make sure configuration + # fails + RET=0 + + bridge vlan add vid 10 dev vxlan20 pvid untagged &> /dev/null + check_fail $? + + log_test "vlan-aware - two vnis mapped to the same vlan" + + # Test that enslavement of a port to a bridge fails when two VNIs + # are mapped to the same VLAN + RET=0 + + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vxlan20 pvid untagged + bridge vlan add vid 10 dev vxlan20 pvid untagged + + ip link set dev $swp1 master br0 &> /dev/null + check_fail $? + + log_test "vlan-aware - failed enslavement to vlan-aware bridge" + + ip link del dev vxlan20 + ip link del dev vxlan10 + ip link del dev br0 +} + +offload_indication_vlan_aware_setup_create() +{ + # Create a simple setup with two VxLAN devices and a single VLAN-aware + # bridge + ip link add name br0 up type bridge mcast_snooping 0 vlan_filtering 1 \ + vlan_default_pvid 0 + + ip link set dev $swp1 master br0 + + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip address add 198.51.100.1/32 dev lo + + ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged +} + +offload_indication_vlan_aware_setup_destroy() +{ + bridge vlan del vid 20 dev vxlan20 + bridge vlan del vid 10 dev vxlan10 + + ip link del dev vxlan20 + ip link del dev vxlan10 + + ip address del 198.51.100.1/32 dev lo + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 + + ip link set dev $swp1 nomaster + + ip link del dev br0 +} + +offload_indication_vlan_aware_fdb_test() +{ + RET=0 + + log_info "vxlan entry offload indication - vlan-aware" + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \ + dst 198.51.100.2 vlan 10 + + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - initial state" + + # Remove FDB entry from the bridge driver and check that corresponding + # entry in the VxLAN driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 master vlan 10 + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from bridge" + + # Add the FDB entry back to the bridge driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 master static vlan 10 + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to bridge" + + # Remove FDB entry from the VxLAN driver and check that corresponding + # entry in the bridge driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from vxlan" + + # Add the FDB entry back to the VxLAN driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2 + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to vxlan" + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self master vlan 10 +} + +offload_indication_vlan_aware_decap_route_test() +{ + RET=0 + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + # Toggle PVID flag on one VxLAN device and make sure route is still + # marked as offloaded + bridge vlan add vid 10 dev vxlan10 untagged + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + # Toggle PVID flag on second VxLAN device and make sure route is no + # longer marked as offloaded + bridge vlan add vid 20 dev vxlan20 untagged + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + # Toggle PVID flag back and make sure route is marked as offloaded + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - vni map/unmap" +} + +offload_indication_vlan_aware_test() +{ + offload_indication_vlan_aware_setup_create + offload_indication_vlan_aware_fdb_test + offload_indication_vlan_aware_decap_route_test + offload_indication_vlan_aware_setup_destroy +} + trap cleanup EXIT setup_prepare -- cgit v1.2.3 From b5166d7a92398e3bd7fc44f8705c07fdfbfc0c17 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 28 Nov 2018 20:07:08 +0000 Subject: selftests: forwarding: Add VxLAN test with a VLAN-aware bridge The test is very similar to its VLAN-unaware counterpart (vxlan_bridge_1d.sh), but instead of using multiple VLAN-unaware bridges, a single VLAN-aware bridge is used with multiple VLANs. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../selftests/net/forwarding/vxlan_bridge_1q.sh | 790 +++++++++++++++++++++ .../net/forwarding/vxlan_bridge_1q_port_8472.sh | 10 + 2 files changed, 800 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh create mode 100755 tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh new file mode 100755 index 000000000000..bac2e568d22c --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -0,0 +1,790 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1q) + $swp2 | | +# | | vid 10 vid 10 | | +# | | vid 20 vid 20 | | +# | | | | +# | | + vx10 (vxlan) + vx20 (vxlan) | | +# | | local 192.0.2.17 local 192.0.2.17 | | +# | | remote 192.0.2.34 192.0.2.50 remote 192.0.2.34 192.0.2.50 | | +# | | id 1000 dstport $VXPORT id 2000 dstport $VXPORT | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | 192.0.2.48/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# +----|---------------------------------------|----------------+ +# | | +# +----|------------------------------+ +----|------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 192.0.2.34/28 | | 192.0.2.50/28 | +# | | | | +# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 | +# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1q) | | | | BR2 (802.1q) | | +# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | | +# | | | | | | | | +# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 10 | | | | | vid 10 | | +# | | | vid 20 | | | | | vid 20 | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 198.51.100.3/24 | | | | 198.51.100.4/24 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-----------------------------------+ +-----------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + test_flood + test_unicast + reapply_config + ping_ipv4 + test_flood + test_unicast + test_learning + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 192.0.2.1/28 + vlan_create $h1 20 v$h1 198.51.100.1/24 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 10 v$h2 192.0.2.2/28 + vlan_create $h2 20 v$h2 198.51.100.2/24 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 192.0.2.17/28 + + ip route add 192.0.2.32/28 nexthop via 192.0.2.18 + ip route add 192.0.2.48/28 nexthop via 192.0.2.18 +} + +rp1_unset_addr() +{ + ip route del 192.0.2.48/28 nexthop via 192.0.2.18 + ip route del 192.0.2.32/28 nexthop via 192.0.2.18 + + ip address del dev $rp1 192.0.2.17/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + + ip link add name vx10 type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 10 dev $swp2 + bridge vlan add vid 20 dev $swp2 + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self +} + +switch_destroy() +{ + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge vlan del vid 20 dev $swp2 + bridge vlan del vid 10 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + __simple_if_init v1 v$rp2 192.0.2.33/28 + __simple_if_init v3 v$rp2 192.0.2.49/28 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 192.0.2.49/28 + __simple_if_fini v1 192.0.2.33/28 + simple_if_fini $rp2 192.0.2.18/28 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr1=$1; shift + local host_addr2=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/28 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 1 vlan_default_pvid 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + bridge vlan add vid 10 dev w1 + bridge vlan add vid 20 dev w1 + + ip link add name vx10 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" + ip link set dev vx10 up + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx10 master br2 + tc qdisc add dev vx10 clsact + + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 local $in_addr \ + dstport "$VXPORT" + ip link set dev vx20 up + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx20 master br2 + tc qdisc add dev vx20 clsact + + bridge vlan add vid 20 dev vx20 pvid untagged + + simple_if_init w2 + vlan_create w2 10 vw2 $host_addr1/28 + vlan_create w2 20 vw2 $host_addr2/24 + + ip route add 192.0.2.16/28 nexthop via $nh_addr + ip route add $other_in_addr/32 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 192.0.2.3 \ + 198.51.100.3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 192.0.2.4 \ + 198.51.100.4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +# For the first round of tests, vx10 and vx20 were the first devices to get +# attached to the bridge, and that at the point that the local IP is already +# configured. Try the other scenario of attaching these devices to a bridge +# that already has local ports members, and only then assign the local IP. +reapply_config() +{ + log_info "Reapplying configuration" + + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + + ip link set dev vx20 nomaster + ip link set dev vx10 nomaster + + rp1_unset_addr + sleep 5 + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self + + rp1_set_addr + sleep 5 +} + +ping_ipv4() +{ + ping_test $h1.10 192.0.2.2 ": local->local vid 10" + ping_test $h1.20 198.51.100.2 ": local->local vid 20" + ping_test $h1.10 192.0.2.3 ": local->remote 1 vid 10" + ping_test $h1.10 192.0.2.4 ": local->remote 2 vid 10" + ping_test $h1.20 198.51.100.3 ": local->remote 1 vid 20" + ping_test $h1.20 198.51.100.4 ": local->remote 2 vid 20" +} + +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_sw 2>/dev/null || \ + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_hw +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx10 ns1" "vx20 ns1" "vx10 ns2" "vx20 ns2") + local counter + local key + + # Packets reach the local host tagged whereas they reach the VxLAN + # devices untagged. In order to be able to use the same filter for + # all counters, make sure the packets also reach the local host + # untagged + bridge vlan add vid $vid dev $swp2 untagged + for counter in "${counters[@]}"; do + flood_counter_install $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ $h1 -Q $vid -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $counter + done + bridge vlan add vid $vid dev $swp2 +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local what=$1; shift + local -a expects=("${@}") + + RET=0 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 192.0.2.100 10 "flood vlan 10" \ + 10 10 0 10 0 + __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \ + 10 0 10 0 10 +} + +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local vid=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static vlan $vid 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local vid=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx10 192.0.2.34" + "$r2_mac vx10 192.0.2.50") + local target + + log_info "unicast vlan 10" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 10 $target + done + + __test_unicast $h2_mac 192.0.2.2 0 10 "local MAC unicast" + __test_unicast $r1_mac 192.0.2.3 1 10 "remote MAC 1 unicast" + __test_unicast $r2_mac 192.0.2.4 3 10 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 10 $target + done + + log_info "unicast vlan 20" + + targets=("$h2_mac $h2" "$r1_mac vx20 192.0.2.34" \ + "$r2_mac vx20 192.0.2.50") + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 20 $target + done + + __test_unicast $h2_mac 198.51.100.2 0 20 "local MAC unicast" + __test_unicast $r1_mac 198.51.100.3 2 20 "remote MAC 1 unicast" + __test_unicast $r2_mac 198.51.100.4 4 20 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 20 $target + done +} + +vxlan_ping_test() +{ + local ping_dev=$1; shift + local ping_dip=$1; shift + local ping_args=$1; shift + local capture_dev=$1; shift + local capture_dir=$1; shift + local capture_pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + ping_do $ping_dev $ping_dip "$ping_args" + local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." +} + +__test_learning() +{ + local -a expects=(0 0 0 0 0) + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local idx1=$1; shift + local idx2=$1; shift + local vx=vx$vid + + # Check that flooding works + RET=0 + + expects[0]=10; expects[$idx1]=10; expects[$idx2]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood before learning" + + # Send a packet with source mac set to $mac from host w2 and check that + # a corresponding entry is created in the VxLAN device + RET=0 + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep -q self + check_err $? + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_err $? + + log_test "VXLAN: show learned FDB entry" + + # Repeat first test and check that packets only reach host w2 in ns1 + RET=0 + + expects[0]=0; expects[$idx1]=10; expects[$idx2]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: learned FDB entry" + + # Delete the learned FDB entry from the VxLAN and bridge devices and + # check that packets are flooded + RET=0 + + bridge fdb del dev $vx $mac master self vlan $vid + sleep 1 + + expects[0]=10; expects[$idx1]=10; expects[$idx2]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: deletion of learned FDB entry" + + # Re-learn the first FDB entry and check that it is correctly aged-out + RET=0 + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep -q self + check_err $? + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_err $? + + expects[0]=0; expects[$idx1]=10; expects[$idx2]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + sleep 20 + + bridge fdb show brport $vx | grep $mac | grep -q self + check_fail $? + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_fail $? + + expects[0]=10; expects[$idx1]=10; expects[$idx2]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: Ageing of learned FDB entry" + + # Toggle learning on the bridge port and check that the bridge's FDB + # is populated only when it should + RET=0 + + ip link set dev $vx type bridge_slave learning off + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_fail $? + + ip link set dev $vx type bridge_slave learning on + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_err $? + + log_test "VXLAN: learning toggling on bridge port" +} + +test_learning() +{ + local mac=de:ad:be:ef:13:37 + local dst=192.0.2.100 + local vid=10 + + # Enable learning on the VxLAN devices and set ageing time to 10 seconds + ip link set dev br1 type bridge ageing_time 1000 + ip link set dev vx10 type vxlan ageing 10 + ip link set dev vx10 type vxlan learning + ip link set dev vx20 type vxlan ageing 10 + ip link set dev vx20 type vxlan learning + reapply_config + + log_info "learning vlan 10" + + __test_learning $mac $dst $vid 1 3 + + log_info "learning vlan 20" + + mac=ca:fe:be:ef:13:37 + dst=198.51.100.100 + vid=20 + + __test_learning $mac $dst $vid 2 4 + + # Restore previous settings + ip link set dev vx20 type vxlan nolearning + ip link set dev vx20 type vxlan ageing 300 + ip link set dev vx10 type vxlan nolearning + ip link set dev vx10 type vxlan ageing 300 + ip link set dev br1 type bridge ageing_time 30000 + reapply_config +} + +test_all() +{ + log_info "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh new file mode 100755 index 000000000000..b1b2d1a3164f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 +" +source vxlan_bridge_1q.sh -- cgit v1.2.3 From 88945f460603ad8909b556c67a9229bb23188d41 Mon Sep 17 00:00:00 2001 From: David Miller Date: Wed, 28 Nov 2018 22:33:53 -0800 Subject: bpf: Fix verifier log string check for bad alignment. The message got changed a lot time ago. This was responsible for 36 test case failures on sparc64. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 17021d2b6bfe..5eace1f606fb 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -14287,7 +14287,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, reject_from_alignment = fd_prog < 0 && (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && - strstr(bpf_vlog, "Unknown alignment."); + strstr(bpf_vlog, "misaligned"); #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS if (reject_from_alignment) { printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n", -- cgit v1.2.3 From e9ee9efc0d176512cdce9d27ff8549d7ffa2bfcd Mon Sep 17 00:00:00 2001 From: David Miller Date: Fri, 30 Nov 2018 21:08:14 -0800 Subject: bpf: Add BPF_F_ANY_ALIGNMENT. Often we want to write tests cases that check things like bad context offset accesses. And one way to do this is to use an odd offset on, for example, a 32-bit load. This unfortunately triggers the alignment checks first on platforms that do not set CONFIG_EFFICIENT_UNALIGNED_ACCESS. So the test case see the alignment failure rather than what it was testing for. It is often not completely possible to respect the original intention of the test, or even test the same exact thing, while solving the alignment issue. Another option could have been to check the alignment after the context and other validations are performed by the verifier, but that is a non-trivial change to the verifier. Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 14 ++++++++++++++ kernel/bpf/syscall.c | 7 ++++++- kernel/bpf/verifier.c | 2 ++ tools/include/uapi/linux/bpf.h | 14 ++++++++++++++ tools/lib/bpf/bpf.c | 8 ++++---- tools/lib/bpf/bpf.h | 2 +- tools/testing/selftests/bpf/test_align.c | 4 ++-- tools/testing/selftests/bpf/test_verifier.c | 3 ++- 8 files changed, 45 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 597afdbc1ab9..8050caea7495 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -232,6 +232,20 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the + * verifier will allow any alignment whatsoever. On platforms + * with strict alignment requirements for loads ands stores (such + * as sparc and mips) the verifier validates that all loads and + * stores provably follow this requirement. This flag turns that + * checking and enforcement off. + * + * It is mostly used for testing when we want to validate the + * context and memory access aspects of the verifier, but because + * of an unaligned access the alignment check would trigger before + * the one we are interested in. + */ +#define BPF_F_ANY_ALIGNMENT (1U << 1) + /* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 85cbeec06e50..f9554d9a14e1 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1452,9 +1452,14 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (CHECK_ATTR(BPF_PROG_LOAD)) return -EINVAL; - if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) + if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT)) return -EINVAL; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && + (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + /* copy eBPF program license from user space */ if (strncpy_from_user(license, u64_to_user_ptr(attr->license), sizeof(license) - 1) < 0) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9584438fa2cc..71988337ac14 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6505,6 +6505,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) env->strict_alignment = true; + if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) + env->strict_alignment = false; ret = replace_map_fd_with_map_ptr(env); if (ret < 0) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 597afdbc1ab9..8050caea7495 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -232,6 +232,20 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the + * verifier will allow any alignment whatsoever. On platforms + * with strict alignment requirements for loads ands stores (such + * as sparc and mips) the verifier validates that all loads and + * stores provably follow this requirement. This flag turns that + * checking and enforcement off. + * + * It is mostly used for testing when we want to validate the + * context and memory access aspects of the verifier, but because + * of an unaligned access the alignment check would trigger before + * the one we are interested in. + */ +#define BPF_F_ANY_ALIGNMENT (1U << 1) + /* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index ce1822194590..c19226cccf39 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -279,9 +279,9 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, } int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz, int log_level) + size_t insns_cnt, __u32 prog_flags, const char *license, + __u32 kern_version, char *log_buf, size_t log_buf_sz, + int log_level) { union bpf_attr attr; @@ -295,7 +295,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.log_level = log_level; log_buf[0] = 0; attr.kern_version = kern_version; - attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0; + attr.prog_flags = prog_flags; return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 09e8bbe111d4..60392b70587c 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -98,7 +98,7 @@ LIBBPF_API int bpf_load_program(enum bpf_prog_type type, char *log_buf, size_t log_buf_sz); LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, + size_t insns_cnt, __u32 prog_flags, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz, int log_level); diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 5f377ec53f2f..3c789d03b629 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -620,8 +620,8 @@ static int do_test_single(struct bpf_align_test *test) prog_len = probe_filter_length(prog); fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, 1, "GPL", 0, - bpf_vlog, sizeof(bpf_vlog), 2); + prog, prog_len, BPF_F_STRICT_ALIGNMENT, + "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2); if (fd_prog < 0 && test->result != REJECT) { printf("Failed to load program.\n"); printf("%s", bpf_vlog); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 5eace1f606fb..78e779c35869 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -14275,7 +14275,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, prog_len = probe_filter_length(prog); fd_prog = bpf_verify_program(prog_type, prog, prog_len, - test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, + test->flags & F_LOAD_WITH_STRICT_ALIGNMENT ? + BPF_F_STRICT_ALIGNMENT : 0, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); expected_ret = unpriv && test->result_unpriv != UNDEF ? -- cgit v1.2.3 From c7665702d3208b77b8e00f0699b6b88241b04360 Mon Sep 17 00:00:00 2001 From: David Miller Date: Fri, 30 Nov 2018 21:08:26 -0800 Subject: bpf: Adjust F_NEEDS_EFFICIENT_UNALIGNED_ACCESS handling in test_verifier.c Make it set the flag argument to bpf_verify_program() which will relax the alignment restrictions. Now all such test cases will go properly through the verifier even on inefficient unaligned access architectures. On inefficient unaligned access architectures do not try to run such programs, instead mark the test case as passing but annotate the result similarly to how it is done now in the presence of this flag. So, we get complete full coverage for all REJECT test cases, and at least verifier level coverage for ACCEPT test cases. Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 42 ++++++++++++++++------------- 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 78e779c35869..1d1775faaf14 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -14257,13 +14257,14 @@ out: static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { - int fd_prog, expected_ret, reject_from_alignment; + int fd_prog, expected_ret, alignment_prevented_execution; int prog_len, prog_type = test->prog_type; struct bpf_insn *prog = test->insns; int map_fds[MAX_NR_MAPS]; const char *expected_err; uint32_t expected_val; uint32_t retval; + __u32 pflags; int i, err; for (i = 0; i < MAX_NR_MAPS; i++) @@ -14274,9 +14275,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, do_test_fixup(test, prog_type, prog, map_fds); prog_len = probe_filter_length(prog); - fd_prog = bpf_verify_program(prog_type, prog, prog_len, - test->flags & F_LOAD_WITH_STRICT_ALIGNMENT ? - BPF_F_STRICT_ALIGNMENT : 0, + pflags = 0; + if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) + pflags |= BPF_F_STRICT_ALIGNMENT; + if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) + pflags |= BPF_F_ANY_ALIGNMENT; + fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); expected_ret = unpriv && test->result_unpriv != UNDEF ? @@ -14286,28 +14290,27 @@ static void do_test_single(struct bpf_test *test, bool unpriv, expected_val = unpriv && test->retval_unpriv ? test->retval_unpriv : test->retval; - reject_from_alignment = fd_prog < 0 && - (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && - strstr(bpf_vlog, "misaligned"); -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (reject_from_alignment) { - printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n", - strerror(errno)); - goto fail_log; - } -#endif + alignment_prevented_execution = 0; + if (expected_ret == ACCEPT) { - if (fd_prog < 0 && !reject_from_alignment) { + if (fd_prog < 0) { printf("FAIL\nFailed to load prog '%s'!\n", strerror(errno)); goto fail_log; } +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (fd_prog >= 0 && + (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)) { + alignment_prevented_execution = 1; + goto test_ok; + } +#endif } else { if (fd_prog >= 0) { printf("FAIL\nUnexpected success to load!\n"); goto fail_log; } - if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) { + if (!strstr(bpf_vlog, expected_err)) { printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n", expected_err, bpf_vlog); goto fail_log; @@ -14335,9 +14338,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, goto fail_log; } } +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +test_ok: +#endif (*passes)++; - printf("OK%s\n", reject_from_alignment ? - " (NOTE: reject due to unknown alignment)" : ""); + printf("OK%s\n", alignment_prevented_execution ? + " (NOTE: not executed due to unknown alignment)" : ""); close_fds: close(fd_prog); for (i = 0; i < MAX_NR_MAPS; i++) -- cgit v1.2.3 From 2acc5fd5b8c25df0de7f3c8b8e385f5c6f8202ec Mon Sep 17 00:00:00 2001 From: David Miller Date: Fri, 30 Nov 2018 21:08:39 -0800 Subject: bpf: Make more use of 'any' alignment in test_verifier.c Use F_NEEDS_EFFICIENT_UNALIGNED_ACCESS in more tests where the expected result is REJECT. Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 1d1775faaf14..6bfd3242ea43 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1823,6 +1823,7 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_context access", .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_MSG, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet read for SK_MSG", @@ -2215,6 +2216,8 @@ static struct bpf_test tests[] = { }, .errstr = "invalid bpf_context access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check cb access: half, wrong type", @@ -3281,6 +3284,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R0 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "raw_stack: skb_load_bytes, spilled regs corruption 2", @@ -3311,6 +3315,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R3 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "raw_stack: skb_load_bytes, spilled regs + data", @@ -3810,6 +3815,7 @@ static struct bpf_test tests[] = { .errstr = "R2 invalid mem access 'inv'", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test16 (arith on data_end)", @@ -3993,6 +3999,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test24 (x += pkt_ptr, 5)", @@ -5149,6 +5156,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "invalid access to map value, value_size=64 off=-2 size=4", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid cgroup storage access 5", @@ -5265,6 +5273,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "invalid access to map value, value_size=64 off=-2 size=4", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid per-cpu cgroup storage access 5", @@ -7206,6 +7215,7 @@ static struct bpf_test tests[] = { .errstr = "invalid mem access 'inv'", .result = REJECT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map element value illegal alu op, 5", @@ -7228,6 +7238,7 @@ static struct bpf_test tests[] = { .fixup_map_hash_48b = { 3 }, .errstr = "R0 invalid mem access 'inv'", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map element value is preserved across register spilling", @@ -9720,6 +9731,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end > pkt_data', good access", @@ -9758,6 +9770,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end > pkt_data', bad access 2", @@ -9776,6 +9789,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' < pkt_end, good access", @@ -9814,6 +9828,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' < pkt_end, bad access 2", @@ -9832,6 +9847,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end < pkt_data', good access", @@ -9886,6 +9902,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' >= pkt_end, good access", @@ -9922,6 +9939,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' >= pkt_end, bad access 2", @@ -9997,6 +10015,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' <= pkt_end, good access", @@ -10053,6 +10072,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end <= pkt_data', good access", @@ -10089,6 +10109,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end <= pkt_data', bad access 2", @@ -10162,6 +10183,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data > pkt_meta', good access", @@ -10200,6 +10222,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data > pkt_meta', bad access 2", @@ -10218,6 +10241,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' < pkt_data, good access", @@ -10256,6 +10280,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' < pkt_data, bad access 2", @@ -10274,6 +10299,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data < pkt_meta', good access", @@ -10328,6 +10354,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' >= pkt_data, good access", @@ -10364,6 +10391,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' >= pkt_data, bad access 2", @@ -10439,6 +10467,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' <= pkt_data, good access", @@ -10495,6 +10524,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data <= pkt_meta', good access", @@ -10531,6 +10561,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data <= pkt_meta', bad access 2", @@ -10635,6 +10666,7 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "dereference of modified ctx ptr", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check deducing bounds from const, 8", @@ -10648,6 +10680,7 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "dereference of modified ctx ptr", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check deducing bounds from const, 9", @@ -11122,6 +11155,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R6 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls with args", @@ -11987,6 +12021,7 @@ static struct bpf_test tests[] = { .fixup_map_hash_8b = { 12, 22 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=2 size=8", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2", @@ -12130,6 +12165,7 @@ static struct bpf_test tests[] = { .fixup_map_hash_8b = { 12, 22 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=2 size=8", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls that receive map_value_ptr_or_null via arg. test1", @@ -12301,6 +12337,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .retval = POINTER_VALUE, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 2", @@ -12332,6 +12369,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "invalid access to packet", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 3", @@ -12434,6 +12472,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "same insn cannot be used with different", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 6", @@ -12469,6 +12508,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "R4 invalid mem access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 7", @@ -12503,6 +12543,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "R4 invalid mem access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 8", @@ -12584,6 +12625,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "invalid access to packet", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: caller stack init to zero or map_value_or_null", @@ -12949,6 +12991,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "BPF_XADD stores into R2 pkt is not allowed", .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "xadd/w check whether src/dst got mangled, 1", @@ -13435,6 +13478,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "Unreleased reference", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "reference tracking: alloc, check, free in both subbranches", -- cgit v1.2.3 From 0a68632488aa0129ed530af9ae9e8573f5650812 Mon Sep 17 00:00:00 2001 From: David Miller Date: Fri, 30 Nov 2018 21:08:50 -0800 Subject: bpf: Apply F_NEEDS_EFFICIENT_UNALIGNED_ACCESS to more ACCEPT test cases. If a testcase has alignment problems but is expected to be ACCEPT, verify it using F_NEEDS_EFFICIENT_UNALIGNED_ACCESS too. Maybe in the future if we add some architecture specific code to elide the unaligned memory access warnings during the test, we can execute these as well. Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 6bfd3242ea43..c3b038f26ece 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3918,6 +3918,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test21 (x += pkt_ptr, 2)", @@ -3943,6 +3944,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test22 (x += pkt_ptr, 3)", @@ -3973,6 +3975,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test23 (x += pkt_ptr, 4)", @@ -4025,6 +4028,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test25 (marking on <, good access)", @@ -7732,6 +7736,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .retval = 0 /* csum_diff of 64-byte packet */, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", @@ -9694,6 +9699,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' > pkt_end, bad access 1", @@ -9865,6 +9871,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end < pkt_data', bad access 1", @@ -9977,6 +9984,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end >= pkt_data', bad access 1", @@ -10034,6 +10042,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' <= pkt_end, bad access 1", @@ -10146,6 +10155,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' > pkt_data, bad access 1", @@ -10317,6 +10327,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data < pkt_meta', bad access 1", @@ -10429,6 +10440,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data >= pkt_meta', bad access 1", @@ -10486,6 +10498,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' <= pkt_data, bad access 1", @@ -12405,6 +12418,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, .retval = 1, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 4", @@ -12439,6 +12453,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, .retval = 1, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 5", @@ -12584,6 +12599,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 9", @@ -13507,6 +13523,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "reference tracking in call: free reference in subprog", -- cgit v1.2.3 From de94b651eef38b44adc20057ab5eb6d4ca672169 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Sun, 2 Dec 2018 13:02:15 -0800 Subject: libbpf: Fix license in README.rst The whole libbpf is licensed as (LGPL-2.1 OR BSD-2-Clause). I missed it while adding README.rst. Fix it and use same license as all other files in libbpf do. Since I'm the only author of README.rst so far, no others' permissions should be needed. Fixes: 76d1b894c515 ("libbpf: Document API and ABI conventions") Signed-off-by: Andrey Ignatov Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- tools/lib/bpf/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst index 2ced9e061c4b..056f38310722 100644 --- a/tools/lib/bpf/README.rst +++ b/tools/lib/bpf/README.rst @@ -1,4 +1,4 @@ -.. SPDX-License-Identifier: GPL-2.0 +.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) libbpf API naming convention ============================ -- cgit v1.2.3 From e3da08d057002f9d0831949d51666c3e15dc6b29 Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Sun, 2 Dec 2018 20:18:19 -0500 Subject: bpf: allow BPF read access to qdisc pkt_len The pkt_len field in qdisc_skb_cb stores the skb length as it will appear on the wire after segmentation. For byte accounting, this value is more accurate than skb->len. It is computed on entry to the TC layer, so only valid there. Allow read access to this field from BPF tc classifier and action programs. The implementation is analogous to tc_classid, aside from restricting to read access. To distinguish it from skb->len and self-describe export as wire_len. Changes v1->v2 - Rename pkt_len to wire_len Signed-off-by: Petar Penkov Signed-off-by: Vlad Dumitrescu Signed-off-by: Willem de Bruijn Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 16 +++++++++++++++ tools/include/uapi/linux/bpf.h | 1 + tools/testing/selftests/bpf/test_verifier.c | 32 +++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8050caea7495..0183b8e70a9e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2497,6 +2497,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; __u64 tstamp; + __u32 wire_len; }; struct bpf_tunnel_key { diff --git a/net/core/filter.c b/net/core/filter.c index bd0df75dc7b6..3d54af4c363d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5773,6 +5773,7 @@ static bool sk_filter_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): return false; } @@ -5797,6 +5798,7 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range(struct __sk_buff, wire_len): return false; case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_end): @@ -5843,6 +5845,7 @@ static bool lwt_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): return false; } @@ -6273,6 +6276,7 @@ static bool sk_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): return false; } @@ -6360,6 +6364,7 @@ static bool flow_dissector_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): return false; } @@ -6685,6 +6690,17 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, bpf_target_off(struct sk_buff, tstamp, 8, target_size)); + break; + + case offsetof(struct __sk_buff, wire_len): + BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4); + + off = si->off; + off -= offsetof(struct __sk_buff, wire_len); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct qdisc_skb_cb, pkt_len); + *target_size = 4; + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off); } return insn - insn_buf; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 8050caea7495..0183b8e70a9e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2497,6 +2497,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; __u64 tstamp; + __u32 wire_len; }; struct bpf_tunnel_key { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index c3b038f26ece..b4b4a3f93639 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -14033,6 +14033,38 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, .result = ACCEPT, }, + { + "check wire_len is not readable by sockets", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check wire_len is readable by tc classifier", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "check wire_len is not writable by tc classifier", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid bpf_context access", + .errstr_unpriv = "R1 leaks addr", + .result = REJECT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From 03f1c26b1c56936fa29675b5556c3bb08185585a Mon Sep 17 00:00:00 2001 From: Alexis Bauvin Date: Mon, 3 Dec 2018 10:54:41 +0100 Subject: test/net: Add script for VXLAN underlay in a VRF This script tests the support of a VXLAN underlay in a non-default VRF. It does so by simulating two hypervisors and two VMs, an extended L2 between the VMs with the hypervisors as VTEPs with the underlay in a VRF, and finally by pinging the two VMs. It also tests that moving the underlay from a VRF to another works when down/up the VXLAN interface. Signed-off-by: Alexis Bauvin Reviewed-by: Amine Kherbouche Reviewed-by: David Ahern Tested-by: Amine Kherbouche Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 2 +- .../testing/selftests/net/test_vxlan_under_vrf.sh | 129 +++++++++++++++++++++ 2 files changed, 130 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/test_vxlan_under_vrf.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 7aebbcaa91bf..ee2e27b1cd0d 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -7,7 +7,7 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh -TEST_PROGS += udpgro_bench.sh udpgro.sh +TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh new file mode 100755 index 000000000000..09f9ed92cbe4 --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking VXLAN underlay in a non-default VRF. +# +# It simulates two hypervisors running a VM each using four network namespaces: +# two for the HVs, two for the VMs. +# A small VXLAN tunnel is made between the two hypervisors to have the two vms +# in the same virtual L2: +# +# +-------------------+ +-------------------+ +# | | | | +# | vm-1 netns | | vm-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth-hv | | | | veth-hv | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +------------------------------------+ +# | . | | . | +# | +----------+ | | +----------+ | +# | | veth-tap | | | | veth-tap | | +# | +----+-----+ | | +----+-----+ | +# | | | | | | +# | +--+--+ +--------------+ | | +--------------+ +--+--+ | +# | | br0 | | vrf-underlay | | | | vrf-underlay | | br0 | | +# | +--+--+ +-------+------+ | | +------+-------+ +--+--+ | +# | | | | | | | | +# | +---+----+ +-------+-------+ | | +-------+-------+ +---+----+ | +# | | vxlan0 |....| veth0 |.|...|.| veth0 |....| vxlan0 | | +# | +--------+ | 172.16.0.1/24 | | | | 172.16.0.2/24 | +--------+ | +# | +---------------+ | | +---------------+ | +# | | | | +# | hv-1 netns | | hv-2 netns | +# | | | | +# +-----------------------------------+ +------------------------------------+ +# +# This tests both the connectivity between vm-1 and vm-2, and that the underlay +# can be moved in and out of the vrf by unsetting and setting veth0's master. + +set -e + +cleanup() { + ip link del veth-hv-1 2>/dev/null || true + ip link del veth-tap 2>/dev/null || true + + for ns in hv-1 hv-2 vm-1 vm-2; do + ip netns del $ns || true + done +} + +# Clean start +cleanup &> /dev/null + +[[ $1 == "clean" ]] && exit 0 + +trap cleanup EXIT + +# Setup "Hypervisors" simulated with netns +ip link add veth-hv-1 type veth peer name veth-hv-2 +setup-hv-networking() { + hv=$1 + + ip netns add hv-$hv + ip link set veth-hv-$hv netns hv-$hv + ip -netns hv-$hv link set veth-hv-$hv name veth0 + + ip -netns hv-$hv link add vrf-underlay type vrf table 1 + ip -netns hv-$hv link set vrf-underlay up + ip -netns hv-$hv addr add 172.16.0.$hv/24 dev veth0 + ip -netns hv-$hv link set veth0 up + + ip -netns hv-$hv link add br0 type bridge + ip -netns hv-$hv link set br0 up + + ip -netns hv-$hv link add vxlan0 type vxlan id 10 local 172.16.0.$hv dev veth0 dstport 4789 + ip -netns hv-$hv link set vxlan0 master br0 + ip -netns hv-$hv link set vxlan0 up +} +setup-hv-networking 1 +setup-hv-networking 2 + +# Check connectivity between HVs by pinging hv-2 from hv-1 +echo -n "Checking HV connectivity " +ip netns exec hv-1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false) +echo "[ OK ]" + +# Setups a "VM" simulated by a netns an a veth pair +setup-vm() { + id=$1 + + ip netns add vm-$id + ip link add veth-tap type veth peer name veth-hv + + ip link set veth-tap netns hv-$id + ip -netns hv-$id link set veth-tap master br0 + ip -netns hv-$id link set veth-tap up + + ip link set veth-hv netns vm-$id + ip -netns vm-$id addr add 10.0.0.$id/24 dev veth-hv + ip -netns vm-$id link set veth-hv up +} +setup-vm 1 +setup-vm 2 + +# Setup VTEP routes to make ARP work +bridge -netns hv-1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent +bridge -netns hv-2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent + +echo -n "Check VM connectivity through VXLAN (underlay in the default VRF) " +ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +echo "[ OK ]" + +# Move the underlay to a non-default VRF +ip -netns hv-1 link set veth0 vrf vrf-underlay +ip -netns hv-1 link set veth0 down +ip -netns hv-1 link set veth0 up +ip -netns hv-2 link set veth0 vrf vrf-underlay +ip -netns hv-2 link set veth0 down +ip -netns hv-2 link set veth0 up + +echo -n "Check VM connectivity through VXLAN (underlay in a VRF) " +ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +echo "[ OK ]" -- cgit v1.2.3 From db63e489c7aa03bab86960d6bcb6bf1fad8eea4e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 30 Nov 2018 15:32:41 -0500 Subject: selftests: extend zerocopy tests to udp Both msg_zerocopy and udpgso_bench have udp zerocopy variants. Exercise these as part of the standard kselftest run. With udp, msg_zerocopy has no control channel. Ensure that the receiver exits after the sender by accounting for the initial delay in starting them (in msg_zerocopy.sh). Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/msg_zerocopy.c | 3 ++- tools/testing/selftests/net/msg_zerocopy.sh | 2 ++ tools/testing/selftests/net/udpgso_bench.sh | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 406cc70c571d..4b02933cab8a 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -651,12 +651,13 @@ static void do_flush_datagram(int fd, int type) static void do_rx(int domain, int type, int protocol) { + const int cfg_receiver_wait_ms = 400; uint64_t tstop; int fd; fd = do_setup_rx(domain, type, protocol); - tstop = gettimeofday_ms() + cfg_runtime_ms; + tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms; do { if (type == SOCK_STREAM) do_flush_tcp(fd); diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index c43c6debda06..825ffec85cea 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -25,6 +25,8 @@ readonly path_sysctl_mem="net.core.optmem_max" if [[ "$#" -eq "0" ]]; then $0 4 tcp -t 1 $0 6 tcp -t 1 + $0 4 udp -t 1 + $0 6 udp -t 1 echo "OK. All tests passed" exit 0 fi diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 0f0628613f81..5670a9ffd8eb 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -35,6 +35,9 @@ run_udp() { echo "udp gso" run_in_netns ${args} -S 0 + + echo "udp gso zerocopy" + run_in_netns ${args} -S 0 -z } run_tcp() { -- cgit v1.2.3 From 2587a974f184ff1cd8103de25b6354a3a4da7132 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 3 Dec 2018 11:31:24 +0000 Subject: tools: sync uapi/linux/bpf.h Pull changes from "bpf: respect size hint to BPF_PROG_TEST_RUN if present". Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0183b8e70a9e..64262890feb2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -374,8 +374,11 @@ union bpf_attr { struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ __u32 prog_fd; __u32 retval; - __u32 data_size_in; - __u32 data_size_out; + __u32 data_size_in; /* input: len of data_in */ + __u32 data_size_out; /* input/output: len of data_out + * returns ENOSPC if data_out + * is too small. + */ __aligned_u64 data_in; __aligned_u64 data_out; __u32 repeat; -- cgit v1.2.3 From 64a975913b311e5192a76cd10721d4565c9addd1 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 3 Dec 2018 11:31:25 +0000 Subject: libbpf: add bpf_prog_test_run_xattr Add a new function, which encourages safe usage of the test interface. bpf_prog_test_run continues to work as before, but should be considered unsafe. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 23 +++++++++++++++++++++++ tools/lib/bpf/bpf.h | 19 +++++++++++++++++++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 43 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index c19226cccf39..5c3be06bf0dd 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -463,6 +463,29 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, return ret; } +int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) +{ + union bpf_attr attr; + int ret; + + if (!test_attr->data_out && test_attr->data_size_out > 0) + return -EINVAL; + + bzero(&attr, sizeof(attr)); + attr.test.prog_fd = test_attr->prog_fd; + attr.test.data_in = ptr_to_u64(test_attr->data_in); + attr.test.data_out = ptr_to_u64(test_attr->data_out); + attr.test.data_size_in = test_attr->data_size_in; + attr.test.data_size_out = test_attr->data_size_out; + attr.test.repeat = test_attr->repeat; + + ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + test_attr->data_size_out = attr.test.data_size_out; + test_attr->retval = attr.test.retval; + test_attr->duration = attr.test.duration; + return ret; +} + int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 60392b70587c..098e6f793b76 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -118,6 +118,25 @@ LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); + +struct bpf_prog_test_run_attr { + int prog_fd; + int repeat; + const void *data_in; + __u32 data_size_in; + void *data_out; /* optional */ + __u32 data_size_out; /* in: max length of data_out + * out: length of data_out */ + __u32 retval; /* out: return code of the BPF program */ + __u32 duration; /* out: average per repetition in ns */ +}; + +LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); + +/* + * bpf_prog_test_run does not check that data_out is large enough. Consider + * using bpf_prog_test_run_xattr instead. + */ LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 4fb29f6d7a80..8deff22d61bb 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -65,6 +65,7 @@ LIBBPF_0.0.1 { bpf_prog_load_xattr; bpf_prog_query; bpf_prog_test_run; + bpf_prog_test_run_xattr; bpf_program__fd; bpf_program__is_kprobe; bpf_program__is_perf_event; -- cgit v1.2.3 From df47fc376df8338fdc9b27a5c9162a08f6e6157c Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 3 Dec 2018 11:31:26 +0000 Subject: selftests: add a test for bpf_prog_test_run_xattr Make sure that bpf_prog_test_run_xattr returns the correct length and that the kernel respects the output size hint. Also check that errno indicates ENOSPC if there is a short output buffer given. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_progs.c | 55 +++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 1c57abbe0945..26f1fdf3e2bf 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -70,7 +70,7 @@ static struct { .tcp.urg_ptr = 123, }; -#define CHECK(condition, tag, format...) ({ \ +#define _CHECK(condition, tag, duration, format...) ({ \ int __ret = !!(condition); \ if (__ret) { \ error_cnt++; \ @@ -83,6 +83,11 @@ static struct { __ret; \ }) +#define CHECK(condition, tag, format...) \ + _CHECK(condition, tag, duration, format) +#define CHECK_ATTR(condition, tag, format...) \ + _CHECK(condition, tag, tattr.duration, format) + static int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) { @@ -124,6 +129,53 @@ static void test_pkt_access(void) bpf_object__close(obj); } +static void test_prog_run_xattr(void) +{ + const char *file = "./test_pkt_access.o"; + struct bpf_object *obj; + char buf[10]; + int err; + struct bpf_prog_test_run_attr tattr = { + .repeat = 1, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = 5, + }; + + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, + &tattr.prog_fd); + if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + return; + + memset(buf, 0, sizeof(buf)); + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run", + "err %d errno %d retval %d\n", err, errno, tattr.retval); + + CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out", + "incorrect output size, want %lu have %u\n", + sizeof(pkt_v4), tattr.data_size_out); + + CHECK_ATTR(buf[5] != 0, "overflow", + "BPF_PROG_TEST_RUN ignored size hint\n"); + + tattr.data_out = NULL; + tattr.data_size_out = 0; + errno = 0; + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err || errno || tattr.retval, "run_no_output", + "err %d errno %d retval %d\n", err, errno, tattr.retval); + + tattr.data_size_out = 1; + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err); + + bpf_object__close(obj); +} + static void test_xdp(void) { struct vip key4 = {.protocol = 6, .family = AF_INET}; @@ -1837,6 +1889,7 @@ int main(void) jit_enabled = is_jit_enabled(); test_pkt_access(); + test_prog_run_xattr(); test_xdp(); test_xdp_adjust_tail(); test_l4lb_all(); -- cgit v1.2.3 From b6f153d3e5a5bfbda17b997bd9e258143aa11809 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 4 Dec 2018 08:15:14 +0000 Subject: selftests: mlxsw: Add one-armed router test Construct a "one-armed router" topology and test that packets are forwarded by the ASIC and that a copy of the packet is sent to the kernel, which does not forward the packet again. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/one_armed_router.sh | 259 +++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh new file mode 100755 index 000000000000..f02d83e94576 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh @@ -0,0 +1,259 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test a "one-armed router" [1] scenario. Packets forwarded between H1 and H2 +# should be forwarded by the ASIC, but also trapped so that ICMP redirect +# packets could be potentially generated. +# +# 1. https://en.wikipedia.org/wiki/One-armed_router +# +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | 192.0.2.2/24 | | +# | | 2001:db8:1::2/64 | | +# | | 198.51.100.2/24 | | +# | | 2001:db8:2::2/64 | | +# | | | | +# | | + $swp2 | | +# | +--|--------------------------------------------------------------------+ | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="ping_ipv4 ping_ipv6 fwd_mark_ipv4 fwd_mark_ipv6" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64 +} + +switch_create() +{ + ip link add name br0 type bridge mcast_snooping 0 + ip link set dev br0 up + + ip link set dev $swp1 master br0 + ip link set dev $swp1 up + ip link set dev $swp2 master br0 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + + __addr_add_del br0 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del br0 add 198.51.100.2/24 2001:db8:2::2/64 +} + +switch_destroy() +{ + __addr_add_del br0 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del br0 del 192.0.2.2/24 2001:db8:1::2/64 + + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br0 down + ip link del dev br0 +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.1 ": h1->h2" +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": h1->h2" +} + +fwd_mark_ipv4() +{ + # Transmit packets from H1 to H2 and make sure they are trapped at + # swp1 due to loopback error, but only forwarded by the ASIC through + # swp2 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ip pref 2 handle 102 flower \ + skip_sw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + ip vrf exec v$h1 $MZ $h1 -c 10 -d 100msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + RET=0 + + tc_check_packets "dev $swp1 ingress" 101 10 + check_err $? + + log_test "fwd mark: trapping IPv4 packets due to LBERROR" + + RET=0 + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? + + log_test "fwd mark: forwarding IPv4 packets in software" + + RET=0 + + tc_check_packets "dev $swp2 egress" 102 10 + check_err $? + + log_test "fwd mark: forwarding IPv4 packets in hardware" + + tc filter del dev $swp2 egress protocol ip pref 2 handle 102 flower + tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower +} + +fwd_mark_ipv6() +{ + tc filter add dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ipv6 pref 2 handle 102 flower \ + skip_sw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + ip vrf exec v$h1 $MZ $h1 -6 -c 10 -d 100msec -p 64 -A 2001:db8:1::1 \ + -B 2001:db8:2::1 -t udp dp=52768,sp=42768 -q + + RET=0 + + tc_check_packets "dev $swp1 ingress" 101 10 + check_err $? + + log_test "fwd mark: trapping IPv6 packets due to LBERROR" + + RET=0 + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? + + log_test "fwd mark: forwarding IPv6 packets in software" + + RET=0 + + tc_check_packets "dev $swp2 egress" 102 10 + check_err $? + + log_test "fwd mark: forwarding IPv6 packets in hardware" + + tc filter del dev $swp2 egress protocol ipv6 pref 2 handle 102 flower + tc filter del dev $swp2 egress protocol ipv6 pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + sysctl_set net.ipv4.conf.all.accept_redirects 0 + sysctl_set net.ipv6.conf.all.accept_redirects 0 + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + sysctl_restore net.ipv6.conf.all.accept_redirects + sysctl_restore net.ipv4.conf.all.accept_redirects + + forwarding_restore + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From 30da46b5dc3a9a14db11706d841440e28b12bb53 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 5 Dec 2018 10:28:24 +0000 Subject: tools: bpftool: add a command to dump the trace pipe BPF programs can use the bpf_trace_printk() helper to print debug information into the trace pipe. Add a subcommand "bpftool prog tracelog" to simply dump this pipe to the console. This is for a good part copied from iproute2, where the feature is available with "tc exec bpf dbg". Changes include dumping pipe content to stdout instead of stderr and adding JSON support (content is dumped as an array of strings, one per line read from the pipe). This version is dual-licensed, with Daniel's permission. Cc: Daniel Borkmann Suggested-by: Daniel Borkmann Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 13 +- tools/bpf/bpftool/bash-completion/bpftool | 5 +- tools/bpf/bpftool/main.h | 1 + tools/bpf/bpftool/prog.c | 4 +- tools/bpf/bpftool/tracelog.c | 157 +++++++++++++++++++++++ 5 files changed, 176 insertions(+), 4 deletions(-) create mode 100644 tools/bpf/bpftool/tracelog.c (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index ab36e920e552..5524b6dccd85 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -26,8 +26,9 @@ MAP COMMANDS | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog pin** *PROG* *FILE* | **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] -| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] -| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] +| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] +| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] +| **bpftool** **prog tracelog** | **bpftool** **prog help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } @@ -117,6 +118,14 @@ DESCRIPTION parameter, with the exception of *flow_dissector* which is detached from the current networking name space. + **bpftool prog tracelog** + Dump the trace pipe of the system to the console (stdout). + Hit to stop printing. BPF programs can write to this + trace pipe at runtime with the **bpf_trace_printk()** helper. + This should be used only for debugging purposes. For + streaming data from BPF programs to user space, one can use + perf events (see also **bpftool-map**\ (8)). + **bpftool prog help** Print short help message. diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 9a60080f085f..44c189ba072a 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -398,10 +398,13 @@ _bpftool() ;; esac ;; + tracelog) + return 0 + ;; *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'dump help pin attach detach load \ - show list' -- "$cur" ) ) + show list tracelog' -- "$cur" ) ) ;; esac ;; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 2761981669c8..0be0dd8f467f 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -167,6 +167,7 @@ int do_event_pipe(int argc, char **argv); int do_cgroup(int argc, char **arg); int do_perf(int argc, char **arg); int do_net(int argc, char **arg); +int do_tracelog(int argc, char **arg); int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 56db61c5a91f..54c8dbf05c9c 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1140,6 +1140,7 @@ static int do_help(int argc, char **argv) " [pinmaps MAP_DIR]\n" " %s %s attach PROG ATTACH_TYPE [MAP]\n" " %s %s detach PROG ATTACH_TYPE [MAP]\n" + " %s %s tracelog\n" " %s %s help\n" "\n" " " HELP_SPEC_MAP "\n" @@ -1158,7 +1159,7 @@ static int do_help(int argc, char **argv) "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2], bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); return 0; } @@ -1173,6 +1174,7 @@ static const struct cmd cmds[] = { { "loadall", do_loadall }, { "attach", do_attach }, { "detach", do_detach }, + { "tracelog", do_tracelog }, { 0 } }; diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c new file mode 100644 index 000000000000..1fa8e513f590 --- /dev/null +++ b/tools/bpf/bpftool/tracelog.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (c) 2015-2017 Daniel Borkmann */ +/* Copyright (c) 2018 Netronome Systems, Inc. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "main.h" + +#ifndef TRACEFS_MAGIC +# define TRACEFS_MAGIC 0x74726163 +#endif + +#define _textify(x) #x +#define textify(x) _textify(x) + +FILE *trace_pipe_fd; +char *buff; + +static int validate_tracefs_mnt(const char *mnt, unsigned long magic) +{ + struct statfs st_fs; + + if (statfs(mnt, &st_fs) < 0) + return -ENOENT; + if ((unsigned long)st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +static bool +find_tracefs_mnt_single(unsigned long magic, char *mnt, const char *mntpt) +{ + size_t src_len; + + if (validate_tracefs_mnt(mntpt, magic)) + return false; + + src_len = strlen(mntpt); + if (src_len + 1 >= PATH_MAX) { + p_err("tracefs mount point name too long"); + return false; + } + + strcpy(mnt, mntpt); + return true; +} + +static bool find_tracefs_pipe(char *mnt) +{ + static const char * const known_mnts[] = { + "/sys/kernel/debug/tracing", + "/sys/kernel/tracing", + "/tracing", + "/trace", + }; + const char *pipe_name = "/trace_pipe"; + const char *fstype = "tracefs"; + char type[100], format[32]; + const char * const *ptr; + bool found = false; + FILE *fp; + + for (ptr = known_mnts; ptr < known_mnts + ARRAY_SIZE(known_mnts); ptr++) + if (find_tracefs_mnt_single(TRACEFS_MAGIC, mnt, *ptr)) + goto exit_found; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return false; + + /* Allow room for NULL terminating byte and pipe file name */ + snprintf(format, sizeof(format), "%%*s %%%zds %%99s %%*s %%*d %%*d\\n", + PATH_MAX - strlen(pipe_name) - 1); + while (fscanf(fp, format, mnt, type) == 2) + if (strcmp(type, fstype) == 0) { + found = true; + break; + } + fclose(fp); + + /* The string from fscanf() might be truncated, check mnt is valid */ + if (!found || validate_tracefs_mnt(mnt, TRACEFS_MAGIC)) + return false; + +exit_found: + strcat(mnt, pipe_name); + return true; +} + +static void exit_tracelog(int signum) +{ + fclose(trace_pipe_fd); + free(buff); + + if (json_output) { + jsonw_end_array(json_wtr); + jsonw_destroy(&json_wtr); + } + + exit(0); +} + +int do_tracelog(int argc, char **argv) +{ + const struct sigaction act = { + .sa_handler = exit_tracelog + }; + char trace_pipe[PATH_MAX]; + bool found_trace_pipe; + size_t buff_len = 0; + + if (json_output) + jsonw_start_array(json_wtr); + + found_trace_pipe = find_tracefs_pipe(trace_pipe); + if (!found_trace_pipe) { + p_err("could not find trace pipe, tracefs not mounted?"); + return -1; + } + + trace_pipe_fd = fopen(trace_pipe, "r"); + if (!trace_pipe_fd) { + p_err("could not open trace pipe: %s", strerror(errno)); + return -1; + } + + sigaction(SIGHUP, &act, NULL); + sigaction(SIGINT, &act, NULL); + sigaction(SIGTERM, &act, NULL); + while (1) { + ssize_t ret; + + ret = getline(&buff, &buff_len, trace_pipe_fd); + if (ret <= 0) { + p_err("failed to read content from trace pipe: %s", + strerror(errno)); + break; + } + if (json_output) + jsonw_string(json_wtr, buff); + else + printf("%s", buff); + } + + fclose(trace_pipe_fd); + free(buff); + return -1; +} -- cgit v1.2.3 From 555249df711b146a2898949ac80503be29fe7340 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 5 Dec 2018 17:35:45 -0800 Subject: bpf: tools: Sync uapi bpf.h for the name changes in bpf_func_info This patch sync the name changes in bpf_func_info to the tools/. Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 64262890feb2..16263e8827fc 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2991,7 +2991,7 @@ struct bpf_flow_keys { }; struct bpf_func_info { - __u32 insn_offset; + __u32 insn_off; __u32 type_id; }; -- cgit v1.2.3 From 84ecc1f98ca7ce28ede9bc5cc70a557fdfa09caa Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 5 Dec 2018 17:35:47 -0800 Subject: bpf: Expect !info.func_info and insn_off name changes in test_btf/libbpf/bpftool Similar to info.jited_*, info.func_info could be 0 if bpf_dump_raw_ok() == false. This patch makes changes to test_btf and bpftool to expect info.func_info could be 0. This patch also makes the needed changes for s/insn_offset/insn_off/. Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/prog.c | 7 +++++++ tools/bpf/bpftool/xlated_dumper.c | 4 ++-- tools/lib/bpf/btf.c | 12 ++++++------ tools/testing/selftests/bpf/test_btf.c | 8 +++++++- 4 files changed, 22 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 54c8dbf05c9c..a9a51123454c 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -589,6 +589,13 @@ static int do_dump(int argc, char **argv) goto err_free; } + if (func_info && !info.func_info) { + /* kernel.kptr_restrict is set. No func_info available. */ + free(func_info); + func_info = NULL; + finfo_cnt = 0; + } + if ((member_len == &info.jited_prog_len && info.jited_prog_insns == 0) || (member_len == &info.xlated_prog_len && diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index e06ac0286a75..131ecd175533 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -261,7 +261,7 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, jsonw_start_object(json_wtr); if (btf && record) { - if (record->insn_offset == i) { + if (record->insn_off == i) { btf_dumper_type_only(btf, record->type_id, func_sig, sizeof(func_sig)); @@ -330,7 +330,7 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, } if (btf && record) { - if (record->insn_offset == i) { + if (record->insn_off == i) { btf_dumper_type_only(btf, record->type_id, func_sig, sizeof(func_sig)); diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index c2d641f3e16e..85d6446cf832 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -45,7 +45,7 @@ struct btf_ext { /* The minimum bpf_func_info checked by the loader */ struct bpf_func_info_min { - __u32 insn_offset; + __u32 insn_off; __u32 type_id; }; @@ -670,7 +670,7 @@ int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext, memcpy(data, sinfo->data, records_len); - /* adjust the insn_offset, the data in .BTF.ext is + /* adjust the insn_off, the data in .BTF.ext is * the actual byte offset, and the kernel expects * the offset in term of bpf_insn. * @@ -681,7 +681,7 @@ int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext, struct bpf_func_info_min *record; record = data + i * record_size; - record->insn_offset /= sizeof(struct bpf_insn); + record->insn_off /= sizeof(struct bpf_insn); } *func_info = data; @@ -722,15 +722,15 @@ int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext, return -ENOMEM; memcpy(data + existing_flen, sinfo->data, records_len); - /* adjust insn_offset only, the rest data will be passed + /* adjust insn_off only, the rest data will be passed * to the kernel. */ for (i = 0; i < sinfo->num_func_info; i++) { struct bpf_func_info_min *record; record = data + existing_flen + i * record_size; - record->insn_offset = - record->insn_offset / sizeof(struct bpf_insn) + + record->insn_off = + record->insn_off / sizeof(struct bpf_insn) + insns_cnt; } *func_info = data; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index bae7308b7ec5..ff0952ea757a 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -3156,7 +3156,7 @@ static struct btf_func_type_test { }, { - .descr = "func_type (Incorrect bpf_func_info.insn_offset)", + .descr = "func_type (Incorrect bpf_func_info.insn_off)", .raw_types = { BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ @@ -3303,6 +3303,12 @@ static int do_test_func_type(int test_num) goto done; } + if (CHECK(!info.func_info, + "info.func_info == 0. kernel.kptr_restrict is set?")) { + err = -1; + goto done; + } + finfo = func_info; for (i = 0; i < 2; i++) { if (CHECK(finfo->type_id != test->func_info[i][1], -- cgit v1.2.3 From 1ba1daed90e235f297f60443128a3acfc05aac7b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 6 Dec 2018 17:05:48 +0000 Subject: selftests: mlxsw: Add a new test extack.sh Add a testsuite dedicated to testing extack propagation and related functionality. Signed-off-by: Petr Machata Acked-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/extack.sh | 84 ++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/extack.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh new file mode 100755 index 000000000000..101a5508bdfd --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test operations that we expect to report extended ack. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + netdev_pre_up_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +netdev_pre_up_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 0 mcast_snooping 0 + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 nolearning noudpcsum tos inherit ttl 100 + + ip link set dev vx1 master br1 + check_err $? + + ip link set dev $swp1 master br1 + check_err $? + + ip link add name br2 up type bridge vlan_filtering 0 mcast_snooping 0 + ip link add name vx2 up type vxlan id 2000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 nolearning noudpcsum tos inherit ttl 100 + + ip link set dev vx2 master br2 + check_err $? + + ip link set dev $swp2 master br2 + check_err $? + + # Unsupported configuration: mlxsw demands that all offloaded VXLAN + # devices have the same TTL. + ip link set dev vx2 down + ip link set dev vx2 type vxlan ttl 200 + + ip link set dev vx2 up &>/dev/null + check_fail $? + + ip link set dev vx2 up 2>&1 >/dev/null | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - NETDEV_PRE_UP" + + ip link del dev vx2 + ip link del dev br2 + + ip link del dev vx1 + ip link del dev br1 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From 0efe9ed98d15146becded107965b4e2f6b3aa805 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 7 Dec 2018 19:55:18 +0000 Subject: selftests: mlxsw: vxlan: Test FDB un/marking on VXLAN join/leave When a VXLAN device is attached to an offloaded bridge, or when a front-panel port is attached to a bridge that already has a VXLAN device, mlxsw should offload the existing offloadable FDB entries. Similarly when VXLAN device is downed, the FDB entries are unoffloaded, and the marks thus need to be cleared. Similarly when a front-panel port device is attached to a bridge with a VXLAN device, or when VLAN flags are tweaked on a VXLAN port attached to a VLAN-aware bridge. Test that the replaying / clearing logic works by observing transitions in presence of offload marks under different scenarios. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/vxlan.sh | 177 +++++++++++++++++++++ 1 file changed, 177 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index 90b4998a3b70..ea11535f5a6e 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -647,12 +647,159 @@ offload_indication_decap_route_test() noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 } +check_fdb_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload + check_err $? + bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload + check_err $? + + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_err $? +} + +check_vxlan_fdb_not_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep -q self + check_err $? + bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload + check_fail $? + + bridge fdb show dev vxlan0 | grep $zmac | grep -q self + check_err $? + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_fail $? +} + +check_bridge_fdb_not_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep -q master + check_err $? + bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload + check_fail $? +} + +__offload_indication_join_vxlan_first() +{ + local vid=$1; shift + + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 + + ip link set dev vxlan0 master br0 + bridge fdb add dev vxlan0 $mac self master static dst 198.51.100.2 + + RET=0 + check_vxlan_fdb_not_offloaded + ip link set dev $swp1 master br0 + sleep .1 + check_fdb_offloaded + log_test "offload indication - attach vxlan first" + + RET=0 + ip link set dev vxlan0 down + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - set vxlan down" + + RET=0 + ip link set dev vxlan0 up + sleep .1 + check_fdb_offloaded + log_test "offload indication - set vxlan up" + + if [[ ! -z $vid ]]; then + RET=0 + bridge vlan del dev vxlan0 vid $vid + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - delete VLAN" + + RET=0 + bridge vlan add dev vxlan0 vid $vid + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - add tagged VLAN" + + RET=0 + bridge vlan add dev vxlan0 vid $vid pvid untagged + sleep .1 + check_fdb_offloaded + log_test "offload indication - add pvid/untagged VLAN" + fi + + RET=0 + ip link set dev $swp1 nomaster + check_vxlan_fdb_not_offloaded + log_test "offload indication - detach port" +} + +offload_indication_join_vxlan_first() +{ + ip link add dev br0 up type bridge mcast_snooping 0 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_first + + ip link del dev vxlan0 + ip link del dev br0 +} + +__offload_indication_join_vxlan_last() +{ + local zmac=00:00:00:00:00:00 + + RET=0 + + bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 + + ip link set dev $swp1 master br0 + + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_fail $? + + ip link set dev vxlan0 master br0 + + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_err $? + + log_test "offload indication - attach vxlan last" +} + +offload_indication_join_vxlan_last() +{ + ip link add dev br0 up type bridge mcast_snooping 0 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_last + + ip link del dev vxlan0 + ip link del dev br0 +} + offload_indication_test() { offload_indication_setup_create offload_indication_fdb_test offload_indication_decap_route_test offload_indication_setup_destroy + + log_info "offload indication - replay & cleanup" + offload_indication_join_vxlan_first + offload_indication_join_vxlan_last } sanitization_vlan_aware_test() @@ -848,12 +995,42 @@ offload_indication_vlan_aware_decap_route_test() log_test "vxlan decap route - vni map/unmap" } +offload_indication_vlan_aware_join_vxlan_first() +{ + ip link add dev br0 up type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 1 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_first 1 + + ip link del dev vxlan0 + ip link del dev br0 +} + +offload_indication_vlan_aware_join_vxlan_last() +{ + ip link add dev br0 up type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 1 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_last + + ip link del dev vxlan0 + ip link del dev br0 +} + offload_indication_vlan_aware_test() { offload_indication_vlan_aware_setup_create offload_indication_vlan_aware_fdb_test offload_indication_vlan_aware_decap_route_test offload_indication_vlan_aware_setup_destroy + + log_info "offload indication - replay & cleanup - vlan aware" + offload_indication_vlan_aware_join_vxlan_first + offload_indication_vlan_aware_join_vxlan_last } trap cleanup EXIT -- cgit v1.2.3 From 55939b262a61d35d3ccde8a388976b946d654dcf Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 7 Dec 2018 19:55:20 +0000 Subject: selftests: forwarding: Add PVID test case for VXLAN with VLAN-aware bridges When using VLAN-aware bridges with VXLAN, the VLAN that is mapped to the VNI of the VXLAN device is that which is configured as "pvid untagged" on the corresponding bridge port. When these flags are toggled or when the VLAN is deleted entirely, remote hosts should not be able to receive packets from the VTEP. Add a test case for above mentioned scenarios. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/vxlan_bridge_1q.sh | 70 ++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh index bac2e568d22c..a5789721ba92 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -95,6 +95,7 @@ export VXPORT test_flood test_unicast test_learning + test_pvid "} NUM_NETIFS=6 @@ -610,6 +611,75 @@ test_unicast() done } +test_pvid() +{ + local -a expects=(0 0 0 0 0) + local mac=de:ad:be:ef:13:37 + local dst=192.0.2.100 + local vid=10 + + # Check that flooding works + RET=0 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood before pvid off" + + # Toggle PVID off and test that flood to remote hosts does not work + RET=0 + + bridge vlan add vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid off" + + # Toggle PVID on and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid on" + + # Add a new VLAN and test that it does not affect flooding + RET=0 + + bridge vlan add vid 30 dev vx10 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + bridge vlan del vid 30 dev vx10 + + log_test "VXLAN: flood after vlan add" + + # Remove currently mapped VLAN and test that flood to remote hosts does + # not work + RET=0 + + bridge vlan del vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan delete" + + # Re-add the VLAN and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan re-add" +} + vxlan_ping_test() { local ping_dev=$1; shift -- cgit v1.2.3 From c099f3f413f354ecdd518e4be6a232592e91d5b9 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Wed, 5 Dec 2018 13:52:36 -0500 Subject: selftests: bpf: update testcases for BPF_ALU | BPF_ARSH "arsh32 on imm" and "arsh32 on reg" now are accepted. Also added two new testcases to make sure arsh32 won't be treated as arsh64 during interpretation or JIT code-gen for which case the high bits will be moved into low halve that the testcases could catch them. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index b4b4a3f93639..36ce58b4933e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -721,8 +721,18 @@ static struct bpf_test tests[] = { BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "unknown opcode c4", + .result = ACCEPT, + .retval = 0, + }, + { + "arsh32 on imm 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, 0x1122334485667788), + BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = -16069393, }, { "arsh32 on reg", @@ -732,8 +742,19 @@ static struct bpf_test tests[] = { BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "unknown opcode cc", + .result = ACCEPT, + .retval = 0, + }, + { + "arsh32 on reg 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, 0xffff55667788), + BPF_MOV64_IMM(BPF_REG_1, 15), + BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 43724, }, { "arsh64 on imm", -- cgit v1.2.3 From d37e56df23f9e5916f95b60388c3f179c53c7102 Mon Sep 17 00:00:00 2001 From: Lucas Bates Date: Thu, 6 Dec 2018 17:42:24 -0500 Subject: tc-testing: Add command timeout feature to tdc Using an attribute set in the tdc_config.py file, limit the amount of time tdc will wait for an executed command to complete and prevent the script from hanging entirely. This timeout will be applied to all executed commands. Signed-off-by: Lucas Bates Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc.py | 16 +++++++++++----- tools/testing/selftests/tc-testing/tdc_config.py | 2 ++ 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index 7607ba3e3cbe..b862ee5c44b5 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -131,12 +131,16 @@ def exec_cmd(args, pm, stage, command): stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=ENVIR) - (rawout, serr) = proc.communicate() - if proc.returncode != 0 and len(serr) > 0: - foutput = serr.decode("utf-8", errors="ignore") - else: - foutput = rawout.decode("utf-8", errors="ignore") + try: + (rawout, serr) = proc.communicate(timeout=NAMES['TIMEOUT']) + if proc.returncode != 0 and len(serr) > 0: + foutput = serr.decode("utf-8", errors="ignore") + else: + foutput = rawout.decode("utf-8", errors="ignore") + except subprocess.TimeoutExpired: + foutput = "Command \"{}\" timed out\n".format(command) + proc.returncode = 255 proc.stdout.close() proc.stderr.close() @@ -438,6 +442,8 @@ def check_default_settings(args, remaining, pm): NAMES['TC'] = args.path if args.device != None: NAMES['DEV2'] = args.device + if 'TIMEOUT' not in NAMES: + NAMES['TIMEOUT'] = None if not os.path.isfile(NAMES['TC']): print("The specified tc path " + NAMES['TC'] + " does not exist.") exit(1) diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index d651bc1501bd..6d91e48c2625 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -15,6 +15,8 @@ NAMES = { 'DEV1': 'v0p1', 'DEV2': '', 'BATCH_FILE': './batch.txt', + # Length of time in seconds to wait before terminating a command + 'TIMEOUT': 12, # Name of the namespace to use 'NS': 'tcut', # Directory containing eBPF test programs -- cgit v1.2.3 From dfe465d33e7fce145746d836ddb31f0e27f28e28 Mon Sep 17 00:00:00 2001 From: Lucas Bates Date: Thu, 6 Dec 2018 17:42:25 -0500 Subject: tc-testing: Add new TdcResults module This module includes new classes for tdc to use in keeping track of test case results, instead of generating and tracking a lengthy string. The new module can be extended to support multiple formal test result formats to be friendlier to automation. Signed-off-by: Lucas Bates Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/TdcResults.py | 132 +++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 tools/testing/selftests/tc-testing/TdcResults.py (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/TdcResults.py b/tools/testing/selftests/tc-testing/TdcResults.py new file mode 100644 index 000000000000..1e4d95fdf8d0 --- /dev/null +++ b/tools/testing/selftests/tc-testing/TdcResults.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 + +from enum import Enum + +class ResultState(Enum): + noresult = -1 + skip = 0 + success = 1 + fail = 2 + +class TestResult: + def __init__(self, test_id="", test_name=""): + self.test_id = test_id + self.test_name = test_name + self.result = ResultState.noresult + self.failmsg = "" + self.errormsg = "" + self.steps = [] + + def set_result(self, result): + if (isinstance(result, ResultState)): + self.result = result + return True + else: + raise TypeError('Unknown result type, must be type ResultState') + + def get_result(self): + return self.result + + def set_errormsg(self, errormsg): + self.errormsg = errormsg + return True + + def append_errormsg(self, errormsg): + self.errormsg = '{}\n{}'.format(self.errormsg, errormsg) + + def get_errormsg(self): + return self.errormsg + + def set_failmsg(self, failmsg): + self.failmsg = failmsg + return True + + def append_failmsg(self, failmsg): + self.failmsg = '{}\n{}'.format(self.failmsg, failmsg) + + def get_failmsg(self): + return self.failmsg + + def add_steps(self, newstep): + if type(newstep) == list: + self.steps.extend(newstep) + elif type(newstep) == str: + self.steps.append(step) + else: + raise TypeError('TdcResults.add_steps() requires a list or str') + + def get_executed_steps(self): + return self.steps + +class TestSuiteReport(): + _testsuite = [] + + def add_resultdata(self, result_data): + if isinstance(result_data, TestResult): + self._testsuite.append(result_data) + return True + + def count_tests(self): + return len(self._testsuite) + + def count_failures(self): + return sum(1 for t in self._testsuite if t.result == ResultState.fail) + + def count_skips(self): + return sum(1 for t in self._testsuite if t.result == ResultState.skip) + + def find_result(self, test_id): + return next((tr for tr in self._testsuite if tr.test_id == test_id), None) + + def update_result(self, result_data): + orig = self.find_result(result_data.test_id) + if orig != None: + idx = self._testsuite.index(orig) + self._testsuite[idx] = result_data + else: + self.add_resultdata(result_data) + + def format_tap(self): + ftap = "" + ftap += '1..{}\n'.format(self.count_tests()) + index = 1 + for t in self._testsuite: + if t.result == ResultState.fail: + ftap += 'not ' + ftap += 'ok {} {} - {}'.format(str(index), t.test_id, t.test_name) + if t.result == ResultState.skip or t.result == ResultState.noresult: + ftap += ' # skipped - {}\n'.format(t.errormsg) + elif t.result == ResultState.fail: + if len(t.steps) > 0: + ftap += '\tCommands executed in this test case:' + for step in t.steps: + ftap += '\n\t\t{}'.format(step) + ftap += '\n\t{}'.format(t.failmsg) + ftap += '\n' + index += 1 + return ftap + + def format_xunit(self): + from xml.sax.saxutils import escape + xunit = "\n" + xunit += '\t\n'.format(self.count_tests(), self.count_skips()) + for t in self._testsuite: + xunit += '\t\t 0: + xunit += 'Commands executed in this test case:\n' + for step in t.steps: + xunit += '\t{}\n'.format(escape(step)) + xunit += 'FAILURE: {}\n'.format(escape(t.failmsg)) + xunit += '\t\t\t\n' + if t.errormsg: + xunit += '\t\t\t\n{}\n'.format(escape(t.errormsg)) + xunit += '\t\t\t\n' + if t.result == ResultState.skip: + xunit += '\t\t\t\n' + xunit += '\t\t\n' + xunit += '\t\n' + xunit += '\n' + return xunit -- cgit v1.2.3 From 915c158deaf97be49e71250f43e1539542217962 Mon Sep 17 00:00:00 2001 From: Lucas Bates Date: Thu, 6 Dec 2018 17:42:26 -0500 Subject: tc-testing: Implement the TdcResults module in tdc In tdc and the valgrind plugin, begin using the TdcResults module to track executed test cases. Signed-off-by: Lucas Bates Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/TdcPlugin.py | 3 +- .../tc-testing/plugin-lib/valgrindPlugin.py | 22 +++- tools/testing/selftests/tc-testing/tdc.py | 117 ++++++++++++--------- 3 files changed, 91 insertions(+), 51 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py index 3ee9a6dacb52..1d9e279331eb 100644 --- a/tools/testing/selftests/tc-testing/TdcPlugin.py +++ b/tools/testing/selftests/tc-testing/TdcPlugin.py @@ -18,11 +18,12 @@ class TdcPlugin: if self.args.verbose > 1: print(' -- {}.post_suite'.format(self.sub_class)) - def pre_case(self, test_ordinal, testid): + def pre_case(self, test_ordinal, testid, test_name): '''run commands before test_runner does one test''' if self.args.verbose > 1: print(' -- {}.pre_case'.format(self.sub_class)) self.args.testid = testid + self.args.test_name = test_name self.args.test_ordinal = test_ordinal def post_case(self): diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py index 477a7bd7d7fb..e00c798de0bb 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py @@ -11,6 +11,7 @@ from string import Template import subprocess import time from TdcPlugin import TdcPlugin +from TdcResults import * from tdc_config import * @@ -21,6 +22,7 @@ class SubPlugin(TdcPlugin): def __init__(self): self.sub_class = 'valgrind/SubPlugin' self.tap = '' + self._tsr = TestSuiteReport() super().__init__() def pre_suite(self, testcount, testidlist): @@ -34,10 +36,14 @@ class SubPlugin(TdcPlugin): def post_suite(self, index): '''run commands after test_runner goes into a test loop''' super().post_suite(index) - self._add_to_tap('\n|---\n') if self.args.verbose > 1: print('{}.post_suite'.format(self.sub_class)) - print('{}'.format(self.tap)) + #print('{}'.format(self.tap)) + for xx in range(index - 1, self.testcount): + res = TestResult('{}-mem'.format(self.testidlist[xx]), 'Test skipped') + res.set_result(ResultState.skip) + res.set_errormsg('Skipped because of prior setup/teardown failure') + self._add_results(res) if self.args.verbose < 4: subprocess.check_output('rm -f vgnd-*.log', shell=True) @@ -128,8 +134,17 @@ class SubPlugin(TdcPlugin): nle_num = int(nle_mo.group(1)) mem_results = '' + res = TestResult('{}-mem'.format(self.args.testid), + '{} memory leak check'.format(self.args.test_name)) if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0): mem_results += 'not ' + res.set_result(ResultState.fail) + res.set_failmsg('Memory leak detected') + res.append_failmsg(content) + else: + res.set_result(ResultState.success) + + self._add_results(res) mem_results += 'ok {} - {}-mem # {}\n'.format( self.args.test_ordinal, self.args.testid, 'memory leak check') @@ -138,5 +153,8 @@ class SubPlugin(TdcPlugin): print('{}'.format(content)) self._add_to_tap(content) + def _add_results(self, res): + self._tsr.add_resultdata(res) + def _add_to_tap(self, more_tap_output): self.tap += more_tap_output diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index b862ee5c44b5..e6e4ce80a726 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -23,6 +23,7 @@ from tdc_config import * from tdc_helper import * import TdcPlugin +from TdcResults import * class PluginMgrTestFail(Exception): @@ -60,10 +61,10 @@ class PluginMgr: for pgn_inst in reversed(self.plugin_instances): pgn_inst.post_suite(index) - def call_pre_case(self, test_ordinal, testid): + def call_pre_case(self, test_ordinal, testid, test_name): for pgn_inst in self.plugin_instances: try: - pgn_inst.pre_case(test_ordinal, testid) + pgn_inst.pre_case(test_ordinal, testid, test_name) except Exception as ee: print('exception {} in call to pre_case for {} plugin'. format(ee, pgn_inst.__class__)) @@ -102,7 +103,6 @@ class PluginMgr: self.argparser = argparse.ArgumentParser( description='Linux TC unit tests') - def replace_keywords(cmd): """ For a given executable command, substitute any known @@ -187,6 +187,7 @@ def run_one_test(pm, args, index, tidx): result = True tresult = "" tap = "" + res = TestResult(tidx['id'], tidx['name']) if args.verbose > 0: print("\t====================\n=====> ", end="") print("Test " + tidx["id"] + ": " + tidx["name"]) @@ -194,7 +195,7 @@ def run_one_test(pm, args, index, tidx): # populate NAMES with TESTID for this test NAMES['TESTID'] = tidx['id'] - pm.call_pre_case(index, tidx['id']) + pm.call_pre_case(index, tidx['id'], tidx['name']) prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"]) if (args.verbose > 0): @@ -209,10 +210,11 @@ def run_one_test(pm, args, index, tidx): pm.call_post_execute() if (exit_code is None or exit_code != int(tidx["expExitCode"])): - result = False print("exit: {!r}".format(exit_code)) print("exit: {}".format(int(tidx["expExitCode"]))) #print("exit: {!r} {}".format(exit_code, int(tidx["expExitCode"]))) + res.set_result(ResultState.fail) + res.set_failmsg('Command exited with {}, expected {}\n{}'.format(exit_code, tidx["expExitCode"], procout)) print(procout) else: if args.verbose > 0: @@ -223,20 +225,15 @@ def run_one_test(pm, args, index, tidx): if procout: match_index = re.findall(match_pattern, procout) if len(match_index) != int(tidx["matchCount"]): - result = False + res.set_result(ResultState.fail) + res.set_failmsg('Could not match regex pattern. Verify command output:\n{}'.format(procout)) + else: + res.set_result(ResultState.success) elif int(tidx["matchCount"]) != 0: - result = False - - if not result: - tresult += 'not ' - tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name']) - tap += tresult - - if result == False: - if procout: - tap += procout + res.set_result(ResultState.fail) + res.set_failmsg('No output generated by verify command.') else: - tap += 'No output!\n' + res.set_result(ResultState.success) prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout) pm.call_post_case() @@ -245,7 +242,7 @@ def run_one_test(pm, args, index, tidx): # remove TESTID from NAMES del(NAMES['TESTID']) - return tap + return res def test_runner(pm, args, filtered_tests): """ @@ -265,25 +262,15 @@ def test_runner(pm, args, filtered_tests): emergency_exit = False emergency_exit_message = '' - if args.notap: - if args.verbose: - tap = 'notap requested: omitting test plan\n' - else: - tap = str(index) + ".." + str(tcount) + "\n" + tsr = TestSuiteReport() + try: pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist]) except Exception as ee: ex_type, ex, ex_tb = sys.exc_info() print('Exception {} {} (caught in pre_suite).'. format(ex_type, ex)) - # when the extra print statements are uncommented, - # the traceback does not appear between them - # (it appears way earlier in the tdc.py output) - # so don't bother ... - # print('--------------------(') - # print('traceback') traceback.print_tb(ex_tb) - # print('--------------------)') emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex) emergency_exit = True stage = 'pre-SUITE' @@ -299,15 +286,26 @@ def test_runner(pm, args, filtered_tests): if args.verbose > 1: print('Not executing test {} {} because DEV2 not defined'. format(tidx['id'], tidx['name'])) + res = TestResult(tidx['id'], tidx['name']) + res.set_result(ResultState.skip) + res.set_errormsg('Not executed because DEV2 is not defined') + tsr.add_resultdata(res) continue try: badtest = tidx # in case it goes bad - tap += run_one_test(pm, args, index, tidx) + res = run_one_test(pm, args, index, tidx) + tsr.add_resultdata(res) except PluginMgrTestFail as pmtf: ex_type, ex, ex_tb = sys.exc_info() stage = pmtf.stage message = pmtf.message output = pmtf.output + res = TestResult(tidx['id'], tidx['name']) + res.set_result(ResultState.skip) + res.set_errormsg(pmtf.message) + res.set_failmsg(pmtf.output) + tsr.add_resultdata(res) + index += 1 print(message) print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'. format(ex_type, ex, index, tidx['id'], tidx['name'], stage)) @@ -326,16 +324,16 @@ def test_runner(pm, args, filtered_tests): # if we failed in setup or teardown, # fill in the remaining tests with ok-skipped count = index - if not args.notap: - tap += 'about to flush the tap output if tests need to be skipped\n' - if tcount + 1 != index: - for tidx in testlist[index - 1:]: - msg = 'skipped - previous {} failed'.format(stage) - tap += 'ok {} - {} # {} {} {}\n'.format( - count, tidx['id'], msg, index, badtest.get('id', '--Unknown--')) - count += 1 - tap += 'done flushing skipped test tap output\n' + if tcount + 1 != count: + for tidx in testlist[count - 1:]: + res = TestResult(tidx['id'], tidx['name']) + res.set_result(ResultState.skip) + msg = 'skipped - previous {} failed {} {}'.format(stage, + index, badtest.get('id', '--Unknown--')) + res.set_errormsg(msg) + tsr.add_resultdata(res) + count += 1 if args.pause: print('Want to pause\nPress enter to continue ...') @@ -344,7 +342,7 @@ def test_runner(pm, args, filtered_tests): pm.call_post_suite(index) - return tap + return tsr def has_blank_ids(idlist): """ @@ -384,6 +382,10 @@ def set_args(parser): """ Set the command line arguments for tdc. """ + parser.add_argument( + '--outfile', type=str, + help='Path to the file in which results should be saved. ' + + 'Default target is the current directory.') parser.add_argument( '-p', '--path', type=str, help='The full path to the tc executable to use') @@ -420,8 +422,9 @@ def set_args(parser): '-v', '--verbose', action='count', default=0, help='Show the commands that are being run') parser.add_argument( - '-N', '--notap', action='store_true', - help='Suppress tap results for command under test') + '--format', default='tap', const='tap', nargs='?', + choices=['none', 'xunit', 'tap'], + help='Specify the format for test results. (Default: TAP)') parser.add_argument('-d', '--device', help='Execute the test case in flower category') parser.add_argument( @@ -638,12 +641,30 @@ def set_operation_mode(pm, args): if len(alltests): catresults = test_runner(pm, args, alltests) + if args.format == 'none': + print('Test results output suppression requested\n') + else: + print('\nAll test results: \n') + if args.format == 'xunit': + suffix = 'xml' + res = catresults.format_xunit() + elif args.format == 'tap': + suffix = 'tap' + res = catresults.format_tap() + print(res) + print('\n\n') + if not args.outfile: + fname = 'test-results.{}'.format(suffix) + else: + fname = args.outfile + with open(fname, 'w') as fh: + fh.write(res) + fh.close() + if os.getenv('SUDO_UID') is not None: + os.chown(fname, uid=int(os.getenv('SUDO_UID')), + gid=int(os.getenv('SUDO_GID'))) else: - catresults = 'No tests found\n' - if args.notap: - print('Tap output suppression requested\n') - else: - print('All test results: \n\n{}'.format(catresults)) + print('No tests found\n') def main(): """ -- cgit v1.2.3 From 8d189159ac1e8dc6d491840b3b2a9ede762269bc Mon Sep 17 00:00:00 2001 From: Lucas Bates Date: Thu, 6 Dec 2018 17:42:27 -0500 Subject: tc-testing: gitignore, ignore generated test results Ignore any .tap or .xml test result files generated by tdc. Additionally, ignore plugin symlinks. Signed-off-by: Lucas Bates Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/.gitignore | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore index 7a60b85e148f..c5cc160948b3 100644 --- a/tools/testing/selftests/tc-testing/.gitignore +++ b/tools/testing/selftests/tc-testing/.gitignore @@ -1,2 +1,5 @@ __pycache__/ *.pyc +plugins/ +*.xml +*.tap -- cgit v1.2.3 From ee491d8dbe496c510fa06a786460638d78428147 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 7 Dec 2018 16:42:26 -0800 Subject: bpf: tools: Sync uapi bpf.h Sync uapi bpf.h to tools/include/uapi/linux for the new bpf_line_info. Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 16263e8827fc..7973c28b24a0 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -356,6 +356,9 @@ union bpf_attr { __u32 func_info_rec_size; /* userspace bpf_func_info size */ __aligned_u64 func_info; /* func info */ __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -2679,6 +2682,12 @@ struct bpf_prog_info { __u32 func_info_rec_size; __aligned_u64 func_info; __u32 func_info_cnt; + __u32 line_info_cnt; + __aligned_u64 line_info; + __aligned_u64 jited_line_info; + __u32 jited_line_info_cnt; + __u32 line_info_rec_size; + __u32 jited_line_info_rec_size; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2995,4 +3004,14 @@ struct bpf_func_info { __u32 type_id; }; +#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) +#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) + +struct bpf_line_info { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 05687352c600930058ca4c68048302502e07a572 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 7 Dec 2018 16:42:27 -0800 Subject: bpf: Refactor and bug fix in test_func_type in test_btf.c 1) bpf_load_program_xattr() is absorbing the EBIG error which makes testing this case impossible. It is replaced with a direct syscall(__NR_bpf, BPF_PROG_LOAD,...). 2) The test_func_type() is renamed to test_info_raw() to prepare for the new line_info test in the next patch. 3) The bpf_obj_get_info_by_fd() testing for func_info is refactored to test_get_finfo(). A new test_get_linfo() will be added in the next patch for testing line_info purpose. 4) The test->func_info_cnt is checked instead of a static value "2". 5) Remove unnecessary "\n" in error message. 6) Adding back info_raw_test_num to the cmd arg such that a specific test case can be tested, like all other existing tests. 7) Fix a bug in handling expected_prog_load_failure. A test could pass even if prog_fd != -1 while expected_prog_load_failure is true. 8) The min rec_size check should be < 8 instead of < 4. Fixes: 4798c4ba3ba9 ("tools/bpf: extends test_btf to test load/retrieve func_type info") Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_btf.c | 211 +++++++++++++++++++-------------- 1 file changed, 125 insertions(+), 86 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index ff0952ea757a..8d5777c89620 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -114,12 +115,13 @@ static struct args { unsigned int raw_test_num; unsigned int file_test_num; unsigned int get_info_test_num; + unsigned int info_raw_test_num; bool raw_test; bool file_test; bool get_info_test; bool pprint_test; bool always_log; - bool func_type_test; + bool info_raw_test; } args; static char btf_log_buf[BTF_LOG_BUF_SIZE]; @@ -3051,7 +3053,7 @@ static int test_pprint(void) return err; } -static struct btf_func_type_test { +static struct prog_info_raw_test { const char *descr; const char *str_sec; __u32 raw_types[MAX_NR_RAW_TYPES]; @@ -3062,7 +3064,7 @@ static struct btf_func_type_test { __u32 func_info_rec_size; __u32 func_info_cnt; bool expected_prog_load_failure; -} func_type_test[] = { +} info_raw_tests[] = { { .descr = "func_type (main func + one sub)", .raw_types = { @@ -3198,90 +3200,44 @@ static size_t probe_prog_length(const struct bpf_insn *fp) return len + 1; } -static int do_test_func_type(int test_num) +static int test_get_finfo(const struct prog_info_raw_test *test, + int prog_fd) { - const struct btf_func_type_test *test = &func_type_test[test_num]; - unsigned int raw_btf_size, info_len, rec_size; - int i, btf_fd = -1, prog_fd = -1, err = 0; - struct bpf_load_program_attr attr = {}; - void *raw_btf, *func_info = NULL; struct bpf_prog_info info = {}; struct bpf_func_info *finfo; - - fprintf(stderr, "%s......", test->descr); - raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, - test->str_sec, test->str_sec_size, - &raw_btf_size); - - if (!raw_btf) - return -1; - - *btf_log_buf = '\0'; - btf_fd = bpf_load_btf(raw_btf, raw_btf_size, - btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); - free(raw_btf); - - if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) { - err = -1; - goto done; - } - - if (*btf_log_buf && args.always_log) - fprintf(stderr, "\n%s", btf_log_buf); - - attr.prog_type = test->prog_type; - attr.insns = test->insns; - attr.insns_cnt = probe_prog_length(attr.insns); - attr.license = "GPL"; - attr.prog_btf_fd = btf_fd; - attr.func_info_rec_size = test->func_info_rec_size; - attr.func_info_cnt = test->func_info_cnt; - attr.func_info = test->func_info; - - *btf_log_buf = '\0'; - prog_fd = bpf_load_program_xattr(&attr, btf_log_buf, - BTF_LOG_BUF_SIZE); - if (test->expected_prog_load_failure && prog_fd == -1) { - err = 0; - goto done; - } - if (CHECK(prog_fd == -1, "invalid prog_id errno:%d", errno)) { - fprintf(stderr, "%s\n", btf_log_buf); - err = -1; - goto done; - } + __u32 info_len, rec_size, i; + void *func_info = NULL; + int err; /* get necessary lens */ info_len = sizeof(struct bpf_prog_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { fprintf(stderr, "%s\n", btf_log_buf); - err = -1; - goto done; + return -1; } - if (CHECK(info.func_info_cnt != 2, - "incorrect info.func_info_cnt (1st) %d\n", + if (CHECK(info.func_info_cnt != test->func_info_cnt, + "incorrect info.func_info_cnt (1st) %d", info.func_info_cnt)) { - err = -1; - goto done; + return -1; } + rec_size = info.func_info_rec_size; - if (CHECK(rec_size < 4, - "incorrect info.func_info_rec_size (1st) %d\n", rec_size)) { - err = -1; - goto done; + if (CHECK(rec_size < 8, + "incorrect info.func_info_rec_size (1st) %d", rec_size)) { + return -1; } + if (!info.func_info_cnt) + return 0; + func_info = malloc(info.func_info_cnt * rec_size); - if (CHECK(!func_info, "out of memory")) { - err = -1; - goto done; - } + if (CHECK(!func_info, "out of memory")) + return -1; /* reset info to only retrieve func_info related data */ memset(&info, 0, sizeof(info)); - info.func_info_cnt = 2; + info.func_info_cnt = test->func_info_cnt; info.func_info_rec_size = rec_size; info.func_info = ptr_to_u64(func_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); @@ -3290,14 +3246,14 @@ static int do_test_func_type(int test_num) err = -1; goto done; } - if (CHECK(info.func_info_cnt != 2, - "incorrect info.func_info_cnt (2nd) %d\n", + if (CHECK(info.func_info_cnt != test->func_info_cnt, + "incorrect info.func_info_cnt (2nd) %d", info.func_info_cnt)) { err = -1; goto done; } - if (CHECK(info.func_info_rec_size != rec_size, - "incorrect info.func_info_rec_size (2nd) %d\n", + if (CHECK(info.func_info_rec_size < 8, + "incorrect info.func_info_rec_size (2nd) %d", info.func_info_rec_size)) { err = -1; goto done; @@ -3310,7 +3266,7 @@ static int do_test_func_type(int test_num) } finfo = func_info; - for (i = 0; i < 2; i++) { + for (i = 0; i < test->func_info_cnt; i++) { if (CHECK(finfo->type_id != test->func_info[i][1], "incorrect func_type %u expected %u", finfo->type_id, test->func_info[i][1])) { @@ -3320,7 +3276,75 @@ static int do_test_func_type(int test_num) finfo = (void *)finfo + rec_size; } + err = 0; + +done: + free(func_info); + return err; +} + +static int do_test_info_raw(unsigned int test_num) +{ + const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1]; + int btf_fd = -1, prog_fd = -1, err = 0; + unsigned int raw_btf_size; + union bpf_attr attr = {}; + void *raw_btf; + + fprintf(stderr, "BTF prog info raw test[%u] (%s): ", test_num, test->descr); + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, + test->str_sec, test->str_sec_size, + &raw_btf_size); + + if (!raw_btf) + return -1; + + *btf_log_buf = '\0'; + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + args.always_log); + free(raw_btf); + + if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) { + err = -1; + goto done; + } + + if (*btf_log_buf && args.always_log) + fprintf(stderr, "\n%s", btf_log_buf); + *btf_log_buf = '\0'; + + attr.prog_type = test->prog_type; + attr.insns = ptr_to_u64(test->insns); + attr.insn_cnt = probe_prog_length(test->insns); + attr.license = ptr_to_u64("GPL"); + attr.prog_btf_fd = btf_fd; + attr.func_info_rec_size = test->func_info_rec_size; + attr.func_info_cnt = test->func_info_cnt; + attr.func_info = ptr_to_u64(test->func_info); + attr.log_buf = ptr_to_u64(btf_log_buf); + attr.log_size = BTF_LOG_BUF_SIZE; + attr.log_level = 1; + + prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + err = ((prog_fd == -1) != test->expected_prog_load_failure); + if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d", + prog_fd, test->expected_prog_load_failure, errno)) { + err = -1; + goto done; + } + + if (prog_fd == -1) + goto done; + + err = test_get_finfo(test, prog_fd); + if (err) + goto done; + done: + if (!err) + fprintf(stderr, "OK"); + if (*btf_log_buf && (err || args.always_log)) fprintf(stderr, "\n%s", btf_log_buf); @@ -3328,33 +3352,38 @@ done: close(btf_fd); if (prog_fd != -1) close(prog_fd); - free(func_info); + return err; } -static int test_func_type(void) +static int test_info_raw(void) { unsigned int i; int err = 0; - for (i = 0; i < ARRAY_SIZE(func_type_test); i++) - err |= count_result(do_test_func_type(i)); + if (args.info_raw_test_num) + return count_result(do_test_info_raw(args.info_raw_test_num)); + + for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++) + err |= count_result(do_test_info_raw(i)); return err; } static void usage(const char *cmd) { - fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] |" - " [-g test_num (1 - %zu)] |" - " [-f test_num (1 - %zu)] | [-p] | [-k] ]\n", + fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n" + "\t[-g btf_get_info_test_num (1 - %zu)] |\n" + "\t[-f btf_file_test_num (1 - %zu)] |\n" + "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n" + "\t[-p (pretty print test)]]\n", cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests), - ARRAY_SIZE(file_tests)); + ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests)); } static int parse_args(int argc, char **argv) { - const char *optstr = "lpkf:r:g:"; + const char *optstr = "lpk:f:r:g:"; int opt; while ((opt = getopt(argc, argv, optstr)) != -1) { @@ -3378,7 +3407,8 @@ static int parse_args(int argc, char **argv) args.pprint_test = true; break; case 'k': - args.func_type_test = true; + args.info_raw_test_num = atoi(optarg); + args.info_raw_test = true; break; case 'h': usage(argv[0]); @@ -3413,6 +3443,14 @@ static int parse_args(int argc, char **argv) return -1; } + if (args.info_raw_test_num && + (args.info_raw_test_num < 1 || + args.info_raw_test_num > ARRAY_SIZE(info_raw_tests))) { + fprintf(stderr, "BTF prog info raw test number must be [1 - %zu]\n", + ARRAY_SIZE(info_raw_tests)); + return -1; + } + return 0; } @@ -3445,16 +3483,17 @@ int main(int argc, char **argv) if (args.pprint_test) err |= test_pprint(); - if (args.func_type_test) - err |= test_func_type(); + if (args.info_raw_test) + err |= test_info_raw(); if (args.raw_test || args.get_info_test || args.file_test || - args.pprint_test || args.func_type_test) + args.pprint_test || args.info_raw_test) goto done; err |= test_raw(); err |= test_get_info(); err |= test_file(); + err |= test_info_raw(); done: print_summary(); -- cgit v1.2.3 From 4d6304c763551cdb420201b1b754cc625d56bdd6 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 7 Dec 2018 16:42:28 -0800 Subject: bpf: Add unit tests for bpf_line_info Add unit tests for bpf_line_info for both BPF_PROG_LOAD and BPF_OBJ_GET_INFO_BY_FD. jit enabled: [root@arch-fb-vm1 bpf]# ./test_btf -k 0 BTF prog info raw test[5] (line_info (No subprog)): OK BTF prog info raw test[6] (line_info (No subprog. insn_off >= prog->len)): OK BTF prog info raw test[7] (line_info (No subprog. zero tailing line_info): OK BTF prog info raw test[8] (line_info (No subprog. nonzero tailing line_info)): OK BTF prog info raw test[9] (line_info (subprog)): OK BTF prog info raw test[10] (line_info (subprog + func_info)): OK BTF prog info raw test[11] (line_info (subprog. missing 1st func line info)): OK BTF prog info raw test[12] (line_info (subprog. missing 2nd func line info)): OK BTF prog info raw test[13] (line_info (subprog. unordered insn offset)): OK jit disabled: BTF prog info raw test[5] (line_info (No subprog)): not jited. skipping jited_line_info check. OK BTF prog info raw test[6] (line_info (No subprog. insn_off >= prog->len)): OK BTF prog info raw test[7] (line_info (No subprog. zero tailing line_info): not jited. skipping jited_line_info check. OK BTF prog info raw test[8] (line_info (No subprog. nonzero tailing line_info)): OK BTF prog info raw test[9] (line_info (subprog)): not jited. skipping jited_line_info check. OK BTF prog info raw test[10] (line_info (subprog + func_info)): not jited. skipping jited_line_info check. OK BTF prog info raw test[11] (line_info (subprog. missing 1st func line info)): OK BTF prog info raw test[12] (line_info (subprog. missing 2nd func line info)): OK BTF prog info raw test[13] (line_info (subprog. unordered insn offset)): OK Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_btf.c | 597 ++++++++++++++++++++++++++++++++- 1 file changed, 580 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 8d5777c89620..7707273736ac 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -108,7 +108,7 @@ static int __base_pr(const char *format, ...) #define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f -#define MAX_NR_RAW_TYPES 1024 +#define MAX_NR_RAW_U32 1024 #define BTF_LOG_BUF_SIZE 65535 static struct args { @@ -137,7 +137,7 @@ struct btf_raw_test { const char *str_sec; const char *map_name; const char *err_str; - __u32 raw_types[MAX_NR_RAW_TYPES]; + __u32 raw_types[MAX_NR_RAW_U32]; __u32 str_sec_size; enum bpf_map_type map_type; __u32 key_size; @@ -156,6 +156,9 @@ struct btf_raw_test { int str_len_delta; }; +#define BTF_STR_SEC(str) \ + .str_sec = str, .str_sec_size = sizeof(str) + static struct btf_raw_test raw_tests[] = { /* enum E { * E0, @@ -1858,11 +1861,11 @@ static const char *get_next_str(const char *start, const char *end) return start < end - 1 ? start + 1 : NULL; } -static int get_type_sec_size(const __u32 *raw_types) +static int get_raw_sec_size(const __u32 *raw_types) { int i; - for (i = MAX_NR_RAW_TYPES - 1; + for (i = MAX_NR_RAW_U32 - 1; i >= 0 && raw_types[i] != BTF_END_RAW; i--) ; @@ -1874,7 +1877,8 @@ static void *btf_raw_create(const struct btf_header *hdr, const __u32 *raw_types, const char *str, unsigned int str_sec_size, - unsigned int *btf_size) + unsigned int *btf_size, + const char **ret_next_str) { const char *next_str = str, *end_str = str + str_sec_size; unsigned int size_needed, offset; @@ -1883,7 +1887,7 @@ static void *btf_raw_create(const struct btf_header *hdr, uint32_t *ret_types; void *raw_btf; - type_sec_size = get_type_sec_size(raw_types); + type_sec_size = get_raw_sec_size(raw_types); if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types")) return NULL; @@ -1922,6 +1926,8 @@ static void *btf_raw_create(const struct btf_header *hdr, ret_hdr->str_len = str_sec_size; *btf_size = size_needed; + if (ret_next_str) + *ret_next_str = next_str; return raw_btf; } @@ -1941,7 +1947,7 @@ static int do_test_raw(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -2018,7 +2024,7 @@ static int test_raw(void) struct btf_get_info_test { const char *descr; const char *str_sec; - __u32 raw_types[MAX_NR_RAW_TYPES]; + __u32 raw_types[MAX_NR_RAW_U32]; __u32 str_sec_size; int btf_size_delta; int (*special_test)(unsigned int test_num); @@ -2098,7 +2104,7 @@ static int test_big_btf_info(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -2182,7 +2188,7 @@ static int test_btf_id(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -2320,7 +2326,7 @@ static int do_test_get_info(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -2849,7 +2855,7 @@ static int do_test_pprint(void) fprintf(stderr, "%s......", test->descr); raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -3053,16 +3059,23 @@ static int test_pprint(void) return err; } +#define BPF_LINE_INFO_ENC(insn_off, file_off, line_off, line_num, line_col) \ + (insn_off), (file_off), (line_off), ((line_num) << 10 | ((line_col) & 0x3ff)) + static struct prog_info_raw_test { const char *descr; const char *str_sec; - __u32 raw_types[MAX_NR_RAW_TYPES]; + const char *err_str; + __u32 raw_types[MAX_NR_RAW_U32]; __u32 str_sec_size; struct bpf_insn insns[MAX_INSNS]; __u32 prog_type; __u32 func_info[MAX_SUBPROGS][2]; __u32 func_info_rec_size; __u32 func_info_cnt; + __u32 line_info[MAX_NR_RAW_U32]; + __u32 line_info_rec_size; + __u32 nr_jited_ksyms; bool expected_prog_load_failure; } info_raw_tests[] = { { @@ -3093,6 +3106,7 @@ static struct prog_info_raw_test { .func_info = { {0, 5}, {3, 6} }, .func_info_rec_size = 8, .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, }, { @@ -3123,6 +3137,7 @@ static struct prog_info_raw_test { .func_info = { {0, 5}, {3, 6} }, .func_info_rec_size = 4, .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, .expected_prog_load_failure = true, }, @@ -3154,6 +3169,7 @@ static struct prog_info_raw_test { .func_info = { {0, 5}, {3, 6} }, .func_info_rec_size = 8, .func_info_cnt = 1, + .line_info = { BTF_END_RAW }, .expected_prog_load_failure = true, }, @@ -3185,6 +3201,278 @@ static struct prog_info_raw_test { .func_info = { {0, 5}, {2, 6} }, .func_info_rec_size = 8, .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (No subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, +}, + +{ + .descr = "line_info (No subprog. insn_off >= prog->len)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BPF_LINE_INFO_ENC(4, 0, 0, 5, 6), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .err_str = "line_info[4].insn_off", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (No subprog. zero tailing line_info", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), 0, + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), 0, + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), 0, + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), 0, + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info) + sizeof(__u32), + .nr_jited_ksyms = 1, +}, + +{ + .descr = "line_info (No subprog. nonzero tailing line_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), 0, + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), 0, + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), 0, + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), 1, + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info) + sizeof(__u32), + .nr_jited_ksyms = 1, + .err_str = "nonzero tailing record in line_info", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, +}, + +{ + .descr = "line_info (subprog + func_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0sub\0main\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {5, 3} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, +}, + +{ + .descr = "line_info (subprog. missing 1st func line info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "missing bpf_line_info for func#0", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog. missing 2nd func line info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "missing bpf_line_info for func#1", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog. unordered insn offset)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "Invalid line_info[2].insn_off", .expected_prog_load_failure = true, }, @@ -3200,6 +3488,46 @@ static size_t probe_prog_length(const struct bpf_insn *fp) return len + 1; } +static __u32 *patch_name_tbd(const __u32 *raw_u32, + const char *str, __u32 str_off, + unsigned int str_sec_size, + unsigned int *ret_size) +{ + int i, raw_u32_size = get_raw_sec_size(raw_u32); + const char *end_str = str + str_sec_size; + const char *next_str = str + str_off; + __u32 *new_u32 = NULL; + + if (raw_u32_size == -1) + return ERR_PTR(-EINVAL); + + if (!raw_u32_size) { + *ret_size = 0; + return NULL; + } + + new_u32 = malloc(raw_u32_size); + if (!new_u32) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < raw_u32_size / sizeof(raw_u32[0]); i++) { + if (raw_u32[i] == NAME_TBD) { + next_str = get_next_str(next_str, end_str); + if (CHECK(!next_str, "Error in getting next_str\n")) { + free(new_u32); + return ERR_PTR(-EINVAL); + } + new_u32[i] = next_str - str; + next_str += strlen(next_str); + } else { + new_u32[i] = raw_u32[i]; + } + } + + *ret_size = raw_u32_size; + return new_u32; +} + static int test_get_finfo(const struct prog_info_raw_test *test, int prog_fd) { @@ -3283,18 +3611,229 @@ done: return err; } +static int test_get_linfo(const struct prog_info_raw_test *test, + const void *patched_linfo, + __u32 cnt, int prog_fd) +{ + __u32 i, info_len, nr_jited_ksyms, nr_jited_func_lens; + __u64 *jited_linfo = NULL, *jited_ksyms = NULL; + __u32 rec_size, jited_rec_size, jited_cnt; + struct bpf_line_info *linfo = NULL; + __u32 cur_func_len, ksyms_found; + struct bpf_prog_info info = {}; + __u32 *jited_func_lens = NULL; + __u64 cur_func_ksyms; + int err; + + jited_cnt = cnt; + rec_size = sizeof(*linfo); + jited_rec_size = sizeof(*jited_linfo); + if (test->nr_jited_ksyms) + nr_jited_ksyms = test->nr_jited_ksyms; + else + nr_jited_ksyms = test->func_info_cnt; + nr_jited_func_lens = nr_jited_ksyms; + + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "err:%d errno:%d", err, errno)) { + err = -1; + goto done; + } + + if (!info.jited_prog_len) { + /* prog is not jited */ + jited_cnt = 0; + nr_jited_ksyms = 1; + nr_jited_func_lens = 1; + } + + if (CHECK(info.line_info_cnt != cnt || + info.jited_line_info_cnt != jited_cnt || + info.nr_jited_ksyms != nr_jited_ksyms || + info.nr_jited_func_lens != nr_jited_func_lens || + (!info.line_info_cnt && info.jited_line_info_cnt), + "info: line_info_cnt:%u(expected:%u) jited_line_info_cnt:%u(expected:%u) nr_jited_ksyms:%u(expected:%u) nr_jited_func_lens:%u(expected:%u)", + info.line_info_cnt, cnt, + info.jited_line_info_cnt, jited_cnt, + info.nr_jited_ksyms, nr_jited_ksyms, + info.nr_jited_func_lens, nr_jited_func_lens)) { + err = -1; + goto done; + } + + if (CHECK(info.line_info_rec_size < 16 || + info.jited_line_info_rec_size < 8, + "info: line_info_rec_size:%u(userspace expected:%u) jited_line_info_rec_size:%u(userspace expected:%u)", + info.line_info_rec_size, rec_size, + info.jited_line_info_rec_size, jited_rec_size)) { + err = -1; + goto done; + } + + if (!cnt) + return 0; + + rec_size = info.line_info_rec_size; + jited_rec_size = info.jited_line_info_rec_size; + + memset(&info, 0, sizeof(info)); + + linfo = calloc(cnt, rec_size); + if (CHECK(!linfo, "!linfo")) { + err = -1; + goto done; + } + info.line_info_cnt = cnt; + info.line_info_rec_size = rec_size; + info.line_info = ptr_to_u64(linfo); + + if (jited_cnt) { + jited_linfo = calloc(jited_cnt, jited_rec_size); + jited_ksyms = calloc(nr_jited_ksyms, sizeof(*jited_ksyms)); + jited_func_lens = calloc(nr_jited_func_lens, + sizeof(*jited_func_lens)); + if (CHECK(!jited_linfo || !jited_ksyms || !jited_func_lens, + "jited_linfo:%p jited_ksyms:%p jited_func_lens:%p", + jited_linfo, jited_ksyms, jited_func_lens)) { + err = -1; + goto done; + } + + info.jited_line_info_cnt = jited_cnt; + info.jited_line_info_rec_size = jited_rec_size; + info.jited_line_info = ptr_to_u64(jited_linfo); + info.nr_jited_ksyms = nr_jited_ksyms; + info.jited_ksyms = ptr_to_u64(jited_ksyms); + info.nr_jited_func_lens = nr_jited_func_lens; + info.jited_func_lens = ptr_to_u64(jited_func_lens); + } + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + /* + * Only recheck the info.*line_info* fields. + * Other fields are not the concern of this test. + */ + if (CHECK(err == -1 || + !info.line_info || + info.line_info_cnt != cnt || + (jited_cnt && !info.jited_line_info) || + info.jited_line_info_cnt != jited_cnt || + info.line_info_rec_size != rec_size || + info.jited_line_info_rec_size != jited_rec_size, + "err:%d errno:%d info: line_info_cnt:%u(expected:%u) jited_line_info_cnt:%u(expected:%u) line_info_rec_size:%u(expected:%u) jited_linfo_rec_size:%u(expected:%u) line_info:%p jited_line_info:%p", + err, errno, + info.line_info_cnt, cnt, + info.jited_line_info_cnt, jited_cnt, + info.line_info_rec_size, rec_size, + info.jited_line_info_rec_size, jited_rec_size, + (void *)(long)info.line_info, + (void *)(long)info.jited_line_info)) { + err = -1; + goto done; + } + + CHECK(linfo[0].insn_off, "linfo[0].insn_off:%u", + linfo[0].insn_off); + for (i = 1; i < cnt; i++) { + const struct bpf_line_info *expected_linfo; + + expected_linfo = patched_linfo + (i * test->line_info_rec_size); + if (CHECK(linfo[i].insn_off <= linfo[i - 1].insn_off, + "linfo[%u].insn_off:%u <= linfo[%u].insn_off:%u", + i, linfo[i].insn_off, + i - 1, linfo[i - 1].insn_off)) { + err = -1; + goto done; + } + if (CHECK(linfo[i].file_name_off != expected_linfo->file_name_off || + linfo[i].line_off != expected_linfo->line_off || + linfo[i].line_col != expected_linfo->line_col, + "linfo[%u] (%u, %u, %u) != (%u, %u, %u)", i, + linfo[i].file_name_off, + linfo[i].line_off, + linfo[i].line_col, + expected_linfo->file_name_off, + expected_linfo->line_off, + expected_linfo->line_col)) { + err = -1; + goto done; + } + } + + if (!jited_cnt) { + fprintf(stderr, "not jited. skipping jited_line_info check. "); + err = 0; + goto done; + } + + if (CHECK(jited_linfo[0] != jited_ksyms[0], + "jited_linfo[0]:%lx != jited_ksyms[0]:%lx", + (long)(jited_linfo[0]), (long)(jited_ksyms[0]))) { + err = -1; + goto done; + } + + ksyms_found = 1; + cur_func_len = jited_func_lens[0]; + cur_func_ksyms = jited_ksyms[0]; + for (i = 1; i < jited_cnt; i++) { + if (ksyms_found < nr_jited_ksyms && + jited_linfo[i] == jited_ksyms[ksyms_found]) { + cur_func_ksyms = jited_ksyms[ksyms_found]; + cur_func_len = jited_ksyms[ksyms_found]; + ksyms_found++; + continue; + } + + if (CHECK(jited_linfo[i] <= jited_linfo[i - 1], + "jited_linfo[%u]:%lx <= jited_linfo[%u]:%lx", + i, (long)jited_linfo[i], + i - 1, (long)(jited_linfo[i - 1]))) { + err = -1; + goto done; + } + + if (CHECK(jited_linfo[i] - cur_func_ksyms > cur_func_len, + "jited_linfo[%u]:%lx - %lx > %u", + i, (long)jited_linfo[i], (long)cur_func_ksyms, + cur_func_len)) { + err = -1; + goto done; + } + } + + if (CHECK(ksyms_found != nr_jited_ksyms, + "ksyms_found:%u != nr_jited_ksyms:%u", + ksyms_found, nr_jited_ksyms)) { + err = -1; + goto done; + } + + err = 0; + +done: + free(linfo); + free(jited_linfo); + free(jited_ksyms); + free(jited_func_lens); + return err; +} + static int do_test_info_raw(unsigned int test_num) { const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1]; + unsigned int raw_btf_size, linfo_str_off, linfo_size; int btf_fd = -1, prog_fd = -1, err = 0; - unsigned int raw_btf_size; + void *raw_btf, *patched_linfo = NULL; + const char *ret_next_str; union bpf_attr attr = {}; - void *raw_btf; fprintf(stderr, "BTF prog info raw test[%u] (%s): ", test_num, test->descr); raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, &ret_next_str); if (!raw_btf) return -1; @@ -3314,6 +3853,16 @@ static int do_test_info_raw(unsigned int test_num) fprintf(stderr, "\n%s", btf_log_buf); *btf_log_buf = '\0'; + linfo_str_off = ret_next_str - test->str_sec; + patched_linfo = patch_name_tbd(test->line_info, + test->str_sec, linfo_str_off, + test->str_sec_size, &linfo_size); + if (IS_ERR(patched_linfo)) { + fprintf(stderr, "error in creating raw bpf_line_info"); + err = -1; + goto done; + } + attr.prog_type = test->prog_type; attr.insns = ptr_to_u64(test->insns); attr.insn_cnt = probe_prog_length(test->insns); @@ -3325,11 +3874,18 @@ static int do_test_info_raw(unsigned int test_num) attr.log_buf = ptr_to_u64(btf_log_buf); attr.log_size = BTF_LOG_BUF_SIZE; attr.log_level = 1; + if (linfo_size) { + attr.line_info_rec_size = test->line_info_rec_size; + attr.line_info = ptr_to_u64(patched_linfo); + attr.line_info_cnt = linfo_size / attr.line_info_rec_size; + } prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); err = ((prog_fd == -1) != test->expected_prog_load_failure); if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d", - prog_fd, test->expected_prog_load_failure, errno)) { + prog_fd, test->expected_prog_load_failure, errno) || + CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), + "expected err_str:%s", test->err_str)) { err = -1; goto done; } @@ -3341,6 +3897,10 @@ static int do_test_info_raw(unsigned int test_num) if (err) goto done; + err = test_get_linfo(test, patched_linfo, attr.line_info_cnt, prog_fd); + if (err) + goto done; + done: if (!err) fprintf(stderr, "OK"); @@ -3353,6 +3913,9 @@ done: if (prog_fd != -1) close(prog_fd); + if (!IS_ERR(patched_linfo)) + free(patched_linfo); + return err; } -- cgit v1.2.3 From f0187f0b17fad7439f510eff4d65606c9ea1190f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 7 Dec 2018 16:42:29 -0800 Subject: bpf: libbpf: Refactor and bug fix on the bpf_func_info loading logic This patch refactor and fix a bug in the libbpf's bpf_func_info loading logic. The bug fix and refactoring are targeting the same commit 2993e0515bb4 ("tools/bpf: add support to read .BTF.ext sections") which is in the bpf-next branch. 1) In bpf_load_program_xattr(), it should retry when errno == E2BIG regardless of log_buf and log_buf_sz. This patch fixes it. 2) btf_ext__reloc_init() and btf_ext__reloc() are essentially the same except btf_ext__reloc_init() always has insns_cnt == 0. Hence, btf_ext__reloc_init() is removed. btf_ext__reloc() is also renamed to btf_ext__reloc_func_info() to get ready for the line_info support in the next patch. 3) Consolidate func_info section logic from "btf_ext_parse_hdr()", "btf_ext_validate_func_info()" and "btf_ext__new()" to a new function "btf_ext_copy_func_info()" such that similar logic can be reused by the later libbpf's line_info patch. 4) The next line_info patch will store line_info_cnt instead of line_info_len in the bpf_program because the kernel is taking line_info_cnt also. It will save a few "len" to "cnt" conversions and will also save some function args. Hence, this patch also makes bpf_program to store func_info_cnt instead of func_info_len. 5) btf_ext depends on btf. e.g. the func_info's type_id in ".BTF.ext" is not useful when ".BTF" is absent. This patch only init the obj->btf_ext pointer after it has successfully init the obj->btf pointer. This can avoid always checking "obj->btf && obj->btf_ext" together for accessing ".BTF.ext". Checking "obj->btf_ext" alone will do. 6) Move "struct btf_sec_func_info" from btf.h to btf.c. There is no external usage outside btf.c. Fixes: 2993e0515bb4 ("tools/bpf: add support to read .BTF.ext sections") Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 7 +- tools/lib/bpf/btf.c | 191 +++++++++++++++++++------------------------------ tools/lib/bpf/btf.h | 17 ++--- tools/lib/bpf/libbpf.c | 139 +++++++++++++++++++++++------------ 4 files changed, 177 insertions(+), 177 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 5c3be06bf0dd..9fbbc0ed5952 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -205,7 +205,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, min(name_len, BPF_OBJ_NAME_LEN - 1)); fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); - if (fd >= 0 || !log_buf || !log_buf_sz) + if (fd >= 0) return fd; /* After bpf_prog_load, the kernel may modify certain attributes @@ -244,10 +244,13 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); - if (fd >= 0 || !log_buf || !log_buf_sz) + if (fd >= 0) goto done; } + if (!log_buf || !log_buf_sz) + goto done; + /* Try again with log */ attr.log_buf = ptr_to_u64(log_buf); attr.log_size = log_buf_sz; diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 85d6446cf832..aa4fa02b13fc 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -43,6 +43,13 @@ struct btf_ext { __u32 func_info_len; }; +struct btf_sec_func_info { + __u32 sec_name_off; + __u32 num_func_info; + /* Followed by num_func_info number of bpf func_info records */ + __u8 data[0]; +}; + /* The minimum bpf_func_info checked by the loader */ struct bpf_func_info_min { __u32 insn_off; @@ -479,41 +486,66 @@ exit_free: return err; } -static int btf_ext_validate_func_info(const void *finfo, __u32 size, - btf_print_fn_t err_log) +static int btf_ext_copy_func_info(struct btf_ext *btf_ext, + __u8 *data, __u32 data_size, + btf_print_fn_t err_log) { - int sec_hdrlen = sizeof(struct btf_sec_func_info); - __u32 size_left, num_records, record_size; + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; const struct btf_sec_func_info *sinfo; - __u64 total_record_size; + __u32 info_left, record_size; + /* The start of the info sec (including the __u32 record_size). */ + const void *info; + + /* data and data_size do not include btf_ext_header from now on */ + data = data + hdr->hdr_len; + data_size -= hdr->hdr_len; + + if (hdr->func_info_off & 0x03) { + elog("BTF.ext func_info section is not aligned to 4 bytes\n"); + return -EINVAL; + } + + if (data_size < hdr->func_info_off || + hdr->func_info_len > data_size - hdr->func_info_off) { + elog("func_info section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", + hdr->func_info_off, hdr->func_info_len); + return -EINVAL; + } + + info = data + hdr->func_info_off; + info_left = hdr->func_info_len; /* At least a func_info record size */ - if (size < sizeof(__u32)) { + if (info_left < sizeof(__u32)) { elog("BTF.ext func_info record size not found"); return -EINVAL; } - /* The record size needs to meet below minimum standard */ - record_size = *(__u32 *)finfo; + /* The record size needs to meet the minimum standard */ + record_size = *(__u32 *)info; if (record_size < sizeof(struct bpf_func_info_min) || - record_size % sizeof(__u32)) { + record_size & 0x03) { elog("BTF.ext func_info invalid record size"); return -EINVAL; } - sinfo = finfo + sizeof(__u32); - size_left = size - sizeof(__u32); + sinfo = info + sizeof(__u32); + info_left -= sizeof(__u32); /* If no func_info records, return failure now so .BTF.ext * won't be used. */ - if (!size_left) { + if (!info_left) { elog("BTF.ext no func info records"); return -EINVAL; } - while (size_left) { - if (size_left < sec_hdrlen) { + while (info_left) { + unsigned int sec_hdrlen = sizeof(struct btf_sec_func_info); + __u64 total_record_size; + __u32 num_records; + + if (info_left < sec_hdrlen) { elog("BTF.ext func_info header not found"); return -EINVAL; } @@ -526,15 +558,23 @@ static int btf_ext_validate_func_info(const void *finfo, __u32 size, total_record_size = sec_hdrlen + (__u64)num_records * record_size; - if (size_left < total_record_size) { + if (info_left < total_record_size) { elog("incorrect BTF.ext num_func_info"); return -EINVAL; } - size_left -= total_record_size; + info_left -= total_record_size; sinfo = (void *)sinfo + total_record_size; } + btf_ext->func_info_len = hdr->func_info_len - sizeof(__u32); + btf_ext->func_info_rec_size = record_size; + btf_ext->func_info = malloc(btf_ext->func_info_len); + if (!btf_ext->func_info) + return -ENOMEM; + memcpy(btf_ext->func_info, info + sizeof(__u32), + btf_ext->func_info_len); + return 0; } @@ -542,8 +582,6 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size, btf_print_fn_t err_log) { const struct btf_ext_header *hdr = (struct btf_ext_header *)data; - __u32 meta_left, last_func_info_pos; - void *finfo; if (data_size < offsetof(struct btf_ext_header, func_info_off) || data_size < hdr->hdr_len) { @@ -566,34 +604,12 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size, return -ENOTSUP; } - meta_left = data_size - hdr->hdr_len; - if (!meta_left) { + if (data_size == hdr->hdr_len) { elog("BTF.ext has no data\n"); return -EINVAL; } - if (meta_left < hdr->func_info_off) { - elog("Invalid BTF.ext func_info section offset:%u\n", - hdr->func_info_off); - return -EINVAL; - } - - if (hdr->func_info_off & 0x03) { - elog("BTF.ext func_info section is not aligned to 4 bytes\n"); - return -EINVAL; - } - - last_func_info_pos = hdr->hdr_len + hdr->func_info_off + - hdr->func_info_len; - if (last_func_info_pos > data_size) { - elog("Invalid BTF.ext func_info section size:%u\n", - hdr->func_info_len); - return -EINVAL; - } - - finfo = data + hdr->hdr_len + hdr->func_info_off; - return btf_ext_validate_func_info(finfo, hdr->func_info_len, - err_log); + return 0; } void btf_ext__free(struct btf_ext *btf_ext) @@ -607,10 +623,7 @@ void btf_ext__free(struct btf_ext *btf_ext) struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) { - const struct btf_ext_header *hdr; struct btf_ext *btf_ext; - void *org_fdata, *fdata; - __u32 hdrlen, size_u32; int err; err = btf_ext_parse_hdr(data, size, err_log); @@ -621,81 +634,18 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) if (!btf_ext) return ERR_PTR(-ENOMEM); - hdr = (const struct btf_ext_header *)data; - hdrlen = hdr->hdr_len; - size_u32 = sizeof(__u32); - fdata = malloc(hdr->func_info_len - size_u32); - if (!fdata) { - free(btf_ext); - return ERR_PTR(-ENOMEM); + err = btf_ext_copy_func_info(btf_ext, data, size, err_log); + if (err) { + btf_ext__free(btf_ext); + return ERR_PTR(err); } - /* remember record size and copy rest of func_info data */ - org_fdata = data + hdrlen + hdr->func_info_off; - btf_ext->func_info_rec_size = *(__u32 *)org_fdata; - memcpy(fdata, org_fdata + size_u32, hdr->func_info_len - size_u32); - btf_ext->func_info = fdata; - btf_ext->func_info_len = hdr->func_info_len - size_u32; - return btf_ext; } -int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext, - const char *sec_name, void **func_info, - __u32 *func_info_rec_size, __u32 *func_info_len) -{ - __u32 sec_hdrlen = sizeof(struct btf_sec_func_info); - __u32 i, record_size, records_len; - struct btf_sec_func_info *sinfo; - const char *info_sec_name; - __s64 remain_len; - void *data; - - record_size = btf_ext->func_info_rec_size; - sinfo = btf_ext->func_info; - remain_len = btf_ext->func_info_len; - - while (remain_len > 0) { - records_len = sinfo->num_func_info * record_size; - info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); - if (strcmp(info_sec_name, sec_name)) { - remain_len -= sec_hdrlen + records_len; - sinfo = (void *)sinfo + sec_hdrlen + records_len; - continue; - } - - data = malloc(records_len); - if (!data) - return -ENOMEM; - - memcpy(data, sinfo->data, records_len); - - /* adjust the insn_off, the data in .BTF.ext is - * the actual byte offset, and the kernel expects - * the offset in term of bpf_insn. - * - * adjust the insn offset only, the rest data will - * be passed to kernel. - */ - for (i = 0; i < sinfo->num_func_info; i++) { - struct bpf_func_info_min *record; - - record = data + i * record_size; - record->insn_off /= sizeof(struct bpf_insn); - } - - *func_info = data; - *func_info_len = records_len; - *func_info_rec_size = record_size; - return 0; - } - - return -EINVAL; -} - -int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **func_info, __u32 *func_info_len) +int btf_ext__reloc_func_info(struct btf *btf, struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *cnt) { __u32 sec_hdrlen = sizeof(struct btf_sec_func_info); __u32 i, record_size, existing_flen, records_len; @@ -716,7 +666,7 @@ int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext, continue; } - existing_flen = *func_info_len; + existing_flen = (*cnt) * record_size; data = realloc(*func_info, existing_flen + records_len); if (!data) return -ENOMEM; @@ -734,9 +684,14 @@ int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext, insns_cnt; } *func_info = data; - *func_info_len = existing_flen + records_len; + *cnt += sinfo->num_func_info; return 0; } - return -EINVAL; + return -ENOENT; +} + +__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext) +{ + return btf_ext->func_info_rec_size; } diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 5336b2f37293..936177a538cd 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -53,13 +53,6 @@ struct btf_ext_header { __u32 func_info_len; }; -struct btf_sec_func_info { - __u32 sec_name_off; - __u32 num_func_info; - /* Followed by num_func_info number of bpf func_info records */ - __u8 data[0]; -}; - typedef int (*btf_print_fn_t)(const char *, ...) __attribute__((format(printf, 1, 2))); @@ -77,12 +70,10 @@ LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log); void btf_ext__free(struct btf_ext *btf_ext); -int btf_ext__reloc_init(struct btf *btf, struct btf_ext *btf_ext, - const char *sec_name, void **func_info, - __u32 *func_info_rec_size, __u32 *func_info_len); -int btf_ext__reloc(struct btf *btf, struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, void **func_info, - __u32 *func_info_len); +int btf_ext__reloc_func_info(struct btf *btf, struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *func_info_len); +__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 59b748ebd15f..4ea3368bf803 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -167,7 +167,7 @@ struct bpf_program { int btf_fd; void *func_info; __u32 func_info_rec_size; - __u32 func_info_len; + __u32 func_info_cnt; struct bpf_capabilities *caps; }; @@ -779,6 +779,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) { Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; + Elf_Data *btf_ext_data = NULL; Elf_Scn *scn = NULL; int idx = 0, err = 0; @@ -841,14 +842,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) obj->btf = NULL; } } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { - obj->btf_ext = btf_ext__new(data->d_buf, data->d_size, - __pr_debug); - if (IS_ERR(obj->btf_ext)) { - pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", - BTF_EXT_ELF_SEC, - PTR_ERR(obj->btf_ext)); - obj->btf_ext = NULL; - } + btf_ext_data = data; } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warning("bpf: multiple SYMTAB in %s\n", @@ -910,6 +904,22 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } + if (btf_ext_data) { + if (!obj->btf) { + pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", + BTF_EXT_ELF_SEC, BTF_ELF_SEC); + } else { + obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, + btf_ext_data->d_size, + __pr_debug); + if (IS_ERR(obj->btf_ext)) { + pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_EXT_ELF_SEC, + PTR_ERR(obj->btf_ext)); + obj->btf_ext = NULL; + } + } + } if (obj->efile.maps_shndx >= 0) { err = bpf_object__init_maps(obj, flags); if (err) @@ -1275,6 +1285,69 @@ bpf_object__create_maps(struct bpf_object *obj) return 0; } +static int +check_btf_ext_reloc_err(struct bpf_program *prog, int err, + void *btf_prog_info, const char *info_name) +{ + if (err != -ENOENT) { + pr_warning("Error in loading %s for sec %s.\n", + info_name, prog->section_name); + return err; + } + + /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */ + + if (btf_prog_info) { + /* + * Some info has already been found but has problem + * in the last btf_ext reloc. Must have to error + * out. + */ + pr_warning("Error in relocating %s for sec %s.\n", + info_name, prog->section_name); + return err; + } + + /* + * Have problem loading the very first info. Ignore + * the rest. + */ + pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n", + info_name, prog->section_name, info_name); + return 0; +} + +static int +bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj, + const char *section_name, __u32 insn_offset) +{ + int err; + + if (!insn_offset || prog->func_info) { + /* + * !insn_offset => main program + * + * For sub prog, the main program's func_info has to + * be loaded first (i.e. prog->func_info != NULL) + */ + err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext, + section_name, insn_offset, + &prog->func_info, + &prog->func_info_cnt); + if (err) + return check_btf_ext_reloc_err(prog, err, + prog->func_info, + "bpf_func_info"); + + prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext); + } + + if (!insn_offset) + prog->btf_fd = btf__fd(obj->btf); + + return 0; +} + static int bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, struct reloc_desc *relo) @@ -1306,17 +1379,12 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, return -ENOMEM; } - if (obj->btf && obj->btf_ext) { - err = btf_ext__reloc(obj->btf, obj->btf_ext, - text->section_name, - prog->insns_cnt, - &prog->func_info, - &prog->func_info_len); - if (err) { - pr_warning("error in btf_ext__reloc for sec %s\n", - text->section_name); + if (obj->btf_ext) { + err = bpf_program_reloc_btf_ext(prog, obj, + text->section_name, + prog->insns_cnt); + if (err) return err; - } } memcpy(new_insn + prog->insns_cnt, text->insns, @@ -1341,18 +1409,11 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) if (!prog) return 0; - if (obj->btf && obj->btf_ext) { - err = btf_ext__reloc_init(obj->btf, obj->btf_ext, - prog->section_name, - &prog->func_info, - &prog->func_info_rec_size, - &prog->func_info_len); - if (err) { - pr_warning("err in btf_ext__reloc_init for sec %s\n", - prog->section_name); + if (obj->btf_ext) { + err = bpf_program_reloc_btf_ext(prog, obj, + prog->section_name, 0); + if (err) return err; - } - prog->btf_fd = btf__fd(obj->btf); } if (!prog->reloc_desc) @@ -1444,8 +1505,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) static int load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, - char *license, __u32 kern_version, int *pfd, - __u32 func_info_cnt) + char *license, __u32 kern_version, int *pfd) { struct bpf_load_program_attr load_attr; char *cp, errmsg[STRERR_BUFSIZE]; @@ -1465,8 +1525,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; - load_attr.func_info_cnt = func_info_cnt; - + load_attr.func_info_cnt = prog->func_info_cnt; if (!load_attr.insns || !load_attr.insns_cnt) return -EINVAL; @@ -1523,14 +1582,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_version) { - __u32 func_info_cnt; int err = 0, fd, i; - if (prog->func_info_len == 0) - func_info_cnt = 0; - else - func_info_cnt = prog->func_info_len / prog->func_info_rec_size; - if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { pr_warning("Internal error: can't load program '%s'\n", @@ -1553,8 +1606,7 @@ bpf_program__load(struct bpf_program *prog, prog->section_name, prog->instances.nr); } err = load_program(prog, prog->insns, prog->insns_cnt, - license, kern_version, &fd, - func_info_cnt); + license, kern_version, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -1584,8 +1636,7 @@ bpf_program__load(struct bpf_program *prog, err = load_program(prog, result.new_insn_ptr, result.new_insn_cnt, - license, kern_version, &fd, - func_info_cnt); + license, kern_version, &fd); if (err) { pr_warning("Loading the %dth instance of program '%s' failed\n", -- cgit v1.2.3 From 3d65014146c69bbc4d2947f60dbd722d352cdc46 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 7 Dec 2018 16:42:31 -0800 Subject: bpf: libbpf: Add btf_line_info support to libbpf This patch adds bpf_line_info support to libbpf: 1) Parsing the line_info sec from ".BTF.ext" 2) Relocating the line_info. If the main prog *_info relocation fails, it will ignore the remaining subprog line_info and continue. If the subprog *_info relocation fails, it will bail out. 3) BPF_PROG_LOAD a prog with line_info Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 86 +++++++++++++------- tools/lib/bpf/bpf.h | 3 + tools/lib/bpf/btf.c | 209 +++++++++++++++++++++++++++++++++++-------------- tools/lib/bpf/btf.h | 10 ++- tools/lib/bpf/libbpf.c | 20 +++++ 5 files changed, 239 insertions(+), 89 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9fbbc0ed5952..3caaa3428774 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -173,11 +173,36 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, -1); } +static void * +alloc_zero_tailing_info(const void *orecord, __u32 cnt, + __u32 actual_rec_size, __u32 expected_rec_size) +{ + __u64 info_len = actual_rec_size * cnt; + void *info, *nrecord; + int i; + + info = malloc(info_len); + if (!info) + return NULL; + + /* zero out bytes kernel does not understand */ + nrecord = info; + for (i = 0; i < cnt; i++) { + memcpy(nrecord, orecord, expected_rec_size); + memset(nrecord + expected_rec_size, 0, + actual_rec_size - expected_rec_size); + orecord += actual_rec_size; + nrecord += actual_rec_size; + } + + return info; +} + int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, char *log_buf, size_t log_buf_sz) { + void *finfo = NULL, *linfo = NULL; union bpf_attr attr; - void *finfo = NULL; __u32 name_len; int fd; @@ -201,6 +226,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.func_info_rec_size = load_attr->func_info_rec_size; attr.func_info_cnt = load_attr->func_info_cnt; attr.func_info = ptr_to_u64(load_attr->func_info); + attr.line_info_rec_size = load_attr->line_info_rec_size; + attr.line_info_cnt = load_attr->line_info_cnt; + attr.line_info = ptr_to_u64(load_attr->line_info); memcpy(attr.prog_name, load_attr->name, min(name_len, BPF_OBJ_NAME_LEN - 1)); @@ -212,36 +240,35 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, * to give user space a hint how to deal with loading failure. * Check to see whether we can make some changes and load again. */ - if (errno == E2BIG && attr.func_info_cnt && - attr.func_info_rec_size < load_attr->func_info_rec_size) { - __u32 actual_rec_size = load_attr->func_info_rec_size; - __u32 expected_rec_size = attr.func_info_rec_size; - __u32 finfo_cnt = load_attr->func_info_cnt; - __u64 finfo_len = actual_rec_size * finfo_cnt; - const void *orecord; - void *nrecord; - int i; - - finfo = malloc(finfo_len); - if (!finfo) - /* further try with log buffer won't help */ - return fd; - - /* zero out bytes kernel does not understand */ - orecord = load_attr->func_info; - nrecord = finfo; - for (i = 0; i < load_attr->func_info_cnt; i++) { - memcpy(nrecord, orecord, expected_rec_size); - memset(nrecord + expected_rec_size, 0, - actual_rec_size - expected_rec_size); - orecord += actual_rec_size; - nrecord += actual_rec_size; + while (errno == E2BIG && (!finfo || !linfo)) { + if (!finfo && attr.func_info_cnt && + attr.func_info_rec_size < load_attr->func_info_rec_size) { + /* try with corrected func info records */ + finfo = alloc_zero_tailing_info(load_attr->func_info, + load_attr->func_info_cnt, + load_attr->func_info_rec_size, + attr.func_info_rec_size); + if (!finfo) + goto done; + + attr.func_info = ptr_to_u64(finfo); + attr.func_info_rec_size = load_attr->func_info_rec_size; + } else if (!linfo && attr.line_info_cnt && + attr.line_info_rec_size < + load_attr->line_info_rec_size) { + linfo = alloc_zero_tailing_info(load_attr->line_info, + load_attr->line_info_cnt, + load_attr->line_info_rec_size, + attr.line_info_rec_size); + if (!linfo) + goto done; + + attr.line_info = ptr_to_u64(linfo); + attr.line_info_rec_size = load_attr->line_info_rec_size; + } else { + break; } - /* try with corrected func info records */ - attr.func_info = ptr_to_u64(finfo); - attr.func_info_rec_size = load_attr->func_info_rec_size; - fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); if (fd >= 0) @@ -259,6 +286,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); done: free(finfo); + free(linfo); return fd; } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 098e6f793b76..8f09de482839 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -82,6 +82,9 @@ struct bpf_load_program_attr { __u32 func_info_rec_size; const void *func_info; __u32 func_info_cnt; + __u32 line_info_rec_size; + const void *line_info; + __u32 line_info_cnt; }; /* Flags to direct loading requirements */ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index aa4fa02b13fc..d682d3b8f7b9 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -37,16 +37,26 @@ struct btf { int fd; }; +struct btf_ext_info { + /* + * info points to a deep copy of the individual info section + * (e.g. func_info and line_info) from the .BTF.ext. + * It does not include the __u32 rec_size. + */ + void *info; + __u32 rec_size; + __u32 len; +}; + struct btf_ext { - void *func_info; - __u32 func_info_rec_size; - __u32 func_info_len; + struct btf_ext_info func_info; + struct btf_ext_info line_info; }; -struct btf_sec_func_info { +struct btf_ext_info_sec { __u32 sec_name_off; - __u32 num_func_info; - /* Followed by num_func_info number of bpf func_info records */ + __u32 num_info; + /* Followed by num_info * record_size number of bytes */ __u8 data[0]; }; @@ -56,6 +66,14 @@ struct bpf_func_info_min { __u32 type_id; }; +/* The minimum bpf_line_info checked by the loader */ +struct bpf_line_info_min { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; @@ -486,12 +504,22 @@ exit_free: return err; } -static int btf_ext_copy_func_info(struct btf_ext *btf_ext, - __u8 *data, __u32 data_size, - btf_print_fn_t err_log) +struct btf_ext_sec_copy_param { + __u32 off; + __u32 len; + __u32 min_rec_size; + struct btf_ext_info *ext_info; + const char *desc; +}; + +static int btf_ext_copy_info(struct btf_ext *btf_ext, + __u8 *data, __u32 data_size, + struct btf_ext_sec_copy_param *ext_sec, + btf_print_fn_t err_log) { const struct btf_ext_header *hdr = (struct btf_ext_header *)data; - const struct btf_sec_func_info *sinfo; + const struct btf_ext_info_sec *sinfo; + struct btf_ext_info *ext_info; __u32 info_left, record_size; /* The start of the info sec (including the __u32 record_size). */ const void *info; @@ -500,66 +528,69 @@ static int btf_ext_copy_func_info(struct btf_ext *btf_ext, data = data + hdr->hdr_len; data_size -= hdr->hdr_len; - if (hdr->func_info_off & 0x03) { - elog("BTF.ext func_info section is not aligned to 4 bytes\n"); + if (ext_sec->off & 0x03) { + elog(".BTF.ext %s section is not aligned to 4 bytes\n", + ext_sec->desc); return -EINVAL; } - if (data_size < hdr->func_info_off || - hdr->func_info_len > data_size - hdr->func_info_off) { - elog("func_info section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", - hdr->func_info_off, hdr->func_info_len); + if (data_size < ext_sec->off || + ext_sec->len > data_size - ext_sec->off) { + elog("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", + ext_sec->desc, ext_sec->off, ext_sec->len); return -EINVAL; } - info = data + hdr->func_info_off; - info_left = hdr->func_info_len; + info = data + ext_sec->off; + info_left = ext_sec->len; - /* At least a func_info record size */ + /* At least a record size */ if (info_left < sizeof(__u32)) { - elog("BTF.ext func_info record size not found"); + elog(".BTF.ext %s record size not found\n", ext_sec->desc); return -EINVAL; } /* The record size needs to meet the minimum standard */ record_size = *(__u32 *)info; - if (record_size < sizeof(struct bpf_func_info_min) || + if (record_size < ext_sec->min_rec_size || record_size & 0x03) { - elog("BTF.ext func_info invalid record size"); + elog("%s section in .BTF.ext has invalid record size %u\n", + ext_sec->desc, record_size); return -EINVAL; } sinfo = info + sizeof(__u32); info_left -= sizeof(__u32); - /* If no func_info records, return failure now so .BTF.ext - * won't be used. - */ + /* If no records, return failure now so .BTF.ext won't be used. */ if (!info_left) { - elog("BTF.ext no func info records"); + elog("%s section in .BTF.ext has no records", ext_sec->desc); return -EINVAL; } while (info_left) { - unsigned int sec_hdrlen = sizeof(struct btf_sec_func_info); + unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec); __u64 total_record_size; __u32 num_records; if (info_left < sec_hdrlen) { - elog("BTF.ext func_info header not found"); + elog("%s section header is not found in .BTF.ext\n", + ext_sec->desc); return -EINVAL; } - num_records = sinfo->num_func_info; + num_records = sinfo->num_info; if (num_records == 0) { - elog("incorrect BTF.ext num_func_info"); + elog("%s section has incorrect num_records in .BTF.ext\n", + ext_sec->desc); return -EINVAL; } total_record_size = sec_hdrlen + (__u64)num_records * record_size; if (info_left < total_record_size) { - elog("incorrect BTF.ext num_func_info"); + elog("%s section has incorrect num_records in .BTF.ext\n", + ext_sec->desc); return -EINVAL; } @@ -567,17 +598,49 @@ static int btf_ext_copy_func_info(struct btf_ext *btf_ext, sinfo = (void *)sinfo + total_record_size; } - btf_ext->func_info_len = hdr->func_info_len - sizeof(__u32); - btf_ext->func_info_rec_size = record_size; - btf_ext->func_info = malloc(btf_ext->func_info_len); - if (!btf_ext->func_info) + ext_info = ext_sec->ext_info; + ext_info->len = ext_sec->len - sizeof(__u32); + ext_info->rec_size = record_size; + ext_info->info = malloc(ext_info->len); + if (!ext_info->info) return -ENOMEM; - memcpy(btf_ext->func_info, info + sizeof(__u32), - btf_ext->func_info_len); + memcpy(ext_info->info, info + sizeof(__u32), ext_info->len); return 0; } +static int btf_ext_copy_func_info(struct btf_ext *btf_ext, + __u8 *data, __u32 data_size, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + struct btf_ext_sec_copy_param param = { + .off = hdr->func_info_off, + .len = hdr->func_info_len, + .min_rec_size = sizeof(struct bpf_func_info_min), + .ext_info = &btf_ext->func_info, + .desc = "func_info" + }; + + return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); +} + +static int btf_ext_copy_line_info(struct btf_ext *btf_ext, + __u8 *data, __u32 data_size, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + struct btf_ext_sec_copy_param param = { + .off = hdr->line_info_off, + .len = hdr->line_info_len, + .min_rec_size = sizeof(struct bpf_line_info_min), + .ext_info = &btf_ext->line_info, + .desc = "line_info", + }; + + return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); +} + static int btf_ext_parse_hdr(__u8 *data, __u32 data_size, btf_print_fn_t err_log) { @@ -617,7 +680,8 @@ void btf_ext__free(struct btf_ext *btf_ext) if (!btf_ext) return; - free(btf_ext->func_info); + free(btf_ext->func_info.info); + free(btf_ext->line_info.info); free(btf_ext); } @@ -640,25 +704,32 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) return ERR_PTR(err); } + err = btf_ext_copy_line_info(btf_ext, data, size, err_log); + if (err) { + btf_ext__free(btf_ext); + return ERR_PTR(err); + } + return btf_ext; } -int btf_ext__reloc_func_info(struct btf *btf, struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **func_info, __u32 *cnt) +static int btf_ext_reloc_info(const struct btf *btf, + const struct btf_ext_info *ext_info, + const char *sec_name, __u32 insns_cnt, + void **info, __u32 *cnt) { - __u32 sec_hdrlen = sizeof(struct btf_sec_func_info); - __u32 i, record_size, existing_flen, records_len; - struct btf_sec_func_info *sinfo; + __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u32 i, record_size, existing_len, records_len; + struct btf_ext_info_sec *sinfo; const char *info_sec_name; __u64 remain_len; void *data; - record_size = btf_ext->func_info_rec_size; - sinfo = btf_ext->func_info; - remain_len = btf_ext->func_info_len; + record_size = ext_info->rec_size; + sinfo = ext_info->info; + remain_len = ext_info->len; while (remain_len > 0) { - records_len = sinfo->num_func_info * record_size; + records_len = sinfo->num_info * record_size; info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); if (strcmp(info_sec_name, sec_name)) { remain_len -= sec_hdrlen + records_len; @@ -666,32 +737,52 @@ int btf_ext__reloc_func_info(struct btf *btf, struct btf_ext *btf_ext, continue; } - existing_flen = (*cnt) * record_size; - data = realloc(*func_info, existing_flen + records_len); + existing_len = (*cnt) * record_size; + data = realloc(*info, existing_len + records_len); if (!data) return -ENOMEM; - memcpy(data + existing_flen, sinfo->data, records_len); + memcpy(data + existing_len, sinfo->data, records_len); /* adjust insn_off only, the rest data will be passed * to the kernel. */ - for (i = 0; i < sinfo->num_func_info; i++) { - struct bpf_func_info_min *record; + for (i = 0; i < sinfo->num_info; i++) { + __u32 *insn_off; - record = data + existing_flen + i * record_size; - record->insn_off = - record->insn_off / sizeof(struct bpf_insn) + + insn_off = data + existing_len + (i * record_size); + *insn_off = *insn_off / sizeof(struct bpf_insn) + insns_cnt; } - *func_info = data; - *cnt += sinfo->num_func_info; + *info = data; + *cnt += sinfo->num_info; return 0; } return -ENOENT; } +int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *cnt) +{ + return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name, + insns_cnt, func_info, cnt); +} + +int btf_ext__reloc_line_info(const struct btf *btf, const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt) +{ + return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name, + insns_cnt, line_info, cnt); +} + __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext) { - return btf_ext->func_info_rec_size; + return btf_ext->func_info.rec_size; +} + +__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) +{ + return btf_ext->line_info.rec_size; } diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 936177a538cd..b0610dcdae6b 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -51,6 +51,8 @@ struct btf_ext_header { /* All offsets are in bytes relative to the end of this header */ __u32 func_info_off; __u32 func_info_len; + __u32 line_info_off; + __u32 line_info_len; }; typedef int (*btf_print_fn_t)(const char *, ...) @@ -70,10 +72,16 @@ LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log); void btf_ext__free(struct btf_ext *btf_ext); -int btf_ext__reloc_func_info(struct btf *btf, struct btf_ext *btf_ext, +int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, const char *sec_name, __u32 insns_cnt, void **func_info, __u32 *func_info_len); +int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt); __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); +__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4ea3368bf803..e2bc75ee1614 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -170,6 +170,10 @@ struct bpf_program { __u32 func_info_cnt; struct bpf_capabilities *caps; + + void *line_info; + __u32 line_info_rec_size; + __u32 line_info_cnt; }; struct bpf_map { @@ -1342,6 +1346,19 @@ bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj, prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext); } + if (!insn_offset || prog->line_info) { + err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext, + section_name, insn_offset, + &prog->line_info, + &prog->line_info_cnt); + if (err) + return check_btf_ext_reloc_err(prog, err, + prog->line_info, + "bpf_line_info"); + + prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); + } + if (!insn_offset) prog->btf_fd = btf__fd(obj->btf); @@ -1526,6 +1543,9 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; load_attr.func_info_cnt = prog->func_info_cnt; + load_attr.line_info = prog->line_info; + load_attr.line_info_rec_size = prog->line_info_rec_size; + load_attr.line_info_cnt = prog->line_info_cnt; if (!load_attr.insns || !load_attr.insns_cnt) return -EINVAL; -- cgit v1.2.3 From b053b439b72ad152257ecc3f71cfb4c619b0137e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 7 Dec 2018 16:42:32 -0800 Subject: bpf: libbpf: bpftool: Print bpf_line_info during prog dump This patch adds print bpf_line_info function in 'prog dump jitted' and 'prog dump xlated': [root@arch-fb-vm1 bpf]# ~/devshare/fb-kernel/linux/tools/bpf/bpftool/bpftool prog dump jited pinned /sys/fs/bpf/test_btf_haskv [...] int test_long_fname_2(struct dummy_tracepoint_args * arg): bpf_prog_44a040bf25481309_test_long_fname_2: ; static int test_long_fname_2(struct dummy_tracepoint_args *arg) 0: push %rbp 1: mov %rsp,%rbp 4: sub $0x30,%rsp b: sub $0x28,%rbp f: mov %rbx,0x0(%rbp) 13: mov %r13,0x8(%rbp) 17: mov %r14,0x10(%rbp) 1b: mov %r15,0x18(%rbp) 1f: xor %eax,%eax 21: mov %rax,0x20(%rbp) 25: xor %esi,%esi ; int key = 0; 27: mov %esi,-0x4(%rbp) ; if (!arg->sock) 2a: mov 0x8(%rdi),%rdi ; if (!arg->sock) 2e: cmp $0x0,%rdi 32: je 0x0000000000000070 34: mov %rbp,%rsi ; counts = bpf_map_lookup_elem(&btf_map, &key); 37: add $0xfffffffffffffffc,%rsi 3b: movabs $0xffff8881139d7480,%rdi 45: add $0x110,%rdi 4c: mov 0x0(%rsi),%eax 4f: cmp $0x4,%rax 53: jae 0x000000000000005e 55: shl $0x3,%rax 59: add %rdi,%rax 5c: jmp 0x0000000000000060 5e: xor %eax,%eax ; if (!counts) 60: cmp $0x0,%rax 64: je 0x0000000000000070 ; counts->v6++; 66: mov 0x4(%rax),%edi 69: add $0x1,%rdi 6d: mov %edi,0x4(%rax) 70: mov 0x0(%rbp),%rbx 74: mov 0x8(%rbp),%r13 78: mov 0x10(%rbp),%r14 7c: mov 0x18(%rbp),%r15 80: add $0x28,%rbp 84: leaveq 85: retq [...] With linum: [root@arch-fb-vm1 bpf]# ~/devshare/fb-kernel/linux/tools/bpf/bpftool/bpftool prog dump jited pinned /sys/fs/bpf/test_btf_haskv linum int _dummy_tracepoint(struct dummy_tracepoint_args * arg): bpf_prog_b07ccb89267cf242__dummy_tracepoint: ; return test_long_fname_1(arg); [file:/data/users/kafai/fb-kernel/linux/tools/testing/selftests/bpf/test_btf_haskv.c line_num:54 line_col:9] 0: push %rbp 1: mov %rsp,%rbp 4: sub $0x28,%rsp b: sub $0x28,%rbp f: mov %rbx,0x0(%rbp) 13: mov %r13,0x8(%rbp) 17: mov %r14,0x10(%rbp) 1b: mov %r15,0x18(%rbp) 1f: xor %eax,%eax 21: mov %rax,0x20(%rbp) 25: callq 0x000000000000851e ; return test_long_fname_1(arg); [file:/data/users/kafai/fb-kernel/linux/tools/testing/selftests/bpf/test_btf_haskv.c line_num:54 line_col:2] 2a: xor %eax,%eax 2c: mov 0x0(%rbp),%rbx 30: mov 0x8(%rbp),%r13 34: mov 0x10(%rbp),%r14 38: mov 0x18(%rbp),%r15 3c: add $0x28,%rbp 40: leaveq 41: retq [...] Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 16 +- tools/bpf/bpftool/bash-completion/bpftool | 6 +- tools/bpf/bpftool/btf_dumper.c | 64 ++++++ tools/bpf/bpftool/jit_disasm.c | 23 ++- tools/bpf/bpftool/main.h | 23 ++- tools/bpf/bpftool/prog.c | 100 ++++++++- tools/bpf/bpftool/xlated_dumper.c | 30 ++- tools/bpf/bpftool/xlated_dumper.h | 7 +- tools/lib/bpf/Build | 2 +- tools/lib/bpf/bpf_prog_linfo.c | 253 +++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 13 ++ tools/lib/bpf/libbpf.map | 4 + 12 files changed, 516 insertions(+), 25 deletions(-) create mode 100644 tools/lib/bpf/bpf_prog_linfo.c (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 5524b6dccd85..7c30731a9b73 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -22,8 +22,8 @@ MAP COMMANDS ============= | **bpftool** **prog { show | list }** [*PROG*] -| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}] -| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] +| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}] +| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes** | **linum**}] | **bpftool** **prog pin** *PROG* *FILE* | **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] | **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] @@ -56,7 +56,7 @@ DESCRIPTION Output will start with program ID followed by program type and zero or more named attributes (depending on kernel version). - **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** }] + **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }] Dump eBPF instructions of the program from the kernel. By default, eBPF will be disassembled and printed to standard output in human-readable format. In this case, **opcodes** @@ -69,13 +69,21 @@ DESCRIPTION built instead, and eBPF instructions will be presented with CFG in DOT format, on standard output. - **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** }] + If the prog has line_info available, the source line will + be displayed by default. If **linum** is specified, + the filename, line number and line column will also be + displayed on top of the source line. + **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** | **linum** }] Dump jited image (host machine code) of the program. If *FILE* is specified image will be written to a file, otherwise it will be disassembled and printed to stdout. **opcodes** controls if raw opcodes will be printed. + If the prog has line_info available, the source line will + be displayed by default. If **linum** is specified, + the filename, line number and line column will also be + displayed on top of the source line. **bpftool prog pin** *PROG* *FILE* Pin program *PROG* as *FILE*. diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 44c189ba072a..a57febd6abb1 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -191,7 +191,7 @@ _bpftool() # Deal with simplest keywords case $prev in - help|hex|opcodes|visual) + help|hex|opcodes|visual|linum) return 0 ;; tag) @@ -278,10 +278,10 @@ _bpftool() *) _bpftool_once_attr 'file' if _bpftool_search_list 'xlated'; then - COMPREPLY+=( $( compgen -W 'opcodes visual' -- \ + COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \ "$cur" ) ) else - COMPREPLY+=( $( compgen -W 'opcodes' -- \ + COMPREPLY+=( $( compgen -W 'opcodes linum' -- \ "$cur" ) ) fi return 0 diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index c3fd3a7cb787..dbbf6ece6760 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -385,3 +385,67 @@ void btf_dumper_type_only(const struct btf *btf, __u32 type_id, char *func_sig, if (err < 0) func_sig[0] = '\0'; } + +static const char *ltrim(const char *s) +{ + while (isspace(*s)) + s++; + + return s; +} + +void btf_dump_linfo_plain(const struct btf *btf, + const struct bpf_line_info *linfo, + const char *prefix, bool linum) +{ + const char *line = btf__name_by_offset(btf, linfo->line_off); + + if (!line) + return; + line = ltrim(line); + + if (!prefix) + prefix = ""; + + if (linum) { + const char *file = btf__name_by_offset(btf, linfo->file_name_off); + + /* More forgiving on file because linum option is + * expected to provide more info than the already + * available src line. + */ + if (!file) + file = ""; + + printf("%s%s [file:%s line_num:%u line_col:%u]\n", + prefix, line, file, + BPF_LINE_INFO_LINE_NUM(linfo->line_col), + BPF_LINE_INFO_LINE_COL(linfo->line_col)); + } else { + printf("%s%s\n", prefix, line); + } +} + +void btf_dump_linfo_json(const struct btf *btf, + const struct bpf_line_info *linfo, bool linum) +{ + const char *line = btf__name_by_offset(btf, linfo->line_off); + + if (line) + jsonw_string_field(json_wtr, "src", ltrim(line)); + + if (linum) { + const char *file = btf__name_by_offset(btf, linfo->file_name_off); + + if (file) + jsonw_string_field(json_wtr, "file", file); + + if (BPF_LINE_INFO_LINE_NUM(linfo->line_col)) + jsonw_int_field(json_wtr, "line_num", + BPF_LINE_INFO_LINE_NUM(linfo->line_col)); + + if (BPF_LINE_INFO_LINE_COL(linfo->line_col)) + jsonw_int_field(json_wtr, "line_col", + BPF_LINE_INFO_LINE_COL(linfo->line_col)); + } +} diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index 545a92471c33..f381f8628ce9 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "json_writer.h" #include "main.h" @@ -68,10 +69,16 @@ static int fprintf_json(void *out, const char *fmt, ...) } void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch, const char *disassembler_options) + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum) { + const struct bpf_line_info *linfo = NULL; disassembler_ftype disassemble; struct disassemble_info info; + unsigned int nr_skip = 0; int count, i, pc = 0; char tpath[PATH_MAX]; bfd *bfdf; @@ -127,12 +134,26 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, if (json_output) jsonw_start_array(json_wtr); do { + if (prog_linfo) { + linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo, + func_ksym + pc, + func_idx, + nr_skip); + if (linfo) + nr_skip++; + } + if (json_output) { jsonw_start_object(json_wtr); oper_count = 0; + if (linfo) + btf_dump_linfo_json(btf, linfo, linum); jsonw_name(json_wtr, "pc"); jsonw_printf(json_wtr, "\"0x%x\"", pc); } else { + if (linfo) + btf_dump_linfo_plain(btf, linfo, "; ", + linum); printf("%4x:\t", pc); } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 0be0dd8f467f..d9393abdba78 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -138,6 +138,9 @@ struct pinned_obj { struct hlist_node hash; }; +struct btf; +struct bpf_line_info; + int build_pinned_obj_table(struct pinned_obj_table *table, enum bpf_obj_type type); void delete_pinned_obj_table(struct pinned_obj_table *tab); @@ -175,13 +178,23 @@ int map_parse_fd(int *argc, char ***argv); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); #ifdef HAVE_LIBBFD_SUPPORT +struct bpf_prog_linfo; void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch, const char *disassembler_options); + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum); int disasm_init(void); #else static inline void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch, const char *disassembler_options) + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum) + { } static inline int disasm_init(void) @@ -217,6 +230,12 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, void btf_dumper_type_only(const struct btf *btf, __u32 func_type_id, char *func_only, int size); +void btf_dump_linfo_plain(const struct btf *btf, + const struct bpf_line_info *linfo, + const char *prefix, bool linum); +void btf_dump_linfo_json(const struct btf *btf, + const struct bpf_line_info *linfo, bool linum); + struct nlattr; struct ifinfomsg; struct tcmsg; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index a9a51123454c..65b921ffd10a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -423,24 +423,26 @@ static int do_show(int argc, char **argv) static int do_dump(int argc, char **argv) { + unsigned int finfo_rec_size, linfo_rec_size, jited_linfo_rec_size; + void *func_info = NULL, *linfo = NULL, *jited_linfo = NULL; + unsigned int finfo_cnt, linfo_cnt = 0, jited_linfo_cnt = 0; + struct bpf_prog_linfo *prog_linfo = NULL; unsigned long *func_ksyms = NULL; struct bpf_prog_info info = {}; unsigned int *func_lens = NULL; const char *disasm_opt = NULL; - unsigned int finfo_rec_size; unsigned int nr_func_ksyms; unsigned int nr_func_lens; struct dump_data dd = {}; __u32 len = sizeof(info); struct btf *btf = NULL; - void *func_info = NULL; - unsigned int finfo_cnt; unsigned int buf_size; char *filepath = NULL; bool opcodes = false; bool visual = false; char func_sig[1024]; unsigned char *buf; + bool linum = false; __u32 *member_len; __u64 *member_ptr; ssize_t n; @@ -484,6 +486,9 @@ static int do_dump(int argc, char **argv) } else if (is_prefix(*argv, "visual")) { visual = true; NEXT_ARG(); + } else if (is_prefix(*argv, "linum")) { + linum = true; + NEXT_ARG(); } if (argc) { @@ -543,6 +548,32 @@ static int do_dump(int argc, char **argv) } } + linfo_rec_size = info.line_info_rec_size; + if (info.line_info_cnt && linfo_rec_size && info.btf_id) { + linfo_cnt = info.line_info_cnt; + linfo = malloc(linfo_cnt * linfo_rec_size); + if (!linfo) { + p_err("mem alloc failed"); + close(fd); + goto err_free; + } + } + + jited_linfo_rec_size = info.jited_line_info_rec_size; + if (info.jited_line_info_cnt && + jited_linfo_rec_size && + info.nr_jited_ksyms && + info.nr_jited_func_lens && + info.btf_id) { + jited_linfo_cnt = info.jited_line_info_cnt; + jited_linfo = malloc(jited_linfo_cnt * jited_linfo_rec_size); + if (!jited_linfo) { + p_err("mem alloc failed"); + close(fd); + goto err_free; + } + } + memset(&info, 0, sizeof(info)); *member_ptr = ptr_to_u64(buf); @@ -554,6 +585,13 @@ static int do_dump(int argc, char **argv) info.func_info_cnt = finfo_cnt; info.func_info_rec_size = finfo_rec_size; info.func_info = ptr_to_u64(func_info); + info.line_info_cnt = linfo_cnt; + info.line_info_rec_size = linfo_rec_size; + info.line_info = ptr_to_u64(linfo); + info.jited_line_info_cnt = jited_linfo_cnt; + info.jited_line_info_rec_size = jited_linfo_rec_size; + info.jited_line_info = ptr_to_u64(jited_linfo); + err = bpf_obj_get_info_by_fd(fd, &info, &len); close(fd); @@ -596,6 +634,30 @@ static int do_dump(int argc, char **argv) finfo_cnt = 0; } + if (linfo && info.line_info_cnt != linfo_cnt) { + p_err("incorrect line_info_cnt %u vs. expected %u", + info.line_info_cnt, linfo_cnt); + goto err_free; + } + + if (info.line_info_rec_size != linfo_rec_size) { + p_err("incorrect line_info_rec_size %u vs. expected %u", + info.line_info_rec_size, linfo_rec_size); + goto err_free; + } + + if (jited_linfo && info.jited_line_info_cnt != jited_linfo_cnt) { + p_err("incorrect jited_line_info_cnt %u vs. expected %u", + info.jited_line_info_cnt, jited_linfo_cnt); + goto err_free; + } + + if (info.jited_line_info_rec_size != jited_linfo_rec_size) { + p_err("incorrect jited_line_info_rec_size %u vs. expected %u", + info.jited_line_info_rec_size, jited_linfo_rec_size); + goto err_free; + } + if ((member_len == &info.jited_prog_len && info.jited_prog_insns == 0) || (member_len == &info.xlated_prog_len && @@ -609,6 +671,12 @@ static int do_dump(int argc, char **argv) goto err_free; } + if (linfo_cnt) { + prog_linfo = bpf_prog_linfo__new(&info); + if (!prog_linfo) + p_err("error in processing bpf_line_info. continue without it."); + } + if (filepath) { fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd < 0) { @@ -690,8 +758,11 @@ static int do_dump(int argc, char **argv) printf("%s:\n", sym_name); } - disasm_print_insn(img, lens[i], opcodes, name, - disasm_opt); + disasm_print_insn(img, lens[i], opcodes, + name, disasm_opt, btf, + prog_linfo, ksyms[i], i, + linum); + img += lens[i]; if (json_output) @@ -704,7 +775,7 @@ static int do_dump(int argc, char **argv) jsonw_end_array(json_wtr); } else { disasm_print_insn(buf, *member_len, opcodes, name, - disasm_opt); + disasm_opt, btf, NULL, 0, 0, false); } } else if (visual) { if (json_output) @@ -718,11 +789,14 @@ static int do_dump(int argc, char **argv) dd.btf = btf; dd.func_info = func_info; dd.finfo_rec_size = finfo_rec_size; + dd.prog_linfo = prog_linfo; if (json_output) - dump_xlated_json(&dd, buf, *member_len, opcodes); + dump_xlated_json(&dd, buf, *member_len, opcodes, + linum); else - dump_xlated_plain(&dd, buf, *member_len, opcodes); + dump_xlated_plain(&dd, buf, *member_len, opcodes, + linum); kernel_syms_destroy(&dd); } @@ -730,6 +804,9 @@ static int do_dump(int argc, char **argv) free(func_ksyms); free(func_lens); free(func_info); + free(linfo); + free(jited_linfo); + bpf_prog_linfo__free(prog_linfo); return 0; err_free: @@ -737,6 +814,9 @@ err_free: free(func_ksyms); free(func_lens); free(func_info); + free(linfo); + free(jited_linfo); + bpf_prog_linfo__free(prog_linfo); return -1; } @@ -1138,8 +1218,8 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %s %s { show | list } [PROG]\n" - " %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n" - " %s %s dump jited PROG [{ file FILE | opcodes }]\n" + " %s %s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n" + " %s %s dump jited PROG [{ file FILE | opcodes | linum }]\n" " %s %s pin PROG FILE\n" " %s %s { load | loadall } OBJ PATH \\\n" " [type TYPE] [dev NAME] \\\n" diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 131ecd175533..aef628dcccb6 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "disasm.h" #include "json_writer.h" @@ -234,8 +235,9 @@ static const char *print_imm(void *private_data, } void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, - bool opcodes) + bool opcodes, bool linum) { + const struct bpf_prog_linfo *prog_linfo = dd->prog_linfo; const struct bpf_insn_cbs cbs = { .cb_print = print_insn_json, .cb_call = print_call, @@ -246,6 +248,7 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, struct bpf_insn *insn = buf; struct btf *btf = dd->btf; bool double_insn = false; + unsigned int nr_skip = 0; char func_sig[1024]; unsigned int i; @@ -273,6 +276,16 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, } } + if (prog_linfo) { + const struct bpf_line_info *linfo; + + linfo = bpf_prog_linfo__lfind(prog_linfo, i, nr_skip); + if (linfo) { + btf_dump_linfo_json(btf, linfo, linum); + nr_skip++; + } + } + jsonw_name(json_wtr, "disasm"); print_bpf_insn(&cbs, insn + i, true); @@ -307,8 +320,9 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, } void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, - bool opcodes) + bool opcodes, bool linum) { + const struct bpf_prog_linfo *prog_linfo = dd->prog_linfo; const struct bpf_insn_cbs cbs = { .cb_print = print_insn, .cb_call = print_call, @@ -318,6 +332,7 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, struct bpf_func_info *record; struct bpf_insn *insn = buf; struct btf *btf = dd->btf; + unsigned int nr_skip = 0; bool double_insn = false; char func_sig[1024]; unsigned int i; @@ -340,6 +355,17 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, } } + if (prog_linfo) { + const struct bpf_line_info *linfo; + + linfo = bpf_prog_linfo__lfind(prog_linfo, i, nr_skip); + if (linfo) { + btf_dump_linfo_plain(btf, linfo, "; ", + linum); + nr_skip++; + } + } + double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); printf("% 4d: ", i); diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h index aec31723e1e5..a24f89df8cb2 100644 --- a/tools/bpf/bpftool/xlated_dumper.h +++ b/tools/bpf/bpftool/xlated_dumper.h @@ -40,6 +40,8 @@ #define SYM_MAX_NAME 256 +struct bpf_prog_linfo; + struct kernel_sym { unsigned long address; char name[SYM_MAX_NAME]; @@ -54,6 +56,7 @@ struct dump_data { struct btf *btf; void *func_info; __u32 finfo_rec_size; + const struct bpf_prog_linfo *prog_linfo; char scratch_buff[SYM_MAX_NAME + 8]; }; @@ -61,9 +64,9 @@ void kernel_syms_load(struct dump_data *dd); void kernel_syms_destroy(struct dump_data *dd); struct kernel_sym *kernel_syms_search(struct dump_data *dd, unsigned long key); void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, - bool opcodes); + bool opcodes, bool linum); void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, - bool opcodes); + bool opcodes, bool linum); void dump_xlated_for_graph(struct dump_data *dd, void *buf, void *buf_end, unsigned int start_index); diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 7bc31c905018..197b40f5b5c6 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c new file mode 100644 index 000000000000..b8af65145408 --- /dev/null +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2018 Facebook */ + +#include +#include +#include +#include +#include "libbpf.h" + +#ifndef min +#define min(x, y) ((x) < (y) ? (x) : (y)) +#endif + +struct bpf_prog_linfo { + void *raw_linfo; + void *raw_jited_linfo; + __u32 *nr_jited_linfo_per_func; + __u32 *jited_linfo_func_idx; + __u32 nr_linfo; + __u32 nr_jited_func; + __u32 rec_size; + __u32 jited_rec_size; +}; + +static int dissect_jited_func(struct bpf_prog_linfo *prog_linfo, + const __u64 *ksym_func, const __u32 *ksym_len) +{ + __u32 nr_jited_func, nr_linfo; + const void *raw_jited_linfo; + const __u64 *jited_linfo; + __u64 last_jited_linfo; + /* + * Index to raw_jited_linfo: + * i: Index for searching the next ksym_func + * prev_i: Index to the last found ksym_func + */ + __u32 i, prev_i; + __u32 f; /* Index to ksym_func */ + + raw_jited_linfo = prog_linfo->raw_jited_linfo; + jited_linfo = raw_jited_linfo; + if (ksym_func[0] != *jited_linfo) + goto errout; + + prog_linfo->jited_linfo_func_idx[0] = 0; + nr_jited_func = prog_linfo->nr_jited_func; + nr_linfo = prog_linfo->nr_linfo; + + for (prev_i = 0, i = 1, f = 1; + i < nr_linfo && f < nr_jited_func; + i++) { + raw_jited_linfo += prog_linfo->jited_rec_size; + last_jited_linfo = *jited_linfo; + jited_linfo = raw_jited_linfo; + + if (ksym_func[f] == *jited_linfo) { + prog_linfo->jited_linfo_func_idx[f] = i; + + /* Sanity check */ + if (last_jited_linfo - ksym_func[f - 1] + 1 > + ksym_len[f - 1]) + goto errout; + + prog_linfo->nr_jited_linfo_per_func[f - 1] = + i - prev_i; + prev_i = i; + + /* + * The ksym_func[f] is found in jited_linfo. + * Look for the next one. + */ + f++; + } else if (*jited_linfo <= last_jited_linfo) { + /* Ensure the addr is increasing _within_ a func */ + goto errout; + } + } + + if (f != nr_jited_func) + goto errout; + + prog_linfo->nr_jited_linfo_per_func[nr_jited_func - 1] = + nr_linfo - prev_i; + + return 0; + +errout: + return -EINVAL; +} + +void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo) +{ + if (!prog_linfo) + return; + + free(prog_linfo->raw_linfo); + free(prog_linfo->raw_jited_linfo); + free(prog_linfo->nr_jited_linfo_per_func); + free(prog_linfo->jited_linfo_func_idx); + free(prog_linfo); +} + +struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) +{ + struct bpf_prog_linfo *prog_linfo; + __u32 nr_linfo, nr_jited_func; + + nr_linfo = info->line_info_cnt; + + /* + * Test !info->line_info because the kernel may NULL + * the ptr if kernel.kptr_restrict is set. + */ + if (!nr_linfo || !info->line_info) + return NULL; + + /* + * The min size that bpf_prog_linfo has to access for + * searching purpose. + */ + if (info->line_info_rec_size < + offsetof(struct bpf_line_info, file_name_off)) + return NULL; + + prog_linfo = calloc(1, sizeof(*prog_linfo)); + if (!prog_linfo) + return NULL; + + /* Copy xlated line_info */ + prog_linfo->nr_linfo = nr_linfo; + prog_linfo->rec_size = info->line_info_rec_size; + prog_linfo->raw_linfo = malloc(nr_linfo * prog_linfo->rec_size); + if (!prog_linfo->raw_linfo) + goto err_free; + memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, + nr_linfo * prog_linfo->rec_size); + + nr_jited_func = info->nr_jited_ksyms; + if (!nr_jited_func || + !info->jited_line_info || + info->jited_line_info_cnt != nr_linfo || + info->jited_line_info_rec_size < sizeof(__u64) || + info->nr_jited_func_lens != nr_jited_func || + !info->jited_ksyms || + !info->jited_func_lens) + /* Not enough info to provide jited_line_info */ + return prog_linfo; + + /* Copy jited_line_info */ + prog_linfo->nr_jited_func = nr_jited_func; + prog_linfo->jited_rec_size = info->jited_line_info_rec_size; + prog_linfo->raw_jited_linfo = malloc(nr_linfo * + prog_linfo->jited_rec_size); + if (!prog_linfo->raw_jited_linfo) + goto err_free; + memcpy(prog_linfo->raw_jited_linfo, + (void *)(long)info->jited_line_info, + nr_linfo * prog_linfo->jited_rec_size); + + /* Number of jited_line_info per jited func */ + prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func * + sizeof(__u32)); + if (!prog_linfo->nr_jited_linfo_per_func) + goto err_free; + + /* + * For each jited func, + * the start idx to the "linfo" and "jited_linfo" array, + */ + prog_linfo->jited_linfo_func_idx = malloc(nr_jited_func * + sizeof(__u32)); + if (!prog_linfo->jited_linfo_func_idx) + goto err_free; + + if (dissect_jited_func(prog_linfo, + (__u64 *)(long)info->jited_ksyms, + (__u32 *)(long)info->jited_func_lens)) + goto err_free; + + return prog_linfo; + +err_free: + bpf_prog_linfo__free(prog_linfo); + return NULL; +} + +const struct bpf_line_info * +bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, + __u64 addr, __u32 func_idx, __u32 nr_skip) +{ + __u32 jited_rec_size, rec_size, nr_linfo, start, i; + const void *raw_jited_linfo, *raw_linfo; + const __u64 *jited_linfo; + + if (func_idx >= prog_linfo->nr_jited_func) + return NULL; + + nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx]; + if (nr_skip >= nr_linfo) + return NULL; + + start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip; + jited_rec_size = prog_linfo->jited_rec_size; + raw_jited_linfo = prog_linfo->raw_jited_linfo + + (start * jited_rec_size); + jited_linfo = raw_jited_linfo; + if (addr < *jited_linfo) + return NULL; + + nr_linfo -= nr_skip; + rec_size = prog_linfo->rec_size; + raw_linfo = prog_linfo->raw_linfo + (start * rec_size); + for (i = 0; i < nr_linfo; i++) { + if (addr < *jited_linfo) + break; + + raw_linfo += rec_size; + raw_jited_linfo += jited_rec_size; + jited_linfo = raw_jited_linfo; + } + + return raw_linfo - rec_size; +} + +const struct bpf_line_info * +bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, + __u32 insn_off, __u32 nr_skip) +{ + const struct bpf_line_info *linfo; + __u32 rec_size, nr_linfo, i; + const void *raw_linfo; + + nr_linfo = prog_linfo->nr_linfo; + if (nr_skip >= nr_linfo) + return NULL; + + rec_size = prog_linfo->rec_size; + raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size); + linfo = raw_linfo; + if (insn_off < linfo->insn_off) + return NULL; + + nr_linfo -= nr_skip; + for (i = 0; i < nr_linfo; i++) { + if (insn_off < linfo->insn_off) + break; + + raw_linfo += rec_size; + linfo = raw_linfo; + } + + return raw_linfo - rec_size; +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index f30c3d07bb7d..5f68d7b75215 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -342,6 +342,19 @@ int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); +struct bpf_prog_linfo; +struct bpf_prog_info; + +LIBBPF_API void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo); +LIBBPF_API struct bpf_prog_linfo * +bpf_prog_linfo__new(const struct bpf_prog_info *info); +LIBBPF_API const struct bpf_line_info * +bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, + __u64 addr, __u32 func_idx, __u32 nr_skip); +LIBBPF_API const struct bpf_line_info * +bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, + __u32 insn_off, __u32 nr_skip); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 8deff22d61bb..cd02cd4e2cc3 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -99,6 +99,10 @@ LIBBPF_0.0.1 { bpf_program__unload; bpf_program__unpin; bpf_program__unpin_instance; + bpf_prog_linfo__free; + bpf_prog_linfo__new; + bpf_prog_linfo__lfind_addr_func; + bpf_prog_linfo__lfind; bpf_raw_tracepoint_open; bpf_set_link_xdp_fd; bpf_task_fd_query; -- cgit v1.2.3 From 01d3240a04f4c09392e13c77b54d4423ebce2d72 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 6 Dec 2018 13:01:03 +0000 Subject: media: bpf: add bpf function to report mouse movement Some IR remotes have a directional pad or other pointer-like thing that can be used as a mouse. Make it possible to decode these types of IR protocols in BPF. Cc: netdev@vger.kernel.org Signed-off-by: Sean Young Signed-off-by: Alexei Starovoitov --- drivers/media/rc/bpf-lirc.c | 24 ++++++++ include/uapi/linux/bpf.h | 17 +++++- tools/include/uapi/linux/bpf.h | 18 +++++- tools/testing/selftests/bpf/bpf_helpers.h | 2 + tools/testing/selftests/bpf/test_lirc_mode2.sh | 3 +- tools/testing/selftests/bpf/test_lirc_mode2_kern.c | 3 + tools/testing/selftests/bpf/test_lirc_mode2_user.c | 65 +++++++++++++++------- 7 files changed, 109 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index 8b97fd1f0cea..390a722e6211 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -59,6 +59,28 @@ static const struct bpf_func_proto rc_keydown_proto = { .arg4_type = ARG_ANYTHING, }; +BPF_CALL_3(bpf_rc_pointer_rel, u32*, sample, s32, rel_x, s32, rel_y) +{ + struct ir_raw_event_ctrl *ctrl; + + ctrl = container_of(sample, struct ir_raw_event_ctrl, bpf_sample); + + input_report_rel(ctrl->dev->input_dev, REL_X, rel_x); + input_report_rel(ctrl->dev->input_dev, REL_Y, rel_y); + input_sync(ctrl->dev->input_dev); + + return 0; +} + +static const struct bpf_func_proto rc_pointer_rel_proto = { + .func = bpf_rc_pointer_rel, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -67,6 +89,8 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &rc_repeat_proto; case BPF_FUNC_rc_keydown: return &rc_keydown_proto; + case BPF_FUNC_rc_pointer_rel: + return &rc_pointer_rel_proto; case BPF_FUNC_map_lookup_elem: return &bpf_map_lookup_elem_proto; case BPF_FUNC_map_update_elem: diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7a66db8d15d5..1bee1135866a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2301,6 +2301,20 @@ union bpf_attr { * payload and/or *pop* value being to large. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded pointer movement. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * Return + * 0 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2394,7 +2408,8 @@ union bpf_attr { FN(map_pop_elem), \ FN(map_peek_elem), \ FN(msg_push_data), \ - FN(msg_pop_data), + FN(msg_pop_data), \ + FN(rc_pointer_rel), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7973c28b24a0..6ad50b6471d3 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2298,9 +2298,22 @@ union bpf_attr { * if possible. Other errors can occur if input parameters are * invalid either due to *start* byte not being valid part of msg * payload and/or *pop* value being to large. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded pointer movement. + * + * The *ctx* should point to the lirc sample as passed into + * the program. * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". * Return - * 0 on success, or a negative erro in case of failure. + * 0 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2394,7 +2407,8 @@ union bpf_attr { FN(map_pop_elem), \ FN(map_peek_elem), \ FN(msg_push_data), \ - FN(msg_pop_data), + FN(msg_pop_data), \ + FN(rc_pointer_rel), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 7b69519a09b1..04c060e8f10a 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -170,6 +170,8 @@ static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) = (void *) BPF_FUNC_skb_vlan_push; static int (*bpf_skb_vlan_pop)(void *ctx) = (void *) BPF_FUNC_skb_vlan_pop; +static int (*bpf_rc_pointer_rel)(void *ctx, int rel_x, int rel_y) = + (void *) BPF_FUNC_rc_pointer_rel; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh index 677686198df3..ec4e15948e40 100755 --- a/tools/testing/selftests/bpf/test_lirc_mode2.sh +++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh @@ -21,13 +21,14 @@ do if grep -q DRV_NAME=rc-loopback $i/uevent then LIRCDEV=$(grep DEVNAME= $i/lirc*/uevent | sed sQDEVNAME=Q/dev/Q) + INPUTDEV=$(grep DEVNAME= $i/input*/event*/uevent | sed sQDEVNAME=Q/dev/Q) fi done if [ -n $LIRCDEV ]; then TYPE=lirc_mode2 - ./test_lirc_mode2_user $LIRCDEV + ./test_lirc_mode2_user $LIRCDEV $INPUTDEV ret=$? if [ $ret -ne 0 ]; then echo -e ${RED}"FAIL: $TYPE"${NC} diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c index ba26855563a5..4147130cc3b7 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c @@ -15,6 +15,9 @@ int bpf_decoder(unsigned int *sample) if (duration & 0x10000) bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0); + if (duration & 0x20000) + bpf_rc_pointer_rel(sample, (duration >> 8) & 0xff, + duration & 0xff); } return 0; diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c index d470d63c33db..fb5fd6841ef3 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -47,12 +48,13 @@ int main(int argc, char **argv) { struct bpf_object *obj; - int ret, lircfd, progfd, mode; - int testir = 0x1dead; + int ret, lircfd, progfd, inputfd; + int testir1 = 0x1dead; + int testir2 = 0x20101; u32 prog_ids[10], prog_flags[10], prog_cnt; - if (argc != 2) { - printf("Usage: %s /dev/lircN\n", argv[0]); + if (argc != 3) { + printf("Usage: %s /dev/lircN /dev/input/eventM\n", argv[0]); return 2; } @@ -76,9 +78,9 @@ int main(int argc, char **argv) return 1; } - mode = LIRC_MODE_SCANCODE; - if (ioctl(lircfd, LIRC_SET_REC_MODE, &mode)) { - printf("failed to set rec mode: %m\n"); + inputfd = open(argv[2], O_RDONLY | O_NONBLOCK); + if (inputfd == -1) { + printf("failed to open input device %s: %m\n", argv[1]); return 1; } @@ -102,29 +104,54 @@ int main(int argc, char **argv) } /* Write raw IR */ - ret = write(lircfd, &testir, sizeof(testir)); - if (ret != sizeof(testir)) { + ret = write(lircfd, &testir1, sizeof(testir1)); + if (ret != sizeof(testir1)) { printf("Failed to send test IR message: %m\n"); return 1; } - struct pollfd pfd = { .fd = lircfd, .events = POLLIN }; - struct lirc_scancode lsc; + struct pollfd pfd = { .fd = inputfd, .events = POLLIN }; + struct input_event event; - poll(&pfd, 1, 100); + for (;;) { + poll(&pfd, 1, 100); - /* Read decoded IR */ - ret = read(lircfd, &lsc, sizeof(lsc)); - if (ret != sizeof(lsc)) { - printf("Failed to read decoded IR: %m\n"); - return 1; + /* Read decoded IR */ + ret = read(inputfd, &event, sizeof(event)); + if (ret != sizeof(event)) { + printf("Failed to read decoded IR: %m\n"); + return 1; + } + + if (event.type == EV_MSC && event.code == MSC_SCAN && + event.value == 0xdead) { + break; + } } - if (lsc.scancode != 0xdead || lsc.rc_proto != 64) { - printf("Incorrect scancode decoded\n"); + /* Write raw IR */ + ret = write(lircfd, &testir2, sizeof(testir2)); + if (ret != sizeof(testir2)) { + printf("Failed to send test IR message: %m\n"); return 1; } + for (;;) { + poll(&pfd, 1, 100); + + /* Read decoded IR */ + ret = read(inputfd, &event, sizeof(event)); + if (ret != sizeof(event)) { + printf("Failed to read decoded IR: %m\n"); + return 1; + } + + if (event.type == EV_REL && event.code == REL_Y && + event.value == 1 ) { + break; + } + } + prog_cnt = 10; ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids, &prog_cnt); -- cgit v1.2.3 From e434b8cdf788568ba65a0a0fd9f3cb41f3ca1803 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 7 Dec 2018 12:16:18 -0500 Subject: bpf: relax verifier restriction on BPF_MOV | BPF_ALU Currently, the destination register is marked as unknown for 32-bit sub-register move (BPF_MOV | BPF_ALU) whenever the source register type is SCALAR_VALUE. This is too conservative that some valid cases will be rejected. Especially, this may turn a constant scalar value into unknown value that could break some assumptions of verifier. For example, test_l4lb_noinline.c has the following C code: struct real_definition *dst 1: if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) 2: return TC_ACT_SHOT; 3: 4: if (dst->flags & F_IPV6) { get_packet_dst is responsible for initializing "dst" into valid pointer and return true (1), otherwise return false (0). The compiled instruction sequence using alu32 will be: 412: (54) (u32) r7 &= (u32) 1 413: (bc) (u32) r0 = (u32) r7 414: (95) exit insn 413, a BPF_MOV | BPF_ALU, however will turn r0 into unknown value even r7 contains SCALAR_VALUE 1. This causes trouble when verifier is walking the code path that hasn't initialized "dst" inside get_packet_dst, for which case 0 is returned and we would then expect verifier concluding line 1 in the above C code pass the "if" check, therefore would skip fall through path starting at line 4. Now, because r0 returned from callee has became unknown value, so verifier won't skip analyzing path starting at line 4 and "dst->flags" requires dereferencing the pointer "dst" which actually hasn't be initialized for this path. This patch relaxed the code marking sub-register move destination. For a SCALAR_VALUE, it is safe to just copy the value from source then truncate it into 32-bit. A unit test also included to demonstrate this issue. This test will fail before this patch. This relaxation could let verifier skipping more paths for conditional comparison against immediate. It also let verifier recording a more accurate/strict value for one register at one state, if this state end up with going through exit without rejection and it is used for state comparison later, then it is possible an inaccurate/permissive value is better. So the real impact on verifier processed insn number is complex. But in all, without this fix, valid program could be rejected. >From real benchmarking on kernel selftests and Cilium bpf tests, there is no impact on processed instruction number when tests ares compiled with default compilation options. There is slightly improvements when they are compiled with -mattr=+alu32 after this patch. Also, test_xdp_noinline/-mattr=+alu32 now passed verification. It is rejected before this fix. Insn processed before/after this patch: default -mattr=+alu32 Kernel selftest === test_xdp.o 371/371 369/369 test_l4lb.o 6345/6345 5623/5623 test_xdp_noinline.o 2971/2971 rejected/2727 test_tcp_estates.o 429/429 430/430 Cilium bpf === bpf_lb-DLB_L3.o: 2085/2085 1685/1687 bpf_lb-DLB_L4.o: 2287/2287 1986/1982 bpf_lb-DUNKNOWN.o: 690/690 622/622 bpf_lxc.o: 95033/95033 N/A bpf_netdev.o: 7245/7245 N/A bpf_overlay.o: 2898/2898 3085/2947 NOTE: - bpf_lxc.o and bpf_netdev.o compiled by -mattr=+alu32 are rejected by verifier due to another issue inside verifier on supporting alu32 binary. - Each cilium bpf program could generate several processed insn number, above number is sum of them. v1->v2: - Restrict the change on SCALAR_VALUE. - Update benchmark numbers on Cilium bpf tests. Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 16 ++++++++++++---- tools/testing/selftests/bpf/test_verifier.c | 13 +++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9d25506bd55a..2e70b813a1a7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3583,12 +3583,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; if (BPF_SRC(insn->code) == BPF_X) { + struct bpf_reg_state *src_reg = regs + insn->src_reg; + struct bpf_reg_state *dst_reg = regs + insn->dst_reg; + if (BPF_CLASS(insn->code) == BPF_ALU64) { /* case: R1 = R2 * copy register state to dest reg */ - regs[insn->dst_reg] = regs[insn->src_reg]; - regs[insn->dst_reg].live |= REG_LIVE_WRITTEN; + *dst_reg = *src_reg; + dst_reg->live |= REG_LIVE_WRITTEN; } else { /* R1 = (u32) R2 */ if (is_pointer_value(env, insn->src_reg)) { @@ -3596,9 +3599,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) "R%d partial copy of pointer\n", insn->src_reg); return -EACCES; + } else if (src_reg->type == SCALAR_VALUE) { + *dst_reg = *src_reg; + dst_reg->live |= REG_LIVE_WRITTEN; + } else { + mark_reg_unknown(env, regs, + insn->dst_reg); } - mark_reg_unknown(env, regs, insn->dst_reg); - coerce_reg_to_size(®s[insn->dst_reg], 4); + coerce_reg_to_size(dst_reg, 4); } } else { /* case: R = imm diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 36ce58b4933e..957e4711c46c 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2959,6 +2959,19 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, .result = ACCEPT, }, + { + "alu32: mov u32 const", + .insns = { + BPF_MOV32_IMM(BPF_REG_7, 0), + BPF_ALU32_IMM(BPF_AND, BPF_REG_7, 1), + BPF_MOV32_REG(BPF_REG_0, BPF_REG_7), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, { "unpriv: partial copy of pointer", .insns = { -- cgit v1.2.3 From 10a5ce98539948affbdc28dc0f39a1b6b2307f9d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 10 Dec 2018 10:53:24 -0800 Subject: bpf: bpftool: Fix newline and p_err issue This patch fixes a few newline issues and also replaces p_err with p_info in prog.c Fixes: b053b439b72a ("bpf: libbpf: bpftool: Print bpf_line_info during prog dump") Cc: Jakub Kicinski Signed-off-by: Martin KaFai Lau Acked-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 2 ++ tools/bpf/bpftool/main.h | 1 - tools/bpf/bpftool/prog.c | 3 +-- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 7c30731a9b73..bb1aeb98b6da 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -73,6 +73,7 @@ DESCRIPTION be displayed by default. If **linum** is specified, the filename, line number and line column will also be displayed on top of the source line. + **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** | **linum** }] Dump jited image (host machine code) of the program. If *FILE* is specified image will be written to a file, @@ -84,6 +85,7 @@ DESCRIPTION be displayed by default. If **linum** is specified, the filename, line number and line column will also be displayed on top of the source line. + **bpftool prog pin** *PROG* *FILE* Pin program *PROG* as *FILE*. diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index d9393abdba78..0b37599f8cda 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -194,7 +194,6 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, const struct bpf_prog_linfo *prog_linfo, __u64 func_ksym, unsigned int func_idx, bool linum) - { } static inline int disasm_init(void) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 65b921ffd10a..9a78ebbcea1d 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -592,7 +592,6 @@ static int do_dump(int argc, char **argv) info.jited_line_info_rec_size = jited_linfo_rec_size; info.jited_line_info = ptr_to_u64(jited_linfo); - err = bpf_obj_get_info_by_fd(fd, &info, &len); close(fd); if (err) { @@ -674,7 +673,7 @@ static int do_dump(int argc, char **argv) if (linfo_cnt) { prog_linfo = bpf_prog_linfo__new(&info); if (!prog_linfo) - p_err("error in processing bpf_line_info. continue without it."); + p_info("error in processing bpf_line_info. continue without it."); } if (filepath) { -- cgit v1.2.3 From b4f8623c0cefdfb818f99b3eb3c1e4c7708dd84e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 10 Dec 2018 14:14:09 -0800 Subject: tools/bpf: sync kernel uapi bpf.h to tools directory Sync kernel uapi bpf.h "*_info_cnt => nr_*_info" changes to tools directory. Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6ad50b6471d3..620ee1f919cf 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2695,11 +2695,11 @@ struct bpf_prog_info { __u32 btf_id; __u32 func_info_rec_size; __aligned_u64 func_info; - __u32 func_info_cnt; - __u32 line_info_cnt; + __u32 nr_func_info; + __u32 nr_line_info; __aligned_u64 line_info; __aligned_u64 jited_line_info; - __u32 jited_line_info_cnt; + __u32 nr_jited_line_info; __u32 line_info_rec_size; __u32 jited_line_info_rec_size; } __attribute__((aligned(8))); -- cgit v1.2.3 From cfc542411bd40ff4f8a70b3d061bd6acdfb05629 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 10 Dec 2018 14:14:10 -0800 Subject: tools/bpf: rename *_info_cnt to nr_*_info Rename all occurances of *_info_cnt field access to nr_*_info in tools directory. The local variables finfo_cnt, linfo_cnt and jited_linfo_cnt in function do_dump() of tools/bpf/bpftool/prog.c are also changed to nr_finfo, nr_linfo and nr_jited_linfo to keep naming convention consistent. Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/prog.c | 48 +++++++++++++-------------- tools/lib/bpf/bpf_prog_linfo.c | 4 +-- tools/testing/selftests/bpf/test_btf.c | 60 +++++++++++++++++----------------- 3 files changed, 56 insertions(+), 56 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 9a78ebbcea1d..b73b4e473948 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -425,7 +425,7 @@ static int do_dump(int argc, char **argv) { unsigned int finfo_rec_size, linfo_rec_size, jited_linfo_rec_size; void *func_info = NULL, *linfo = NULL, *jited_linfo = NULL; - unsigned int finfo_cnt, linfo_cnt = 0, jited_linfo_cnt = 0; + unsigned int nr_finfo, nr_linfo = 0, nr_jited_linfo = 0; struct bpf_prog_linfo *prog_linfo = NULL; unsigned long *func_ksyms = NULL; struct bpf_prog_info info = {}; @@ -537,10 +537,10 @@ static int do_dump(int argc, char **argv) } } - finfo_cnt = info.func_info_cnt; + nr_finfo = info.nr_func_info; finfo_rec_size = info.func_info_rec_size; - if (finfo_cnt && finfo_rec_size) { - func_info = malloc(finfo_cnt * finfo_rec_size); + if (nr_finfo && finfo_rec_size) { + func_info = malloc(nr_finfo * finfo_rec_size); if (!func_info) { p_err("mem alloc failed"); close(fd); @@ -549,9 +549,9 @@ static int do_dump(int argc, char **argv) } linfo_rec_size = info.line_info_rec_size; - if (info.line_info_cnt && linfo_rec_size && info.btf_id) { - linfo_cnt = info.line_info_cnt; - linfo = malloc(linfo_cnt * linfo_rec_size); + if (info.nr_line_info && linfo_rec_size && info.btf_id) { + nr_linfo = info.nr_line_info; + linfo = malloc(nr_linfo * linfo_rec_size); if (!linfo) { p_err("mem alloc failed"); close(fd); @@ -560,13 +560,13 @@ static int do_dump(int argc, char **argv) } jited_linfo_rec_size = info.jited_line_info_rec_size; - if (info.jited_line_info_cnt && + if (info.nr_jited_line_info && jited_linfo_rec_size && info.nr_jited_ksyms && info.nr_jited_func_lens && info.btf_id) { - jited_linfo_cnt = info.jited_line_info_cnt; - jited_linfo = malloc(jited_linfo_cnt * jited_linfo_rec_size); + nr_jited_linfo = info.nr_jited_line_info; + jited_linfo = malloc(nr_jited_linfo * jited_linfo_rec_size); if (!jited_linfo) { p_err("mem alloc failed"); close(fd); @@ -582,13 +582,13 @@ static int do_dump(int argc, char **argv) info.nr_jited_ksyms = nr_func_ksyms; info.jited_func_lens = ptr_to_u64(func_lens); info.nr_jited_func_lens = nr_func_lens; - info.func_info_cnt = finfo_cnt; + info.nr_func_info = nr_finfo; info.func_info_rec_size = finfo_rec_size; info.func_info = ptr_to_u64(func_info); - info.line_info_cnt = linfo_cnt; + info.nr_line_info = nr_linfo; info.line_info_rec_size = linfo_rec_size; info.line_info = ptr_to_u64(linfo); - info.jited_line_info_cnt = jited_linfo_cnt; + info.nr_jited_line_info = nr_jited_linfo; info.jited_line_info_rec_size = jited_linfo_rec_size; info.jited_line_info = ptr_to_u64(jited_linfo); @@ -614,9 +614,9 @@ static int do_dump(int argc, char **argv) goto err_free; } - if (info.func_info_cnt != finfo_cnt) { - p_err("incorrect func_info_cnt %d vs. expected %d", - info.func_info_cnt, finfo_cnt); + if (info.nr_func_info != nr_finfo) { + p_err("incorrect nr_func_info %d vs. expected %d", + info.nr_func_info, nr_finfo); goto err_free; } @@ -630,12 +630,12 @@ static int do_dump(int argc, char **argv) /* kernel.kptr_restrict is set. No func_info available. */ free(func_info); func_info = NULL; - finfo_cnt = 0; + nr_finfo = 0; } - if (linfo && info.line_info_cnt != linfo_cnt) { - p_err("incorrect line_info_cnt %u vs. expected %u", - info.line_info_cnt, linfo_cnt); + if (linfo && info.nr_line_info != nr_linfo) { + p_err("incorrect nr_line_info %u vs. expected %u", + info.nr_line_info, nr_linfo); goto err_free; } @@ -645,9 +645,9 @@ static int do_dump(int argc, char **argv) goto err_free; } - if (jited_linfo && info.jited_line_info_cnt != jited_linfo_cnt) { - p_err("incorrect jited_line_info_cnt %u vs. expected %u", - info.jited_line_info_cnt, jited_linfo_cnt); + if (jited_linfo && info.nr_jited_line_info != nr_jited_linfo) { + p_err("incorrect nr_jited_line_info %u vs. expected %u", + info.nr_jited_line_info, nr_jited_linfo); goto err_free; } @@ -670,7 +670,7 @@ static int do_dump(int argc, char **argv) goto err_free; } - if (linfo_cnt) { + if (nr_linfo) { prog_linfo = bpf_prog_linfo__new(&info); if (!prog_linfo) p_info("error in processing bpf_line_info. continue without it."); diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c index b8af65145408..addd6e9971cc 100644 --- a/tools/lib/bpf/bpf_prog_linfo.c +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -105,7 +105,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) struct bpf_prog_linfo *prog_linfo; __u32 nr_linfo, nr_jited_func; - nr_linfo = info->line_info_cnt; + nr_linfo = info->nr_line_info; /* * Test !info->line_info because the kernel may NULL @@ -138,7 +138,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) nr_jited_func = info->nr_jited_ksyms; if (!nr_jited_func || !info->jited_line_info || - info->jited_line_info_cnt != nr_linfo || + info->nr_jited_line_info != nr_linfo || info->jited_line_info_rec_size < sizeof(__u64) || info->nr_jited_func_lens != nr_jited_func || !info->jited_ksyms || diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 7707273736ac..d4c63316c862 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -2548,9 +2548,9 @@ static int do_test_file(unsigned int test_num) err = -1; goto done; } - if (CHECK(info.func_info_cnt != 3, - "incorrect info.func_info_cnt (1st) %d", - info.func_info_cnt)) { + if (CHECK(info.nr_func_info != 3, + "incorrect info.nr_func_info (1st) %d", + info.nr_func_info)) { err = -1; goto done; } @@ -2561,7 +2561,7 @@ static int do_test_file(unsigned int test_num) goto done; } - func_info = malloc(info.func_info_cnt * rec_size); + func_info = malloc(info.nr_func_info * rec_size); if (CHECK(!func_info, "out of memory")) { err = -1; goto done; @@ -2569,7 +2569,7 @@ static int do_test_file(unsigned int test_num) /* reset info to only retrieve func_info related data */ memset(&info, 0, sizeof(info)); - info.func_info_cnt = 3; + info.nr_func_info = 3; info.func_info_rec_size = rec_size; info.func_info = ptr_to_u64(func_info); @@ -2580,9 +2580,9 @@ static int do_test_file(unsigned int test_num) err = -1; goto done; } - if (CHECK(info.func_info_cnt != 3, - "incorrect info.func_info_cnt (2nd) %d", - info.func_info_cnt)) { + if (CHECK(info.nr_func_info != 3, + "incorrect info.nr_func_info (2nd) %d", + info.nr_func_info)) { err = -1; goto done; } @@ -3544,9 +3544,9 @@ static int test_get_finfo(const struct prog_info_raw_test *test, fprintf(stderr, "%s\n", btf_log_buf); return -1; } - if (CHECK(info.func_info_cnt != test->func_info_cnt, - "incorrect info.func_info_cnt (1st) %d", - info.func_info_cnt)) { + if (CHECK(info.nr_func_info != test->func_info_cnt, + "incorrect info.nr_func_info (1st) %d", + info.nr_func_info)) { return -1; } @@ -3556,16 +3556,16 @@ static int test_get_finfo(const struct prog_info_raw_test *test, return -1; } - if (!info.func_info_cnt) + if (!info.nr_func_info) return 0; - func_info = malloc(info.func_info_cnt * rec_size); + func_info = malloc(info.nr_func_info * rec_size); if (CHECK(!func_info, "out of memory")) return -1; /* reset info to only retrieve func_info related data */ memset(&info, 0, sizeof(info)); - info.func_info_cnt = test->func_info_cnt; + info.nr_func_info = test->func_info_cnt; info.func_info_rec_size = rec_size; info.func_info = ptr_to_u64(func_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); @@ -3574,9 +3574,9 @@ static int test_get_finfo(const struct prog_info_raw_test *test, err = -1; goto done; } - if (CHECK(info.func_info_cnt != test->func_info_cnt, - "incorrect info.func_info_cnt (2nd) %d", - info.func_info_cnt)) { + if (CHECK(info.nr_func_info != test->func_info_cnt, + "incorrect info.nr_func_info (2nd) %d", + info.nr_func_info)) { err = -1; goto done; } @@ -3648,14 +3648,14 @@ static int test_get_linfo(const struct prog_info_raw_test *test, nr_jited_func_lens = 1; } - if (CHECK(info.line_info_cnt != cnt || - info.jited_line_info_cnt != jited_cnt || + if (CHECK(info.nr_line_info != cnt || + info.nr_jited_line_info != jited_cnt || info.nr_jited_ksyms != nr_jited_ksyms || info.nr_jited_func_lens != nr_jited_func_lens || - (!info.line_info_cnt && info.jited_line_info_cnt), - "info: line_info_cnt:%u(expected:%u) jited_line_info_cnt:%u(expected:%u) nr_jited_ksyms:%u(expected:%u) nr_jited_func_lens:%u(expected:%u)", - info.line_info_cnt, cnt, - info.jited_line_info_cnt, jited_cnt, + (!info.nr_line_info && info.nr_jited_line_info), + "info: nr_line_info:%u(expected:%u) nr_jited_line_info:%u(expected:%u) nr_jited_ksyms:%u(expected:%u) nr_jited_func_lens:%u(expected:%u)", + info.nr_line_info, cnt, + info.nr_jited_line_info, jited_cnt, info.nr_jited_ksyms, nr_jited_ksyms, info.nr_jited_func_lens, nr_jited_func_lens)) { err = -1; @@ -3684,7 +3684,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test, err = -1; goto done; } - info.line_info_cnt = cnt; + info.nr_line_info = cnt; info.line_info_rec_size = rec_size; info.line_info = ptr_to_u64(linfo); @@ -3700,7 +3700,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test, goto done; } - info.jited_line_info_cnt = jited_cnt; + info.nr_jited_line_info = jited_cnt; info.jited_line_info_rec_size = jited_rec_size; info.jited_line_info = ptr_to_u64(jited_linfo); info.nr_jited_ksyms = nr_jited_ksyms; @@ -3717,15 +3717,15 @@ static int test_get_linfo(const struct prog_info_raw_test *test, */ if (CHECK(err == -1 || !info.line_info || - info.line_info_cnt != cnt || + info.nr_line_info != cnt || (jited_cnt && !info.jited_line_info) || - info.jited_line_info_cnt != jited_cnt || + info.nr_jited_line_info != jited_cnt || info.line_info_rec_size != rec_size || info.jited_line_info_rec_size != jited_rec_size, - "err:%d errno:%d info: line_info_cnt:%u(expected:%u) jited_line_info_cnt:%u(expected:%u) line_info_rec_size:%u(expected:%u) jited_linfo_rec_size:%u(expected:%u) line_info:%p jited_line_info:%p", + "err:%d errno:%d info: nr_line_info:%u(expected:%u) nr_jited_line_info:%u(expected:%u) line_info_rec_size:%u(expected:%u) jited_linfo_rec_size:%u(expected:%u) line_info:%p jited_line_info:%p", err, errno, - info.line_info_cnt, cnt, - info.jited_line_info_cnt, jited_cnt, + info.nr_line_info, cnt, + info.nr_jited_line_info, jited_cnt, info.line_info_rec_size, rec_size, info.jited_line_info_rec_size, jited_rec_size, (void *)(long)info.line_info, -- cgit v1.2.3 From 0bd72117fba2dd51a65eaa7b480adc0eea9a4409 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 11 Dec 2018 10:26:33 +0100 Subject: bpf: fix up uapi helper description and sync bpf header with tools Minor markup fixup from bpf-next into net-next merge in the BPF helper description of bpf_sk_lookup_tcp() and bpf_sk_lookup_udp(). Also sync up the copy of bpf.h from tooling infrastructure. Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 12 +++--- tools/include/uapi/linux/bpf.h | 87 +++++++++++++++++++++--------------------- 2 files changed, 50 insertions(+), 49 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 92e962ba0c47..aa582cd5bfcf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2218,9 +2218,9 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, the *struct bpf_sock* - * result is from reuse->socks[] using the hash of the tuple. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description @@ -2254,9 +2254,9 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, the *struct bpf_sock* - * result is from reuse->socks[] using the hash of the tuple. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * * int bpf_sk_release(struct bpf_sock *sock) * Description diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 94c002584068..aa582cd5bfcf 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -502,18 +502,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) - * Description - * Pop an element from *map*. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) - * Description - * Get an element from *map* without removing it. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1937,9 +1925,9 @@ union bpf_attr { * is set to metric from route (IPv4/IPv6 only), and ifindex * is set to the device index of the nexthop from the FIB lookup. * - * *plen* argument is the size of the passed in struct. - * *flags* argument can be a combination of one or more of the - * following values: + * *plen* argument is the size of the passed in struct. + * *flags* argument can be a combination of one or more of the + * following values: * * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB @@ -1948,9 +1936,9 @@ union bpf_attr { * Perform lookup from an egress perspective (default is * ingress). * - * *ctx* is either **struct xdp_md** for XDP programs or - * **struct sk_buff** tc cls_act programs. - * Return + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * Return * * < 0 if any input argument is invalid * * 0 on success (packet is forwarded, nexthop neighbor exists) * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the @@ -2095,8 +2083,8 @@ union bpf_attr { * translated to a keycode using the rc keymap, and reported as * an input key down event. After a period a key up event is * generated. This period can be extended by calling either - * **bpf_rc_keydown** () again with the same values, or calling - * **bpf_rc_repeat** (). + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * * Some protocols include a toggle bit, in case the button was * released and pressed again between consecutive scancodes. @@ -2179,21 +2167,22 @@ union bpf_attr { * The *flags* meaning is specific for each map type, * and has to be 0 for cgroup local storage. * - * Depending on the bpf program type, a local storage area - * can be shared between multiple instances of the bpf program, + * Depending on the BPF program type, a local storage area + * can be shared between multiple instances of the BPF program, * running simultaneously. * * A user should care about the synchronization by himself. - * For example, by using the BPF_STX_XADD instruction to alter + * For example, by using the **BPF_STX_XADD** instruction to alter * the shared data. * Return - * Pointer to the local storage area. + * A pointer to the local storage area. * * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description - * Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map - * It checks the selected sk is matching the incoming - * request in the skb. + * Select a **SO_REUSEPORT** socket from a + * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * It checks the selected socket is matching the incoming + * request in the socket buffer. * Return * 0 on success, or a negative error in case of failure. * @@ -2201,7 +2190,7 @@ union bpf_attr { * Description * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2229,15 +2218,15 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, the *struct bpf_sock* - * result is from reuse->socks[] using the hash of the tuple. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2265,42 +2254,54 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. - * For sockets with reuseport option, the *struct bpf_sock* - * result is from reuse->socks[] using the hash of the tuple. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * - * int bpf_sk_release(struct bpf_sock *sk) + * int bpf_sk_release(struct bpf_sock *sock) * Description - * Release the reference held by *sock*. *sock* must be a non-NULL - * pointer that was returned from bpf_sk_lookup_xxx\ (). + * Release the reference held by *sock*. *sock* must be a + * non-**NULL** pointer that was returned from + * **bpf_sk_lookup_xxx**\ (). * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) * Description - * For socket policies, insert *len* bytes into msg at offset + * For socket policies, insert *len* bytes into *msg* at offset * *start*. * * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a - * *msg* it may want to insert metadata or options into the msg. + * *msg* it may want to insert metadata or options into the *msg*. * This can later be read and used by any of the lower layer BPF * hooks. * * This helper may fail if under memory pressure (a malloc * fails) in these cases BPF programs will get an appropriate * error and BPF programs will need to handle them. - * * Return * 0 on success, or a negative error in case of failure. * * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) - * Description + * Description * Will remove *pop* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if * an allocation and copy are required due to a full ring buffer. * However, the helper will try to avoid doing the allocation * if possible. Other errors can occur if input parameters are - * invalid either due to *start* byte not being valid part of msg + * invalid either due to *start* byte not being valid part of *msg* * payload and/or *pop* value being to large. * Return * 0 on success, or a negative error in case of failure. -- cgit v1.2.3 From 6d4efada3b823b3e9019e50c40114abca43a677b Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Mon, 10 Dec 2018 07:11:46 +0000 Subject: selftests: forwarding: Add multicast routing test Introduce basic testing for both IPv4 and IPv6 multicast. The test creates an (S,G) type route, sends traffic and verifies traffic arrives when the route is present and then verifies traffic does not arrive after deleting the route. This test requires smcroute - https://github.com/troglobit/smcroute which is a tool that allows creation of static multicast routes. Signed-off-by: Nir Dotan Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/lib.sh | 2 + .../selftests/net/forwarding/router_multicast.sh | 311 +++++++++++++++++++++ 2 files changed, 313 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/router_multicast.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 7af5a03bcb32..3f248d1f5b91 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -15,6 +15,8 @@ PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no} NETIF_TYPE=${NETIF_TYPE:=veth} NETIF_CREATE=${NETIF_CREATE:=yes} +MCD=${MCD:=smcrouted} +MC_CLI=${MC_CLI:=smcroutectl} relative_path="${BASH_SOURCE%/*}" if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh new file mode 100755 index 000000000000..109e6d785169 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_multicast.sh @@ -0,0 +1,311 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +------------------+ +# | H1 (v$h1) | +# | 2001:db8:1::2/64 | +# | 198.51.100.2/28 | +# | $h1 + | +# +-------------|----+ +# | +# +-------------|-------------------------------+ +# | SW1 | | +# | $rp1 + | +# | 198.51.100.1/28 | +# | 2001:db8:1::1/64 | +# | | +# | 2001:db8:2::1/64 2001:db8:3::1/64 | +# | 198.51.100.17/28 198.51.100.33/28 | +# | $rp2 + $rp3 + | +# +--------------|--------------------------|---+ +# | | +# | | +# +--------------|---+ +--------------|---+ +# | H2 (v$h2) | | | H3 (v$h3) | | +# | $h2 + | | $h3 + | +# | 198.51.100.18/28 | | 198.51.100.34/28 | +# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 | +# +------------------+ +------------------+ +# + +ALL_TESTS="mcast_v4 mcast_v6" +NUM_NETIFS=6 +source lib.sh +source tc_common.sh + +require_command $MCD +require_command $MC_CLI +table_name=selftests + +h1_create() +{ + simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64 + + ip route add 198.51.100.16/28 vrf v$h1 nexthop via 198.51.100.1 + ip route add 198.51.100.32/28 vrf v$h1 nexthop via 198.51.100.1 + + ip route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::1 + ip route add 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:3::/64 vrf v$h1 + ip route del 2001:db8:2::/64 vrf v$h1 + + ip route del 198.51.100.32/28 vrf v$h1 + ip route del 198.51.100.16/28 vrf v$h1 + + simple_if_fini $h1 198.51.100.2/28 2001:db8:1::2/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.18/28 2001:db8:2::2/64 + + ip route add 198.51.100.0/28 vrf v$h2 nexthop via 198.51.100.17 + ip route add 198.51.100.32/28 vrf v$h2 nexthop via 198.51.100.17 + + ip route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 + ip route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:2::1 + + tc qdisc add dev $h2 ingress +} + +h2_destroy() +{ + tc qdisc del dev $h2 ingress + + ip route del 2001:db8:3::/64 vrf v$h2 + ip route del 2001:db8:1::/64 vrf v$h2 + + ip route del 198.51.100.32/28 vrf v$h2 + ip route del 198.51.100.0/28 vrf v$h2 + + simple_if_fini $h2 198.51.100.18/28 2001:db8:2::2/64 +} + +h3_create() +{ + simple_if_init $h3 198.51.100.34/28 2001:db8:3::2/64 + + ip route add 198.51.100.0/28 vrf v$h3 nexthop via 198.51.100.33 + ip route add 198.51.100.16/28 vrf v$h3 nexthop via 198.51.100.33 + + ip route add 2001:db8:1::/64 vrf v$h3 nexthop via 2001:db8:3::1 + ip route add 2001:db8:2::/64 vrf v$h3 nexthop via 2001:db8:3::1 + + tc qdisc add dev $h3 ingress +} + +h3_destroy() +{ + tc qdisc del dev $h3 ingress + + ip route del 2001:db8:2::/64 vrf v$h3 + ip route del 2001:db8:1::/64 vrf v$h3 + + ip route del 198.51.100.16/28 vrf v$h3 + ip route del 198.51.100.0/28 vrf v$h3 + + simple_if_fini $h3 198.51.100.34/28 2001:db8:3::2/64 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + ip link set dev $rp3 up + + ip address add 198.51.100.1/28 dev $rp1 + ip address add 198.51.100.17/28 dev $rp2 + ip address add 198.51.100.33/28 dev $rp3 + + ip address add 2001:db8:1::1/64 dev $rp1 + ip address add 2001:db8:2::1/64 dev $rp2 + ip address add 2001:db8:3::1/64 dev $rp3 +} + +router_destroy() +{ + ip address del 2001:db8:3::1/64 dev $rp3 + ip address del 2001:db8:2::1/64 dev $rp2 + ip address del 2001:db8:1::1/64 dev $rp1 + + ip address del 198.51.100.33/28 dev $rp3 + ip address del 198.51.100.17/28 dev $rp2 + ip address del 198.51.100.1/28 dev $rp1 + + ip link set dev $rp3 down + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +start_mcd() +{ + SMCROUTEDIR="$(mktemp -d)" + + for ((i = 1; i <= $NUM_NETIFS; ++i)); do + echo "phyint ${NETIFS[p$i]} enable" >> \ + $SMCROUTEDIR/$table_name.conf + done + + $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \ + -P $SMCROUTEDIR/$table_name.pid +} + +kill_mcd() +{ + pkill $MCD + rm -rf $SMCROUTEDIR +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + start_mcd + + vrf_prepare + + h1_create + h2_create + h3_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + kill_mcd +} + +create_mcast_sg() +{ + local if_name=$1; shift + local s_addr=$1; shift + local mcast=$1; shift + local dest_ifs=${@} + + $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs +} + +delete_mcast_sg() +{ + local if_name=$1; shift + local s_addr=$1; shift + local mcast=$1; shift + local dest_ifs=${@} + + $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs +} + +mcast_v4() +{ + # Add two interfaces to an MC group, send a packet to the MC group and + # verify packets are received on both. Then delete the route and verify + # packets are no longer received. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 122 flower \ + dst_ip 225.1.2.3 action drop + tc filter add dev $h3 ingress protocol ip pref 1 handle 133 flower \ + dst_ip 225.1.2.3 action drop + + create_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3 + + # Send frames with the corresponding L2 destination address. + $MZ $h1 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast not received on second host" + + delete_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3 + + $MZ $h1 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast received on host although deleted" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast received on second host although deleted" + + tc filter del dev $h3 ingress protocol ip pref 1 handle 133 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 122 flower + + log_test "mcast IPv4" +} + +mcast_v6() +{ + # Add two interfaces to an MC group, send a packet to the MC group and + # verify packets are received on both. Then delete the route and verify + # packets are no longer received. + + RET=0 + + tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 122 flower \ + dst_ip ff0e::3 action drop + tc filter add dev $h3 ingress protocol ipv6 pref 1 handle 133 flower \ + dst_ip ff0e::3 action drop + + create_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3 + + # Send frames with the corresponding L2 destination address. + $MZ $h1 -6 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 \ + -b 33:33:00:00:00:03 -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast not received on second host" + + delete_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3 + + $MZ $h1 -6 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 \ + -b 33:33:00:00:00:03 -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast received on first host although deleted" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast received on second host although deleted" + + tc filter del dev $h3 ingress protocol ipv6 pref 1 handle 133 flower + tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 122 flower + + log_test "mcast IPv6" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From a0517a0f7ef23550b4484c37e2b9c2d32abebf64 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 11 Dec 2018 19:20:52 -0800 Subject: selftests/bpf: use __bpf_constant_htons in test_prog.c For some reason, my older GCC (< 4.8) isn't smart enough to optimize the !__builtin_constant_p() branch in bpf_htons, I see: error: implicit declaration of function '__builtin_bswap16' Let's use __bpf_constant_htons as suggested by Daniel Borkmann. I tried to use simple htons, but it produces the following: test_progs.c:54:17: error: braced-group within expression allowed only inside a function .eth.h_proto = htons(ETH_P_IP), Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_progs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 26f1fdf3e2bf..126fc624290d 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -51,10 +51,10 @@ static struct { struct iphdr iph; struct tcphdr tcp; } __packed pkt_v4 = { - .eth.h_proto = bpf_htons(ETH_P_IP), + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), .iph.ihl = 5, .iph.protocol = 6, - .iph.tot_len = bpf_htons(MAGIC_BYTES), + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.urg_ptr = 123, }; @@ -64,9 +64,9 @@ static struct { struct ipv6hdr iph; struct tcphdr tcp; } __packed pkt_v6 = { - .eth.h_proto = bpf_htons(ETH_P_IPV6), + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), .iph.nexthdr = 6, - .iph.payload_len = bpf_htons(MAGIC_BYTES), + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.urg_ptr = 123, }; -- cgit v1.2.3 From 8f9a8a61931197d32ae31960f4179ff60ccbe3db Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 10 Dec 2018 15:43:02 -0800 Subject: selftests/bpf: add btf annotations for cgroup_local_storage maps Add btf annotations to cgroup local storage maps (per-cpu and shared) in the network packet counting example. Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/netcnt_prog.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/netcnt_prog.c b/tools/testing/selftests/bpf/netcnt_prog.c index 1198abca1360..9f741e69cebe 100644 --- a/tools/testing/selftests/bpf/netcnt_prog.c +++ b/tools/testing/selftests/bpf/netcnt_prog.c @@ -16,12 +16,18 @@ struct bpf_map_def SEC("maps") percpu_netcnt = { .value_size = sizeof(struct percpu_net_cnt), }; +BPF_ANNOTATE_KV_PAIR(percpu_netcnt, struct bpf_cgroup_storage_key, + struct percpu_net_cnt); + struct bpf_map_def SEC("maps") netcnt = { .type = BPF_MAP_TYPE_CGROUP_STORAGE, .key_size = sizeof(struct bpf_cgroup_storage_key), .value_size = sizeof(struct net_cnt), }; +BPF_ANNOTATE_KV_PAIR(netcnt, struct bpf_cgroup_storage_key, + struct net_cnt); + SEC("cgroup/skb") int bpf_nextcnt(struct __sk_buff *skb) { -- cgit v1.2.3 From b498dc7d29bd6b4a108451671f81d712fba008ec Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 12 Dec 2018 17:03:04 +0000 Subject: selftests: mlxsw: extack: Test VLAN add on a VXLAN device Test mapping a VLAN at a VXLAN device that can't be offloaded. Signed-off-by: Petr Machata Acked-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/extack.sh | 31 ++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh index 101a5508bdfd..d9e02624c70b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/extack.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh @@ -7,6 +7,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" netdev_pre_up_test + vxlan_vlan_add_test " NUM_NETIFS=2 source $lib_dir/lib.sh @@ -74,6 +75,36 @@ netdev_pre_up_test() ip link del dev br1 } +vxlan_vlan_add_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 + + # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 tos inherit ttl 100 + + ip link set dev vx1 master br1 + check_err $? + + bridge vlan add dev vx1 vid 1 + check_err $? + + ip link set dev $swp1 master br1 + check_err $? + + bridge vlan add dev vx1 vid 1 pvid untagged 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - map VLAN at VXLAN device" + + ip link del dev vx1 + ip link del dev br1 +} + trap cleanup EXIT setup_prepare -- cgit v1.2.3 From 06a2fc8f1498dc978e856ef5ded6667b476a3b5b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 12 Dec 2018 17:03:06 +0000 Subject: selftests: mlxsw: extack: Test VLAN add on a port device Test mapping a VLAN at a port device such that on the same VLAN, there already is an unoffloadable VXLAN device. Signed-off-by: Petr Machata Acked-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/extack.sh | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh index d9e02624c70b..d72d8488a3b2 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/extack.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh @@ -8,6 +8,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" netdev_pre_up_test vxlan_vlan_add_test + port_vlan_add_test " NUM_NETIFS=2 source $lib_dir/lib.sh @@ -105,6 +106,35 @@ vxlan_vlan_add_test() ip link del dev br1 } +port_vlan_add_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 + + # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 tos inherit ttl 100 + + ip link set dev $swp1 master br1 + check_err $? + + bridge vlan del dev $swp1 vid 1 + + ip link set dev vx1 master br1 + check_err $? + + bridge vlan add dev $swp1 vid 1 pvid untagged 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - map VLAN at port" + + ip link del dev vx1 + ip link del dev br1 +} + trap cleanup EXIT setup_prepare -- cgit v1.2.3 From afc7c944ef828b431c564818f8cbc80f3e16f0d6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 12 Dec 2018 19:59:24 -0800 Subject: tools: bpftool: fix SPDX format in headers Documentation/process/license-rules.rst sayeth: 2. Style: The SPDX license identifier is added in form of a comment. The comment style depends on the file type:: C source: // SPDX-License-Identifier: C header: /* SPDX-License-Identifier: */ Headers should use C comment style. Signed-off-by: Jakub Kicinski Acked-by: Roman Gushchin Acked-by: YueHaibing Acked-by: Yonghong Song Acked-by: Stanislav Fomichev Acked-by: Sean Young Acked-by: Jiri Benc Acked-by: David Calavera Acked-by: Andrey Ignatov Acked-by: Joe Stringer Acked-by: David Ahern Acked-by: Alexei Starovoitov Acked-by: Petar Penkov Acked-by: Sandipan Das Acked-by: Prashant Bhole Acked-by: Stephen Hemminger Acked-by: John Fastabend Acked-by: Taeung Song Acked-by: Jiri Olsa Acked-by: Daniel Borkmann CC: okash.khawaja@gmail.com Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/cfg.h | 2 +- tools/bpf/bpftool/netlink_dumper.h | 2 +- tools/bpf/bpftool/xlated_dumper.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/cfg.h b/tools/bpf/bpftool/cfg.h index 2cc9bd990b13..493e2389d3cc 100644 --- a/tools/bpf/bpftool/cfg.h +++ b/tools/bpf/bpftool/cfg.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* * Copyright (C) 2018 Netronome Systems, Inc. * diff --git a/tools/bpf/bpftool/netlink_dumper.h b/tools/bpf/bpftool/netlink_dumper.h index e3516b586a34..fb1b1a685f7e 100644 --- a/tools/bpf/bpftool/netlink_dumper.h +++ b/tools/bpf/bpftool/netlink_dumper.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (C) 2018 Facebook #ifndef _NETLINK_DUMPER_H_ diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h index a24f89df8cb2..f950ab2dafbf 100644 --- a/tools/bpf/bpftool/xlated_dumper.h +++ b/tools/bpf/bpftool/xlated_dumper.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* * Copyright (C) 2018 Netronome Systems, Inc. * -- cgit v1.2.3 From 02ff58dcf70ad7d11b01523dc404166ed11021da Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 12 Dec 2018 19:59:25 -0800 Subject: tools: bpftool: replace Netronome boilerplate with SPDX license headers Replace the repeated license text with SDPX identifiers. Signed-off-by: Jakub Kicinski Acked-by: Roman Gushchin Acked-by: YueHaibing Acked-by: Yonghong Song Acked-by: Stanislav Fomichev Acked-by: Sean Young Acked-by: Jiri Benc Acked-by: David Calavera Acked-by: Andrey Ignatov Acked-by: Joe Stringer Acked-by: David Ahern Acked-by: Alexei Starovoitov Acked-by: Petar Penkov Acked-by: Sandipan Das Acked-by: Prashant Bhole Acked-by: Stephen Hemminger Acked-by: John Fastabend Acked-by: Taeung Song Acked-by: Jiri Olsa Acked-by: Daniel Borkmann CC: okash.khawaja@gmail.com Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/bash-completion/bpftool | 31 +------------------------- tools/bpf/bpftool/cfg.c | 36 +------------------------------ tools/bpf/bpftool/cfg.h | 36 +------------------------------ tools/bpf/bpftool/common.c | 34 ++--------------------------- tools/bpf/bpftool/main.c | 34 ++--------------------------- tools/bpf/bpftool/main.h | 34 ++--------------------------- tools/bpf/bpftool/map.c | 34 ++--------------------------- tools/bpf/bpftool/prog.c | 34 ++--------------------------- tools/bpf/bpftool/xlated_dumper.c | 36 +------------------------------ tools/bpf/bpftool/xlated_dumper.h | 36 +------------------------------ 10 files changed, 15 insertions(+), 330 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index a57febd6abb1..e4e4fab1b8c7 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -1,37 +1,8 @@ # bpftool(8) bash completion -*- shell-script -*- # +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) # Copyright (C) 2017-2018 Netronome Systems, Inc. # -# This software is dual licensed under the GNU General License -# Version 2, June 1991 as shown in the file COPYING in the top-level -# directory of this source tree or the BSD 2-Clause License provided -# below. You have the option to license this software under the -# complete terms of either license. -# -# The BSD 2-Clause License: -# -# Redistribution and use in source and binary forms, with or -# without modification, are permitted provided that the following -# conditions are met: -# -# 1. Redistributions of source code must retain the above -# copyright notice, this list of conditions and the following -# disclaimer. -# -# 2. Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials -# provided with the distribution. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# # Author: Quentin Monnet # Takes a list of words in argument; each one of them is added to COMPREPLY if diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c index f30b3a4a840b..31f0db41513f 100644 --- a/tools/bpf/bpftool/cfg.c +++ b/tools/bpf/bpftool/cfg.c @@ -1,39 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -/* - * Copyright (C) 2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* Copyright (C) 2018 Netronome Systems, Inc. */ #include #include diff --git a/tools/bpf/bpftool/cfg.h b/tools/bpf/bpftool/cfg.h index 493e2389d3cc..e144257ea6d2 100644 --- a/tools/bpf/bpftool/cfg.h +++ b/tools/bpf/bpftool/cfg.h @@ -1,39 +1,5 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ -/* - * Copyright (C) 2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* Copyright (C) 2018 Netronome Systems, Inc. */ #ifndef __BPF_TOOL_CFG_H #define __BPF_TOOL_CFG_H diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 172d3761d9ab..99e4027dde75 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -1,35 +1,5 @@ -/* - * Copyright (C) 2017-2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ #include #include diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 5c4c1cd5a7ba..9e657e7d5172 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -1,35 +1,5 @@ -/* - * Copyright (C) 2017-2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ #include #include diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 0b37599f8cda..d2beb88f0e2e 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -1,35 +1,5 @@ -/* - * Copyright (C) 2017-2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ #ifndef __BPF_TOOL_H #define __BPF_TOOL_H diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 8469ea6cf1c8..2037e3dc864b 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1,35 +1,5 @@ -/* - * Copyright (C) 2017-2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ #include #include diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index b73b4e473948..eddf7fba41c6 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1,35 +1,5 @@ -/* - * Copyright (C) 2017-2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ #define _GNU_SOURCE #include diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index aef628dcccb6..640ffe86a655 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -1,39 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -/* - * Copyright (C) 2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* Copyright (C) 2018 Netronome Systems, Inc. */ #define _GNU_SOURCE #include diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h index f950ab2dafbf..54847e174273 100644 --- a/tools/bpf/bpftool/xlated_dumper.h +++ b/tools/bpf/bpftool/xlated_dumper.h @@ -1,39 +1,5 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ -/* - * Copyright (C) 2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* Copyright (C) 2018 Netronome Systems, Inc. */ #ifndef __BPF_TOOL_XLATED_DUMPER_H #define __BPF_TOOL_XLATED_DUMPER_H -- cgit v1.2.3 From 907b22365115fb86196152909915108d7c8243d1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 12 Dec 2018 19:59:26 -0800 Subject: tools: bpftool: dual license all files Currently bpftool contains a mix of GPL-only and GPL or BSD2 licensed files. Make sure all files are dual licensed under GPLv2 and BSD-2-Clause. Signed-off-by: Jakub Kicinski Acked-by: Roman Gushchin Acked-by: YueHaibing Acked-by: Yonghong Song Acked-by: Stanislav Fomichev Acked-by: Sean Young Acked-by: Jiri Benc Acked-by: David Calavera Acked-by: Andrey Ignatov Acked-by: Joe Stringer Acked-by: David Ahern Acked-by: Alexei Starovoitov Acked-by: Petar Penkov Acked-by: Sandipan Das Acked-by: Prashant Bhole Acked-by: Stephen Hemminger Acked-by: John Fastabend Acked-by: Taeung Song Acked-by: Jiri Olsa Acked-by: Daniel Borkmann CC: okash.khawaja@gmail.com Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/btf_dumper.c | 2 +- tools/bpf/bpftool/cgroup.c | 2 +- tools/bpf/bpftool/jit_disasm.c | 1 + tools/bpf/bpftool/json_writer.c | 1 + tools/bpf/bpftool/json_writer.h | 1 + tools/bpf/bpftool/map_perf_ring.c | 2 +- tools/bpf/bpftool/net.c | 2 +- tools/bpf/bpftool/netlink_dumper.c | 2 +- tools/bpf/bpftool/netlink_dumper.h | 2 +- tools/bpf/bpftool/perf.c | 2 +- 10 files changed, 10 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 2392ccdc918f..5cdb2ef8b6f1 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (c) 2018 Facebook */ #include diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index ee7a9765c6b3..4b5c8da2a7c0 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (C) 2017 Facebook // Author: Roman Gushchin diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index f381f8628ce9..3ef3093560ba 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* * Based on: * diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c index c6eef76322ae..a07d17918725 100644 --- a/tools/bpf/bpftool/json_writer.c +++ b/tools/bpf/bpftool/json_writer.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* * Simple streaming JSON writer * diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h index 0fa2fb1b6351..c1ab51aed99c 100644 --- a/tools/bpf/bpftool/json_writer.h +++ b/tools/bpf/bpftool/json_writer.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* * Simple streaming JSON writer * diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index bdaf4062e26e..0507dfaf7a8f 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-only +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2018 Netronome Systems, Inc. */ /* This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index d441bb7035ca..db0e7de49d49 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (C) 2018 Facebook #define _GNU_SOURCE diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c index 4e9f4531269f..550a0f537eed 100644 --- a/tools/bpf/bpftool/netlink_dumper.c +++ b/tools/bpf/bpftool/netlink_dumper.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (C) 2018 Facebook #include diff --git a/tools/bpf/bpftool/netlink_dumper.h b/tools/bpf/bpftool/netlink_dumper.h index fb1b1a685f7e..774af6c62ef5 100644 --- a/tools/bpf/bpftool/netlink_dumper.h +++ b/tools/bpf/bpftool/netlink_dumper.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ // Copyright (C) 2018 Facebook #ifndef _NETLINK_DUMPER_H_ diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index b76b77dcfd1f..f2a545e667c4 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (C) 2018 Facebook // Author: Yonghong Song -- cgit v1.2.3 From 177e77169b0b71587c74382d5f2207a16da34790 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 12 Dec 2018 10:18:22 -0800 Subject: bpf: Remove !func_info and !line_info check from test_btf and bpftool kernel can provide the func_info and line_info even it fails the btf_dump_raw_ok() test because they don't contain kernel address. This patch removes the corresponding '== 0' test. Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/prog.c | 7 ------- tools/lib/bpf/bpf_prog_linfo.c | 6 +----- tools/testing/selftests/bpf/test_btf.c | 7 ------- 3 files changed, 1 insertion(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index eddf7fba41c6..ee51279be9c7 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -596,13 +596,6 @@ static int do_dump(int argc, char **argv) goto err_free; } - if (func_info && !info.func_info) { - /* kernel.kptr_restrict is set. No func_info available. */ - free(func_info); - func_info = NULL; - nr_finfo = 0; - } - if (linfo && info.nr_line_info != nr_linfo) { p_err("incorrect nr_line_info %u vs. expected %u", info.nr_line_info, nr_linfo); diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c index addd6e9971cc..6978314ea7f6 100644 --- a/tools/lib/bpf/bpf_prog_linfo.c +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -107,11 +107,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) nr_linfo = info->nr_line_info; - /* - * Test !info->line_info because the kernel may NULL - * the ptr if kernel.kptr_restrict is set. - */ - if (!nr_linfo || !info->line_info) + if (!nr_linfo) return NULL; /* diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index f570e0a39959..8478316aaf9a 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -3948,12 +3948,6 @@ static int test_get_finfo(const struct prog_info_raw_test *test, goto done; } - if (CHECK(!info.func_info, - "info.func_info == 0. kernel.kptr_restrict is set?")) { - err = -1; - goto done; - } - finfo = func_info; for (i = 0; i < test->func_info_cnt; i++) { if (CHECK(finfo->type_id != test->func_info[i][1], @@ -4077,7 +4071,6 @@ static int test_get_linfo(const struct prog_info_raw_test *test, * Other fields are not the concern of this test. */ if (CHECK(err == -1 || - !info.line_info || info.nr_line_info != cnt || (jited_cnt && !info.jited_line_info) || info.nr_jited_line_info != jited_cnt || -- cgit v1.2.3 From eb896a69a0cf3afa69f0983470cc41f9cb04c017 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 12 Dec 2018 09:37:47 -0800 Subject: bpf: sync tools/include/uapi/linux/bpf.h Sync bpf.h for nr_prog_tags and prog_tags. Signed-off-by: Song Liu Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index aa582cd5bfcf..e7d57e89f25f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2717,6 +2717,8 @@ struct bpf_prog_info { __u32 nr_jited_line_info; __u32 line_info_rec_size; __u32 jited_line_info_rec_size; + __u32 nr_prog_tags; + __aligned_u64 prog_tags; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From 28c1272914613b6c5a0129d6d502c790d4f23d6c Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 13 Dec 2018 13:19:01 -0800 Subject: selftests/bpf: Fix sk lookup usage in test_sock_addr Semantic of netns_id argument of bpf_sk_lookup_tcp and bpf_sk_lookup_udp was changed (fixed) in f71c6143c203. Corresponding changes have to be applied to all call sites in selftests. The patch fixes corresponding call sites in test_sock_addr test: pass BPF_F_CURRENT_NETNS instead of 0 in netns_id argument. Fixes: f71c6143c203 ("bpf: Support sk lookup in netns with id 0") Reported-by: Yonghong Song Signed-off-by: Andrey Ignatov Acked-by: Joe Stringer Tested-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/connect4_prog.c | 6 ++++-- tools/testing/selftests/bpf/connect6_prog.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c index b8395f3c43e9..1fd244d35ba9 100644 --- a/tools/testing/selftests/bpf/connect4_prog.c +++ b/tools/testing/selftests/bpf/connect4_prog.c @@ -35,9 +35,11 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) return 0; else if (ctx->type == SOCK_STREAM) - sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4), 0, 0); + sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4), + BPF_F_CURRENT_NETNS, 0); else - sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4), 0, 0); + sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4), + BPF_F_CURRENT_NETNS, 0); if (!sk) return 0; diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c index 25f5dc7b7aa0..26397ab7b3c7 100644 --- a/tools/testing/selftests/bpf/connect6_prog.c +++ b/tools/testing/selftests/bpf/connect6_prog.c @@ -47,9 +47,11 @@ int connect_v6_prog(struct bpf_sock_addr *ctx) if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) return 0; else if (ctx->type == SOCK_STREAM) - sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6), 0, 0); + sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6), + BPF_F_CURRENT_NETNS, 0); else - sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv6), 0, 0); + sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv6), + BPF_F_CURRENT_NETNS, 0); if (!sk) return 0; -- cgit v1.2.3 From 555afaae121ada40d10e538e888c6ecf3cd731eb Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 13 Dec 2018 11:54:54 +0000 Subject: selftests: mlxsw: Test RIF MAC vetoing Test that attempts to change address in a way that violates Spectrum requirements are vetoed with extack. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/rtnetlink.sh | 91 ++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh new file mode 100755 index 000000000000..bc8b44c77108 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test various interface configuration scenarios. Observe that configurations +# deemed valid by mlxsw succeed, invalid configurations fail and that no traces +# are produced. To prevent the test from passing in case traces are produced, +# the user can set the 'kernel.panic_on_warn' and 'kernel.panic_on_oops' +# sysctls in its environment. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + rif_set_addr_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +rif_set_addr_test() +{ + local swp1_mac=$(mac_get $swp1) + local swp2_mac=$(mac_get $swp2) + + RET=0 + + # $swp1 and $swp2 likely got their IPv6 local addresses already, but + # here we need to test the transition to RIF. + ip addr flush dev $swp1 + ip addr flush dev $swp2 + sleep .1 + + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + ip link set dev $swp1 addr 00:11:22:33:44:55 + check_err $? + + # IP address enablement should be rejected if the MAC address prefix + # doesn't match other RIFs. + ip addr add dev $swp2 192.0.2.2/28 &>/dev/null + check_fail $? "IP address addition passed for a device with a wrong MAC" + ip addr add dev $swp2 192.0.2.2/28 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for IP address addition" + + ip link set dev $swp2 addr 00:11:22:33:44:66 + check_err $? + ip addr add dev $swp2 192.0.2.2/28 &>/dev/null + check_err $? + + # Change of MAC address of a RIF should be forbidden if the new MAC + # doesn't share the prefix with other MAC addresses. + ip link set dev $swp2 addr 00:11:22:33:00:66 &>/dev/null + check_fail $? "change of MAC address passed for a wrong MAC" + ip link set dev $swp2 addr 00:11:22:33:00:66 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for MAC address change" + + log_test "RIF - bad MAC change" + + ip addr del dev $swp2 192.0.2.2/28 + ip addr del dev $swp1 192.0.2.1/28 + + ip link set dev $swp2 addr $swp2_mac + ip link set dev $swp1 addr $swp1_mac +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From 9651ee10ce3bc1e7a71242727711da93441602ac Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 13 Dec 2018 11:54:56 +0000 Subject: selftests: mlxsw: Test FID RIF MAC vetoing When a FID RIF is created for a bridge with IP address, its MAC address must obey the same requirements as other RIFs. Test that attempts to change the address incompatibly by attaching a device are vetoed with extack. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/rtnetlink.sh | 79 ++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index bc8b44c77108..7f78b96279f3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -11,6 +11,8 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" rif_set_addr_test + rif_inherit_bridge_addr_test + rif_non_inherit_bridge_addr_test " NUM_NETIFS=2 source $lib_dir/lib.sh @@ -81,6 +83,83 @@ rif_set_addr_test() ip link set dev $swp1 addr $swp1_mac } +rif_inherit_bridge_addr_test() +{ + RET=0 + + # Create first RIF + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + # Create a FID RIF + ip link add name br1 up type bridge vlan_filtering 0 + ip link set dev $swp2 master br1 + ip addr add dev br1 192.0.2.17/28 + check_err $? + + # Prepare a device with a low MAC address + ip link add name d up type dummy + ip link set dev d addr 00:11:22:33:44:55 + + # Attach the device to br1. That prompts bridge address change, which + # should be vetoed, thus preventing the attachment. + ip link set dev d master br1 &>/dev/null + check_fail $? "Device with low MAC was permitted to attach a bridge with RIF" + ip link set dev d master br1 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for bridge attach rejection" + + ip link set dev $swp2 addr 00:11:22:33:44:55 &>/dev/null + check_fail $? "Changing swp2's MAC address permitted" + ip link set dev $swp2 addr 00:11:22:33:44:55 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for bridge port MAC address change rejection" + + log_test "RIF - attach port with bad MAC to bridge" + + ip link del dev d + ip link del dev br1 + ip addr del dev $swp1 192.0.2.1/28 +} + +rif_non_inherit_bridge_addr_test() +{ + local swp2_mac=$(mac_get $swp2) + + RET=0 + + # Create first RIF + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + # Create a FID RIF + ip link add name br1 up type bridge vlan_filtering 0 + ip link set dev br1 addr $swp2_mac + ip link set dev $swp2 master br1 + ip addr add dev br1 192.0.2.17/28 + check_err $? + + # Prepare a device with a low MAC address + ip link add name d up type dummy + ip link set dev d addr 00:11:22:33:44:55 + + # Attach the device to br1. Since the bridge address was set, it should + # work. + ip link set dev d master br1 &>/dev/null + check_err $? "Could not attach a device with low MAC to a bridge with RIF" + + # Port MAC address change should be allowed for a bridge with set MAC. + ip link set dev $swp2 addr 00:11:22:33:44:55 + check_err $? "Changing swp2's MAC address not permitted" + + log_test "RIF - attach port with bad MAC to bridge with set MAC" + + ip link set dev $swp2 addr $swp2_mac + ip link del dev d + ip link del dev br1 + ip addr del dev $swp1 192.0.2.1/28 +} + trap cleanup EXIT setup_prepare -- cgit v1.2.3 From 6254e5c6a8d7c19e51e671e2648de2db06f8d504 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 12 Dec 2018 13:15:37 -0800 Subject: selftests: net: test that listening sockets match on address properly This patch adds a selftest that verifies that a socket listening on a specific address is chosen in preference over sockets that listen on any address. The test covers UDP/UDP6/TCP/TCP6. It is based on, and similar to, reuseport_dualstack.c selftest. Signed-off-by: Peter Oskolkov Signed-off-by: David S. Miller --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 4 +- tools/testing/selftests/net/reuseport_addr_any.c | 264 ++++++++++++++++++++++ tools/testing/selftests/net/reuseport_addr_any.sh | 4 + 4 files changed, 271 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/net/reuseport_addr_any.c create mode 100755 tools/testing/selftests/net/reuseport_addr_any.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 7f57b916e6b2..6f81130605d7 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -3,6 +3,7 @@ socket psock_fanout psock_snd psock_tpacket +reuseport_addr_any reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ee2e27b1cd0d..aeecc3ef53d0 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -7,10 +7,10 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh -TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh +TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket -TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy +TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c new file mode 100644 index 000000000000..f5e01d989519 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Test that sockets listening on a specific address are preferred + * over sockets listening on addr_any. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const char *IP4_ADDR = "127.0.0.1"; +static const char *IP6_ADDR = "::1"; +static const char *IP4_MAPPED6 = "::ffff:127.0.0.1"; + +static const int PORT = 8888; + +static void build_rcv_fd(int family, int proto, int *rcv_fds, int count, + const char *addr_str) +{ + struct sockaddr_in addr4 = {0}; + struct sockaddr_in6 addr6 = {0}; + struct sockaddr *addr; + int opt, i, sz; + + memset(&addr, 0, sizeof(addr)); + + switch (family) { + case AF_INET: + addr4.sin_family = family; + if (!addr_str) + addr4.sin_addr.s_addr = htonl(INADDR_ANY); + else if (!inet_pton(family, addr_str, &addr4.sin_addr.s_addr)) + error(1, errno, "inet_pton failed: %s", addr_str); + addr4.sin_port = htons(PORT); + sz = sizeof(addr4); + addr = (struct sockaddr *)&addr4; + break; + case AF_INET6: + addr6.sin6_family = AF_INET6; + if (!addr_str) + addr6.sin6_addr = in6addr_any; + else if (!inet_pton(family, addr_str, &addr6.sin6_addr)) + error(1, errno, "inet_pton failed: %s", addr_str); + addr6.sin6_port = htons(PORT); + sz = sizeof(addr6); + addr = (struct sockaddr *)&addr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + for (i = 0; i < count; ++i) { + rcv_fds[i] = socket(family, proto, 0); + if (rcv_fds[i] < 0) + error(1, errno, "failed to create receive socket"); + + opt = 1; + if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + + if (bind(rcv_fds[i], addr, sz)) + error(1, errno, "failed to bind receive socket"); + + if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) + error(1, errno, "failed to listen on receive port"); + } +} + +static int connect_and_send(int family, int proto) +{ + struct sockaddr_in saddr4 = {0}; + struct sockaddr_in daddr4 = {0}; + struct sockaddr_in6 saddr6 = {0}; + struct sockaddr_in6 daddr6 = {0}; + struct sockaddr *saddr, *daddr; + int fd, sz; + + switch (family) { + case AF_INET: + saddr4.sin_family = AF_INET; + saddr4.sin_addr.s_addr = htonl(INADDR_ANY); + saddr4.sin_port = 0; + + daddr4.sin_family = AF_INET; + if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr)) + error(1, errno, "inet_pton failed: %s", IP4_ADDR); + daddr4.sin_port = htons(PORT); + + sz = sizeof(saddr4); + saddr = (struct sockaddr *)&saddr4; + daddr = (struct sockaddr *)&daddr4; + break; + case AF_INET6: + saddr6.sin6_family = AF_INET6; + saddr6.sin6_addr = in6addr_any; + + daddr6.sin6_family = AF_INET6; + if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr)) + error(1, errno, "inet_pton failed: %s", IP6_ADDR); + daddr6.sin6_port = htons(PORT); + + sz = sizeof(saddr6); + saddr = (struct sockaddr *)&saddr6; + daddr = (struct sockaddr *)&daddr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + fd = socket(family, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (bind(fd, saddr, sz)) + error(1, errno, "failed to bind send socket"); + + if (connect(fd, daddr, sz)) + error(1, errno, "failed to connect send socket"); + + if (send(fd, "a", 1, 0) < 0) + error(1, errno, "failed to send message"); + + return fd; +} + +static int receive_once(int epfd, int proto) +{ + struct epoll_event ev; + int i, fd; + char buf[8]; + + i = epoll_wait(epfd, &ev, 1, 3); + if (i < 0) + error(1, errno, "epoll_wait failed"); + + if (proto == SOCK_STREAM) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + i = recv(fd, buf, sizeof(buf), 0); + close(fd); + } else { + i = recv(ev.data.fd, buf, sizeof(buf), 0); + } + + if (i < 0) + error(1, errno, "failed to recv"); + + return ev.data.fd; +} + +static void test(int *rcv_fds, int count, int family, int proto, int fd) +{ + struct epoll_event ev; + int epfd, i, send_fd, recv_fd; + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + + ev.events = EPOLLIN; + for (i = 0; i < count; ++i) { + ev.data.fd = rcv_fds[i]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev)) + error(1, errno, "failed to register sock epoll"); + } + + send_fd = connect_and_send(family, proto); + + recv_fd = receive_once(epfd, proto); + if (recv_fd != fd) + error(1, 0, "received on an unexpected socket"); + + close(send_fd); + close(epfd); +} + +int main(void) +{ + /* Below we test that a socket listening on a specific address + * is always selected in preference over a socket listening + * on addr_any. Bugs where this is not the case often result + * in sockets created first or last to get picked. So below + * we make sure that there are always addr_any sockets created + * before and after a specific socket is created. + */ + int rcv_fds[10], i; + + fprintf(stderr, "---- UDP IPv4 ----\n"); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 4, 1, IP4_ADDR); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_DGRAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- UDP IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 4, 1, IP6_ADDR); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET6, SOCK_DGRAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- UDP IPv4 mapped to IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 4, 1, IP4_MAPPED6); + build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_DGRAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- TCP IPv4 ----\n"); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 4, 1, IP4_ADDR); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_STREAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- TCP IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 4, 1, IP6_ADDR); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET6, SOCK_STREAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- TCP IPv4 mapped to IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 4, 1, IP4_MAPPED6); + build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_STREAM, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} diff --git a/tools/testing/selftests/net/reuseport_addr_any.sh b/tools/testing/selftests/net/reuseport_addr_any.sh new file mode 100755 index 000000000000..104592f62ad4 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_addr_any.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +./in_netns.sh ./reuseport_addr_any -- cgit v1.2.3 From 730ff40f80c5b09b2402958321f2762a3f852e30 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 Dec 2018 11:42:32 -0800 Subject: selftests/bpf: check insn processed in test_verifier Teach test_verifier to parse verifier output for insn processed and compare with expected number. Signed-off-by: Alexei Starovoitov Acked-by: Edward Cree Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index a08c67c8767e..82359cdbc805 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -76,7 +76,7 @@ struct bpf_test { int fixup_percpu_cgroup_storage[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; - uint32_t retval, retval_unpriv; + uint32_t retval, retval_unpriv, insn_processed; enum { UNDEF, ACCEPT, @@ -14444,6 +14444,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv, } } + if (test->insn_processed) { + uint32_t insn_processed; + char *proc; + + proc = strstr(bpf_vlog, "processed "); + insn_processed = atoi(proc + 10); + if (test->insn_processed != insn_processed) { + printf("FAIL\nUnexpected insn_processed %u vs %u\n", + insn_processed, test->insn_processed); + goto fail_log; + } + } + if (fd_prog >= 0) { __u8 tmp[TEST_DATA_LEN << 2]; __u32 size_tmp = sizeof(tmp); -- cgit v1.2.3 From 19e2dbb7dd978d24505e918ac54d6f7dfdc88b1d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 13 Dec 2018 11:42:33 -0800 Subject: bpf: improve stacksafe state comparison "if (old->allocated_stack > cur->allocated_stack)" check is too conservative. In some cases explored stack could have allocated more space, but that stack space was not live. The test case improves from 19 to 15 processed insns and improvement on real programs is significant as well: before after bpf_lb-DLB_L3.o 1940 1831 bpf_lb-DLB_L4.o 3089 3029 bpf_lb-DUNKNOWN.o 1065 1064 bpf_lxc-DDROP_ALL.o 28052 26309 bpf_lxc-DUNKNOWN.o 35487 33517 bpf_netdev.o 10864 9713 bpf_overlay.o 6643 6184 bpf_lcx_jit.o 38437 37335 Signed-off-by: Alexei Starovoitov Acked-by: Edward Cree Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 13 +++++++------ tools/testing/selftests/bpf/test_verifier.c | 22 ++++++++++++++++++++++ 2 files changed, 29 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0c6deb3f2be4..e4724fe8120f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5191,12 +5191,6 @@ static bool stacksafe(struct bpf_func_state *old, { int i, spi; - /* if explored stack has more populated slots than current stack - * such stacks are not equivalent - */ - if (old->allocated_stack > cur->allocated_stack) - return false; - /* walk slots of the explored stack and ignore any additional * slots in the current stack, since explored(safe) state * didn't use them @@ -5212,6 +5206,13 @@ static bool stacksafe(struct bpf_func_state *old, if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) continue; + + /* explored stack has more populated slots than current stack + * and these slots were used + */ + if (i >= cur->allocated_stack) + return false; + /* if old state was safe with misc data in the stack * it will be safe with zero-initialized stack. * The opposite is not true diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 82359cdbc805..f9de7fe0c26d 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -13647,6 +13647,28 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, + { + "allocated_stack", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, -8), + BPF_STX_MEM(BPF_B, BPF_REG_10, BPF_REG_7, -9), + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_10, -9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = ACCEPT, + .insn_processed = 15, + }, { "reference tracking in call: free reference in subprog and outside", .insns = { -- cgit v1.2.3 From bc6cd664609b92aa437ed3f384e332faab5949c8 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 14 Dec 2018 13:55:57 +0000 Subject: tools: bpftool: add doc for -m option to bpftool-prog.rst The --mapcompat|-m option has been documented on the main bpftool.rst page, and on the interactive help. As this option is useful for loading programs with maps with the "bpftool prog load" command, it should also appear in the related bpftool-prog.rst documentation page. Let's add it. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index bb1aeb98b6da..b41e96ac0dfe 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -158,6 +158,9 @@ OPTIONS When showing BPF programs, show file names of pinned programs. + -m, --mapcompat + Allow loading maps with unknown map definitions. + EXAMPLES ======== **# bpftool prog show** -- cgit v1.2.3 From 32870ba4078c3b4deac7ccb35b078c9ab69488a3 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 14 Dec 2018 13:55:58 +0000 Subject: tools: bpftool: fix examples in documentation for bpftool prog Bring various fixes to the manual page for "bpftool prog" set of commands: - Fix typos ("dum" -> "dump") - Harmonise indentation and format for command output - Update date format for program load time - Add instruction numbers on program dumps - Fix JSON format for the example program listing Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 98 +++++++++++++----------- 1 file changed, 53 insertions(+), 45 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index b41e96ac0dfe..53920ffc48dd 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -164,80 +164,88 @@ OPTIONS EXAMPLES ======== **# bpftool prog show** + :: - 10: xdp name some_prog tag 005a3d2123620c8b gpl - loaded_at Sep 29/20:11 uid 0 - xlated 528B jited 370B memlock 4096B map_ids 10 + 10: xdp name some_prog tag 005a3d2123620c8b gpl + loaded_at 2017-09-29T20:11:00+0000 uid 0 + xlated 528B jited 370B memlock 4096B map_ids 10 **# bpftool --json --pretty prog show** :: - { - "programs": [{ - "id": 10, - "type": "xdp", - "tag": "005a3d2123620c8b", - "gpl_compatible": true, - "loaded_at": "Sep 29/20:11", - "uid": 0, - "bytes_xlated": 528, - "jited": true, - "bytes_jited": 370, - "bytes_memlock": 4096, - "map_ids": [10 - ] - } - ] - } + [{ + "id": 10, + "type": "xdp", + "tag": "005a3d2123620c8b", + "gpl_compatible": true, + "loaded_at": 1506715860, + "uid": 0, + "bytes_xlated": 528, + "jited": true, + "bytes_jited": 370, + "bytes_memlock": 4096, + "map_ids": [10 + ] + } + ] | | **# bpftool prog dump xlated id 10 file /tmp/t** | **# ls -l /tmp/t** -| -rw------- 1 root root 560 Jul 22 01:42 /tmp/t -**# bpftool prog dum jited tag 005a3d2123620c8b** +:: + + -rw------- 1 root root 560 Jul 22 01:42 /tmp/t + +**# bpftool prog dump jited tag 005a3d2123620c8b** :: - push %rbp - mov %rsp,%rbp - sub $0x228,%rsp - sub $0x28,%rbp - mov %rbx,0x0(%rbp) + 0: push %rbp + 1: mov %rsp,%rbp + 2: sub $0x228,%rsp + 3: sub $0x28,%rbp + 4: mov %rbx,0x0(%rbp) | | **# mount -t bpf none /sys/fs/bpf/** | **# bpftool prog pin id 10 /sys/fs/bpf/prog** | **# bpftool prog load ./my_prog.o /sys/fs/bpf/prog2** | **# ls -l /sys/fs/bpf/** -| -rw------- 1 root root 0 Jul 22 01:43 prog -| -rw------- 1 root root 0 Jul 22 01:44 prog2 -**# bpftool prog dum jited pinned /sys/fs/bpf/prog opcodes** +:: + + -rw------- 1 root root 0 Jul 22 01:43 prog + -rw------- 1 root root 0 Jul 22 01:44 prog2 + +**# bpftool prog dump jited pinned /sys/fs/bpf/prog opcodes** :: - push %rbp - 55 - mov %rsp,%rbp - 48 89 e5 - sub $0x228,%rsp - 48 81 ec 28 02 00 00 - sub $0x28,%rbp - 48 83 ed 28 - mov %rbx,0x0(%rbp) - 48 89 5d 00 + 0: push %rbp + 55 + 1: mov %rsp,%rbp + 48 89 e5 + 4: sub $0x228,%rsp + 48 81 ec 28 02 00 00 + b: sub $0x28,%rbp + 48 83 ed 28 + f: mov %rbx,0x0(%rbp) + 48 89 5d 00 | | **# bpftool prog load xdp1_kern.o /sys/fs/bpf/xdp1 type xdp map name rxcnt id 7** | **# bpftool prog show pinned /sys/fs/bpf/xdp1** -| 9: xdp name xdp_prog1 tag 539ec6ce11b52f98 gpl -| loaded_at 2018-06-25T16:17:31-0700 uid 0 -| xlated 488B jited 336B memlock 4096B map_ids 7 -| **# rm /sys/fs/bpf/xdp1** -| + +:: + + 9: xdp name xdp_prog1 tag 539ec6ce11b52f98 gpl + loaded_at 2018-06-25T16:17:31-0700 uid 0 + xlated 488B jited 336B memlock 4096B map_ids 7 + +**# rm /sys/fs/bpf/xdp1** SEE ALSO ======== -- cgit v1.2.3 From bd0fb9d0078edbfa2c983ad61901d90f1c0f5747 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 14 Dec 2018 13:55:59 +0000 Subject: tools: bpftool: add a prog array map update example to documentation Add an example in map documentation to show how to use bpftool in order to update the references to programs hold by prog array maps. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-map.rst | 55 +++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 5318dcb2085e..3221be1b9ccc 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -170,6 +170,61 @@ The following three commands are equivalent: | **# bpftool map pin id 10 /sys/fs/bpf/map** | **# bpftool map del pinned /sys/fs/bpf/map key 13 00 07 00** +Note that map update can also be used in order to change the program references +hold by a program array map. This can be used, for example, to change the +programs used for tail-call jumps at runtime, without having to reload the +entry-point program. Below is an example for this use case: we load a program +defining a prog array map, and with a main function that contains a tail call +to other programs that can be used either to "process" packets or to "debug" +processing. Note that the prog array map MUST be pinned into the BPF virtual +file system for the map update to work successfully, as kernel flushes prog +array maps when they have no more references from user space (and the update +would be lost as soon as bpftool exits). + +| +| **# bpftool prog loadall tail_calls.o /sys/fs/bpf/foo type xdp** +| **# bpftool prog --bpffs** + +:: + + 545: xdp name main_func tag 674b4b5597193dc3 gpl + loaded_at 2018-12-12T15:02:58+0000 uid 0 + xlated 240B jited 257B memlock 4096B map_ids 294 + pinned /sys/fs/bpf/foo/xdp + 546: xdp name bpf_func_process tag e369a529024751fc gpl + loaded_at 2018-12-12T15:02:58+0000 uid 0 + xlated 200B jited 164B memlock 4096B + pinned /sys/fs/bpf/foo/process + 547: xdp name bpf_func_debug tag 0b597868bc7f0976 gpl + loaded_at 2018-12-12T15:02:58+0000 uid 0 + xlated 200B jited 164B memlock 4096B + pinned /sys/fs/bpf/foo/debug + +**# bpftool map** + +:: + + 294: prog_array name jmp_table flags 0x0 + key 4B value 4B max_entries 1 memlock 4096B + owner_prog_type xdp owner jited + +| +| **# bpftool map pin id 294 /sys/fs/bpf/bar** +| **# bpftool map dump pinned /sys/fs/bpf/bar** + +:: + + Found 0 elements + +| +| **# bpftool map update pinned /sys/fs/bpf/bar key 0 0 0 0 value pinned /sys/fs/bpf/foo/debug** +| **# bpftool map dump pinned /sys/fs/bpf/bar** + +:: + + key: 00 00 00 00 value: 22 02 00 00 + Found 1 element + SEE ALSO ======== **bpf**\ (2), -- cgit v1.2.3 From 8c03ecf712f5bbbb9d353ed69715aef3cc2b4029 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 14 Dec 2018 13:56:00 +0000 Subject: tools: bpftool: fix warning on struct bpf_prog_linfo definition The following warning appears when compiling bpftool without BFD support: main.h:198:23: warning: 'struct bpf_prog_linfo' declared inside parameter list will not be visible outside of this definition or declaration const struct bpf_prog_linfo *prog_linfo, Fix it by declaring struct bpf_prog_linfo even in the case BFD is not supported. Fixes: b053b439b72a ("bpf: libbpf: bpftool: Print bpf_line_info during prog dump") Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index d2beb88f0e2e..9487345b04a7 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -147,8 +147,8 @@ int prog_parse_fd(int *argc, char ***argv); int map_parse_fd(int *argc, char ***argv); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); -#ifdef HAVE_LIBBFD_SUPPORT struct bpf_prog_linfo; +#ifdef HAVE_LIBBFD_SUPPORT void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, const char *arch, const char *disassembler_options, const struct btf *btf, -- cgit v1.2.3 From c101189bc9680675a2686bafe908015a07a0da51 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 14 Dec 2018 13:56:01 +0000 Subject: tools: bpftool: fix -Wmissing declaration warnings Help compiler check arguments for several utility functions used to print items to the console by adding the "printf" attribute when declaring those functions. Also, declare as "static" two functions that are only used in prog.c. All of them discovered by compiling bpftool with -Wmissing-format-attribute -Wmissing-declarations. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/common.c | 4 ++-- tools/bpf/bpftool/json_writer.c | 6 ++++-- tools/bpf/bpftool/prog.c | 4 ++-- tools/bpf/bpftool/xlated_dumper.c | 7 ++++--- 4 files changed, 12 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 99e4027dde75..24582e8a96fb 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -28,7 +28,7 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif -void p_err(const char *fmt, ...) +void __printf(1, 2) p_err(const char *fmt, ...) { va_list ap; @@ -46,7 +46,7 @@ void p_err(const char *fmt, ...) va_end(ap); } -void p_info(const char *fmt, ...) +void __printf(1, 2) p_info(const char *fmt, ...) { va_list ap; diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c index a07d17918725..bff7ee026680 100644 --- a/tools/bpf/bpftool/json_writer.c +++ b/tools/bpf/bpftool/json_writer.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "json_writer.h" @@ -157,7 +158,8 @@ void jsonw_name(json_writer_t *self, const char *name) putc(' ', self->out); } -void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap) +void __printf(2, 0) +jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap) { jsonw_eor(self); putc('"', self->out); @@ -165,7 +167,7 @@ void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap) putc('"', self->out); } -void jsonw_printf(json_writer_t *self, const char *fmt, ...) +void __printf(2, 3) jsonw_printf(json_writer_t *self, const char *fmt, ...) { va_list ap; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index ee51279be9c7..2d1bb7d6ff51 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -32,7 +32,7 @@ static const char * const attach_type_strings[] = { [__MAX_BPF_ATTACH_TYPE] = NULL, }; -enum bpf_attach_type parse_attach_type(const char *str) +static enum bpf_attach_type parse_attach_type(const char *str) { enum bpf_attach_type type; @@ -798,7 +798,7 @@ struct map_replace { char *name; }; -int map_replace_compar(const void *p1, const void *p2) +static int map_replace_compar(const void *p1, const void *p2) { const struct map_replace *a = p1, *b = p2; diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 640ffe86a655..7073dbe1ff27 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -81,7 +81,7 @@ struct kernel_sym *kernel_syms_search(struct dump_data *dd, sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL; } -static void print_insn(void *private_data, const char *fmt, ...) +static void __printf(2, 3) print_insn(void *private_data, const char *fmt, ...) { va_list args; @@ -90,7 +90,7 @@ static void print_insn(void *private_data, const char *fmt, ...) va_end(args); } -static void +static void __printf(2, 3) print_insn_for_graph(void *private_data, const char *fmt, ...) { char buf[64], *p; @@ -121,7 +121,8 @@ print_insn_for_graph(void *private_data, const char *fmt, ...) printf("%s", buf); } -static void print_insn_json(void *private_data, const char *fmt, ...) +static void __printf(2, 3) +print_insn_json(void *private_data, const char *fmt, ...) { unsigned int l = strlen(fmt); char chomped_fmt[l]; -- cgit v1.2.3 From 11fb60d1089f52dd9003d02cf2590c9b56eda840 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Sat, 15 Dec 2018 14:27:24 -0800 Subject: selftests: net: reuseport_addr_any: add DCCP This patch adds coverage of DCCP to reuseport_addr_any selftest. Signed-off-by: Peter Oskolkov Signed-off-by: David S. Miller --- tools/testing/selftests/net/reuseport_addr_any.c | 49 +++++++++++++++++++++++- 1 file changed, 47 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c index f5e01d989519..4ac3e24b7cfc 100644 --- a/tools/testing/selftests/net/reuseport_addr_any.c +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -75,7 +76,16 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count, error(1, errno, "failed to bind receive socket"); if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) - error(1, errno, "failed to listen on receive port"); + error(1, errno, "tcp: failed to listen on receive port"); + else if (proto == SOCK_DCCP) { + if (setsockopt(rcv_fds[i], SOL_DCCP, + DCCP_SOCKOPT_SERVICE, + &(int) {htonl(42)}, sizeof(int))) + error(1, errno, "failed to setsockopt"); + + if (listen(rcv_fds[i], 10)) + error(1, errno, "dccp: failed to listen on receive port"); + } } } @@ -124,6 +134,11 @@ static int connect_and_send(int family, int proto) if (fd < 0) error(1, errno, "failed to create send socket"); + if (proto == SOCK_DCCP && + setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE, + &(int){htonl(42)}, sizeof(int))) + error(1, errno, "failed to setsockopt"); + if (bind(fd, saddr, sz)) error(1, errno, "failed to bind send socket"); @@ -146,7 +161,7 @@ static int receive_once(int epfd, int proto) if (i < 0) error(1, errno, "epoll_wait failed"); - if (proto == SOCK_STREAM) { + if (proto == SOCK_STREAM || proto == SOCK_DCCP) { fd = accept(ev.data.fd, NULL, NULL); if (fd < 0) error(1, errno, "failed to accept"); @@ -259,6 +274,36 @@ int main(void) for (i = 0; i < 9; ++i) close(rcv_fds[i]); + fprintf(stderr, "---- DCCP IPv4 ----\n"); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 4, 1, IP4_ADDR); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_DCCP, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- DCCP IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 4, 1, IP6_ADDR); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET6, SOCK_DCCP, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + + fprintf(stderr, "---- DCCP IPv4 mapped to IPv6 ----\n"); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 2, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 4, 1, IP4_MAPPED6); + build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, AF_INET, SOCK_DCCP, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + fprintf(stderr, "SUCCESS\n"); return 0; } -- cgit v1.2.3 From 31d31951d00a3ec6335dad36cd49a4767e2bb304 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Sat, 15 Dec 2018 22:35:11 -0800 Subject: selftests: net: rtnetlink.sh: add fdb get test tests the below three cases of bridge fdb get: [bridge, mac, vlan] [bridge_port, mac, vlan, flags=[NTF_MASTER]] [vxlandev, mac, flags=NTF_SELF] depends on iproute2 support for bridge fdb get. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 53 ++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index e101af52d1d6..bb3436f8807f 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -955,6 +955,58 @@ kci_test_ip6erspan() ip netns del "$testns" } +kci_test_fdb_get() +{ + IP="ip -netns testns" + BRIDGE="bridge -netns testns" + brdev="test-br0" + vxlandev="vxlan10" + test_mac=de:ad:be:ef:13:37 + localip="10.0.2.2" + dstip="10.0.2.3" + ret=0 + + bridge fdb help 2>&1 |grep -q 'bridge fdb get' + if [ $? -ne 0 ];then + echo "SKIP: fdb get tests: iproute2 too old" + return $ksft_skip + fi + + ip netns add testns + if [ $? -ne 0 ]; then + echo "SKIP fdb get tests: cannot add net namespace $testns" + return $ksft_skip + fi + + $IP link add "$vxlandev" type vxlan id 10 local $localip \ + dstport 4789 2>/dev/null + check_err $? + $IP link add name "$brdev" type bridge &>/dev/null + check_err $? + $IP link set dev "$vxlandev" master "$brdev" &>/dev/null + check_err $? + $BRIDGE fdb add $test_mac dev "$vxlandev" master &>/dev/null + check_err $? + $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self &>/dev/null + check_err $? + + $BRIDGE fdb get $test_mac brport "$vxlandev" 2>/dev/null | grep -q "dev $vxlandev master $brdev" + check_err $? + $BRIDGE fdb get $test_mac br "$brdev" 2>/dev/null | grep -q "dev $vxlandev master $brdev" + check_err $? + $BRIDGE fdb get $test_mac dev "$vxlandev" self 2>/dev/null | grep -q "dev $vxlandev dst $dstip" + check_err $? + + ip netns del testns &>/dev/null + + if [ $ret -ne 0 ]; then + echo "FAIL: bridge fdb get" + return 1 + fi + + echo "PASS: bridge fdb get" +} + kci_test_rtnl() { kci_add_dummy @@ -979,6 +1031,7 @@ kci_test_rtnl() kci_test_macsec kci_test_ipsec kci_test_ipsec_offload + kci_test_fdb_get kci_del_dummy } -- cgit v1.2.3 From 095c720807499953b7e16528f705d09675c96c0c Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Sun, 16 Dec 2018 08:49:35 +0000 Subject: selftests: mlxsw: Add Bloom filter simple test Add a test that exercises Bloom filter code. Activate eRP table in the region by adding multiple rule patterns which with very high probability use different entries in the Bloom filter. Then send packets in order to check lookup hits on all relevant rules. Signed-off-by: Nir Dotan Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/spectrum-2/tc_flower.sh | 57 +++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 00ae99fbc253..ad66e3e94cc9 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -8,7 +8,8 @@ lib_dir=$(dirname $0)/../../../../net/forwarding ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ - multiple_masks_test ctcam_edge_cases_test delta_simple_test" + multiple_masks_test ctcam_edge_cases_test delta_simple_test \ + bloom_simple_test" NUM_NETIFS=2 source $lib_dir/tc_common.sh source $lib_dir/lib.sh @@ -404,6 +405,60 @@ delta_simple_test() log_test "delta simple test ($tcflags)" } +bloom_simple_test() +{ + # Bloom filter requires that the eRP table is used. This test + # verifies that Bloom filter is not harming correctness of ACLs. + # First, make sure that eRP table is used and then set rule patterns + # which are distant enough and will result skipping a lookup after + # consulting the Bloom filter. Although some eRP lookups are skipped, + # the correct filter should be hit. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 5 handle 104 flower \ + $tcflags dst_ip 198.51.100.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.0.0/8 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Two filters - did not match highest priority" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Single filter - did not match" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Low prio filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 198.0.0.0/8 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Two filters - did not match highest priority after add" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 5 handle 104 flower + + log_test "bloom simple test ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} -- cgit v1.2.3 From 5118ca4edf9939ed3812ffbfabe7b97164f29635 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Sun, 16 Dec 2018 08:49:36 +0000 Subject: selftests: mlxsw: Add Bloom filter complex test Bloom filter index computation is based on the values of {rule & mask, mask ID, region ID} and the computation also varies according to the region key size. Add a test that exercises the possible combinations by creating multiple chains using different key sizes and then pass a frame that is supposed to to produce a hit on all of the regions. Signed-off-by: Nir Dotan Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/spectrum-2/tc_flower.sh | 85 +++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index ad66e3e94cc9..a7667ebc578f 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -9,7 +9,7 @@ lib_dir=$(dirname $0)/../../../../net/forwarding ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ - bloom_simple_test" + bloom_simple_test bloom_complex_test" NUM_NETIFS=2 source $lib_dir/tc_common.sh source $lib_dir/lib.sh @@ -459,6 +459,89 @@ bloom_simple_test() log_test "bloom simple test ($tcflags)" } +bloom_complex_test() +{ + # Bloom filter index computation is affected from region ID, eRP + # ID and from the region key size. In order to excercise those parts + # of the Bloom filter code, use a series of regions, each with a + # different key size and send packet that should hit all of them. + local index + + RET=0 + NUM_CHAINS=4 + BASE_INDEX=100 + + # Create chain with up to 2 key blocks (ip_proto only) + tc chain add dev $h2 ingress chain 1 protocol ip flower \ + ip_proto tcp &> /dev/null + # Create chain with 2-4 key blocks (ip_proto, src MAC) + tc chain add dev $h2 ingress chain 2 protocol ip flower \ + ip_proto tcp \ + src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null + # Create chain with 4-8 key blocks (ip_proto, src & dst MAC, IPv4 dest) + tc chain add dev $h2 ingress chain 3 protocol ip flower \ + ip_proto tcp \ + dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \ + src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \ + dst_ip 0.0.0.0/32 &> /dev/null + # Default chain contains all fields and therefore is 8-12 key blocks + tc chain add dev $h2 ingress chain 4 + + # We need at least 2 rules in every region to have eRP table active + # so create a dummy rule per chain using a different pattern + for i in $(eval echo {0..$NUM_CHAINS}); do + index=$((BASE_INDEX - 1 - i)) + tc filter add dev $h2 ingress chain $i protocol ip \ + pref 2 handle $index flower \ + $tcflags ip_proto tcp action drop + done + + # Add rules to test Bloom filter, each in a different chain + index=$BASE_INDEX + tc filter add dev $h2 ingress protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags dst_ip 192.0.0.0/16 action goto chain 1 + tc filter add dev $h2 ingress chain 1 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags action goto chain 2 + tc filter add dev $h2 ingress chain 2 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags src_mac $h1mac action goto chain 3 + tc filter add dev $h2 ingress chain 3 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags dst_ip 192.0.0.0/8 action goto chain 4 + tc filter add dev $h2 ingress chain 4 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags src_ip 192.0.2.0/24 action drop + + # Send a packet that is supposed to hit all chains + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + for i in $(eval echo {0..$NUM_CHAINS}); do + index=$((BASE_INDEX + i + 1)) + tc_check_packets "dev $h2 ingress" $index 1 + check_err $? "Did not match chain $i" + done + + # Rules cleanup + for i in $(eval echo {$NUM_CHAINS..0}); do + index=$((BASE_INDEX - i - 1)) + tc filter del dev $h2 ingress chain $i \ + pref 2 handle $index flower + index=$((BASE_INDEX + i + 1)) + tc filter del dev $h2 ingress chain $i \ + pref 1 handle $index flower + done + + # Chains cleanup + for i in $(eval echo {$NUM_CHAINS..1}); do + tc chain del dev $h2 ingress chain $i + done + + log_test "bloom complex test ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} -- cgit v1.2.3 From 5d06a76d9e6d2d00eadba72548636ff0ff6fabde Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Sun, 16 Dec 2018 08:49:37 +0000 Subject: selftests: mlxsw: Add Bloom delta test The eRP table is active when there is more than a single rule pattern. It may be that the patterns are close enough and use delta mechanism. Bloom filter index computation is based on the values of {rule & mask, mask ID, region ID} where the rule delta bits must be cleared. Add a test that exercises Bloom filter with delta mechanism. Configure rules within delta range and pass a packet which is supposed to hit the correct rule. Signed-off-by: Nir Dotan Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/spectrum-2/tc_flower.sh | 37 +++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index a7667ebc578f..b41d6256b2d0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -9,7 +9,7 @@ lib_dir=$(dirname $0)/../../../../net/forwarding ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ - bloom_simple_test bloom_complex_test" + bloom_simple_test bloom_complex_test bloom_delta_test" NUM_NETIFS=2 source $lib_dir/tc_common.sh source $lib_dir/lib.sh @@ -542,6 +542,41 @@ bloom_complex_test() log_test "bloom complex test ($tcflags)" } + +bloom_delta_test() +{ + # When multiple masks are used, the eRP table is activated. When + # masks are close enough (delta) the masks reside on the same + # eRP table. This test verifies that the eRP table is correctly + # allocated and used in delta condition and that Bloom filter is + # still functional with delta. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.1.0.0/16 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.1.2.1 -B 192.1.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Single filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.2.1.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.2.1.1 -B 192.2.1.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Delta filters - did not match second filter" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "bloom delta test ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} -- cgit v1.2.3 From 6c4fc209fcf9d27efbaa48368773e4d2bfbd59aa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 16 Dec 2018 00:49:47 +0100 Subject: bpf: remove useless version check for prog load Existing libraries and tracing frameworks work around this kernel version check by automatically deriving the kernel version from uname(3) or similar such that the user does not need to do it manually; these workarounds also make the version check useless at the same time. Moreover, most other BPF tracing types enabling bpf_probe_read()-like functionality have /not/ adapted this check, and in general these days it is well understood anyway that all the tracing programs are not stable with regards to future kernels as kernel internal data structures are subject to change from release to release. Back at last netconf we discussed [0] and agreed to remove this check from bpf_prog_load() and instead document it here in the uapi header that there is no such guarantee for stable API for these programs. [0] http://vger.kernel.org/netconf2018_files/DanielBorkmann_netconf2018.pdf Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 10 +++++++++- kernel/bpf/syscall.c | 5 ----- tools/include/uapi/linux/bpf.h | 10 +++++++++- 3 files changed, 18 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e7d57e89f25f..1d324c2cbca2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -133,6 +133,14 @@ enum bpf_map_type { BPF_MAP_TYPE_STACK, }; +/* Note that tracing related programs such as + * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT} + * are not subject to a stable API since kernel internal data + * structures can change from release to release and may + * therefore break existing tracing BPF programs. Tracing BPF + * programs correspond to /a/ specific kernel which is to be + * analyzed, and not /a/ specific kernel /and/ all future ones. + */ enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, @@ -343,7 +351,7 @@ union bpf_attr { __u32 log_level; /* verbosity level of verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ - __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 kern_version; /* not used */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; __u32 prog_ifindex; /* ifindex of netdev to prep for */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6ae062f1cf20..5db31067d85e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1473,11 +1473,6 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) return -E2BIG; - - if (type == BPF_PROG_TYPE_KPROBE && - attr->kern_version != LINUX_VERSION_CODE) - return -EINVAL; - if (type != BPF_PROG_TYPE_SOCKET_FILTER && type != BPF_PROG_TYPE_CGROUP_SKB && !capable(CAP_SYS_ADMIN)) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e7d57e89f25f..1d324c2cbca2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -133,6 +133,14 @@ enum bpf_map_type { BPF_MAP_TYPE_STACK, }; +/* Note that tracing related programs such as + * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT} + * are not subject to a stable API since kernel internal data + * structures can change from release to release and may + * therefore break existing tracing BPF programs. Tracing BPF + * programs correspond to /a/ specific kernel which is to be + * analyzed, and not /a/ specific kernel /and/ all future ones. + */ enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, @@ -343,7 +351,7 @@ union bpf_attr { __u32 log_level; /* verbosity level of verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ - __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 kern_version; /* not used */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; __u32 prog_ifindex; /* ifindex of netdev to prep for */ -- cgit v1.2.3 From 128b343dbef543e75793deed5cb3e5eeb9b69d0e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 15 Dec 2018 22:13:53 -0800 Subject: tools/bpf: sync btf.h header from kernel to tools Sync include/uapi/linux/btf.h to tools/include/uapi/linux/btf.h. Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/btf.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 14f66948fc95..7b7475ef2f17 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -34,7 +34,9 @@ struct btf_type { * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-31: unused + * bits 28-30: unused + * bit 31: kind_flag, currently used by + * struct, union and fwd */ __u32 info; /* "size" is used by INT, ENUM, STRUCT and UNION. @@ -52,6 +54,7 @@ struct btf_type { #define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) #define BTF_INFO_VLEN(info) ((info) & 0xffff) +#define BTF_INFO_KFLAG(info) ((info) >> 31) #define BTF_KIND_UNKN 0 /* Unknown */ #define BTF_KIND_INT 1 /* Integer */ @@ -110,9 +113,22 @@ struct btf_array { struct btf_member { __u32 name_off; __u32 type; - __u32 offset; /* offset in bits */ + /* If the type info kind_flag is set, the btf_member offset + * contains both member bitfield size and bit offset. The + * bitfield size is set for bitfield members. If the type + * info kind_flag is not set, the offset contains only bit + * offset. + */ + __u32 offset; }; +/* If the struct/union type info kind_flag is set, the + * following two macros are used to access bitfield_size + * and bit_offset from btf_member.offset. + */ +#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24) +#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff) + /* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". * The exact number of btf_param is stored in the vlen (of the * info in "struct btf_type"). -- cgit v1.2.3 From cd9de5d3d64b472f67a5ac8520f79ce42f7583b8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 15 Dec 2018 22:13:55 -0800 Subject: tools/bpf: add test_btf unit tests for kind_flag This patch added unit tests for different types handling type->info.kind_flag. The following new tests are added: $ test_btf ... BTF raw test[82] (invalid int kind_flag): OK BTF raw test[83] (invalid ptr kind_flag): OK BTF raw test[84] (invalid array kind_flag): OK BTF raw test[85] (invalid enum kind_flag): OK BTF raw test[86] (valid fwd kind_flag): OK BTF raw test[87] (invalid typedef kind_flag): OK BTF raw test[88] (invalid volatile kind_flag): OK BTF raw test[89] (invalid const kind_flag): OK BTF raw test[90] (invalid restrict kind_flag): OK BTF raw test[91] (invalid func kind_flag): OK BTF raw test[92] (invalid func_proto kind_flag): OK BTF raw test[93] (valid struct kind_flag, bitfield_size = 0): OK BTF raw test[94] (valid struct kind_flag, int member, bitfield_size != 0): OK BTF raw test[95] (valid union kind_flag, int member, bitfield_size != 0): OK BTF raw test[96] (valid struct kind_flag, enum member, bitfield_size != 0): OK BTF raw test[97] (valid union kind_flag, enum member, bitfield_size != 0): OK BTF raw test[98] (valid struct kind_flag, typedef member, bitfield_size != 0): OK BTF raw test[99] (valid union kind_flag, typedef member, bitfield_size != 0): OK BTF raw test[100] (invalid struct type, bitfield_size greater than struct size): OK BTF raw test[101] (invalid struct type, kind_flag bitfield base_type int not regular): OK BTF raw test[102] (invalid struct type, kind_flag base_type int not regular): OK BTF raw test[103] (invalid union type, bitfield_size greater than struct size): OK ... PASS:122 SKIP:0 FAIL:0 The second parameter name of macro BTF_INFO_ENC(kind, root, vlen) in selftests test_btf.c is also renamed from "root" to "kind_flag". Note that before this patch "root" is not used and always 0. Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_btf.c | 496 ++++++++++++++++++++++++++++++++- 1 file changed, 494 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 8478316aaf9a..5346d1fa8a4d 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -65,8 +65,8 @@ static int __base_pr(const char *format, ...) return err; } -#define BTF_INFO_ENC(kind, root, vlen) \ - ((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) +#define BTF_INFO_ENC(kind, kind_flag, vlen) \ + ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) #define BTF_TYPE_ENC(name, info, size_or_type) \ (name), (info), (size_or_type) @@ -86,6 +86,8 @@ static int __base_pr(const char *format, ...) #define BTF_MEMBER_ENC(name, type, bits_offset) \ (name), (type), (bits_offset) #define BTF_ENUM_ENC(name, val) (name), (val) +#define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \ + ((bitfield_size) << 24 | (bits_offset)) #define BTF_TYPEDEF_ENC(name, type) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type) @@ -2215,6 +2217,496 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid type_id", }, +{ + .descr = "invalid int kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 1, 0), 4), /* [2] */ + BTF_INT_ENC(0, 0, 32), + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "int_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid ptr kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "ptr_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid array kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 1, 0), 0), /* [2] */ + BTF_ARRAY_ENC(1, 1, 1), + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid enum kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 1, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "valid fwd kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "invalid typedef kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_TYPEDEF, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid volatile kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "volatile_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid const kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "const_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid restrict kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_RESTRICT, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "restrict_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid func kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 0), 0), /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 1, 0), 2), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid func_proto kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 1, 0), 0), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "valid struct, kind_flag, bitfield_size = 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 8), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(0, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(0, 32)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid struct, kind_flag, int member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 4)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid union, kind_flag, int member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid struct, kind_flag, enum member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4),/* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 4)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid union, kind_flag, enum member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid struct, kind_flag, typedef member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4),/* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 4, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 5, BTF_MEMBER_OFFSET(4, 4)), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid union, kind_flag, typedef member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 4, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 5, BTF_MEMBER_OFFSET(4, 0)), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "invalid struct, kind_flag, bitfield_size greater than struct size", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 20)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +{ + .descr = "invalid struct, kind_flag, bitfield base_type int not regular", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 20, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(20, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(20, 20)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member base type", +}, + +{ + .descr = "invalid struct, kind_flag, base_type int not regular", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 12, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(8, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(8, 8)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member base type", +}, + +{ + .descr = "invalid union, kind_flag, bitfield_size greater than struct size", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 2), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(8, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +{ + .descr = "invalid struct, kind_flag, int member, bitfield_size = 0, wrong byte alignment", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 12), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 36)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member offset", +}, + +{ + .descr = "invalid struct, kind_flag, enum member, bitfield_size = 0, wrong byte alignment", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 12), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 36)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member offset", +}, + }; /* struct btf_raw_test raw_tests[] */ static const char *get_next_str(const char *start, const char *end) -- cgit v1.2.3 From d0ebce687edc5d5a899b3de6b1adbfa0c5b64021 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 15 Dec 2018 22:13:56 -0800 Subject: tools/bpf: test kernel bpffs map pretty print with struct kind_flag The new tests are added to test bpffs map pretty print in kernel with kind_flag for structure type. $ test_btf -p ...... BTF pretty print array(#1)......OK BTF pretty print array(#2)......OK PASS:8 SKIP:0 FAIL:0 Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_btf.c | 168 ++++++++++++++++++++++++++++++--- 1 file changed, 154 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 5346d1fa8a4d..b783aa2212cc 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -3528,7 +3528,8 @@ struct pprint_mapv { } aenum; }; -static struct btf_raw_test pprint_test_template = { +static struct btf_raw_test pprint_test_template[] = { +{ .raw_types = { /* unsighed char */ /* [1] */ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), @@ -3578,13 +3579,140 @@ static struct btf_raw_test pprint_test_template = { BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */ BTF_END_RAW, }, - .str_sec = "\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum", - .str_sec_size = sizeof("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"), + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"), + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv), + .key_type_id = 3, /* unsigned int */ + .value_type_id = 16, /* struct pprint_mapv */ + .max_entries = 128 * 1024, +}, + +{ + /* this type will have the same type as the + * first .raw_types definition, but struct type will + * be encoded with kind_flag set. + */ + .raw_types = { + /* unsighed char */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), + /* unsigned short */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), + /* unsigned int */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* int */ /* [4] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* unsigned long long */ /* [5] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [6] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [7] */ + /* uint8_t[8] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(9, 1, 8), + /* typedef unsigned char uint8_t */ /* [9] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), + /* typedef unsigned short uint16_t */ /* [10] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), + /* typedef unsigned int uint32_t */ /* [11] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), + /* typedef int int32_t */ /* [12] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* typedef unsigned long long uint64_t *//* [13] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), + /* union (anon) */ /* [14] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ + BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ + /* enum (anon) */ /* [15] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_ENUM_ENC(NAME_TBD, 2), + BTF_ENUM_ENC(NAME_TBD, 3), + /* struct pprint_mapv */ /* [16] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 8), 32), + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ + BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ + BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ + BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 96)), /* unused_bits2a */ + BTF_MEMBER_ENC(NAME_TBD, 7, BTF_MEMBER_OFFSET(28, 98)), /* bits28 */ + BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 126)), /* unused_bits2b */ + BTF_MEMBER_ENC(0, 14, BTF_MEMBER_OFFSET(0, 128)), /* union (anon) */ + BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"), .key_size = sizeof(unsigned int), .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ .value_type_id = 16, /* struct pprint_mapv */ .max_entries = 128 * 1024, +}, + +{ + /* this type will have the same layout as the + * first .raw_types definition. The struct type will + * be encoded with kind_flag set, bitfield members + * are added typedef/const/volatile, and bitfield members + * will have both int and enum types. + */ + .raw_types = { + /* unsighed char */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), + /* unsigned short */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), + /* unsigned int */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* int */ /* [4] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* unsigned long long */ /* [5] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [6] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [7] */ + /* uint8_t[8] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(9, 1, 8), + /* typedef unsigned char uint8_t */ /* [9] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), + /* typedef unsigned short uint16_t */ /* [10] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), + /* typedef unsigned int uint32_t */ /* [11] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), + /* typedef int int32_t */ /* [12] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* typedef unsigned long long uint64_t *//* [13] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), + /* union (anon) */ /* [14] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ + BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ + /* enum (anon) */ /* [15] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_ENUM_ENC(NAME_TBD, 2), + BTF_ENUM_ENC(NAME_TBD, 3), + /* struct pprint_mapv */ /* [16] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 8), 32), + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ + BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ + BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ + BTF_MEMBER_ENC(NAME_TBD, 17, BTF_MEMBER_OFFSET(2, 96)), /* unused_bits2a */ + BTF_MEMBER_ENC(NAME_TBD, 7, BTF_MEMBER_OFFSET(28, 98)), /* bits28 */ + BTF_MEMBER_ENC(NAME_TBD, 19, BTF_MEMBER_OFFSET(2, 126)),/* unused_bits2b */ + BTF_MEMBER_ENC(0, 14, BTF_MEMBER_OFFSET(0, 128)), /* union (anon) */ + BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ + /* typedef unsigned int ___int */ /* [17] */ + BTF_TYPEDEF_ENC(NAME_TBD, 18), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 6), /* [18] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 15), /* [19] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0___int"), + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv), + .key_type_id = 3, /* unsigned int */ + .value_type_id = 16, /* struct pprint_mapv */ + .max_entries = 128 * 1024, +}, + }; static struct btf_pprint_test_meta { @@ -3687,9 +3815,9 @@ static int check_line(const char *expected_line, int nexpected_line, } -static int do_test_pprint(void) +static int do_test_pprint(int test_num) { - const struct btf_raw_test *test = &pprint_test_template; + const struct btf_raw_test *test = &pprint_test_template[test_num]; struct bpf_create_map_attr create_attr = {}; bool ordered_map, lossless_map, percpu_map; int err, ret, num_cpus, rounded_value_size; @@ -3705,7 +3833,7 @@ static int do_test_pprint(void) uint8_t *raw_btf; ssize_t nread; - fprintf(stderr, "%s......", test->descr); + fprintf(stderr, "%s(#%d)......", test->descr, test_num); raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, test->str_sec, test->str_sec_size, &raw_btf_size, NULL); @@ -3898,15 +4026,27 @@ static int test_pprint(void) unsigned int i; int err = 0; + /* test various maps with the first test template */ for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) { - pprint_test_template.descr = pprint_tests_meta[i].descr; - pprint_test_template.map_type = pprint_tests_meta[i].map_type; - pprint_test_template.map_name = pprint_tests_meta[i].map_name; - pprint_test_template.ordered_map = pprint_tests_meta[i].ordered_map; - pprint_test_template.lossless_map = pprint_tests_meta[i].lossless_map; - pprint_test_template.percpu_map = pprint_tests_meta[i].percpu_map; - - err |= count_result(do_test_pprint()); + pprint_test_template[0].descr = pprint_tests_meta[i].descr; + pprint_test_template[0].map_type = pprint_tests_meta[i].map_type; + pprint_test_template[0].map_name = pprint_tests_meta[i].map_name; + pprint_test_template[0].ordered_map = pprint_tests_meta[i].ordered_map; + pprint_test_template[0].lossless_map = pprint_tests_meta[i].lossless_map; + pprint_test_template[0].percpu_map = pprint_tests_meta[i].percpu_map; + + err |= count_result(do_test_pprint(0)); + } + + /* test rest test templates with the first map */ + for (i = 1; i < ARRAY_SIZE(pprint_test_template); i++) { + pprint_test_template[i].descr = pprint_tests_meta[0].descr; + pprint_test_template[i].map_type = pprint_tests_meta[0].map_type; + pprint_test_template[i].map_name = pprint_tests_meta[0].map_name; + pprint_test_template[i].ordered_map = pprint_tests_meta[0].ordered_map; + pprint_test_template[i].lossless_map = pprint_tests_meta[0].lossless_map; + pprint_test_template[i].percpu_map = pprint_tests_meta[0].percpu_map; + err |= count_result(do_test_pprint(i)); } return err; -- cgit v1.2.3 From 9f95e37e31a4eabd7c8152f2ddcdbea5eafdb7c8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 15 Dec 2018 22:13:57 -0800 Subject: tools: bpftool: refactor btf_dumper_int_bits() The core dump funcitonality in btf_dumper_int_bits() is refactored into a separate function btf_dumper_bitfield() which will be used by the next patch. Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/btf_dumper.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 5cdb2ef8b6f1..06e3d2bbf6d1 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -73,20 +73,17 @@ static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id, return ret; } -static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset, +static void btf_dumper_bitfield(__u32 nr_bits, __u8 bit_offset, const void *data, json_writer_t *jw, bool is_plain_text) { int left_shift_bits, right_shift_bits; - int nr_bits = BTF_INT_BITS(int_type); - int total_bits_offset; int bytes_to_copy; int bits_to_copy; __u64 print_num; - total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type); - data += BITS_ROUNDDOWN_BYTES(total_bits_offset); - bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset); + data += BITS_ROUNDDOWN_BYTES(bit_offset); + bit_offset = BITS_PER_BYTE_MASKED(bit_offset); bits_to_copy = bit_offset + nr_bits; bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy); @@ -109,6 +106,22 @@ static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset, jsonw_printf(jw, "%llu", print_num); } + +static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset, + const void *data, json_writer_t *jw, + bool is_plain_text) +{ + int nr_bits = BTF_INT_BITS(int_type); + int total_bits_offset; + + /* bits_offset is at most 7. + * BTF_INT_OFFSET() cannot exceed 64 bits. + */ + total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type); + btf_dumper_bitfield(nr_bits, total_bits_offset, data, jw, + is_plain_text); +} + static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset, const void *data, json_writer_t *jw, bool is_plain_text) -- cgit v1.2.3 From 8772c8bc093b2d5823125475e1206b80664f8acb Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 15 Dec 2018 22:13:58 -0800 Subject: tools: bpftool: support pretty print with kind_flag set The following example shows map pretty print with structures which include bitfield members. enum A { A1, A2, A3, A4, A5 }; typedef enum A ___A; struct tmp_t { char a1:4; int a2:4; int :4; __u32 a3:4; int b; ___A b1:4; enum A b2:4; }; struct bpf_map_def SEC("maps") tmpmap = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(struct tmp_t), .max_entries = 1, }; BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t); and the following map update in the bpf program: key = 0; struct tmp_t t = {}; t.a1 = 2; t.a2 = 4; t.a3 = 6; t.b = 7; t.b1 = 8; t.b2 = 10; bpf_map_update_elem(&tmpmap, &key, &t, 0); With this patch, I am able to print out the map values correctly with this patch: bpftool map dump id 187 [{ "key": 0, "value": { "a1": 0x2, "a2": 0x4, "a3": 0x6, "b": 7, "b1": 0x8, "b2": 0xa } } ] Previously, if a function prototype argument has a typedef type, the prototype is not printed since function __btf_dumper_type_only() bailed out with error if the type is a typedef. This commit corrected this behavior by printing out typedef properly. The following example shows forward type and typedef type can be properly printed in function prototype with modified test_btf_haskv.c. struct t; union u; __attribute__((noinline)) static int test_long_fname_1(struct dummy_tracepoint_args *arg, struct t *p1, union u *p2, __u32 unused) ... int _dummy_tracepoint(struct dummy_tracepoint_args *arg) { return test_long_fname_1(arg, 0, 0, 0); } $ bpftool p d xlated id 24 ... int test_long_fname_1(struct dummy_tracepoint_args * arg, struct t * p1, union u * p2, __u32 unused) ... Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/btf_dumper.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 06e3d2bbf6d1..3f0629edbca5 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -193,6 +193,7 @@ static int btf_dumper_struct(const struct btf_dumper *d, __u32 type_id, const struct btf_type *t; struct btf_member *m; const void *data_off; + int kind_flag; int ret = 0; int i, vlen; @@ -200,18 +201,32 @@ static int btf_dumper_struct(const struct btf_dumper *d, __u32 type_id, if (!t) return -EINVAL; + kind_flag = BTF_INFO_KFLAG(t->info); vlen = BTF_INFO_VLEN(t->info); jsonw_start_object(d->jw); m = (struct btf_member *)(t + 1); for (i = 0; i < vlen; i++) { - data_off = data + BITS_ROUNDDOWN_BYTES(m[i].offset); + __u32 bit_offset = m[i].offset; + __u32 bitfield_size = 0; + + if (kind_flag) { + bitfield_size = BTF_MEMBER_BITFIELD_SIZE(bit_offset); + bit_offset = BTF_MEMBER_BIT_OFFSET(bit_offset); + } + jsonw_name(d->jw, btf__name_by_offset(d->btf, m[i].name_off)); - ret = btf_dumper_do_type(d, m[i].type, - BITS_PER_BYTE_MASKED(m[i].offset), - data_off); - if (ret) - break; + if (bitfield_size) { + btf_dumper_bitfield(bitfield_size, bit_offset, + data, d->jw, d->is_plain_text); + } else { + data_off = data + BITS_ROUNDDOWN_BYTES(bit_offset); + ret = btf_dumper_do_type(d, m[i].type, + BITS_PER_BYTE_MASKED(bit_offset), + data_off); + if (ret) + break; + } } jsonw_end_object(d->jw); @@ -298,6 +313,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id, switch (BTF_INFO_KIND(t->info)) { case BTF_KIND_INT: + case BTF_KIND_TYPEDEF: BTF_PRINT_ARG("%s ", btf__name_by_offset(btf, t->name_off)); break; case BTF_KIND_STRUCT: @@ -321,10 +337,11 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id, BTF_PRINT_TYPE(t->type); BTF_PRINT_ARG("* "); break; - case BTF_KIND_UNKN: case BTF_KIND_FWD: - case BTF_KIND_TYPEDEF: - return -1; + BTF_PRINT_ARG("%s %s ", + BTF_INFO_KFLAG(t->info) ? "union" : "struct", + btf__name_by_offset(btf, t->name_off)); + break; case BTF_KIND_VOLATILE: BTF_PRINT_ARG("volatile "); BTF_PRINT_TYPE(t->type); @@ -348,6 +365,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id, if (pos == -1) return -1; break; + case BTF_KIND_UNKN: default: return -1; } -- cgit v1.2.3 From 07a09d1b73c9651289d35449460d10e195e2f197 Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Mon, 17 Dec 2018 16:57:50 +0900 Subject: bpf: libbpf: fix memleak by freeing line_info This patch fixes a memory leak in libbpf by freeing up line_info member of struct bpf_program while unloading a program. Fixes: 3d65014146c6 ("bpf: libbpf: Add btf_line_info support to libbpf") Signed-off-by: Prashant Bhole Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e2bc75ee1614..169e347c76f6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -266,6 +266,7 @@ void bpf_program__unload(struct bpf_program *prog) zclose(prog->btf_fd); zfree(&prog->func_info); + zfree(&prog->line_info); } static void bpf_program__exit(struct bpf_program *prog) -- cgit v1.2.3 From 0d7410ea6efcdfda56773999f692bbd5d4e4bc00 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 17 Dec 2018 17:31:57 -0800 Subject: tools/bpf: check precise {func, line, jited_line}_info_rec_size in test_btf Current btf func_info, line_info and jited_line are designed to be extensible. The record sizes for {func,line}_info are passed to kernel, and the record sizes for {func,line,jited_line}_info are returned to userspace during bpf_prog_info query. In bpf selftests test_btf.c, when testing whether kernel returns a legitimate {func,line, jited_line)_info rec_size, the test only compares to the minimum allowed size. If the returned rec_size is smaller than the minimum allowed size, it is considered incorrect. The minimum allowed size for these three info sizes are equal to current value of sizeof(struct bpf_func_info), sizeof(struct bpf_line_info) and sizeof(__u64). The original thinking was that in the future when rec_size is increased in kernel, the same test should run correctly. But this sacrificed the precision of testing under the very kernel the test is shipped with, and bpf selftest is typically run with the same repo kernel. So this patch changed the testing of rec_size such that the kernel returned value should be equal to the size defined by tools uapi header bpf.h which syncs with kernel uapi header. Martin discovered a bug in one of rec_size comparisons. Instead of comparing to minimum func_info rec_size 8, it compares to 4. This patch fixed that issue as well. Fixes: 999d82cbc044 ("tools/bpf: enhance test_btf file testing to test func info") Fixes: 05687352c600 ("bpf: Refactor and bug fix in test_func_type in test_btf.c") Fixes: 4d6304c76355 ("bpf: Add unit tests for bpf_line_info") Suggested-by: Martin KaFai Lau Acked-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_btf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index b783aa2212cc..8024b7d4c354 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -3408,7 +3408,7 @@ static int do_test_file(unsigned int test_num) goto done; } rec_size = info.func_info_rec_size; - if (CHECK(rec_size < 4, + if (CHECK(rec_size != sizeof(struct bpf_func_info), "incorrect info.func_info_rec_size (1st) %d\n", rec_size)) { err = -1; goto done; @@ -4544,7 +4544,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test, } rec_size = info.func_info_rec_size; - if (CHECK(rec_size < 8, + if (CHECK(rec_size != sizeof(struct bpf_func_info), "incorrect info.func_info_rec_size (1st) %d", rec_size)) { return -1; } @@ -4573,7 +4573,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test, err = -1; goto done; } - if (CHECK(info.func_info_rec_size < 8, + if (CHECK(info.func_info_rec_size != rec_size, "incorrect info.func_info_rec_size (2nd) %d", info.func_info_rec_size)) { err = -1; @@ -4649,8 +4649,8 @@ static int test_get_linfo(const struct prog_info_raw_test *test, goto done; } - if (CHECK(info.line_info_rec_size < 16 || - info.jited_line_info_rec_size < 8, + if (CHECK(info.line_info_rec_size != sizeof(struct bpf_line_info) || + info.jited_line_info_rec_size != sizeof(__u64), "info: line_info_rec_size:%u(userspace expected:%u) jited_line_info_rec_size:%u(userspace expected:%u)", info.line_info_rec_size, rec_size, info.jited_line_info_rec_size, jited_rec_size)) { -- cgit v1.2.3 From be3245e22d227ad68ab97785d506561374daa028 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 18 Dec 2018 10:13:18 +0000 Subject: tools: bpftool: attempt to mount tracefs if required for tracelog cmd As a follow-up to commit 30da46b5dc3a ("tools: bpftool: add a command to dump the trace pipe"), attempt to mount the tracefs virtual file system if it is not detected on the system before trying to dump content of the tracing pipe on an invocation of "bpftool prog tracelog". Usually, tracefs in automatically mounted by debugfs when the user tries to access it (e.g. "ls /sys/kernel/debug/tracing" mounts the tracefs). So if we failed to find it, it is probably that debugfs is not here either. Therefore, we just attempt a single mount, at a location that does not involve debugfs: /sys/kernel/tracing. Suggested-by: Daniel Borkmann Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/common.c | 25 ++++++++++++++++++++----- tools/bpf/bpftool/main.h | 2 ++ tools/bpf/bpftool/tracelog.c | 20 +++++++++++++------- 3 files changed, 35 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 24582e8a96fb..1bad6602a282 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -76,7 +76,8 @@ void set_max_rlimit(void) setrlimit(RLIMIT_MEMLOCK, &rinf); } -static int mnt_bpffs(const char *target, char *buff, size_t bufflen) +static int +mnt_fs(const char *target, const char *type, char *buff, size_t bufflen) { bool bind_done = false; @@ -98,15 +99,29 @@ static int mnt_bpffs(const char *target, char *buff, size_t bufflen) bind_done = true; } - if (mount("bpf", target, "bpf", 0, "mode=0700")) { - snprintf(buff, bufflen, "mount -t bpf bpf %s failed: %s", - target, strerror(errno)); + if (mount(type, target, type, 0, "mode=0700")) { + snprintf(buff, bufflen, "mount -t %s %s %s failed: %s", + type, type, target, strerror(errno)); return -1; } return 0; } +int mount_tracefs(const char *target) +{ + char err_str[ERR_MAX_LEN]; + int err; + + err = mnt_fs(target, "tracefs", err_str, ERR_MAX_LEN); + if (err) { + err_str[ERR_MAX_LEN - 1] = '\0'; + p_err("can't mount tracefs: %s", err_str); + } + + return err; +} + int open_obj_pinned(char *path, bool quiet) { int fd; @@ -162,7 +177,7 @@ int mount_bpffs_for_pin(const char *name) /* nothing to do if already mounted */ goto out_free; - err = mnt_bpffs(dir, err_str, ERR_MAX_LEN); + err = mnt_fs(dir, "bpf", err_str, ERR_MAX_LEN); if (err) { err_str[ERR_MAX_LEN - 1] = '\0'; p_err("can't mount BPF file system to pin the object (%s): %s", diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 9487345b04a7..9e4499c926fa 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -98,6 +98,8 @@ void usage(void) __noreturn; void set_max_rlimit(void); +int mount_tracefs(const char *target); + struct pinned_obj_table { DECLARE_HASHTABLE(table, 16); }; diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c index 1fa8e513f590..2dc36dfa0896 100644 --- a/tools/bpf/bpftool/tracelog.c +++ b/tools/bpf/bpftool/tracelog.c @@ -54,7 +54,7 @@ find_tracefs_mnt_single(unsigned long magic, char *mnt, const char *mntpt) return true; } -static bool find_tracefs_pipe(char *mnt) +static bool get_tracefs_pipe(char *mnt) { static const char * const known_mnts[] = { "/sys/kernel/debug/tracing", @@ -88,7 +88,17 @@ static bool find_tracefs_pipe(char *mnt) fclose(fp); /* The string from fscanf() might be truncated, check mnt is valid */ - if (!found || validate_tracefs_mnt(mnt, TRACEFS_MAGIC)) + if (found && validate_tracefs_mnt(mnt, TRACEFS_MAGIC)) + goto exit_found; + + p_info("could not find tracefs, attempting to mount it now"); + /* Most of the time, tracefs is automatically mounted by debugfs at + * /sys/kernel/debug/tracing when we try to access it. If we could not + * find it, it is likely that debugfs is not mounted. Let's give one + * attempt at mounting just tracefs at /sys/kernel/tracing. + */ + strcpy(mnt, known_mnts[1]); + if (mount_tracefs(mnt)) return false; exit_found: @@ -115,17 +125,13 @@ int do_tracelog(int argc, char **argv) .sa_handler = exit_tracelog }; char trace_pipe[PATH_MAX]; - bool found_trace_pipe; size_t buff_len = 0; if (json_output) jsonw_start_array(json_wtr); - found_trace_pipe = find_tracefs_pipe(trace_pipe); - if (!found_trace_pipe) { - p_err("could not find trace pipe, tracefs not mounted?"); + if (!get_tracefs_pipe(trace_pipe)) return -1; - } trace_pipe_fd = fopen(trace_pipe, "r"); if (!trace_pipe_fd) { -- cgit v1.2.3 From 33221307c3f993500a9cfc6900811058c6bfc152 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 18 Dec 2018 10:13:19 +0000 Subject: tools: bpftool: add an option to prevent auto-mount of bpffs, tracefs In order to make life easier for users, bpftool automatically attempts to mount the BPF virtual file system, if it is not mounted already, before trying to pin objects in it. Similarly, it attempts to mount tracefs if necessary before trying to dump the trace pipe to the console. While mounting file systems on-the-fly can improve user experience, some administrators might prefer to avoid that. Let's add an option to block these mount attempts. Note that it does not prevent automatic mounting of tracefs by debugfs for the "bpftool prog tracelog" command. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Documentation/bpftool-map.rst | 4 ++++ tools/bpf/bpftool/Documentation/bpftool-prog.rst | 4 ++++ tools/bpf/bpftool/Documentation/bpftool.rst | 4 ++++ tools/bpf/bpftool/common.c | 6 ++++++ tools/bpf/bpftool/main.c | 8 +++++++- tools/bpf/bpftool/main.h | 4 +++- tools/bpf/bpftool/tracelog.c | 3 +++ 7 files changed, 31 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 3221be1b9ccc..64b001b4f777 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -128,6 +128,10 @@ OPTIONS -f, --bpffs Show file names of pinned maps. + -n, --nomount + Do not automatically attempt to mount any virtual file system + (such as tracefs or BPF virtual file system) when necessary. + EXAMPLES ======== **# bpftool map show** diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 53920ffc48dd..58c8369b77dd 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -161,6 +161,10 @@ OPTIONS -m, --mapcompat Allow loading maps with unknown map definitions. + -n, --nomount + Do not automatically attempt to mount any virtual file system + (such as tracefs or BPF virtual file system) when necessary. + EXAMPLES ======== **# bpftool prog show** diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 129b7a9c0f9b..e1677e81ed59 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -60,6 +60,10 @@ OPTIONS -m, --mapcompat Allow loading maps with unknown map definitions. + -n, --nomount + Do not automatically attempt to mount any virtual file system + (such as tracefs or BPF virtual file system) when necessary. + SEE ALSO ======== diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 1bad6602a282..897483457bf0 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -177,6 +177,12 @@ int mount_bpffs_for_pin(const char *name) /* nothing to do if already mounted */ goto out_free; + if (block_mount) { + p_err("no BPF file system found, not mounting it due to --nomount option"); + err = -1; + goto out_free; + } + err = mnt_fs(dir, "bpf", err_str, ERR_MAX_LEN); if (err) { err_str[ERR_MAX_LEN - 1] = '\0'; diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 9e657e7d5172..f44a1c2c4ea0 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -24,6 +24,7 @@ json_writer_t *json_wtr; bool pretty_output; bool json_output; bool show_pinned; +bool block_mount; int bpf_flags; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; @@ -313,6 +314,7 @@ int main(int argc, char **argv) { "version", no_argument, NULL, 'V' }, { "bpffs", no_argument, NULL, 'f' }, { "mapcompat", no_argument, NULL, 'm' }, + { "nomount", no_argument, NULL, 'n' }, { 0 } }; int opt, ret; @@ -321,13 +323,14 @@ int main(int argc, char **argv) pretty_output = false; json_output = false; show_pinned = false; + block_mount = false; bin_name = argv[0]; hash_init(prog_table.table); hash_init(map_table.table); opterr = 0; - while ((opt = getopt_long(argc, argv, "Vhpjfm", + while ((opt = getopt_long(argc, argv, "Vhpjfmn", options, NULL)) >= 0) { switch (opt) { case 'V': @@ -354,6 +357,9 @@ int main(int argc, char **argv) case 'm': bpf_flags = MAPS_RELAX_COMPAT; break; + case 'n': + block_mount = true; + break; default: p_err("unrecognized option '%s'", argv[optind - 1]); if (json_output) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 9e4499c926fa..052c91d4dc55 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -44,7 +44,8 @@ #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }" #define HELP_SPEC_OPTIONS \ - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} | {-m|--mapcompat}" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n" \ + "\t {-m|--mapcompat} | {-n|--nomount} }" #define HELP_SPEC_MAP \ "MAP := { id MAP_ID | pinned FILE }" @@ -85,6 +86,7 @@ extern const char *bin_name; extern json_writer_t *json_wtr; extern bool json_output; extern bool show_pinned; +extern bool block_mount; extern int bpf_flags; extern struct pinned_obj_table prog_table; extern struct pinned_obj_table map_table; diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c index 2dc36dfa0896..e80a5c79b38f 100644 --- a/tools/bpf/bpftool/tracelog.c +++ b/tools/bpf/bpftool/tracelog.c @@ -91,6 +91,9 @@ static bool get_tracefs_pipe(char *mnt) if (found && validate_tracefs_mnt(mnt, TRACEFS_MAGIC)) goto exit_found; + if (block_mount) + return false; + p_info("could not find tracefs, attempting to mount it now"); /* Most of the time, tracefs is automatically mounted by debugfs at * /sys/kernel/debug/tracing when we try to access it. If we could not -- cgit v1.2.3 From 0bae2d4d62d523f06ff1a8e88ce38b45400acd28 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Sat, 15 Dec 2018 03:34:40 -0500 Subject: bpf: correct slot_type marking logic to allow more stack slot sharing Verifier is supposed to support sharing stack slot allocated to ptr with SCALAR_VALUE for privileged program. However this doesn't happen for some cases. The reason is verifier is not clearing slot_type STACK_SPILL for all bytes, it only clears part of them, while verifier is using: slot_type[0] == STACK_SPILL as a convention to check one slot is ptr type. So, the consequence of partial clearing slot_type is verifier could treat a partially overridden ptr slot, which should now be a SCALAR_VALUE slot, still as ptr slot, and rejects some valid programs. Before this patch, test_xdp_noinline.o under bpf selftests, bpf_lxc.o and bpf_netdev.o under Cilium bpf repo, when built with -mattr=+alu32 are rejected due to this issue. After this patch, they all accepted. There is no processed insn number change before and after this patch on Cilium bpf programs. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Reviewed-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 5 +++++ tools/testing/selftests/bpf/test_verifier.c | 34 +++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0125731e2512..e0e77ffeefb8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1286,6 +1286,10 @@ static int check_stack_write(struct bpf_verifier_env *env, /* regular write of data into stack destroys any spilled ptr */ state->stack[spi].spilled_ptr.type = NOT_INIT; + /* Mark slots as STACK_MISC if they belonged to spilled ptr. */ + if (state->stack[spi].slot_type[0] == STACK_SPILL) + for (i = 0; i < BPF_REG_SIZE; i++) + state->stack[spi].slot_type[i] = STACK_MISC; /* only mark the slot as written if all 8 bytes were written * otherwise read propagation may incorrectly stop too soon @@ -1303,6 +1307,7 @@ static int check_stack_write(struct bpf_verifier_env *env, register_is_null(&cur->regs[value_regno])) type = STACK_ZERO; + /* Mark slots affected by this stack write. */ for (i = 0; i < size; i++) state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index f9de7fe0c26d..cf242734e2eb 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1001,14 +1001,44 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), /* mess up with R1 pointer on stack */ BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23), - /* fill back into R0 should fail */ + /* fill back into R0 is fine for priv. + * R0 now becomes SCALAR_VALUE. + */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + /* Load from R0 should fail. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), BPF_EXIT_INSN(), }, .errstr_unpriv = "attempt to corrupt spilled", - .errstr = "corrupted spill", + .errstr = "R0 invalid mem access 'inv", .result = REJECT, }, + { + "check corrupted spill/fill, LSB", + .insns = { + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_ST_MEM(BPF_H, BPF_REG_10, -8, 0xcafe), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "attempt to corrupt spilled", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = POINTER_VALUE, + }, + { + "check corrupted spill/fill, MSB", + .insns = { + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x12345678), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "attempt to corrupt spilled", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = POINTER_VALUE, + }, { "invalid src register in STX", .insns = { -- cgit v1.2.3 From 584e46813e689ff1a348343b73122c43d81d0ef7 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 16 Dec 2018 15:47:05 -0800 Subject: bpf: add tools lib/include support sk_msg_md size field Add the size field to sk_msg_md for tools. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1d324c2cbca2..91c43884f295 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2665,6 +2665,7 @@ struct sk_msg_md { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 size; /* Total size of sk_msg */ }; struct sk_reuseport_md { -- cgit v1.2.3 From 945a47d87cee24a95e11fdc0cd868b872f9b9616 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 16 Dec 2018 15:47:06 -0800 Subject: bpf: sk_msg, add tests for size field This adds tests to read the size field to test_verifier. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index cf242734e2eb..7865b94c02c4 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1843,10 +1843,20 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SK_SKB, }, { - "invalid 64B read of family in SK_MSG", + "valid access size in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, size)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "invalid 64B read of size in SK_MSG", .insns = { BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, family)), + offsetof(struct sk_msg_md, size)), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", @@ -1857,7 +1867,7 @@ static struct bpf_test tests[] = { "invalid read past end of SK_MSG", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, local_port) + 4), + offsetof(struct sk_msg_md, size) + 4), BPF_EXIT_INSN(), }, .errstr = "R0 !read_ok", -- cgit v1.2.3 From 489c25f9a375ba1e780dcf445ca74f6b3766d1b5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 19 Dec 2018 06:08:51 +0000 Subject: selftests: mlxsw: Add rtnetlink tests Add a new test that is focused on rtnetlink configuration. Its purpose is to test valid and invalid (as deemed by mlxsw) configurations and make sure that they succeed / fail without producing a trace. Some of the test cases are derived from recent fixes in order to make sure that the fixed bugs are not introduced again. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/rtnetlink.sh | 392 +++++++++++++++++++++ 1 file changed, 392 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index 7f78b96279f3..9040bfbddfba 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -13,9 +13,23 @@ ALL_TESTS=" rif_set_addr_test rif_inherit_bridge_addr_test rif_non_inherit_bridge_addr_test + vlan_interface_deletion_test + bridge_deletion_test + bridge_vlan_flags_test + vlan_1_test + lag_bridge_upper_test + duplicate_vlans_test + vlan_rif_refcount_test + subport_rif_refcount_test + vlan_dev_deletion_test + lag_unlink_slaves_test + lag_dev_deletion_test + vlan_interface_uppers_test + devlink_reload_test " NUM_NETIFS=2 source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh setup_prepare() { @@ -160,6 +174,384 @@ rif_non_inherit_bridge_addr_test() ip addr del dev $swp1 192.0.2.1/28 } +vlan_interface_deletion_test() +{ + # Test that when a VLAN interface is deleted, its associated router + # interface (RIF) is correctly deleted and not leaked. See commit + # c360867ec46a ("mlxsw: spectrum: Delete RIF when VLAN device is + # removed") for more details + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + ip link add link br0 name br0.10 type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev br0.10 + ip link del dev br0.10 + + # If we leaked the previous RIF, then this should produce a trace + ip link add link br0 name br0.20 type vlan id 20 + ip -6 address add 2001:db8:1::1/64 dev br0.20 + ip link del dev br0.20 + + log_test "vlan interface deletion" + + ip link del dev br0 +} + +bridge_deletion_test() +{ + # Test that when a bridge with VLAN interfaces is deleted, we correctly + # delete the associated RIFs. See commit 602b74eda813 ("mlxsw: + # spectrum_switchdev: Do not leak RIFs when removing bridge") for more + # details + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + ip -6 address add 2001:db8::1/64 dev br0 + + ip link add link br0 name br0.10 type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev br0.10 + + ip link add link br0 name br0.20 type vlan id 20 + ip -6 address add 2001:db8:2::1/64 dev br0.20 + + ip link del dev br0 + + # If we leaked previous RIFs, then this should produce a trace + ip -6 address add 2001:db8:1::1/64 dev $swp1 + ip -6 address del 2001:db8:1::1/64 dev $swp1 + + log_test "bridge deletion" +} + +bridge_vlan_flags_test() +{ + # Test that when bridge VLAN flags are toggled, we do not take + # unnecessary references on related structs. See commit 9e25826ffc94 + # ("mlxsw: spectrum_switchdev: Fix port_vlan refcounting") for more + # details + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + bridge vlan add vid 10 dev $swp1 pvid untagged + bridge vlan add vid 10 dev $swp1 untagged + bridge vlan add vid 10 dev $swp1 pvid + bridge vlan add vid 10 dev $swp1 + ip link del dev br0 + + # If we did not handle references correctly, then this should produce a + # trace + devlink dev reload "$DEVLINK_DEV" + + # Allow netdevices to be re-created following the reload + sleep 20 + + log_test "bridge vlan flags" +} + +vlan_1_test() +{ + # Test that VLAN 1 cannot be configured, as it is used internally for + # untagged traffic. See commit 47bf9df2e820 ("mlxsw: spectrum: Forbid + # creation of VLAN 1 over port/LAG") for more details + RET=0 + + ip link add link $swp1 name $swp1.1 type vlan id 1 &> /dev/null + check_fail $? "managed to create vlan 1 when should not" + + log_test "vlan 1" +} + +lag_bridge_upper_test() +{ + # Test that ports cannot be enslaved to LAG devices that have uppers + # and that failure is handled gracefully. See commit b3529af6bb0d + # ("spectrum: Reference count VLAN entries") for more details + RET=0 + + ip link add name bond1 type bond mode 802.3ad + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev bond1 master br0 + + ip link set dev $swp1 down + ip link set dev $swp1 master bond1 &> /dev/null + check_fail $? "managed to enslave port to lag when should not" + + # This might generate a trace, if we did not handle the failure + # correctly + ip -6 address add 2001:db8:1::1/64 dev $swp1 + ip -6 address del 2001:db8:1::1/64 dev $swp1 + + log_test "lag with bridge upper" + + ip link del dev br0 + ip link del dev bond1 +} + +duplicate_vlans_test() +{ + # Test that on a given port a VLAN is only used once. Either as VLAN + # in a VLAN-aware bridge or as a VLAN device + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + bridge vlan add vid 10 dev $swp1 + + ip link add link $swp1 name $swp1.10 type vlan id 10 &> /dev/null + check_fail $? "managed to create vlan device when should not" + + bridge vlan del vid 10 dev $swp1 + ip link add link $swp1 name $swp1.10 type vlan id 10 + check_err $? "did not manage to create vlan device when should" + bridge vlan add vid 10 dev $swp1 &> /dev/null + check_fail $? "managed to add bridge vlan when should not" + + log_test "duplicate vlans" + + ip link del dev $swp1.10 + ip link del dev br0 +} + +vlan_rif_refcount_test() +{ + # Test that RIFs representing VLAN interfaces are not affected from + # ports member in the VLAN. We use the offload indication on routes + # configured on the RIF to understand if it was created / destroyed + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + ip link set dev $swp1 up + ip link set dev br0 up + + ip link add link br0 name br0.10 up type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev br0.10 + + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + check_err $? "vlan rif was not created before adding port to vlan" + + bridge vlan add vid 10 dev $swp1 + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + check_err $? "vlan rif was destroyed after adding port to vlan" + + bridge vlan del vid 10 dev $swp1 + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + check_err $? "vlan rif was destroyed after removing port from vlan" + + ip link set dev $swp1 nomaster + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + check_fail $? "vlan rif was not destroyed after unlinking port from bridge" + + log_test "vlan rif refcount" + + ip link del dev br0.10 + ip link set dev $swp1 down + ip link del dev br0 +} + +subport_rif_refcount_test() +{ + # Test that RIFs representing upper devices of physical ports are + # reference counted correctly and destroyed when should. We use the + # offload indication on routes configured on the RIF to understand if + # it was created / destroyed + RET=0 + + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link set dev $swp1 master bond1 + ip link set dev $swp2 master bond1 + + ip link set dev bond1 up + ip link add link bond1 name bond1.10 up type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev bond1 + ip -6 address add 2001:db8:2::1/64 dev bond1.10 + + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload + check_err $? "subport rif was not created on lag device" + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload + check_err $? "subport rif was not created on vlan device" + + ip link set dev $swp1 nomaster + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload + check_err $? "subport rif of lag device was destroyed when should not" + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload + check_err $? "subport rif of vlan device was destroyed when should not" + + ip link set dev $swp2 nomaster + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload + check_fail $? "subport rif of lag device was not destroyed when should" + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload + check_fail $? "subport rif of vlan device was not destroyed when should" + + log_test "subport rif refcount" + + ip link del dev bond1.10 + ip link del dev bond1 +} + +vlan_dev_deletion_test() +{ + # Test that VLAN devices are correctly deleted / unlinked when enslaved + # to bridge + RET=0 + + ip link add name br10 type bridge + ip link add name br20 type bridge + ip link add name br30 type bridge + ip link add link $swp1 name $swp1.10 type vlan id 10 + ip link add link $swp1 name $swp1.20 type vlan id 20 + ip link add link $swp1 name $swp1.30 type vlan id 30 + ip link set dev $swp1.10 master br10 + ip link set dev $swp1.20 master br20 + ip link set dev $swp1.30 master br30 + + # If we did not handle the situation correctly, then these operations + # might produce a trace + ip link set dev $swp1.30 nomaster + ip link del dev $swp1.20 + # Deletion via ioctl uses different code paths from netlink + vconfig rem $swp1.10 &> /dev/null + + log_test "vlan device deletion" + + ip link del dev $swp1.30 + ip link del dev br30 + ip link del dev br20 + ip link del dev br10 +} + +lag_create() +{ + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link set dev $swp1 master bond1 + ip link set dev $swp2 master bond1 + + ip link add link bond1 name bond1.10 type vlan id 10 + ip link add link bond1 name bond1.20 type vlan id 20 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev bond1 master br0 + + ip link add name br10 type bridge + ip link set dev bond1.10 master br10 + + ip link add name br20 type bridge + ip link set dev bond1.20 master br20 +} + +lag_unlink_slaves_test() +{ + # Test that ports are correctly unlinked from their LAG master, when + # the LAG and its VLAN uppers are enslaved to bridges + RET=0 + + lag_create + + ip link set dev $swp1 nomaster + check_err $? "lag slave $swp1 was not unlinked from master" + ip link set dev $swp2 nomaster + check_err $? "lag slave $swp2 was not unlinked from master" + + # Try to configure corresponding VLANs as router interfaces + ip -6 address add 2001:db8:1::1/64 dev $swp1 + check_err $? "failed to configure ip address on $swp1" + + ip link add link $swp1 name $swp1.10 type vlan id 10 + ip -6 address add 2001:db8:10::1/64 dev $swp1.10 + check_err $? "failed to configure ip address on $swp1.10" + + ip link add link $swp1 name $swp1.20 type vlan id 20 + ip -6 address add 2001:db8:20::1/64 dev $swp1.20 + check_err $? "failed to configure ip address on $swp1.20" + + log_test "lag slaves unlinking" + + ip link del dev $swp1.20 + ip link del dev $swp1.10 + ip address flush dev $swp1 + + ip link del dev br20 + ip link del dev br10 + ip link del dev br0 + ip link del dev bond1 +} + +lag_dev_deletion_test() +{ + # Test that LAG device is correctly deleted, when the LAG and its VLAN + # uppers are enslaved to bridges + RET=0 + + lag_create + + ip link del dev bond1 + + log_test "lag device deletion" + + ip link del dev br20 + ip link del dev br10 + ip link del dev br0 +} + +vlan_interface_uppers_test() +{ + # Test that uppers of a VLAN interface are correctly sanitized + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + ip link add link br0 name br0.10 type vlan id 10 + ip link add link br0.10 name macvlan0 \ + type macvlan mode private &> /dev/null + check_fail $? "managed to create a macvlan when should not" + + ip -6 address add 2001:db8:1::1/64 dev br0.10 + ip link add link br0.10 name macvlan0 type macvlan mode private + check_err $? "did not manage to create a macvlan when should" + + ip link del dev macvlan0 + + ip link add name vrf-test type vrf table 10 + ip link set dev br0.10 master vrf-test + check_err $? "did not manage to enslave vlan interface to vrf" + ip link del dev vrf-test + + ip link add name br-test type bridge + ip link set dev br0.10 master br-test &> /dev/null + check_fail $? "managed to enslave vlan interface to bridge when should not" + ip link del dev br-test + + log_test "vlan interface uppers" + + ip link del dev br0 +} + +devlink_reload_test() +{ + # Test that after executing all the above configuration tests, a + # devlink reload can be performed without errors + RET=0 + + devlink dev reload "$DEVLINK_DEV" + check_err $? "devlink reload failed" + + log_test "devlink reload - last test" + + sleep 20 +} + trap cleanup EXIT setup_prepare -- cgit v1.2.3 From 9e88b9312acb9b80554c48b58668fb144720333a Mon Sep 17 00:00:00 2001 From: Ivan Babrou Date: Wed, 19 Dec 2018 12:08:03 -0800 Subject: tools: bpftool: do not force gcc as CC This allows transparent cross-compilation with CROSS_COMPILE by relying on 7ed1c1901fe5 ("tools: fix cross-compile var clobbering"). Signed-off-by: Ivan Babrou Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/Makefile | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 1bea6b979082..492f0f24e2d3 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -35,8 +35,6 @@ $(LIBBPF)-clean: prefix ?= /usr/local bash_compdir ?= /usr/share/bash-completion/completions -CC = gcc - CFLAGS += -O2 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ -- cgit v1.2.3 From 8deecf3557cca7d80fb5157f0858b738f8eebbc7 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 19 Dec 2018 12:51:39 -0800 Subject: selftests: rtnetlink.sh: add testcase for neigh get Signed-off-by: Roopa Prabhu Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 54 ++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index bb3436f8807f..c5cbea9b317d 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -1007,6 +1007,59 @@ kci_test_fdb_get() echo "PASS: bridge fdb get" } +kci_test_neigh_get() +{ + dstmac=de:ad:be:ef:13:37 + dstip=10.0.2.4 + dstip6=dead::2 + ret=0 + + ip neigh help 2>&1 |grep -q 'ip neigh get' + if [ $? -ne 0 ];then + echo "SKIP: fdb get tests: iproute2 too old" + return $ksft_skip + fi + + # ipv4 + ip neigh add $dstip lladdr $dstmac dev "$devdummy" > /dev/null + check_err $? + ip neigh get $dstip dev "$devdummy" 2> /dev/null | grep -q "$dstmac" + check_err $? + ip neigh del $dstip lladdr $dstmac dev "$devdummy" > /dev/null + check_err $? + + # ipv4 proxy + ip neigh add proxy $dstip dev "$devdummy" > /dev/null + check_err $? + ip neigh get proxy $dstip dev "$devdummy" 2>/dev/null | grep -q "$dstip" + check_err $? + ip neigh del proxy $dstip dev "$devdummy" > /dev/null + check_err $? + + # ipv6 + ip neigh add $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null + check_err $? + ip neigh get $dstip6 dev "$devdummy" 2> /dev/null | grep -q "$dstmac" + check_err $? + ip neigh del $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null + check_err $? + + # ipv6 proxy + ip neigh add proxy $dstip6 dev "$devdummy" > /dev/null + check_err $? + ip neigh get proxy $dstip6 dev "$devdummy" 2>/dev/null | grep -q "$dstip6" + check_err $? + ip neigh del proxy $dstip6 dev "$devdummy" > /dev/null + check_err $? + + if [ $ret -ne 0 ];then + echo "FAIL: neigh get" + return 1 + fi + + echo "PASS: neigh get" +} + kci_test_rtnl() { kci_add_dummy @@ -1032,6 +1085,7 @@ kci_test_rtnl() kci_test_ipsec kci_test_ipsec_offload kci_test_fdb_get + kci_test_neigh_get kci_del_dummy } -- cgit v1.2.3 From e30f5640e32455e02ba08983ebe0b46054f1f6f0 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 19 Dec 2018 13:01:02 -0800 Subject: bpf: Add BPF_LD_IMM64 to the line_info test This patch adds a BPF_LD_IMM64 case to the line_info test to ensure the kernel rejects linfo_info.insn_off pointing to the 2nd 8 bytes of the BPF_LD_IMM64. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_btf.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 8024b7d4c354..8bcd38010582 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -4253,6 +4253,33 @@ static struct prog_info_raw_test { .expected_prog_load_failure = true, }, +{ + .descr = "line_info (Zero bpf insn code)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0unsigned long\0u64\0u64 a=1;\0return a;"), + .insns = { + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, 0, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .err_str = "Invalid insn code at line_info[1]", + .expected_prog_load_failure = true, +}, + { .descr = "line_info (No subprog. zero tailing line_info", .raw_types = { -- cgit v1.2.3 From 3f2eadb10886340f4d67fe683c2e53b4df005461 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 19 Dec 2018 10:20:09 -0800 Subject: selftests: net: refactor reuseport_addr_any test This patch refactors reuseport_add_any selftest a bit: - makes it more modular (eliminates several copy/pasted blocks); - skips DCCP tests if DCCP is not supported V2: added "Signed-off-by" tag. Signed-off-by: Peter Oskolkov Signed-off-by: David S. Miller --- tools/testing/selftests/net/reuseport_addr_any.c | 121 +++++++---------------- 1 file changed, 38 insertions(+), 83 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c index 4ac3e24b7cfc..6f54d425dba9 100644 --- a/tools/testing/selftests/net/reuseport_addr_any.c +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -203,7 +203,9 @@ static void test(int *rcv_fds, int count, int family, int proto, int fd) close(epfd); } -int main(void) + +static void run_one_test(int fam_send, int fam_rcv, int proto, + const char *addr_str) { /* Below we test that a socket listening on a specific address * is always selected in preference over a socket listening @@ -214,95 +216,48 @@ int main(void) */ int rcv_fds[10], i; - fprintf(stderr, "---- UDP IPv4 ----\n"); - build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL); - build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 4, 1, IP4_ADDR); - build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL); - test(rcv_fds, 9, AF_INET, SOCK_DGRAM, rcv_fds[4]); - for (i = 0; i < 9; ++i) - close(rcv_fds[i]); - - fprintf(stderr, "---- UDP IPv6 ----\n"); - build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 4, 1, IP6_ADDR); - build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL); - test(rcv_fds, 9, AF_INET6, SOCK_DGRAM, rcv_fds[4]); - for (i = 0; i < 9; ++i) - close(rcv_fds[i]); - - fprintf(stderr, "---- UDP IPv4 mapped to IPv6 ----\n"); - build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 2, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 4, 1, IP4_MAPPED6); - build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds + 5, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds + 7, 2, NULL); - test(rcv_fds, 9, AF_INET, SOCK_DGRAM, rcv_fds[4]); - for (i = 0; i < 9; ++i) - close(rcv_fds[i]); - - fprintf(stderr, "---- TCP IPv4 ----\n"); - build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL); - build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 4, 1, IP4_ADDR); - build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL); - test(rcv_fds, 9, AF_INET, SOCK_STREAM, rcv_fds[4]); + build_rcv_fd(AF_INET, proto, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, proto, rcv_fds + 2, 2, NULL); + build_rcv_fd(fam_rcv, proto, rcv_fds + 4, 1, addr_str); + build_rcv_fd(AF_INET, proto, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, proto, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, fam_send, proto, rcv_fds[4]); for (i = 0; i < 9; ++i) close(rcv_fds[i]); + fprintf(stderr, "pass\n"); +} - fprintf(stderr, "---- TCP IPv6 ----\n"); - build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 4, 1, IP6_ADDR); - build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL); - test(rcv_fds, 9, AF_INET6, SOCK_STREAM, rcv_fds[4]); - for (i = 0; i < 9; ++i) - close(rcv_fds[i]); +static void test_proto(int proto, const char *proto_str) +{ + if (proto == SOCK_DCCP) { + int test_fd; + + test_fd = socket(AF_INET, proto, 0); + if (test_fd < 0) { + if (errno == ESOCKTNOSUPPORT) { + fprintf(stderr, "DCCP not supported: skipping DCCP tests\n"); + return; + } else + error(1, errno, "failed to create a DCCP socket"); + } + close(test_fd); + } - fprintf(stderr, "---- TCP IPv4 mapped to IPv6 ----\n"); - build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 2, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 4, 1, IP4_MAPPED6); - build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds + 5, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds + 7, 2, NULL); - test(rcv_fds, 9, AF_INET, SOCK_STREAM, rcv_fds[4]); - for (i = 0; i < 9; ++i) - close(rcv_fds[i]); + fprintf(stderr, "%s IPv4 ... ", proto_str); + run_one_test(AF_INET, AF_INET, proto, IP4_ADDR); - fprintf(stderr, "---- DCCP IPv4 ----\n"); - build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 2, 2, NULL); - build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 4, 1, IP4_ADDR); - build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 5, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 7, 2, NULL); - test(rcv_fds, 9, AF_INET, SOCK_DCCP, rcv_fds[4]); - for (i = 0; i < 9; ++i) - close(rcv_fds[i]); + fprintf(stderr, "%s IPv6 ... ", proto_str); + run_one_test(AF_INET6, AF_INET6, proto, IP6_ADDR); - fprintf(stderr, "---- DCCP IPv6 ----\n"); - build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 2, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 4, 1, IP6_ADDR); - build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 5, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 7, 2, NULL); - test(rcv_fds, 9, AF_INET6, SOCK_DCCP, rcv_fds[4]); - for (i = 0; i < 9; ++i) - close(rcv_fds[i]); + fprintf(stderr, "%s IPv4 mapped to IPv6 ... ", proto_str); + run_one_test(AF_INET, AF_INET6, proto, IP4_MAPPED6); +} - fprintf(stderr, "---- DCCP IPv4 mapped to IPv6 ----\n"); - build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 2, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 4, 1, IP4_MAPPED6); - build_rcv_fd(AF_INET, SOCK_DCCP, rcv_fds + 5, 2, NULL); - build_rcv_fd(AF_INET6, SOCK_DCCP, rcv_fds + 7, 2, NULL); - test(rcv_fds, 9, AF_INET, SOCK_DCCP, rcv_fds[4]); - for (i = 0; i < 9; ++i) - close(rcv_fds[i]); +int main(void) +{ + test_proto(SOCK_DGRAM, "UDP"); + test_proto(SOCK_STREAM, "TCP"); + test_proto(SOCK_DCCP, "DCCP"); fprintf(stderr, "SUCCESS\n"); return 0; -- cgit v1.2.3 From 5a8d5209ac0228c9cf8f335dfdfffa71c54b3201 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 19 Dec 2018 22:13:03 -0800 Subject: selftests: bpf: add trivial JSET tests We seem to have no JSET instruction test, and LLVM does not generate it at all, so let's add a simple hand-coded test to make sure JIT implementations are correct. v2: - extend test_verifier to handle multiple inputs and add the sample there (Daniel) - add a sign extension case Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 209 +++++++++++++++++++++++----- 1 file changed, 178 insertions(+), 31 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 7865b94c02c4..580fc9429147 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -49,6 +49,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_FIXUPS 8 #define MAX_NR_MAPS 13 +#define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -86,6 +87,14 @@ struct bpf_test { uint8_t flags; __u8 data[TEST_DATA_LEN]; void (*fill_helper)(struct bpf_test *self); + uint8_t runs; + struct { + uint32_t retval, retval_unpriv; + union { + __u8 data[TEST_DATA_LEN]; + __u64 data64[TEST_DATA_LEN / 8]; + }; + } retvals[MAX_TEST_RUNS]; }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -14161,6 +14170,101 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R1 leaks addr", .result = REJECT, }, + { + "jset: functional", + .insns = { + /* r0 = 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* prep for direct packet access via r2 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_4, BPF_REG_3, 1), + BPF_EXIT_INSN(), + + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + + /* reg, bit 63 or bit 0 set, taken */ + BPF_LD_IMM64(BPF_REG_8, 0x8000000000000001), + BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + + /* reg, bit 62, not taken */ + BPF_LD_IMM64(BPF_REG_8, 0x4000000000000000), + BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_EXIT_INSN(), + + /* imm, any bit set, taken */ + BPF_JMP_IMM(BPF_JSET, BPF_REG_7, -1, 1), + BPF_EXIT_INSN(), + + /* imm, bit 31 set, taken */ + BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x80000000, 1), + BPF_EXIT_INSN(), + + /* all good - return r0 == 2 */ + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 7, + .retvals = { + { .retval = 2, + .data64 = { (1ULL << 63) | (1U << 31) | (1U << 0), } + }, + { .retval = 2, + .data64 = { (1ULL << 63) | (1U << 31), } + }, + { .retval = 2, + .data64 = { (1ULL << 31) | (1U << 0), } + }, + { .retval = 2, + .data64 = { (__u32)-1, } + }, + { .retval = 2, + .data64 = { ~0x4000000000000000ULL, } + }, + { .retval = 0, + .data64 = { 0, } + }, + { .retval = 0, + .data64 = { ~0ULL, } + }, + }, + }, + { + "jset: sign-extend", + .insns = { + /* r0 = 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* prep for direct packet access via r2 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_4, BPF_REG_3, 1), + BPF_EXIT_INSN(), + + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + + BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x80000000, 1), + BPF_EXIT_INSN(), + + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + .data = { 1, 0, 0, 0, 0, 0, 0, 1, }, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -14443,16 +14547,42 @@ out: return ret; } +static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, + void *data, size_t size_data) +{ + __u8 tmp[TEST_DATA_LEN << 2]; + __u32 size_tmp = sizeof(tmp); + uint32_t retval; + int err; + + if (unpriv) + set_admin(true); + err = bpf_prog_test_run(fd_prog, 1, data, size_data, + tmp, &size_tmp, &retval, NULL); + if (unpriv) + set_admin(false); + if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { + printf("Unexpected bpf_prog_test_run error "); + return err; + } + if (!err && retval != expected_val && + expected_val != POINTER_VALUE) { + printf("FAIL retval %d != %d ", retval, expected_val); + return 1; + } + + return 0; +} + static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { int fd_prog, expected_ret, alignment_prevented_execution; int prog_len, prog_type = test->prog_type; struct bpf_insn *prog = test->insns; + int run_errs, run_successes; int map_fds[MAX_NR_MAPS]; const char *expected_err; - uint32_t expected_val; - uint32_t retval; __u32 pflags; int i, err; @@ -14476,8 +14606,6 @@ static void do_test_single(struct bpf_test *test, bool unpriv, test->result_unpriv : test->result; expected_err = unpriv && test->errstr_unpriv ? test->errstr_unpriv : test->errstr; - expected_val = unpriv && test->retval_unpriv ? - test->retval_unpriv : test->retval; alignment_prevented_execution = 0; @@ -14489,10 +14617,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, } #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS if (fd_prog >= 0 && - (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)) { + (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)) alignment_prevented_execution = 1; - goto test_ok; - } #endif } else { if (fd_prog >= 0) { @@ -14519,33 +14645,54 @@ static void do_test_single(struct bpf_test *test, bool unpriv, } } - if (fd_prog >= 0) { - __u8 tmp[TEST_DATA_LEN << 2]; - __u32 size_tmp = sizeof(tmp); - - if (unpriv) - set_admin(true); - err = bpf_prog_test_run(fd_prog, 1, test->data, - sizeof(test->data), tmp, &size_tmp, - &retval, NULL); - if (unpriv) - set_admin(false); - if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { - printf("Unexpected bpf_prog_test_run error\n"); - goto fail_log; + run_errs = 0; + run_successes = 0; + if (!alignment_prevented_execution && fd_prog >= 0) { + uint32_t expected_val; + int i; + + if (!test->runs) { + expected_val = unpriv && test->retval_unpriv ? + test->retval_unpriv : test->retval; + + err = do_prog_test_run(fd_prog, unpriv, expected_val, + test->data, sizeof(test->data)); + if (err) + run_errs++; + else + run_successes++; } - if (!err && retval != expected_val && - expected_val != POINTER_VALUE) { - printf("FAIL retval %d != %d\n", retval, expected_val); - goto fail_log; + + for (i = 0; i < test->runs; i++) { + if (unpriv && test->retvals[i].retval_unpriv) + expected_val = test->retvals[i].retval_unpriv; + else + expected_val = test->retvals[i].retval; + + err = do_prog_test_run(fd_prog, unpriv, expected_val, + test->retvals[i].data, + sizeof(test->retvals[i].data)); + if (err) { + printf("(run %d/%d) ", i + 1, test->runs); + run_errs++; + } else { + run_successes++; + } } } -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -test_ok: -#endif - (*passes)++; - printf("OK%s\n", alignment_prevented_execution ? - " (NOTE: not executed due to unknown alignment)" : ""); + + if (!run_errs) { + (*passes)++; + if (run_successes > 1) + printf("%d cases ", run_successes); + printf("OK"); + if (alignment_prevented_execution) + printf(" (NOTE: not executed due to unknown alignment)"); + printf("\n"); + } else { + printf("\n"); + goto fail_log; + } close_fds: close(fd_prog); for (i = 0; i < MAX_NR_MAPS; i++) -- cgit v1.2.3 From 14507e35bd9dedcd14c24f9e5e0e0dd48e972e53 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 19 Dec 2018 22:13:05 -0800 Subject: selftests: bpf: verifier: add tests for JSET interpretation Validate that the verifier reasons correctly about the bounds and removes dead code based on results of JSET instruction. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 96 +++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 580fc9429147..b246931c46ef 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -14265,6 +14265,102 @@ static struct bpf_test tests[] = { .retval = 2, .data = { 1, 0, 0, 0, 0, 0, 0, 1, }, }, + { + "jset: known const compare", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .retval_unpriv = 1, + .result_unpriv = ACCEPT, + .retval = 1, + .result = ACCEPT, + }, + { + "jset: known const compare bad", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "!read_ok", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "jset: unknown const compare taken", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "!read_ok", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "jset: unknown const compare not taken", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "!read_ok", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "jset: half-known const compare", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .result_unpriv = ACCEPT, + .result = ACCEPT, + }, + { + "jset: range", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff), + BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3), + BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .result_unpriv = ACCEPT, + .result = ACCEPT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From 489c066cfdf1cad776c5e041f32a2e3cdb05050a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 19 Dec 2018 22:13:09 -0800 Subject: selftests: bpf: add missing executables to .gitignore commit 435f90a338ae ("selftests/bpf: add a test case for sock_ops perf-event notification") missed adding new test to gitignore. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 1b799e30c06d..4a9785043a39 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -27,3 +27,4 @@ test_flow_dissector flow_dissector_load test_netcnt test_section_names +test_tcpnotify_user -- cgit v1.2.3 From 9ee79a65d176e5815877aa148acc956e4cc90c53 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 20 Dec 2018 11:35:29 -0800 Subject: bpf: sk_msg, fix sk_msg_md access past end test Currently, the test to ensure reads past the end of the sk_msg_md data structure fail is incorrectly expecting success. Fix this typo and use correct expected error. Fixes: 945a47d87cee ("bpf: sk_msg, add tests for size field") Reported-by: Alexei Starovoitov Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index b246931c46ef..dbd31750b214 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1879,7 +1879,7 @@ static struct bpf_test tests[] = { offsetof(struct sk_msg_md, size) + 4), BPF_EXIT_INSN(), }, - .errstr = "R0 !read_ok", + .errstr = "invalid bpf_context access", .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_MSG, }, -- cgit v1.2.3 From 9d15dceb8a36919897d52ec8e4a6e6efa57f9f19 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 20 Dec 2018 19:42:25 +0000 Subject: selftests: mlxsw: Add a test case for L3 VNI Previous patch added the ability to offload a VXLAN tunnel used for L3 VNI when it is present in the VLAN-aware bridge before the corresponding VLAN interface is configured. This patch adds a test case to verify that. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/vxlan.sh | 60 ++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index ea11535f5a6e..dcf9f4e913e0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -1021,6 +1021,65 @@ offload_indication_vlan_aware_join_vxlan_last() ip link del dev br0 } +offload_indication_vlan_aware_l3vni_test() +{ + local zmac=00:00:00:00:00:00 + + RET=0 + + sysctl_set net.ipv6.conf.default.disable_ipv6 1 + ip link add dev br0 up type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 0 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + ip link set dev $swp1 master br0 + + # The test will use the offload indication on the FDB entry to + # understand if the tunnel is offloaded or not + bridge fdb append $zmac dev vxlan0 self dst 192.0.2.1 + + ip link set dev vxlan0 master br0 + bridge vlan add dev vxlan0 vid 10 pvid untagged + + # No local port or router port is member in the VLAN, so tunnel should + # not be offloaded + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_fail $? "vxlan tunnel offloaded when should not" + + # Configure a VLAN interface and make sure tunnel is offloaded + ip link add link br0 name br10 up type vlan id 10 + sysctl_set net.ipv6.conf.br10.disable_ipv6 0 + ip -6 address add 2001:db8:1::1/64 dev br10 + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_err $? "vxlan tunnel not offloaded when should" + + # Unlink the VXLAN device, make sure tunnel is no longer offloaded, + # then add it back to the bridge and make sure it is offloaded + ip link set dev vxlan0 nomaster + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_fail $? "vxlan tunnel offloaded after unlinked from bridge" + + ip link set dev vxlan0 master br0 + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_fail $? "vxlan tunnel offloaded despite no matching vid" + + bridge vlan add dev vxlan0 vid 10 pvid untagged + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_err $? "vxlan tunnel not offloaded after adding vid" + + log_test "vxlan - l3 vni" + + ip link del dev vxlan0 + ip link del dev br0 + sysctl_restore net.ipv6.conf.default.disable_ipv6 +} + offload_indication_vlan_aware_test() { offload_indication_vlan_aware_setup_create @@ -1031,6 +1090,7 @@ offload_indication_vlan_aware_test() log_info "offload indication - replay & cleanup - vlan aware" offload_indication_vlan_aware_join_vxlan_first offload_indication_vlan_aware_join_vxlan_last + offload_indication_vlan_aware_l3vni_test } trap cleanup EXIT -- cgit v1.2.3 From 29b1e34efd39ee9f6bc2db3fafad0e4615f15475 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 20 Dec 2018 19:42:35 +0000 Subject: selftests: mlxsw: Adjust test regarding VID 1 Previous patches made it possible to create VLAN devices with VID 1 over mlxsw ports. Adjust the test to verify such an operation succeeds. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index 9040bfbddfba..94fdbf215c14 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -255,15 +255,18 @@ bridge_vlan_flags_test() vlan_1_test() { - # Test that VLAN 1 cannot be configured, as it is used internally for - # untagged traffic. See commit 47bf9df2e820 ("mlxsw: spectrum: Forbid - # creation of VLAN 1 over port/LAG") for more details + # Test that VLAN 1 can be configured over mlxsw ports. In the past it + # was used internally for untagged traffic. See commit 47bf9df2e820 + # ("mlxsw: spectrum: Forbid creation of VLAN 1 over port/LAG") for more + # details RET=0 - ip link add link $swp1 name $swp1.1 type vlan id 1 &> /dev/null - check_fail $? "managed to create vlan 1 when should not" + ip link add link $swp1 name $swp1.1 type vlan id 1 + check_err $? "did not manage to create vlan 1 when should" log_test "vlan 1" + + ip link del dev $swp1.1 } lag_bridge_upper_test() -- cgit v1.2.3 From 03a84ea3d8c537f04b862972e2132dff86a4e93d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 20 Dec 2018 19:42:37 +0000 Subject: selftests: forwarding: Add router test with VID 1 Previous patches made it possible to setup VLAN devices with VID 1 over mlxsw ports. Verify this functionality actually works by conducting a simple router test over VID 1. Adding this test as a generic test since it can be run using veth pairs and it can also be useful for other physical devices where VID 1 was considered reserved (knowingly or not). Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../selftests/net/forwarding/router_vid_1.sh | 135 +++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/router_vid_1.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/router_vid_1.sh b/tools/testing/selftests/net/forwarding/router_vid_1.sh new file mode 100755 index 000000000000..a7306c7ac06d --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_vid_1.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="ping_ipv4 ping_ipv6" +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev vrf-h1 up + + ip link set dev $h1 up + vlan_create $h1 1 vrf-h1 192.0.2.2/24 2001:db8:1::2/64 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + vlan_destroy $h1 1 + ip link set dev $h1 down + + ip link set dev vrf-h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev vrf-h2 up + + ip link set dev $h2 up + vlan_create $h2 1 vrf-h2 198.51.100.2/24 2001:db8:2::2/64 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + vlan_destroy $h2 1 + ip link set dev $h2 down + + ip link set dev vrf-h2 down + vrf_destroy "vrf-h2" +} + +router_create() +{ + ip link set dev $rp1 up + ip link add link $rp1 name $rp1.1 up type vlan id 1 + + ip address add 192.0.2.1/24 dev $rp1.1 + ip address add 2001:db8:1::1/64 dev $rp1.1 + + ip link set dev $rp2 up + ip link add link $rp2 name $rp2.1 up type vlan id 1 + + ip address add 198.51.100.1/24 dev $rp2.1 + ip address add 2001:db8:2::1/64 dev $rp2.1 +} + +router_destroy() +{ + ip address del 2001:db8:2::1/64 dev $rp2.1 + ip address del 198.51.100.1/24 dev $rp2.1 + + ip link del dev $rp2.1 + ip link set dev $rp2 down + + ip address del 2001:db8:1::1/64 dev $rp1.1 + ip address del 192.0.2.1/24 dev $rp1.1 + + ip link del dev $rp1.1 + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1.1 2001:db8:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From 676f4bb168ca855adb957ad7f470984d9391c320 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 20 Dec 2018 17:03:29 +0000 Subject: selftests: rtnetlink: Add a test case for multipath route get Without previous patch a warning would be generated upon multipath route get when FIB multipath hash policy is to use a 5-tuple for multipath hash calculation. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index c5cbea9b317d..78fc593dfe40 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -205,6 +205,8 @@ kci_test_polrouting() kci_test_route_get() { + local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy) + ret=0 ip route get 127.0.0.1 > /dev/null @@ -223,6 +225,19 @@ kci_test_route_get() check_err $? ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null check_err $? + ip route add 10.23.8.0/24 \ + nexthop via 10.23.7.13 dev "$devdummy" \ + nexthop via 10.23.7.14 dev "$devdummy" + check_err $? + sysctl -wq net.ipv4.fib_multipath_hash_policy=0 + ip route get 10.23.8.11 > /dev/null + check_err $? + sysctl -wq net.ipv4.fib_multipath_hash_policy=1 + ip route get 10.23.8.11 > /dev/null + check_err $? + sysctl -wq net.ipv4.fib_multipath_hash_policy="$hash_policy" + ip route del 10.23.8.0/24 + check_err $? ip addr del dev "$devdummy" 10.23.7.11/24 check_err $? -- cgit v1.2.3 From 7085f47fcdc599cb387a472e4cd9def739954119 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 20 Dec 2018 16:22:52 -0500 Subject: selftests: expand txtimestamp with cmsg support Commit 3dd17e63f513 ("sock: accept SO_TIMESTAMPING flags in socket cmsg") added support for passing tx timestamping options per-call in sendmsg. Expand the txtimestamp test with support for this feature. Signed-off-by: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- .../networking/timestamping/txtimestamp.c | 67 +++++++++++++++++----- 1 file changed, 53 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c index 81a98a240456..ca5fad15a855 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -73,6 +73,7 @@ static bool cfg_show_payload; static bool cfg_do_pktinfo; static bool cfg_loop_nodata; static bool cfg_no_delay; +static bool cfg_use_cmsg; static uint16_t dest_port = 9000; static struct sockaddr_in daddr; @@ -269,8 +270,13 @@ static int recv_errmsg(int fd) return ret == -1; } -static void do_test(int family, unsigned int opt) +static void do_test(int family, unsigned int report_opt) { + char control[CMSG_SPACE(sizeof(uint32_t))]; + unsigned int sock_opt; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; char *buf; int fd, i, val = 1, total_len; @@ -321,17 +327,22 @@ static void do_test(int family, unsigned int opt) } } - opt |= SOF_TIMESTAMPING_SOFTWARE | - SOF_TIMESTAMPING_OPT_CMSG | - SOF_TIMESTAMPING_OPT_ID; + sock_opt = SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_CMSG | + SOF_TIMESTAMPING_OPT_ID; + + if (!cfg_use_cmsg) + sock_opt |= report_opt; + if (cfg_loop_nodata) - opt |= SOF_TIMESTAMPING_OPT_TSONLY; + sock_opt |= SOF_TIMESTAMPING_OPT_TSONLY; if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, - (char *) &opt, sizeof(opt))) + (char *) &sock_opt, sizeof(sock_opt))) error(1, 0, "setsockopt timestamping"); for (i = 0; i < cfg_num_pkts; i++) { + memset(&msg, 0, sizeof(msg)); memset(&ts_prev, 0, sizeof(ts_prev)); memset(buf, 'a' + i, total_len); @@ -361,14 +372,38 @@ static void do_test(int family, unsigned int opt) } print_timestamp_usr(); + + iov.iov_base = buf; + iov.iov_len = total_len; + if (cfg_proto != SOCK_STREAM) { - if (family == PF_INET) - val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr)); - else - val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6)); - } else { - val = send(fd, buf, cfg_payload_len, 0); + if (family == PF_INET) { + msg.msg_name = (void *)&daddr; + msg.msg_namelen = sizeof(daddr); + } else { + msg.msg_name = (void *)&daddr6; + msg.msg_namelen = sizeof(daddr6); + } + } + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + if (cfg_use_cmsg) { + memset(control, 0, sizeof(control)); + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_TIMESTAMPING; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + + *((uint32_t *) CMSG_DATA(cmsg)) = report_opt; } + + val = sendmsg(fd, &msg, 0); if (val != total_len) error(1, errno, "send"); @@ -396,6 +431,7 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -6: only IPv6\n" " -h: show this message\n" " -c N: number of packets for each test\n" + " -C: use cmsg to set tstamp recording options\n" " -D: no delay between packets\n" " -F: poll() waits forever for an event\n" " -I: request PKTINFO\n" @@ -413,9 +449,9 @@ static void __attribute__((noreturn)) usage(const char *filepath) static void parse_opt(int argc, char **argv) { int proto_count = 0; - char c; + int c; - while ((c = getopt(argc, argv, "46c:DFhIl:np:rRux")) != -1) { + while ((c = getopt(argc, argv, "46c:CDFhIl:np:rRux")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -426,6 +462,9 @@ static void parse_opt(int argc, char **argv) case 'c': cfg_num_pkts = strtoul(optarg, NULL, 10); break; + case 'C': + cfg_use_cmsg = true; + break; case 'D': cfg_no_delay = true; break; -- cgit v1.2.3 From b52354aa068ee362a58ebb92e8bd0d0e18008364 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 20 Dec 2018 16:22:53 -0500 Subject: selftests: expand txtimestamp with ipv6 dgram + raw and pf_packet Expand the transmit timestamp regression test with support for missing protocols: ipv6 datagram and raw and pf_packet. Also refine resolve_hostname to independently request AF_INET or AF_INET6 addresses. Else, ipv4 addresses may be returned as AF_INET6. Signed-off-by: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- .../networking/timestamping/txtimestamp.c | 189 ++++++++++++++++----- 1 file changed, 146 insertions(+), 43 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c index ca5fad15a855..ecc1db5dcfca 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,7 @@ static bool cfg_do_pktinfo; static bool cfg_loop_nodata; static bool cfg_no_delay; static bool cfg_use_cmsg; +static bool cfg_use_pf_packet; static uint16_t dest_port = 9000; static struct sockaddr_in daddr; @@ -195,7 +197,9 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) } else if ((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) || (cm->cmsg_level == SOL_IPV6 && - cm->cmsg_type == IPV6_RECVERR)) { + cm->cmsg_type == IPV6_RECVERR) || + (cm->cmsg_level = SOL_PACKET && + cm->cmsg_type == PACKET_TX_TIMESTAMP)) { serr = (void *) CMSG_DATA(cm); if (serr->ee_errno != ENOMSG || serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) { @@ -270,9 +274,89 @@ static int recv_errmsg(int fd) return ret == -1; } +static uint16_t get_ip_csum(const uint16_t *start, int num_words, + unsigned long sum) +{ + int i; + + for (i = 0; i < num_words; i++) + sum += start[i]; + + while (sum >> 16) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum; +} + +static uint16_t get_udp_csum(const struct udphdr *udph, int alen) +{ + unsigned long pseudo_sum, csum_len; + const void *csum_start = udph; + + pseudo_sum = htons(IPPROTO_UDP); + pseudo_sum += udph->len; + + /* checksum ip(v6) addresses + udp header + payload */ + csum_start -= alen * 2; + csum_len = ntohs(udph->len) + alen * 2; + + return get_ip_csum(csum_start, csum_len >> 1, pseudo_sum); +} + +static int fill_header_ipv4(void *p) +{ + struct iphdr *iph = p; + + memset(iph, 0, sizeof(*iph)); + + iph->ihl = 5; + iph->version = 4; + iph->ttl = 2; + iph->saddr = daddr.sin_addr.s_addr; /* set for udp csum calc */ + iph->daddr = daddr.sin_addr.s_addr; + iph->protocol = IPPROTO_UDP; + + /* kernel writes saddr, csum, len */ + + return sizeof(*iph); +} + +static int fill_header_ipv6(void *p) +{ + struct ipv6hdr *ip6h = p; + + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(sizeof(struct udphdr) + cfg_payload_len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = 64; + + ip6h->saddr = daddr6.sin6_addr; + ip6h->daddr = daddr6.sin6_addr; + + /* kernel does not write saddr in case of ipv6 */ + + return sizeof(*ip6h); +} + +static void fill_header_udp(void *p, bool is_ipv4) +{ + struct udphdr *udph = p; + + udph->source = ntohs(dest_port + 1); /* spoof */ + udph->dest = ntohs(dest_port); + udph->len = ntohs(sizeof(*udph) + cfg_payload_len); + udph->check = 0; + + udph->check = get_udp_csum(udph, is_ipv4 ? sizeof(struct in_addr) : + sizeof(struct in6_addr)); +} + static void do_test(int family, unsigned int report_opt) { char control[CMSG_SPACE(sizeof(uint32_t))]; + struct sockaddr_ll laddr; unsigned int sock_opt; struct cmsghdr *cmsg; struct msghdr msg; @@ -280,24 +364,28 @@ static void do_test(int family, unsigned int report_opt) char *buf; int fd, i, val = 1, total_len; - if (family == AF_INET6 && cfg_proto != SOCK_STREAM) { - /* due to lack of checksum generation code */ - fprintf(stderr, "test: skipping datagram over IPv6\n"); - return; - } - total_len = cfg_payload_len; - if (cfg_proto == SOCK_RAW) { + if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { total_len += sizeof(struct udphdr); - if (cfg_ipproto == IPPROTO_RAW) - total_len += sizeof(struct iphdr); + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) + if (family == PF_INET) + total_len += sizeof(struct iphdr); + else + total_len += sizeof(struct ipv6hdr); + + /* special case, only rawv6_sendmsg: + * pass proto in sin6_port if not connected + * also see ANK comment in net/ipv4/raw.c + */ + daddr6.sin6_port = htons(cfg_ipproto); } buf = malloc(total_len); if (!buf) error(1, 0, "malloc"); - fd = socket(family, cfg_proto, cfg_ipproto); + fd = socket(cfg_use_pf_packet ? PF_PACKET : family, + cfg_proto, cfg_ipproto); if (fd < 0) error(1, errno, "socket"); @@ -346,29 +434,17 @@ static void do_test(int family, unsigned int report_opt) memset(&ts_prev, 0, sizeof(ts_prev)); memset(buf, 'a' + i, total_len); - if (cfg_proto == SOCK_RAW) { - struct udphdr *udph; + if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { int off = 0; - if (cfg_ipproto == IPPROTO_RAW) { - struct iphdr *iph = (void *) buf; - - memset(iph, 0, sizeof(*iph)); - iph->ihl = 5; - iph->version = 4; - iph->ttl = 2; - iph->daddr = daddr.sin_addr.s_addr; - iph->protocol = IPPROTO_UDP; - /* kernel writes saddr, csum, len */ - - off = sizeof(*iph); + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) { + if (family == PF_INET) + off = fill_header_ipv4(buf); + else + off = fill_header_ipv6(buf); } - udph = (void *) buf + off; - udph->source = ntohs(9000); /* random spoof */ - udph->dest = ntohs(dest_port); - udph->len = ntohs(sizeof(*udph) + cfg_payload_len); - udph->check = 0; /* not allowed for IPv6 */ + fill_header_udp(buf + off, family == PF_INET); } print_timestamp_usr(); @@ -377,7 +453,17 @@ static void do_test(int family, unsigned int report_opt) iov.iov_len = total_len; if (cfg_proto != SOCK_STREAM) { - if (family == PF_INET) { + if (cfg_use_pf_packet) { + memset(&laddr, 0, sizeof(laddr)); + + laddr.sll_family = AF_PACKET; + laddr.sll_ifindex = 1; + laddr.sll_protocol = htons(family == AF_INET ? ETH_P_IP : ETH_P_IPV6); + laddr.sll_halen = ETH_ALEN; + + msg.msg_name = (void *)&laddr; + msg.msg_namelen = sizeof(laddr); + } else if (family == PF_INET) { msg.msg_name = (void *)&daddr; msg.msg_namelen = sizeof(daddr); } else { @@ -437,9 +523,10 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" " -n: set no-payload option\n" + " -p N: connect to port N\n" + " -P: use PF_PACKET\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" - " -p N: connect to port N\n" " -u: use udp\n" " -x: show payload (up to 70 bytes)\n", filepath); @@ -451,7 +538,7 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; int c; - while ((c = getopt(argc, argv, "46c:CDFhIl:np:rRux")) != -1) { + while ((c = getopt(argc, argv, "46c:CDFhIl:np:PrRux")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -474,9 +561,21 @@ static void parse_opt(int argc, char **argv) case 'I': cfg_do_pktinfo = true; break; + case 'l': + cfg_payload_len = strtoul(optarg, NULL, 10); + break; case 'n': cfg_loop_nodata = true; break; + case 'p': + dest_port = strtoul(optarg, NULL, 10); + break; + case 'P': + proto_count++; + cfg_use_pf_packet = true; + cfg_proto = SOCK_DGRAM; + cfg_ipproto = 0; + break; case 'r': proto_count++; cfg_proto = SOCK_RAW; @@ -492,12 +591,6 @@ static void parse_opt(int argc, char **argv) cfg_proto = SOCK_DGRAM; cfg_ipproto = IPPROTO_UDP; break; - case 'l': - cfg_payload_len = strtoul(optarg, NULL, 10); - break; - case 'p': - dest_port = strtoul(optarg, NULL, 10); - break; case 'x': cfg_show_payload = true; break; @@ -514,7 +607,9 @@ static void parse_opt(int argc, char **argv) if (!do_ipv4 && !do_ipv6) error(1, 0, "pass -4 or -6, not both"); if (proto_count > 1) - error(1, 0, "pass -r, -R or -u, not multiple"); + error(1, 0, "pass -P, -r, -R or -u, not multiple"); + if (cfg_do_pktinfo && cfg_use_pf_packet) + error(1, 0, "cannot ask for pktinfo over pf_packet"); if (optind != argc - 1) error(1, 0, "missing required hostname argument"); @@ -522,10 +617,12 @@ static void parse_opt(int argc, char **argv) static void resolve_hostname(const char *hostname) { + struct addrinfo hints = { .ai_family = do_ipv4 ? AF_INET : AF_INET6 }; struct addrinfo *addrs, *cur; int have_ipv4 = 0, have_ipv6 = 0; - if (getaddrinfo(hostname, NULL, NULL, &addrs)) +retry: + if (getaddrinfo(hostname, NULL, &hints, &addrs)) error(1, errno, "getaddrinfo"); cur = addrs; @@ -545,14 +642,20 @@ static void resolve_hostname(const char *hostname) if (addrs) freeaddrinfo(addrs); + if (do_ipv6 && hints.ai_family != AF_INET6) { + hints.ai_family = AF_INET6; + goto retry; + } + do_ipv4 &= have_ipv4; do_ipv6 &= have_ipv6; } static void do_main(int family) { - fprintf(stderr, "family: %s\n", - family == PF_INET ? "INET" : "INET6"); + fprintf(stderr, "family: %s %s\n", + family == PF_INET ? "INET" : "INET6", + cfg_use_pf_packet ? "(PF_PACKET)" : ""); fprintf(stderr, "test SND\n"); do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE); -- cgit v1.2.3 From cda261f421ba2682e25f46beb229172706fc7fcd Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 20 Dec 2018 16:22:54 -0500 Subject: selftests: add txtimestamp kselftest Run the transmit timestamp tests as part of kselftests. Add a txtimestamp.sh test script that runs most variants: ipv4/ipv6, tcp/udp/raw/raw_ipproto/pf_packet, data/nodata, setsockopt/cmsg. The script runs tests with netem delays. Refine txtimestamp.c to validate results. Take expected netem delays as input and compare against real timestamps. To run without dependencies, add a listener socket to be able to connect in the case of TCP. Add the timestamping directory to the kselftests Makefile. Build all the binaries. Only run verified txtimestamp.sh. Signed-off-by: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- tools/testing/selftests/Makefile | 1 + .../selftests/networking/timestamping/Makefile | 5 +- .../selftests/networking/timestamping/config | 2 + .../networking/timestamping/txtimestamp.c | 136 +++++++++++++++++---- .../networking/timestamping/txtimestamp.sh | 57 +++++++++ 5 files changed, 173 insertions(+), 28 deletions(-) create mode 100644 tools/testing/selftests/networking/timestamping/config create mode 100755 tools/testing/selftests/networking/timestamping/txtimestamp.sh (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index f0017c831e57..24b9934fb269 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -25,6 +25,7 @@ TARGETS += mount TARGETS += mqueue TARGETS += net TARGETS += netfilter +TARGETS += networking/timestamping TARGETS += nsfs TARGETS += powerpc TARGETS += proc diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile index 14cfcf006936..6c17a93c4e03 100644 --- a/tools/testing/selftests/networking/timestamping/Makefile +++ b/tools/testing/selftests/networking/timestamping/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -I../../../../../usr/include -TEST_PROGS := hwtstamp_config rxtimestamp timestamping txtimestamp +TEST_GEN_FILES := hwtstamp_config rxtimestamp timestamping txtimestamp +TEST_PROGS := txtimestamp.sh all: $(TEST_PROGS) @@ -9,4 +10,4 @@ top_srcdir = ../../../../.. include ../../lib.mk clean: - rm -fr $(TEST_PROGS) + rm -fr $(TEST_GEN_FILES) diff --git a/tools/testing/selftests/networking/timestamping/config b/tools/testing/selftests/networking/timestamping/config new file mode 100644 index 000000000000..a13e3169b0a4 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/config @@ -0,0 +1,2 @@ +CONFIG_IFB=y +CONFIG_NET_SCH_NETEM=y diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c index ecc1db5dcfca..2e563d17cf0c 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -70,17 +70,67 @@ static int do_ipv4 = 1; static int do_ipv6 = 1; static int cfg_payload_len = 10; static int cfg_poll_timeout = 100; +static int cfg_delay_snd; +static int cfg_delay_ack; static bool cfg_show_payload; static bool cfg_do_pktinfo; static bool cfg_loop_nodata; static bool cfg_no_delay; static bool cfg_use_cmsg; static bool cfg_use_pf_packet; +static bool cfg_do_listen; static uint16_t dest_port = 9000; static struct sockaddr_in daddr; static struct sockaddr_in6 daddr6; -static struct timespec ts_prev; +static struct timespec ts_usr; + +static int saved_tskey = -1; +static int saved_tskey_type = -1; + +static bool test_failed; + +static int64_t timespec_to_us64(struct timespec *ts) +{ + return ts->tv_sec * 1000 * 1000 + ts->tv_nsec / 1000; +} + +static void validate_key(int tskey, int tstype) +{ + int stepsize; + + /* compare key for each subsequent request + * must only test for one type, the first one requested + */ + if (saved_tskey == -1) + saved_tskey_type = tstype; + else if (saved_tskey_type != tstype) + return; + + stepsize = cfg_proto == SOCK_STREAM ? cfg_payload_len : 1; + if (tskey != saved_tskey + stepsize) { + fprintf(stderr, "ERROR: key %d, expected %d\n", + tskey, saved_tskey + stepsize); + test_failed = true; + } + + saved_tskey = tskey; +} + +static void validate_timestamp(struct timespec *cur, int min_delay) +{ + int max_delay = min_delay + 500 /* processing time upper bound */; + int64_t cur64, start64; + + cur64 = timespec_to_us64(cur); + start64 = timespec_to_us64(&ts_usr); + + if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) { + fprintf(stderr, "ERROR: delay %lu expected between %d and %d\n", + cur64 - start64, min_delay, max_delay); + test_failed = true; + } +} static void __print_timestamp(const char *name, struct timespec *cur, uint32_t key, int payload_len) @@ -92,32 +142,19 @@ static void __print_timestamp(const char *name, struct timespec *cur, name, cur->tv_sec, cur->tv_nsec / 1000, key, payload_len); - if ((ts_prev.tv_sec | ts_prev.tv_nsec)) { - int64_t cur_ms, prev_ms; - - cur_ms = (long) cur->tv_sec * 1000 * 1000; - cur_ms += cur->tv_nsec / 1000; - - prev_ms = (long) ts_prev.tv_sec * 1000 * 1000; - prev_ms += ts_prev.tv_nsec / 1000; + if (cur != &ts_usr) + fprintf(stderr, " (USR %+" PRId64 " us)", + timespec_to_us64(cur) - timespec_to_us64(&ts_usr)); - fprintf(stderr, " (%+" PRId64 " us)", cur_ms - prev_ms); - } - - ts_prev = *cur; fprintf(stderr, "\n"); } static void print_timestamp_usr(void) { - struct timespec ts; - struct timeval tv; /* avoid dependency on -lrt */ - - gettimeofday(&tv, NULL); - ts.tv_sec = tv.tv_sec; - ts.tv_nsec = tv.tv_usec * 1000; + if (clock_gettime(CLOCK_REALTIME, &ts_usr)) + error(1, errno, "clock_gettime"); - __print_timestamp(" USR", &ts, 0, 0); + __print_timestamp(" USR", &ts_usr, 0, 0); } static void print_timestamp(struct scm_timestamping *tss, int tstype, @@ -125,15 +162,20 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype, { const char *tsname; + validate_key(tskey, tstype); + switch (tstype) { case SCM_TSTAMP_SCHED: tsname = " ENQ"; + validate_timestamp(&tss->ts[0], 0); break; case SCM_TSTAMP_SND: tsname = " SND"; + validate_timestamp(&tss->ts[0], cfg_delay_snd); break; case SCM_TSTAMP_ACK: tsname = " ACK"; + validate_timestamp(&tss->ts[0], cfg_delay_ack); break; default: error(1, 0, "unknown timestamp type: %u", @@ -389,6 +431,9 @@ static void do_test(int family, unsigned int report_opt) if (fd < 0) error(1, errno, "socket"); + /* reset expected key on each new socket */ + saved_tskey = -1; + if (cfg_proto == SOCK_STREAM) { if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char*) &val, sizeof(val))) @@ -431,7 +476,6 @@ static void do_test(int family, unsigned int report_opt) for (i = 0; i < cfg_num_pkts; i++) { memset(&msg, 0, sizeof(msg)); - memset(&ts_prev, 0, sizeof(ts_prev)); memset(buf, 'a' + i, total_len); if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { @@ -506,7 +550,7 @@ static void do_test(int family, unsigned int report_opt) error(1, errno, "close"); free(buf); - usleep(400 * 1000); + usleep(100 * 1000); } static void __attribute__((noreturn)) usage(const char *filepath) @@ -522,12 +566,15 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -F: poll() waits forever for an event\n" " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" + " -L listen on hostname and port\n" " -n: set no-payload option\n" " -p N: connect to port N\n" " -P: use PF_PACKET\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" " -u: use udp\n" + " -v: validate SND delay (usec)\n" + " -V: validate ACK delay (usec)\n" " -x: show payload (up to 70 bytes)\n", filepath); exit(1); @@ -538,7 +585,7 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; int c; - while ((c = getopt(argc, argv, "46c:CDFhIl:np:PrRux")) != -1) { + while ((c = getopt(argc, argv, "46c:CDFhIl:Lnp:PrRuv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -564,6 +611,9 @@ static void parse_opt(int argc, char **argv) case 'l': cfg_payload_len = strtoul(optarg, NULL, 10); break; + case 'L': + cfg_do_listen = true; + break; case 'n': cfg_loop_nodata = true; break; @@ -591,6 +641,12 @@ static void parse_opt(int argc, char **argv) cfg_proto = SOCK_DGRAM; cfg_ipproto = IPPROTO_UDP; break; + case 'v': + cfg_delay_snd = strtoul(optarg, NULL, 10); + break; + case 'V': + cfg_delay_ack = strtoul(optarg, NULL, 10); + break; case 'x': cfg_show_payload = true; break; @@ -651,6 +707,27 @@ retry: do_ipv6 &= have_ipv6; } +static void do_listen(int family, void *addr, int alen) +{ + int fd, type; + + type = cfg_proto == SOCK_RAW ? SOCK_DGRAM : cfg_proto; + + fd = socket(family, type, 0); + if (fd == -1) + error(1, errno, "socket rx"); + + if (bind(fd, addr, alen)) + error(1, errno, "bind rx"); + + if (type == SOCK_STREAM && listen(fd, 10)) + error(1, errno, "listen rx"); + + /* leave fd open, will be closed on process exit. + * this enables connect() to succeed and avoids icmp replies + */ +} + static void do_main(int family) { fprintf(stderr, "family: %s %s\n", @@ -697,10 +774,17 @@ int main(int argc, char **argv) fprintf(stderr, "server port: %u\n", dest_port); fprintf(stderr, "\n"); - if (do_ipv4) + if (do_ipv4) { + if (cfg_do_listen) + do_listen(PF_INET, &daddr, sizeof(daddr)); do_main(PF_INET); - if (do_ipv6) + } + + if (do_ipv6) { + if (cfg_do_listen) + do_listen(PF_INET6, &daddr6, sizeof(daddr6)); do_main(PF_INET6); + } - return 0; + return test_failed; } diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.sh b/tools/testing/selftests/networking/timestamping/txtimestamp.sh new file mode 100755 index 000000000000..df0d86ca72b7 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Send packets with transmit timestamps over loopback with netem +# Verify that timestamps correspond to netem delay + +set -e + +setup() { + # set 1ms delay on lo egress + tc qdisc add dev lo root netem delay 1ms + + # set 2ms delay on ifb0 egress + modprobe ifb + ip link add ifb_netem0 type ifb + ip link set dev ifb_netem0 up + tc qdisc add dev ifb_netem0 root netem delay 2ms + + # redirect lo ingress through ifb0 egress + tc qdisc add dev lo handle ffff: ingress + tc filter add dev lo parent ffff: \ + u32 match mark 0 0xffff \ + action mirred egress redirect dev ifb_netem0 +} + +run_test_v4v6() { + # SND will be delayed 1000us + # ACK will be delayed 6000us: 1 + 2 ms round-trip + local -r args="$@ -v 1000 -V 6000" + + ./txtimestamp ${args} -4 -L 127.0.0.1 + ./txtimestamp ${args} -6 -L ::1 +} + +run_test_tcpudpraw() { + local -r args=$@ + + run_test_v4v6 ${args} # tcp + run_test_v4v6 ${args} -u # udp + run_test_v4v6 ${args} -r # raw + run_test_v4v6 ${args} -R # raw (IPPROTO_RAW) + run_test_v4v6 ${args} -P # pf_packet +} + +run_test_all() { + run_test_tcpudpraw # setsockopt + run_test_tcpudpraw -C # cmsg + run_test_tcpudpraw -n # timestamp w/o data +} + +if [[ "$(ip netns identify)" == "root" ]]; then + ../../net/in_netns.sh $0 $@ +else + setup + run_test_all + echo "OK. All tests passed" +fi -- cgit v1.2.3 From fa2323325e8b0c61ded1e3d2c6f12f3aa43c4e30 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Thu, 20 Dec 2018 16:41:52 -0800 Subject: selftests: net: reuseport_addr_any: silence clang warning Clang does not recognize that calls to error() terminate execution and complains about uninitialized variable use that happens after calls to error(). This noop patchset fixes this. Signed-off-by: Peter Oskolkov Signed-off-by: David S. Miller --- tools/testing/selftests/net/reuseport_addr_any.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c index 6f54d425dba9..c6233935fed1 100644 --- a/tools/testing/selftests/net/reuseport_addr_any.c +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -60,6 +60,11 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count, break; default: error(1, 0, "Unsupported family %d", family); + /* clang does not recognize error() above as terminating + * the program, so it complains that saddr, sz are + * not initialized when this code path is taken. Silence it. + */ + return; } for (i = 0; i < count; ++i) { @@ -128,6 +133,11 @@ static int connect_and_send(int family, int proto) break; default: error(1, 0, "Unsupported family %d", family); + /* clang does not recognize error() above as terminating + * the program, so it complains that saddr, daddr, sz are + * not initialized when this code path is taken. Silence it. + */ + return -1; } fd = socket(family, proto, 0); -- cgit v1.2.3 From 7fa4bd739fc0fa209fa5120383aab17f6e9600f7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Dec 2018 14:04:46 +0100 Subject: bpf: fix segfault in test_verifier selftest Minor fallout from merge resolution, test_verifier was segfaulting because the REJECT result was correct, but errstr was NULL. Properly fix it as in 339bbff2d6e0. Fixes: 339bbff2d6e0 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next") Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_verifier.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index baafe5c76aca..33f7d38849b8 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -14194,7 +14194,8 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, .errstr_unpriv = "function calls to other bpf functions are allowed for root only", - .result = REJECT, + .errstr = "!read_ok", + .result = REJECT, }, { "jset: functional", -- cgit v1.2.3