From 2bf8c7e735ac8ab81eebd672caee41c77ec1b793 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 18 Aug 2020 14:16:41 +0900 Subject: samples: bpf: Fix broken bpf programs due to removed symbol >From commit f1394b798814 ("block: mark blk_account_io_completion static") symbol blk_account_io_completion() has been marked as static, which makes it no longer possible to attach kprobe to this event. Currently, there are broken samples due to this reason. As a solution to this, attach kprobe events to blk_account_io_done() to modify them to perform the same behavior as before. Signed-off-by: Daniel T. Lee Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200818051641.21724-1-danieltimlee@gmail.com --- samples/bpf/task_fd_query_kern.c | 2 +- samples/bpf/task_fd_query_user.c | 2 +- samples/bpf/tracex3_kern.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'samples') diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c index 278ade5427c8..c821294e1774 100644 --- a/samples/bpf/task_fd_query_kern.c +++ b/samples/bpf/task_fd_query_kern.c @@ -10,7 +10,7 @@ int bpf_prog1(struct pt_regs *ctx) return 0; } -SEC("kretprobe/blk_account_io_completion") +SEC("kretprobe/blk_account_io_done") int bpf_prog2(struct pt_regs *ctx) { return 0; diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index ff2e9c1c7266..4a74531dc403 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -314,7 +314,7 @@ int main(int argc, char **argv) /* test two functions in the corresponding *_kern.c file */ CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_mq_start_request", BPF_FD_TYPE_KPROBE)); - CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion", + CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_done", BPF_FD_TYPE_KRETPROBE)); /* test nondebug fs kprobe */ diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c index 659613c19a82..710a4410b2fb 100644 --- a/samples/bpf/tracex3_kern.c +++ b/samples/bpf/tracex3_kern.c @@ -49,7 +49,7 @@ struct { __uint(max_entries, SLOTS); } lat_map SEC(".maps"); -SEC("kprobe/blk_account_io_completion") +SEC("kprobe/blk_account_io_done") int bpf_prog2(struct pt_regs *ctx) { long rq = PT_REGS_PARM1(ctx); -- cgit v1.2.3 From 35a8b6dd339f04cbcb0b2d085334263542a12b70 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sun, 23 Aug 2020 17:53:32 +0900 Subject: samples: bpf: Cleanup bpf_load.o from Makefile Since commit cc7f641d637b ("samples: bpf: Refactor BPF map performance test with libbpf") has ommited the removal of bpf_load.o from Makefile, this commit removes the bpf_load.o rule for targets where bpf_load.o is not used. Fixes: cc7f641d637b ("samples: bpf: Refactor BPF map performance test with libbpf") Signed-off-by: Daniel T. Lee Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200823085334.9413-2-danieltimlee@gmail.com --- samples/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index f87ee02073ba..0cac89230c6d 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -76,7 +76,7 @@ trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS) lathist-objs := bpf_load.o lathist_user.o offwaketime-objs := bpf_load.o offwaketime_user.o $(TRACE_HELPERS) spintest-objs := bpf_load.o spintest_user.o $(TRACE_HELPERS) -map_perf_test-objs := bpf_load.o map_perf_test_user.o +map_perf_test-objs := map_perf_test_user.o test_overhead-objs := bpf_load.o test_overhead_user.o test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o test_cgrp2_attach-objs := test_cgrp2_attach.o -- cgit v1.2.3 From 3677d0a13171bb1dc8db0af84d48dea14a899962 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sun, 23 Aug 2020 17:53:33 +0900 Subject: samples: bpf: Refactor kprobe tracing programs with libbpf For the problem of increasing fragmentation of the bpf loader programs, instead of using bpf_loader.o, which is used in samples/bpf, this commit refactors the existing kprobe tracing programs with libbbpf bpf loader. - For kprobe events pointing to system calls, the SYSCALL() macro in trace_common.h was used. - Adding a kprobe event and attaching a bpf program to it was done through bpf_program_attach(). - Instead of using the existing BPF MAP definition, MAP definition has been refactored with the new BTF-defined MAP format. Signed-off-by: Daniel T. Lee Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200823085334.9413-3-danieltimlee@gmail.com --- samples/bpf/Makefile | 10 ++-- samples/bpf/lathist_kern.c | 24 ++++---- samples/bpf/lathist_user.c | 42 ++++++++++++-- samples/bpf/spintest_kern.c | 36 ++++++------ samples/bpf/spintest_user.c | 68 ++++++++++++++++++----- samples/bpf/test_current_task_under_cgroup_kern.c | 27 ++++----- samples/bpf/test_current_task_under_cgroup_user.c | 52 +++++++++++++---- samples/bpf/test_probe_write_user_kern.c | 12 ++-- samples/bpf/test_probe_write_user_user.c | 49 ++++++++++++---- samples/bpf/trace_output_kern.c | 15 ++--- samples/bpf/trace_output_user.c | 55 ++++++++++++------ 11 files changed, 272 insertions(+), 118 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0cac89230c6d..c74d477474e2 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -71,11 +71,11 @@ tracex4-objs := tracex4_user.o tracex5-objs := tracex5_user.o $(TRACE_HELPERS) tracex6-objs := tracex6_user.o tracex7-objs := tracex7_user.o -test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o -trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS) -lathist-objs := bpf_load.o lathist_user.o +test_probe_write_user-objs := test_probe_write_user_user.o +trace_output-objs := trace_output_user.o $(TRACE_HELPERS) +lathist-objs := lathist_user.o offwaketime-objs := bpf_load.o offwaketime_user.o $(TRACE_HELPERS) -spintest-objs := bpf_load.o spintest_user.o $(TRACE_HELPERS) +spintest-objs := spintest_user.o $(TRACE_HELPERS) map_perf_test-objs := map_perf_test_user.o test_overhead-objs := bpf_load.o test_overhead_user.o test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o @@ -86,7 +86,7 @@ xdp1-objs := xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := xdp1_user.o xdp_router_ipv4-objs := xdp_router_ipv4_user.o -test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \ +test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := trace_event_user.o $(TRACE_HELPERS) sampleip-objs := sampleip_user.o $(TRACE_HELPERS) diff --git a/samples/bpf/lathist_kern.c b/samples/bpf/lathist_kern.c index ca9c2e4e69aa..4adfcbbe6ef4 100644 --- a/samples/bpf/lathist_kern.c +++ b/samples/bpf/lathist_kern.c @@ -18,12 +18,12 @@ * trace_preempt_[on|off] tracepoints hooks is not supported. */ -struct bpf_map_def SEC("maps") my_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(u64), - .max_entries = MAX_CPU, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, u64); + __uint(max_entries, MAX_CPU); +} my_map SEC(".maps"); SEC("kprobe/trace_preempt_off") int bpf_prog1(struct pt_regs *ctx) @@ -61,12 +61,12 @@ static unsigned int log2l(unsigned long v) return log2(v); } -struct bpf_map_def SEC("maps") my_lat = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(long), - .max_entries = MAX_CPU * MAX_ENTRIES, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, long); + __uint(max_entries, MAX_CPU * MAX_ENTRIES); +} my_lat SEC(".maps"); SEC("kprobe/trace_preempt_on") int bpf_prog2(struct pt_regs *ctx) diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c index 2ff2839a52d5..7d8ff2418303 100644 --- a/samples/bpf/lathist_user.c +++ b/samples/bpf/lathist_user.c @@ -6,9 +6,8 @@ #include #include #include -#include +#include #include -#include "bpf_load.h" #define MAX_ENTRIES 20 #define MAX_CPU 4 @@ -81,20 +80,51 @@ static void get_data(int fd) int main(int argc, char **argv) { + struct bpf_link *links[2]; + struct bpf_program *prog; + struct bpf_object *obj; char filename[256]; + int map_fd, i = 0; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 0; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + map_fd = bpf_object__find_map_fd_by_name(obj, "my_lat"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + bpf_object__for_each_program(prog, obj) { + links[i] = bpf_program__attach(prog); + if (libbpf_get_error(links[i])) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + links[i] = NULL; + goto cleanup; + } + i++; } while (1) { - get_data(map_fd[1]); + get_data(map_fd); print_hist(); sleep(5); } +cleanup: + for (i--; i >= 0; i--) + bpf_link__destroy(links[i]); + + bpf_object__close(obj); return 0; } diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest_kern.c index f508af357251..455da77319d9 100644 --- a/samples/bpf/spintest_kern.c +++ b/samples/bpf/spintest_kern.c @@ -12,25 +12,25 @@ #include #include -struct bpf_map_def SEC("maps") my_map = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(long), - .value_size = sizeof(long), - .max_entries = 1024, -}; -struct bpf_map_def SEC("maps") my_map2 = { - .type = BPF_MAP_TYPE_PERCPU_HASH, - .key_size = sizeof(long), - .value_size = sizeof(long), - .max_entries = 1024, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, long); + __type(value, long); + __uint(max_entries, 1024); +} my_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(key_size, sizeof(long)); + __uint(value_size, sizeof(long)); + __uint(max_entries, 1024); +} my_map2 SEC(".maps"); -struct bpf_map_def SEC("maps") stackmap = { - .type = BPF_MAP_TYPE_STACK_TRACE, - .key_size = sizeof(u32), - .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), - .max_entries = 10000, -}; +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(key_size, sizeof(u32)); + __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64)); + __uint(max_entries, 10000); +} stackmap SEC(".maps"); #define PROG(foo) \ int foo(struct pt_regs *ctx) \ diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index fb430ea2ef51..847da9284fa8 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c @@ -1,40 +1,77 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include #include #include #include -#include "bpf_load.h" +#include #include "trace_helpers.h" int main(int ac, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + char filename[256], symbol[256]; + struct bpf_object *obj = NULL; + struct bpf_link *links[20]; long key, next_key, value; - char filename[256]; + struct bpf_program *prog; + int map_fd, i, j = 0; + const char *title; struct ksym *sym; - int i; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - setrlimit(RLIMIT_MEMLOCK, &r); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); return 2; } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + obj = NULL; + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd = bpf_object__find_map_fd_by_name(obj, "my_map"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + bpf_object__for_each_program(prog, obj) { + title = bpf_program__title(prog, false); + if (sscanf(title, "kprobe/%s", symbol) != 1) + continue; + + /* Attach prog only when symbol exists */ + if (ksym_get_addr(symbol)) { + links[j] = bpf_program__attach(prog); + if (libbpf_get_error(links[j])) { + fprintf(stderr, "bpf_program__attach failed\n"); + links[j] = NULL; + goto cleanup; + } + j++; + } } for (i = 0; i < 5; i++) { key = 0; printf("kprobing funcs:"); - while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) { - bpf_map_lookup_elem(map_fd[0], &next_key, &value); + while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(map_fd, &next_key, &value); assert(next_key == value); sym = ksym_search(value); key = next_key; @@ -48,10 +85,15 @@ int main(int ac, char **argv) if (key) printf("\n"); key = 0; - while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) - bpf_map_delete_elem(map_fd[0], &next_key); + while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) + bpf_map_delete_elem(map_fd, &next_key); sleep(1); } +cleanup: + for (j--; j >= 0; j--) + bpf_link__destroy(links[j]); + + bpf_object__close(obj); return 0; } diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c index 6dc4f41bb6cb..fbd43e2bb4d3 100644 --- a/samples/bpf/test_current_task_under_cgroup_kern.c +++ b/samples/bpf/test_current_task_under_cgroup_kern.c @@ -10,23 +10,24 @@ #include #include #include +#include "trace_common.h" -struct bpf_map_def SEC("maps") cgroup_map = { - .type = BPF_MAP_TYPE_CGROUP_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u32), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); + __uint(max_entries, 1); +} cgroup_map SEC(".maps"); -struct bpf_map_def SEC("maps") perf_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u64), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u64); + __uint(max_entries, 1); +} perf_map SEC(".maps"); /* Writes the last PID that called sync to a map at index 0 */ -SEC("kprobe/sys_sync") +SEC("kprobe/" SYSCALL(sys_sync)) int bpf_prog1(struct pt_regs *ctx) { u64 pid = bpf_get_current_pid_tgid(); diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c index 06e9f8ce42e2..ac251a417f45 100644 --- a/samples/bpf/test_current_task_under_cgroup_user.c +++ b/samples/bpf/test_current_task_under_cgroup_user.c @@ -4,10 +4,9 @@ #define _GNU_SOURCE #include -#include #include #include -#include "bpf_load.h" +#include #include "cgroup_helpers.h" #define CGROUP_PATH "/my-cgroup" @@ -15,13 +14,44 @@ int main(int argc, char **argv) { pid_t remote_pid, local_pid = getpid(); - int cg2, idx = 0, rc = 0; + struct bpf_link *link = NULL; + struct bpf_program *prog; + int cg2, idx = 0, rc = 1; + struct bpf_object *obj; char filename[256]; + int map_fd[2]; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 0; + } + + prog = bpf_object__find_program_by_name(obj, "bpf_prog1"); + if (!prog) { + printf("finding a prog in obj file failed\n"); + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd[0] = bpf_object__find_map_fd_by_name(obj, "cgroup_map"); + map_fd[1] = bpf_object__find_map_fd_by_name(obj, "perf_map"); + if (map_fd[0] < 0 || map_fd[1] < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + link = bpf_program__attach(prog); + if (libbpf_get_error(link)) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + link = NULL; + goto cleanup; } if (setup_cgroup_environment()) @@ -70,12 +100,14 @@ int main(int argc, char **argv) goto err; } - goto out; -err: - rc = 1; + rc = 0; -out: +err: close(cg2); cleanup_cgroup_environment(); + +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); return rc; } diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c index fd651a65281e..220a96438d75 100644 --- a/samples/bpf/test_probe_write_user_kern.c +++ b/samples/bpf/test_probe_write_user_kern.c @@ -13,12 +13,12 @@ #include #include "trace_common.h" -struct bpf_map_def SEC("maps") dnat_map = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(struct sockaddr_in), - .value_size = sizeof(struct sockaddr_in), - .max_entries = 256, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct sockaddr_in); + __type(value, struct sockaddr_in); + __uint(max_entries, 256); +} dnat_map SEC(".maps"); /* kprobe is NOT a stable ABI * kernel functions can be removed, renamed or completely change semantics. diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c index 045eb5e30f54..00ccfb834e45 100644 --- a/samples/bpf/test_probe_write_user_user.c +++ b/samples/bpf/test_probe_write_user_user.c @@ -1,21 +1,22 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include #include -#include "bpf_load.h" +#include #include -#include #include #include int main(int ac, char **argv) { - int serverfd, serverconnfd, clientfd; - socklen_t sockaddr_len; - struct sockaddr serv_addr, mapped_addr, tmp_addr; struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in; + struct sockaddr serv_addr, mapped_addr, tmp_addr; + int serverfd, serverconnfd, clientfd, map_fd; + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_object *obj; + socklen_t sockaddr_len; char filename[256]; char *ip; @@ -24,10 +25,35 @@ int main(int ac, char **argv) tmp_addr_in = (struct sockaddr_in *)&tmp_addr; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 0; + } + + prog = bpf_object__find_program_by_name(obj, "bpf_prog1"); + if (libbpf_get_error(prog)) { + fprintf(stderr, "ERROR: finding a prog in obj file failed\n"); + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd = bpf_object__find_map_fd_by_name(obj, "dnat_map"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + link = bpf_program__attach(prog); + if (libbpf_get_error(link)) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + link = NULL; + goto cleanup; } assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0); @@ -51,7 +77,7 @@ int main(int ac, char **argv) mapped_addr_in->sin_port = htons(5555); mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255"); - assert(!bpf_map_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY)); + assert(!bpf_map_update_elem(map_fd, &mapped_addr, &serv_addr, BPF_ANY)); assert(listen(serverfd, 5) == 0); @@ -75,5 +101,8 @@ int main(int ac, char **argv) /* Is the server's getsockname = the socket getpeername */ assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0); +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); return 0; } diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c index 1d7d422cae6f..b64815af0943 100644 --- a/samples/bpf/trace_output_kern.c +++ b/samples/bpf/trace_output_kern.c @@ -2,15 +2,16 @@ #include #include #include +#include "trace_common.h" -struct bpf_map_def SEC("maps") my_map = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(u32), - .max_entries = 2, -}; +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(u32)); + __uint(max_entries, 2); +} my_map SEC(".maps"); -SEC("kprobe/sys_write") +SEC("kprobe/" SYSCALL(sys_write)) int bpf_prog1(struct pt_regs *ctx) { struct S { diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index 60a17dd05345..364b98764d54 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -1,23 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only #include -#include -#include -#include -#include #include #include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include "bpf_load.h" -#include "perf-sys.h" static __u64 time_get_ns(void) { @@ -57,20 +44,48 @@ static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size) int main(int argc, char **argv) { struct perf_buffer_opts pb_opts = {}; + struct bpf_link *link = NULL; + struct bpf_program *prog; struct perf_buffer *pb; + struct bpf_object *obj; + int map_fd, ret = 0; char filename[256]; FILE *f; - int ret; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 0; + } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd = bpf_object__find_map_fd_by_name(obj, "my_map"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + prog = bpf_object__find_program_by_name(obj, "bpf_prog1"); + if (libbpf_get_error(prog)) { + fprintf(stderr, "ERROR: finding a prog in obj file failed\n"); + goto cleanup; + } + + link = bpf_program__attach(prog); + if (libbpf_get_error(link)) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + link = NULL; + goto cleanup; } pb_opts.sample_cb = print_bpf_output; - pb = perf_buffer__new(map_fd[0], 8, &pb_opts); + pb = perf_buffer__new(map_fd, 8, &pb_opts); ret = libbpf_get_error(pb); if (ret) { printf("failed to setup perf_buffer: %d\n", ret); @@ -84,5 +99,9 @@ int main(int argc, char **argv) while ((ret = perf_buffer__poll(pb, 1000)) >= 0 && cnt < MAX_CNT) { } kill(0, SIGINT); + +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); return ret; } -- cgit v1.2.3 From f0c328f8af5d920a68f8217aec76d9a45288cef1 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sun, 23 Aug 2020 17:53:34 +0900 Subject: samples: bpf: Refactor tracepoint tracing programs with libbpf For the problem of increasing fragmentation of the bpf loader programs, instead of using bpf_loader.o, which is used in samples/bpf, this commit refactors the existing tracepoint tracing programs with libbbpf bpf loader. - Adding a tracepoint event and attaching a bpf program to it was done through bpf_program_attach(). - Instead of using the existing BPF MAP definition, MAP definition has been refactored with the new BTF-defined MAP format. Signed-off-by: Daniel T. Lee Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200823085334.9413-4-danieltimlee@gmail.com --- samples/bpf/Makefile | 6 ++-- samples/bpf/cpustat_kern.c | 36 +++++++++++------------ samples/bpf/cpustat_user.c | 47 +++++++++++++++++++++++++----- samples/bpf/offwaketime_kern.c | 52 ++++++++++++++++----------------- samples/bpf/offwaketime_user.c | 66 ++++++++++++++++++++++++++++++++---------- samples/bpf/syscall_tp_kern.c | 24 +++++++-------- samples/bpf/syscall_tp_user.c | 54 ++++++++++++++++++++++++++-------- 7 files changed, 192 insertions(+), 93 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index c74d477474e2..a6d3646b3818 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -74,7 +74,7 @@ tracex7-objs := tracex7_user.o test_probe_write_user-objs := test_probe_write_user_user.o trace_output-objs := trace_output_user.o $(TRACE_HELPERS) lathist-objs := lathist_user.o -offwaketime-objs := bpf_load.o offwaketime_user.o $(TRACE_HELPERS) +offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS) spintest-objs := spintest_user.o $(TRACE_HELPERS) map_perf_test-objs := map_perf_test_user.o test_overhead-objs := bpf_load.o test_overhead_user.o @@ -100,8 +100,8 @@ xdp_redirect_map-objs := xdp_redirect_map_user.o xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o xdp_monitor-objs := bpf_load.o xdp_monitor_user.o xdp_rxq_info-objs := xdp_rxq_info_user.o -syscall_tp-objs := bpf_load.o syscall_tp_user.o -cpustat-objs := bpf_load.o cpustat_user.o +syscall_tp-objs := syscall_tp_user.o +cpustat-objs := cpustat_user.o xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdpsock-objs := xdpsock_user.o xdp_fwd-objs := xdp_fwd_user.o diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c index a86a19d5f033..5aefd19cdfa1 100644 --- a/samples/bpf/cpustat_kern.c +++ b/samples/bpf/cpustat_kern.c @@ -51,28 +51,28 @@ static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 }; #define MAP_OFF_PSTATE_IDX 3 #define MAP_OFF_NUM 4 -struct bpf_map_def SEC("maps") my_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u64), - .max_entries = MAX_CPU * MAP_OFF_NUM, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CPU * MAP_OFF_NUM); +} my_map SEC(".maps"); /* cstate_duration records duration time for every idle state per CPU */ -struct bpf_map_def SEC("maps") cstate_duration = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u64), - .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES); +} cstate_duration SEC(".maps"); /* pstate_duration records duration time for every operating point per CPU */ -struct bpf_map_def SEC("maps") pstate_duration = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u64), - .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES); +} pstate_duration SEC(".maps"); /* * The trace events for cpu_idle and cpu_frequency are taken from: diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c index 869a99406dbf..96675985e9e0 100644 --- a/samples/bpf/cpustat_user.c +++ b/samples/bpf/cpustat_user.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -18,7 +17,9 @@ #include #include -#include "bpf_load.h" +#include + +static int cstate_map_fd, pstate_map_fd; #define MAX_CPU 8 #define MAX_PSTATE_ENTRIES 5 @@ -181,21 +182,50 @@ static void int_exit(int sig) { cpu_stat_inject_cpu_idle_event(); cpu_stat_inject_cpu_frequency_event(); - cpu_stat_update(map_fd[1], map_fd[2]); + cpu_stat_update(cstate_map_fd, pstate_map_fd); cpu_stat_print(); exit(0); } int main(int argc, char **argv) { + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_object *obj; char filename[256]; int ret; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 0; + } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + prog = bpf_object__find_program_by_name(obj, "bpf_prog1"); + if (!prog) { + printf("finding a prog in obj file failed\n"); + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + cstate_map_fd = bpf_object__find_map_fd_by_name(obj, "cstate_duration"); + pstate_map_fd = bpf_object__find_map_fd_by_name(obj, "pstate_duration"); + if (cstate_map_fd < 0 || pstate_map_fd < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + link = bpf_program__attach(prog); + if (libbpf_get_error(link)) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + link = NULL; + goto cleanup; } ret = cpu_stat_inject_cpu_idle_event(); @@ -210,10 +240,13 @@ int main(int argc, char **argv) signal(SIGTERM, int_exit); while (1) { - cpu_stat_update(map_fd[1], map_fd[2]); + cpu_stat_update(cstate_map_fd, pstate_map_fd); cpu_stat_print(); sleep(5); } +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); return 0; } diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c index e74ee1cd4b9c..14b792915a9c 100644 --- a/samples/bpf/offwaketime_kern.c +++ b/samples/bpf/offwaketime_kern.c @@ -28,38 +28,38 @@ struct key_t { u32 tret; }; -struct bpf_map_def SEC("maps") counts = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(struct key_t), - .value_size = sizeof(u64), - .max_entries = 10000, -}; - -struct bpf_map_def SEC("maps") start = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(u64), - .max_entries = 10000, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct key_t); + __type(value, u64); + __uint(max_entries, 10000); +} counts SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, u64); + __uint(max_entries, 10000); +} start SEC(".maps"); struct wokeby_t { char name[TASK_COMM_LEN]; u32 ret; }; -struct bpf_map_def SEC("maps") wokeby = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(u32), - .value_size = sizeof(struct wokeby_t), - .max_entries = 10000, -}; - -struct bpf_map_def SEC("maps") stackmap = { - .type = BPF_MAP_TYPE_STACK_TRACE, - .key_size = sizeof(u32), - .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), - .max_entries = 10000, -}; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, struct wokeby_t); + __uint(max_entries, 10000); +} wokeby SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(key_size, sizeof(u32)); + __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64)); + __uint(max_entries, 10000); +} stackmap SEC(".maps"); #define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index 51c7da5341cc..5734cfdaaacb 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c @@ -5,19 +5,19 @@ #include #include #include -#include -#include #include #include -#include #include #include #include -#include "bpf_load.h" +#include #include "trace_helpers.h" #define PRINT_RAW_ADDR 0 +/* counts, stackmap */ +static int map_fd[2]; + static void print_ksym(__u64 addr) { struct ksym *sym; @@ -52,14 +52,14 @@ static void print_stack(struct key_t *key, __u64 count) int i; printf("%s;", key->target); - if (bpf_map_lookup_elem(map_fd[3], &key->tret, ip) != 0) { + if (bpf_map_lookup_elem(map_fd[1], &key->tret, ip) != 0) { printf("---;"); } else { for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) print_ksym(ip[i]); } printf("-;"); - if (bpf_map_lookup_elem(map_fd[3], &key->wret, ip) != 0) { + if (bpf_map_lookup_elem(map_fd[1], &key->wret, ip) != 0) { printf("---;"); } else { for (i = 0; i < PERF_MAX_STACK_DEPTH; i++) @@ -96,23 +96,54 @@ static void int_exit(int sig) int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_object *obj = NULL; + struct bpf_link *links[2]; + struct bpf_program *prog; + int delay = 1, i = 0; char filename[256]; - int delay = 1; - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - setrlimit(RLIMIT_MEMLOCK, &r); - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); return 2; } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + obj = NULL; + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counts"); + map_fd[1] = bpf_object__find_map_fd_by_name(obj, "stackmap"); + if (map_fd[0] < 0 || map_fd[1] < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + bpf_object__for_each_program(prog, obj) { + links[i] = bpf_program__attach(prog); + if (libbpf_get_error(links[i])) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + links[i] = NULL; + goto cleanup; + } + i++; } if (argc > 1) @@ -120,5 +151,10 @@ int main(int argc, char **argv) sleep(delay); print_stacks(map_fd[0]); +cleanup: + for (i--; i >= 0; i--) + bpf_link__destroy(links[i]); + + bpf_object__close(obj); return 0; } diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c index 5a62b03b1f88..50231c2eff9c 100644 --- a/samples/bpf/syscall_tp_kern.c +++ b/samples/bpf/syscall_tp_kern.c @@ -18,19 +18,19 @@ struct syscalls_exit_open_args { long ret; }; -struct bpf_map_def SEC("maps") enter_open_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u32), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u32); + __uint(max_entries, 1); +} enter_open_map SEC(".maps"); -struct bpf_map_def SEC("maps") exit_open_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u32), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u32); + __uint(max_entries, 1); +} exit_open_map SEC(".maps"); static __always_inline void count(void *map) { diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index 57014bab7cbe..76a1d00128fb 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -5,16 +5,12 @@ #include #include #include -#include -#include #include #include #include -#include -#include #include +#include #include -#include "bpf_load.h" /* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*. * This requires kernel CONFIG_FTRACE_SYSCALLS to be set. @@ -49,16 +45,44 @@ static void verify_map(int map_id) static int test(char *filename, int num_progs) { - int i, fd, map0_fds[num_progs], map1_fds[num_progs]; + int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0; + struct bpf_link *links[num_progs * 4]; + struct bpf_object *objs[num_progs]; + struct bpf_program *prog; for (i = 0; i < num_progs; i++) { - if (load_bpf_file(filename)) { - fprintf(stderr, "%s", bpf_log_buf); - return 1; + objs[i] = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(objs[i])) { + fprintf(stderr, "opening BPF object file failed\n"); + objs[i] = NULL; + goto cleanup; } - printf("prog #%d: map ids %d %d\n", i, map_fd[0], map_fd[1]); - map0_fds[i] = map_fd[0]; - map1_fds[i] = map_fd[1]; + + /* load BPF program */ + if (bpf_object__load(objs[i])) { + fprintf(stderr, "loading BPF object file failed\n"); + goto cleanup; + } + + map0_fds[i] = bpf_object__find_map_fd_by_name(objs[i], + "enter_open_map"); + map1_fds[i] = bpf_object__find_map_fd_by_name(objs[i], + "exit_open_map"); + if (map0_fds[i] < 0 || map1_fds[i] < 0) { + fprintf(stderr, "finding a map in obj file failed\n"); + goto cleanup; + } + + bpf_object__for_each_program(prog, objs[i]) { + links[j] = bpf_program__attach(prog); + if (libbpf_get_error(links[j])) { + fprintf(stderr, "bpf_program__attach failed\n"); + links[j] = NULL; + goto cleanup; + } + j++; + } + printf("prog #%d: map ids %d %d\n", i, map0_fds[i], map1_fds[i]); } /* current load_bpf_file has perf_event_open default pid = -1 @@ -80,6 +104,12 @@ static int test(char *filename, int num_progs) verify_map(map1_fds[i]); } +cleanup: + for (j--; j >= 0; j--) + bpf_link__destroy(links[j]); + + for (i--; i >= 0; i--) + bpf_object__close(objs[i]); return 0; } -- cgit v1.2.3 From 35149b2c048e43562a598fd8ff91467d429bc666 Mon Sep 17 00:00:00 2001 From: Cristian Dumitrescu Date: Fri, 28 Aug 2020 10:26:28 +0200 Subject: samples/bpf: Add new sample xsk_fwd.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This sample code illustrates the packet forwarding between multiple AF_XDP sockets in multi-threading environment. All the threads and sockets are sharing a common buffer pool, with each socket having its own private buffer cache. The sockets are created with the xsk_socket__create_shared() function, which allows multiple AF_XDP sockets to share the same UMEM object. Example 1: Single thread handling two sockets. Packets received from socket A (on top of interface IFA, queue QA) are forwarded to socket B (on top of interface IFB, queue QB) and vice-versa. The thread is affinitized to CPU core C: ./xsk_fwd -i IFA -q QA -i IFB -q QB -c C Example 2: Two threads, each handling two sockets. Packets from socket A are sent to socket B (by thread X), packets from socket B are sent to socket A (by thread X); packets from socket C are sent to socket D (by thread Y), packets from socket D are sent to socket C (by thread Y). The two threads are bound to CPU cores CX and CY: ./xdp_fwd -i IFA -q QA -i IFB -q QB -i IFC -q QC -i IFD -q QD -c CX -c CY Signed-off-by: Cristian Dumitrescu Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/1598603189-32145-15-git-send-email-magnus.karlsson@intel.com --- samples/bpf/Makefile | 3 + samples/bpf/xsk_fwd.c | 1085 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1088 insertions(+) create mode 100644 samples/bpf/xsk_fwd.c (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index a6d3646b3818..4f1ed0e3cf9f 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -48,6 +48,7 @@ tprogs-y += syscall_tp tprogs-y += cpustat tprogs-y += xdp_adjust_tail tprogs-y += xdpsock +tprogs-y += xsk_fwd tprogs-y += xdp_fwd tprogs-y += task_fd_query tprogs-y += xdp_sample_pkts @@ -104,6 +105,7 @@ syscall_tp-objs := syscall_tp_user.o cpustat-objs := cpustat_user.o xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdpsock-objs := xdpsock_user.o +xsk_fwd-objs := xsk_fwd.o xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) @@ -203,6 +205,7 @@ TPROGLDLIBS_trace_output += -lrt TPROGLDLIBS_map_perf_test += -lrt TPROGLDLIBS_test_overhead += -lrt TPROGLDLIBS_xdpsock += -pthread +TPROGLDLIBS_xsk_fwd += -pthread # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c new file mode 100644 index 000000000000..1cd97c84c337 --- /dev/null +++ b/samples/bpf/xsk_fwd.c @@ -0,0 +1,1085 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2020 Intel Corporation. */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +typedef __u64 u64; +typedef __u32 u32; +typedef __u16 u16; +typedef __u8 u8; + +/* This program illustrates the packet forwarding between multiple AF_XDP + * sockets in multi-threaded environment. All threads are sharing a common + * buffer pool, with each socket having its own private buffer cache. + * + * Example 1: Single thread handling two sockets. The packets received by socket + * A (interface IFA, queue QA) are forwarded to socket B (interface IFB, queue + * QB), while the packets received by socket B are forwarded to socket A. The + * thread is running on CPU core X: + * + * ./xsk_fwd -i IFA -q QA -i IFB -q QB -c X + * + * Example 2: Two threads, each handling two sockets. The thread running on CPU + * core X forwards all the packets received by socket A to socket B, and all the + * packets received by socket B to socket A. The thread running on CPU core Y is + * performing the same packet forwarding between sockets C and D: + * + * ./xsk_fwd -i IFA -q QA -i IFB -q QB -i IFC -q QC -i IFD -q QD + * -c CX -c CY + */ + +/* + * Buffer pool and buffer cache + * + * For packet forwarding, the packet buffers are typically allocated from the + * pool for packet reception and freed back to the pool for further reuse once + * the packet transmission is completed. + * + * The buffer pool is shared between multiple threads. In order to minimize the + * access latency to the shared buffer pool, each thread creates one (or + * several) buffer caches, which, unlike the buffer pool, are private to the + * thread that creates them and therefore cannot be shared with other threads. + * The access to the shared pool is only needed either (A) when the cache gets + * empty due to repeated buffer allocations and it needs to be replenished from + * the pool, or (B) when the cache gets full due to repeated buffer free and it + * needs to be flushed back to the pull. + * + * In a packet forwarding system, a packet received on any input port can + * potentially be transmitted on any output port, depending on the forwarding + * configuration. For AF_XDP sockets, for this to work with zero-copy of the + * packet buffers when, it is required that the buffer pool memory fits into the + * UMEM area shared by all the sockets. + */ + +struct bpool_params { + u32 n_buffers; + u32 buffer_size; + int mmap_flags; + + u32 n_users_max; + u32 n_buffers_per_slab; +}; + +/* This buffer pool implementation organizes the buffers into equally sized + * slabs of *n_buffers_per_slab*. Initially, there are *n_slabs* slabs in the + * pool that are completely filled with buffer pointers (full slabs). + * + * Each buffer cache has a slab for buffer allocation and a slab for buffer + * free, with both of these slabs initially empty. When the cache's allocation + * slab goes empty, it is swapped with one of the available full slabs from the + * pool, if any is available. When the cache's free slab goes full, it is + * swapped for one of the empty slabs from the pool, which is guaranteed to + * succeed. + * + * Partially filled slabs never get traded between the cache and the pool + * (except when the cache itself is destroyed), which enables fast operation + * through pointer swapping. + */ +struct bpool { + struct bpool_params params; + pthread_mutex_t lock; + void *addr; + + u64 **slabs; + u64 **slabs_reserved; + u64 *buffers; + u64 *buffers_reserved; + + u64 n_slabs; + u64 n_slabs_reserved; + u64 n_buffers; + + u64 n_slabs_available; + u64 n_slabs_reserved_available; + + struct xsk_umem_config umem_cfg; + struct xsk_ring_prod umem_fq; + struct xsk_ring_cons umem_cq; + struct xsk_umem *umem; +}; + +static struct bpool * +bpool_init(struct bpool_params *params, + struct xsk_umem_config *umem_cfg) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + u64 n_slabs, n_slabs_reserved, n_buffers, n_buffers_reserved; + u64 slabs_size, slabs_reserved_size; + u64 buffers_size, buffers_reserved_size; + u64 total_size, i; + struct bpool *bp; + u8 *p; + int status; + + /* mmap prep. */ + if (setrlimit(RLIMIT_MEMLOCK, &r)) + return NULL; + + /* bpool internals dimensioning. */ + n_slabs = (params->n_buffers + params->n_buffers_per_slab - 1) / + params->n_buffers_per_slab; + n_slabs_reserved = params->n_users_max * 2; + n_buffers = n_slabs * params->n_buffers_per_slab; + n_buffers_reserved = n_slabs_reserved * params->n_buffers_per_slab; + + slabs_size = n_slabs * sizeof(u64 *); + slabs_reserved_size = n_slabs_reserved * sizeof(u64 *); + buffers_size = n_buffers * sizeof(u64); + buffers_reserved_size = n_buffers_reserved * sizeof(u64); + + total_size = sizeof(struct bpool) + + slabs_size + slabs_reserved_size + + buffers_size + buffers_reserved_size; + + /* bpool memory allocation. */ + p = calloc(total_size, sizeof(u8)); + if (!p) + return NULL; + + /* bpool memory initialization. */ + bp = (struct bpool *)p; + memcpy(&bp->params, params, sizeof(*params)); + bp->params.n_buffers = n_buffers; + + bp->slabs = (u64 **)&p[sizeof(struct bpool)]; + bp->slabs_reserved = (u64 **)&p[sizeof(struct bpool) + + slabs_size]; + bp->buffers = (u64 *)&p[sizeof(struct bpool) + + slabs_size + slabs_reserved_size]; + bp->buffers_reserved = (u64 *)&p[sizeof(struct bpool) + + slabs_size + slabs_reserved_size + buffers_size]; + + bp->n_slabs = n_slabs; + bp->n_slabs_reserved = n_slabs_reserved; + bp->n_buffers = n_buffers; + + for (i = 0; i < n_slabs; i++) + bp->slabs[i] = &bp->buffers[i * params->n_buffers_per_slab]; + bp->n_slabs_available = n_slabs; + + for (i = 0; i < n_slabs_reserved; i++) + bp->slabs_reserved[i] = &bp->buffers_reserved[i * + params->n_buffers_per_slab]; + bp->n_slabs_reserved_available = n_slabs_reserved; + + for (i = 0; i < n_buffers; i++) + bp->buffers[i] = i * params->buffer_size; + + /* lock. */ + status = pthread_mutex_init(&bp->lock, NULL); + if (status) { + free(p); + return NULL; + } + + /* mmap. */ + bp->addr = mmap(NULL, + n_buffers * params->buffer_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | params->mmap_flags, + -1, + 0); + if (bp->addr == MAP_FAILED) { + pthread_mutex_destroy(&bp->lock); + free(p); + return NULL; + } + + /* umem. */ + status = xsk_umem__create(&bp->umem, + bp->addr, + bp->params.n_buffers * bp->params.buffer_size, + &bp->umem_fq, + &bp->umem_cq, + umem_cfg); + if (status) { + munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size); + pthread_mutex_destroy(&bp->lock); + free(p); + return NULL; + } + memcpy(&bp->umem_cfg, umem_cfg, sizeof(*umem_cfg)); + + return bp; +} + +static void +bpool_free(struct bpool *bp) +{ + if (!bp) + return; + + xsk_umem__delete(bp->umem); + munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size); + pthread_mutex_destroy(&bp->lock); + free(bp); +} + +struct bcache { + struct bpool *bp; + + u64 *slab_cons; + u64 *slab_prod; + + u64 n_buffers_cons; + u64 n_buffers_prod; +}; + +static u32 +bcache_slab_size(struct bcache *bc) +{ + struct bpool *bp = bc->bp; + + return bp->params.n_buffers_per_slab; +} + +static struct bcache * +bcache_init(struct bpool *bp) +{ + struct bcache *bc; + + bc = calloc(1, sizeof(struct bcache)); + if (!bc) + return NULL; + + bc->bp = bp; + bc->n_buffers_cons = 0; + bc->n_buffers_prod = 0; + + pthread_mutex_lock(&bp->lock); + if (bp->n_slabs_reserved_available == 0) { + pthread_mutex_unlock(&bp->lock); + free(bc); + return NULL; + } + + bc->slab_cons = bp->slabs_reserved[bp->n_slabs_reserved_available - 1]; + bc->slab_prod = bp->slabs_reserved[bp->n_slabs_reserved_available - 2]; + bp->n_slabs_reserved_available -= 2; + pthread_mutex_unlock(&bp->lock); + + return bc; +} + +static void +bcache_free(struct bcache *bc) +{ + struct bpool *bp; + + if (!bc) + return; + + /* In order to keep this example simple, the case of freeing any + * existing buffers from the cache back to the pool is ignored. + */ + + bp = bc->bp; + pthread_mutex_lock(&bp->lock); + bp->slabs_reserved[bp->n_slabs_reserved_available] = bc->slab_prod; + bp->slabs_reserved[bp->n_slabs_reserved_available + 1] = bc->slab_cons; + bp->n_slabs_reserved_available += 2; + pthread_mutex_unlock(&bp->lock); + + free(bc); +} + +/* To work correctly, the implementation requires that the *n_buffers* input + * argument is never greater than the buffer pool's *n_buffers_per_slab*. This + * is typically the case, with one exception taking place when large number of + * buffers are allocated at init time (e.g. for the UMEM fill queue setup). + */ +static inline u32 +bcache_cons_check(struct bcache *bc, u32 n_buffers) +{ + struct bpool *bp = bc->bp; + u64 n_buffers_per_slab = bp->params.n_buffers_per_slab; + u64 n_buffers_cons = bc->n_buffers_cons; + u64 n_slabs_available; + u64 *slab_full; + + /* + * Consumer slab is not empty: Use what's available locally. Do not + * look for more buffers from the pool when the ask can only be + * partially satisfied. + */ + if (n_buffers_cons) + return (n_buffers_cons < n_buffers) ? + n_buffers_cons : + n_buffers; + + /* + * Consumer slab is empty: look to trade the current consumer slab + * (full) for a full slab from the pool, if any is available. + */ + pthread_mutex_lock(&bp->lock); + n_slabs_available = bp->n_slabs_available; + if (!n_slabs_available) { + pthread_mutex_unlock(&bp->lock); + return 0; + } + + n_slabs_available--; + slab_full = bp->slabs[n_slabs_available]; + bp->slabs[n_slabs_available] = bc->slab_cons; + bp->n_slabs_available = n_slabs_available; + pthread_mutex_unlock(&bp->lock); + + bc->slab_cons = slab_full; + bc->n_buffers_cons = n_buffers_per_slab; + return n_buffers; +} + +static inline u64 +bcache_cons(struct bcache *bc) +{ + u64 n_buffers_cons = bc->n_buffers_cons - 1; + u64 buffer; + + buffer = bc->slab_cons[n_buffers_cons]; + bc->n_buffers_cons = n_buffers_cons; + return buffer; +} + +static inline void +bcache_prod(struct bcache *bc, u64 buffer) +{ + struct bpool *bp = bc->bp; + u64 n_buffers_per_slab = bp->params.n_buffers_per_slab; + u64 n_buffers_prod = bc->n_buffers_prod; + u64 n_slabs_available; + u64 *slab_empty; + + /* + * Producer slab is not yet full: store the current buffer to it. + */ + if (n_buffers_prod < n_buffers_per_slab) { + bc->slab_prod[n_buffers_prod] = buffer; + bc->n_buffers_prod = n_buffers_prod + 1; + return; + } + + /* + * Producer slab is full: trade the cache's current producer slab + * (full) for an empty slab from the pool, then store the current + * buffer to the new producer slab. As one full slab exists in the + * cache, it is guaranteed that there is at least one empty slab + * available in the pool. + */ + pthread_mutex_lock(&bp->lock); + n_slabs_available = bp->n_slabs_available; + slab_empty = bp->slabs[n_slabs_available]; + bp->slabs[n_slabs_available] = bc->slab_prod; + bp->n_slabs_available = n_slabs_available + 1; + pthread_mutex_unlock(&bp->lock); + + slab_empty[0] = buffer; + bc->slab_prod = slab_empty; + bc->n_buffers_prod = 1; +} + +/* + * Port + * + * Each of the forwarding ports sits on top of an AF_XDP socket. In order for + * packet forwarding to happen with no packet buffer copy, all the sockets need + * to share the same UMEM area, which is used as the buffer pool memory. + */ +#ifndef MAX_BURST_RX +#define MAX_BURST_RX 64 +#endif + +#ifndef MAX_BURST_TX +#define MAX_BURST_TX 64 +#endif + +struct burst_rx { + u64 addr[MAX_BURST_RX]; + u32 len[MAX_BURST_RX]; +}; + +struct burst_tx { + u64 addr[MAX_BURST_TX]; + u32 len[MAX_BURST_TX]; + u32 n_pkts; +}; + +struct port_params { + struct xsk_socket_config xsk_cfg; + struct bpool *bp; + const char *iface; + u32 iface_queue; +}; + +struct port { + struct port_params params; + + struct bcache *bc; + + struct xsk_ring_cons rxq; + struct xsk_ring_prod txq; + struct xsk_ring_prod umem_fq; + struct xsk_ring_cons umem_cq; + struct xsk_socket *xsk; + int umem_fq_initialized; + + u64 n_pkts_rx; + u64 n_pkts_tx; +}; + +static void +port_free(struct port *p) +{ + if (!p) + return; + + /* To keep this example simple, the code to free the buffers from the + * socket's receive and transmit queues, as well as from the UMEM fill + * and completion queues, is not included. + */ + + if (p->xsk) + xsk_socket__delete(p->xsk); + + bcache_free(p->bc); + + free(p); +} + +static struct port * +port_init(struct port_params *params) +{ + struct port *p; + u32 umem_fq_size, pos = 0; + int status, i; + + /* Memory allocation and initialization. */ + p = calloc(sizeof(struct port), 1); + if (!p) + return NULL; + + memcpy(&p->params, params, sizeof(p->params)); + umem_fq_size = params->bp->umem_cfg.fill_size; + + /* bcache. */ + p->bc = bcache_init(params->bp); + if (!p->bc || + (bcache_slab_size(p->bc) < umem_fq_size) || + (bcache_cons_check(p->bc, umem_fq_size) < umem_fq_size)) { + port_free(p); + return NULL; + } + + /* xsk socket. */ + status = xsk_socket__create_shared(&p->xsk, + params->iface, + params->iface_queue, + params->bp->umem, + &p->rxq, + &p->txq, + &p->umem_fq, + &p->umem_cq, + ¶ms->xsk_cfg); + if (status) { + port_free(p); + return NULL; + } + + /* umem fq. */ + xsk_ring_prod__reserve(&p->umem_fq, umem_fq_size, &pos); + + for (i = 0; i < umem_fq_size; i++) + *xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) = + bcache_cons(p->bc); + + xsk_ring_prod__submit(&p->umem_fq, umem_fq_size); + p->umem_fq_initialized = 1; + + return p; +} + +static inline u32 +port_rx_burst(struct port *p, struct burst_rx *b) +{ + u32 n_pkts, pos, i; + + /* Free buffers for FQ replenish. */ + n_pkts = ARRAY_SIZE(b->addr); + + n_pkts = bcache_cons_check(p->bc, n_pkts); + if (!n_pkts) + return 0; + + /* RXQ. */ + n_pkts = xsk_ring_cons__peek(&p->rxq, n_pkts, &pos); + if (!n_pkts) { + if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) { + struct pollfd pollfd = { + .fd = xsk_socket__fd(p->xsk), + .events = POLLIN, + }; + + poll(&pollfd, 1, 0); + } + return 0; + } + + for (i = 0; i < n_pkts; i++) { + b->addr[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->addr; + b->len[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->len; + } + + xsk_ring_cons__release(&p->rxq, n_pkts); + p->n_pkts_rx += n_pkts; + + /* UMEM FQ. */ + for ( ; ; ) { + int status; + + status = xsk_ring_prod__reserve(&p->umem_fq, n_pkts, &pos); + if (status == n_pkts) + break; + + if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) { + struct pollfd pollfd = { + .fd = xsk_socket__fd(p->xsk), + .events = POLLIN, + }; + + poll(&pollfd, 1, 0); + } + } + + for (i = 0; i < n_pkts; i++) + *xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) = + bcache_cons(p->bc); + + xsk_ring_prod__submit(&p->umem_fq, n_pkts); + + return n_pkts; +} + +static inline void +port_tx_burst(struct port *p, struct burst_tx *b) +{ + u32 n_pkts, pos, i; + int status; + + /* UMEM CQ. */ + n_pkts = p->params.bp->umem_cfg.comp_size; + + n_pkts = xsk_ring_cons__peek(&p->umem_cq, n_pkts, &pos); + + for (i = 0; i < n_pkts; i++) { + u64 addr = *xsk_ring_cons__comp_addr(&p->umem_cq, pos + i); + + bcache_prod(p->bc, addr); + } + + xsk_ring_cons__release(&p->umem_cq, n_pkts); + + /* TXQ. */ + n_pkts = b->n_pkts; + + for ( ; ; ) { + status = xsk_ring_prod__reserve(&p->txq, n_pkts, &pos); + if (status == n_pkts) + break; + + if (xsk_ring_prod__needs_wakeup(&p->txq)) + sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT, + NULL, 0); + } + + for (i = 0; i < n_pkts; i++) { + xsk_ring_prod__tx_desc(&p->txq, pos + i)->addr = b->addr[i]; + xsk_ring_prod__tx_desc(&p->txq, pos + i)->len = b->len[i]; + } + + xsk_ring_prod__submit(&p->txq, n_pkts); + if (xsk_ring_prod__needs_wakeup(&p->txq)) + sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + p->n_pkts_tx += n_pkts; +} + +/* + * Thread + * + * Packet forwarding threads. + */ +#ifndef MAX_PORTS_PER_THREAD +#define MAX_PORTS_PER_THREAD 16 +#endif + +struct thread_data { + struct port *ports_rx[MAX_PORTS_PER_THREAD]; + struct port *ports_tx[MAX_PORTS_PER_THREAD]; + u32 n_ports_rx; + struct burst_rx burst_rx; + struct burst_tx burst_tx[MAX_PORTS_PER_THREAD]; + u32 cpu_core_id; + int quit; +}; + +static void swap_mac_addresses(void *data) +{ + struct ether_header *eth = (struct ether_header *)data; + struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost; + struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost; + struct ether_addr tmp; + + tmp = *src_addr; + *src_addr = *dst_addr; + *dst_addr = tmp; +} + +static void * +thread_func(void *arg) +{ + struct thread_data *t = arg; + cpu_set_t cpu_cores; + u32 i; + + CPU_ZERO(&cpu_cores); + CPU_SET(t->cpu_core_id, &cpu_cores); + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_cores); + + for (i = 0; !t->quit; i = (i + 1) & (t->n_ports_rx - 1)) { + struct port *port_rx = t->ports_rx[i]; + struct port *port_tx = t->ports_tx[i]; + struct burst_rx *brx = &t->burst_rx; + struct burst_tx *btx = &t->burst_tx[i]; + u32 n_pkts, j; + + /* RX. */ + n_pkts = port_rx_burst(port_rx, brx); + if (!n_pkts) + continue; + + /* Process & TX. */ + for (j = 0; j < n_pkts; j++) { + u64 addr = xsk_umem__add_offset_to_addr(brx->addr[j]); + u8 *pkt = xsk_umem__get_data(port_rx->params.bp->addr, + addr); + + swap_mac_addresses(pkt); + + btx->addr[btx->n_pkts] = brx->addr[j]; + btx->len[btx->n_pkts] = brx->len[j]; + btx->n_pkts++; + + if (btx->n_pkts == MAX_BURST_TX) { + port_tx_burst(port_tx, btx); + btx->n_pkts = 0; + } + } + } + + return NULL; +} + +/* + * Process + */ +static const struct bpool_params bpool_params_default = { + .n_buffers = 64 * 1024, + .buffer_size = XSK_UMEM__DEFAULT_FRAME_SIZE, + .mmap_flags = 0, + + .n_users_max = 16, + .n_buffers_per_slab = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, +}; + +static const struct xsk_umem_config umem_cfg_default = { + .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, + .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, + .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, + .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, + .flags = 0, +}; + +static const struct port_params port_params_default = { + .xsk_cfg = { + .rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, + .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .libbpf_flags = 0, + .xdp_flags = XDP_FLAGS_DRV_MODE, + .bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY, + }, + + .bp = NULL, + .iface = NULL, + .iface_queue = 0, +}; + +#ifndef MAX_PORTS +#define MAX_PORTS 64 +#endif + +#ifndef MAX_THREADS +#define MAX_THREADS 64 +#endif + +static struct bpool_params bpool_params; +static struct xsk_umem_config umem_cfg; +static struct bpool *bp; + +static struct port_params port_params[MAX_PORTS]; +static struct port *ports[MAX_PORTS]; +static u64 n_pkts_rx[MAX_PORTS]; +static u64 n_pkts_tx[MAX_PORTS]; +static int n_ports; + +static pthread_t threads[MAX_THREADS]; +static struct thread_data thread_data[MAX_THREADS]; +static int n_threads; + +static void +print_usage(char *prog_name) +{ + const char *usage = + "Usage:\n" + "\t%s [ -b SIZE ] -c CORE -i INTERFACE [ -q QUEUE ]\n" + "\n" + "-c CORE CPU core to run a packet forwarding thread\n" + " on. May be invoked multiple times.\n" + "\n" + "-b SIZE Number of buffers in the buffer pool shared\n" + " by all the forwarding threads. Default: %u.\n" + "\n" + "-i INTERFACE Network interface. Each (INTERFACE, QUEUE)\n" + " pair specifies one forwarding port. May be\n" + " invoked multiple times.\n" + "\n" + "-q QUEUE Network interface queue for RX and TX. Each\n" + " (INTERFACE, QUEUE) pair specified one\n" + " forwarding port. Default: %u. May be invoked\n" + " multiple times.\n" + "\n"; + printf(usage, + prog_name, + bpool_params_default.n_buffers, + port_params_default.iface_queue); +} + +static int +parse_args(int argc, char **argv) +{ + struct option lgopts[] = { + { NULL, 0, 0, 0 } + }; + int opt, option_index; + + /* Parse the input arguments. */ + for ( ; ;) { + opt = getopt_long(argc, argv, "c:i:q:", lgopts, &option_index); + if (opt == EOF) + break; + + switch (opt) { + case 'b': + bpool_params.n_buffers = atoi(optarg); + break; + + case 'c': + if (n_threads == MAX_THREADS) { + printf("Max number of threads (%d) reached.\n", + MAX_THREADS); + return -1; + } + + thread_data[n_threads].cpu_core_id = atoi(optarg); + n_threads++; + break; + + case 'i': + if (n_ports == MAX_PORTS) { + printf("Max number of ports (%d) reached.\n", + MAX_PORTS); + return -1; + } + + port_params[n_ports].iface = optarg; + port_params[n_ports].iface_queue = 0; + n_ports++; + break; + + case 'q': + if (n_ports == 0) { + printf("No port specified for queue.\n"); + return -1; + } + port_params[n_ports - 1].iface_queue = atoi(optarg); + break; + + default: + printf("Illegal argument.\n"); + return -1; + } + } + + optind = 1; /* reset getopt lib */ + + /* Check the input arguments. */ + if (!n_ports) { + printf("No ports specified.\n"); + return -1; + } + + if (!n_threads) { + printf("No threads specified.\n"); + return -1; + } + + if (n_ports % n_threads) { + printf("Ports cannot be evenly distributed to threads.\n"); + return -1; + } + + return 0; +} + +static void +print_port(u32 port_id) +{ + struct port *port = ports[port_id]; + + printf("Port %u: interface = %s, queue = %u\n", + port_id, port->params.iface, port->params.iface_queue); +} + +static void +print_thread(u32 thread_id) +{ + struct thread_data *t = &thread_data[thread_id]; + u32 i; + + printf("Thread %u (CPU core %u): ", + thread_id, t->cpu_core_id); + + for (i = 0; i < t->n_ports_rx; i++) { + struct port *port_rx = t->ports_rx[i]; + struct port *port_tx = t->ports_tx[i]; + + printf("(%s, %u) -> (%s, %u), ", + port_rx->params.iface, + port_rx->params.iface_queue, + port_tx->params.iface, + port_tx->params.iface_queue); + } + + printf("\n"); +} + +static void +print_port_stats_separator(void) +{ + printf("+-%4s-+-%12s-+-%13s-+-%12s-+-%13s-+\n", + "----", + "------------", + "-------------", + "------------", + "-------------"); +} + +static void +print_port_stats_header(void) +{ + print_port_stats_separator(); + printf("| %4s | %12s | %13s | %12s | %13s |\n", + "Port", + "RX packets", + "RX rate (pps)", + "TX packets", + "TX_rate (pps)"); + print_port_stats_separator(); +} + +static void +print_port_stats_trailer(void) +{ + print_port_stats_separator(); + printf("\n"); +} + +static void +print_port_stats(int port_id, u64 ns_diff) +{ + struct port *p = ports[port_id]; + double rx_pps, tx_pps; + + rx_pps = (p->n_pkts_rx - n_pkts_rx[port_id]) * 1000000000. / ns_diff; + tx_pps = (p->n_pkts_tx - n_pkts_tx[port_id]) * 1000000000. / ns_diff; + + printf("| %4d | %12llu | %13.0f | %12llu | %13.0f |\n", + port_id, + p->n_pkts_rx, + rx_pps, + p->n_pkts_tx, + tx_pps); + + n_pkts_rx[port_id] = p->n_pkts_rx; + n_pkts_tx[port_id] = p->n_pkts_tx; +} + +static void +print_port_stats_all(u64 ns_diff) +{ + int i; + + print_port_stats_header(); + for (i = 0; i < n_ports; i++) + print_port_stats(i, ns_diff); + print_port_stats_trailer(); +} + +static int quit; + +static void +signal_handler(int sig) +{ + quit = 1; +} + +static void remove_xdp_program(void) +{ + int i; + + for (i = 0 ; i < n_ports; i++) + bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1, + port_params[i].xsk_cfg.xdp_flags); +} + +int main(int argc, char **argv) +{ + struct timespec time; + u64 ns0; + int i; + + /* Parse args. */ + memcpy(&bpool_params, &bpool_params_default, + sizeof(struct bpool_params)); + memcpy(&umem_cfg, &umem_cfg_default, + sizeof(struct xsk_umem_config)); + for (i = 0; i < MAX_PORTS; i++) + memcpy(&port_params[i], &port_params_default, + sizeof(struct port_params)); + + if (parse_args(argc, argv)) { + print_usage(argv[0]); + return -1; + } + + /* Buffer pool initialization. */ + bp = bpool_init(&bpool_params, &umem_cfg); + if (!bp) { + printf("Buffer pool initialization failed.\n"); + return -1; + } + printf("Buffer pool created successfully.\n"); + + /* Ports initialization. */ + for (i = 0; i < MAX_PORTS; i++) + port_params[i].bp = bp; + + for (i = 0; i < n_ports; i++) { + ports[i] = port_init(&port_params[i]); + if (!ports[i]) { + printf("Port %d initialization failed.\n", i); + return -1; + } + print_port(i); + } + printf("All ports created successfully.\n"); + + /* Threads. */ + for (i = 0; i < n_threads; i++) { + struct thread_data *t = &thread_data[i]; + u32 n_ports_per_thread = n_ports / n_threads, j; + + for (j = 0; j < n_ports_per_thread; j++) { + t->ports_rx[j] = ports[i * n_ports_per_thread + j]; + t->ports_tx[j] = ports[i * n_ports_per_thread + + (j + 1) % n_ports_per_thread]; + } + + t->n_ports_rx = n_ports_per_thread; + + print_thread(i); + } + + for (i = 0; i < n_threads; i++) { + int status; + + status = pthread_create(&threads[i], + NULL, + thread_func, + &thread_data[i]); + if (status) { + printf("Thread %d creation failed.\n", i); + return -1; + } + } + printf("All threads created successfully.\n"); + + /* Print statistics. */ + signal(SIGINT, signal_handler); + signal(SIGTERM, signal_handler); + signal(SIGABRT, signal_handler); + + clock_gettime(CLOCK_MONOTONIC, &time); + ns0 = time.tv_sec * 1000000000UL + time.tv_nsec; + for ( ; !quit; ) { + u64 ns1, ns_diff; + + sleep(1); + clock_gettime(CLOCK_MONOTONIC, &time); + ns1 = time.tv_sec * 1000000000UL + time.tv_nsec; + ns_diff = ns1 - ns0; + ns0 = ns1; + + print_port_stats_all(ns_diff); + } + + /* Threads completion. */ + printf("Quit.\n"); + for (i = 0; i < n_threads; i++) + thread_data[i].quit = 1; + + for (i = 0; i < n_threads; i++) + pthread_join(threads[i], NULL); + + for (i = 0; i < n_ports; i++) + port_free(ports[i]); + + bpool_free(bp); + + remove_xdp_program(); + + return 0; +} -- cgit v1.2.3 From c8a039a47ffe06077929f29c9d4c7cba01a2104b Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 28 Aug 2020 14:51:05 +0200 Subject: samples/bpf: Optimize l2fwd performance in xdpsock Optimize the throughput performance of the l2fwd sub-app in the xdpsock sample application by removing a duplicate syscall and increasing the size of the fill ring. The latter needs some further explanation. We recommend that you set the fill ring size >= HW RX ring size + AF_XDP RX ring size. Make sure you fill up the fill ring with buffers at regular intervals, and you will with this setting avoid allocation failures in the driver. These are usually quite expensive since drivers have not been written to assume that allocation failures are common. For regular sockets, kernel allocated memory is used that only runs out in OOM situations that should be rare. These two performance optimizations together lead to a 6% percent improvement for the l2fwd app on my machine. Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/1598619065-1944-1-git-send-email-magnus.karlsson@intel.com --- samples/bpf/xdpsock_user.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 19c679456a0e..9f54df768d35 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -613,7 +613,16 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) { struct xsk_umem_info *umem; struct xsk_umem_config cfg = { - .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + /* We recommend that you set the fill ring size >= HW RX ring size + + * AF_XDP RX ring size. Make sure you fill up the fill ring + * with buffers at regular intervals, and you will with this setting + * avoid allocation failures in the driver. These are usually quite + * expensive since drivers have not been written to assume that + * allocation failures are common. For regular sockets, kernel + * allocated memory is used that only runs out in OOM situations + * that should be rare. + */ + .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = opt_xsk_frame_size, .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, @@ -640,13 +649,13 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem) u32 idx; ret = xsk_ring_prod__reserve(&umem->fq, - XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx); - if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) + XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, &idx); + if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS * 2) exit_with_error(-ret); - for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) + for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; i++) *xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * opt_xsk_frame_size; - xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); + xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS * 2); } static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, @@ -888,9 +897,6 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, if (!xsk->outstanding_tx) return; - if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) - kick_tx(xsk); - ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size : xsk->outstanding_tx; -- cgit v1.2.3 From b69e56cf765155dcac0037d7d0f162a2afab76c2 Mon Sep 17 00:00:00 2001 From: Weqaar Janjua Date: Sat, 29 Aug 2020 00:17:17 +0800 Subject: samples/bpf: Fix to xdpsock to avoid recycling frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The txpush program in the xdpsock sample application is supposed to send out all packets in the umem in a round-robin fashion. The problem is that it only cycled through the first BATCH_SIZE worth of packets. Fixed this so that it cycles through all buffers in the umem as intended. Fixes: 248c7f9c0e21 ("samples/bpf: convert xdpsock to use libbpf for AF_XDP access") Signed-off-by: Weqaar Janjua Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20200828161717.42705-1-weqaar.a.janjua@intel.com --- samples/bpf/xdpsock_user.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 9f54df768d35..4cead341ae57 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1010,7 +1010,7 @@ static void rx_drop_all(void) } } -static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb, int batch_size) +static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) { u32 idx; unsigned int i; @@ -1023,14 +1023,14 @@ static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb, int batch_size) for (i = 0; i < batch_size; i++) { struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); - tx_desc->addr = (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; + tx_desc->addr = (*frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; tx_desc->len = PKT_SIZE; } xsk_ring_prod__submit(&xsk->tx, batch_size); xsk->outstanding_tx += batch_size; - frame_nb += batch_size; - frame_nb %= NUM_FRAMES; + *frame_nb += batch_size; + *frame_nb %= NUM_FRAMES; complete_tx_only(xsk, batch_size); } @@ -1086,7 +1086,7 @@ static void tx_only_all(void) } for (i = 0; i < num_socks; i++) - tx_only(xsks[i], frame_nb[i], batch_size); + tx_only(xsks[i], &frame_nb[i], batch_size); pkt_cnt += batch_size; -- cgit v1.2.3 From 698584dffd4bca834ac4733fc6a659a0a0d213e7 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Fri, 4 Sep 2020 15:34:33 +0900 Subject: samples, bpf: Replace bpf_program__title() with bpf_program__section_name() From commit 521095842027 ("libbpf: Deprecate notion of BPF program "title" in favor of "section name""), the term title has been replaced with section name in libbpf. Since the bpf_program__title() has been deprecated, this commit switches this function to bpf_program__section_name(). Due to this commit, the compilation warning issue has also been resolved. Fixes: 521095842027 ("libbpf: Deprecate notion of BPF program "title" in favor of "section name"") Signed-off-by: Daniel T. Lee Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200904063434.24963-1-danieltimlee@gmail.com --- samples/bpf/sockex3_user.c | 6 +++--- samples/bpf/spintest_user.c | 6 +++--- samples/bpf/tracex5_user.c | 6 +++--- samples/bpf/xdp_redirect_cpu_user.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'samples') diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 4dbee7427d47..7793f6a6ae7e 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -29,8 +29,8 @@ int main(int argc, char **argv) struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_program *prog; struct bpf_object *obj; + const char *section; char filename[256]; - const char *title; FILE *f; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); @@ -58,8 +58,8 @@ int main(int argc, char **argv) bpf_object__for_each_program(prog, obj) { fd = bpf_program__fd(prog); - title = bpf_program__title(prog, false); - if (sscanf(title, "socket/%d", &key) != 1) { + section = bpf_program__section_name(prog); + if (sscanf(section, "socket/%d", &key) != 1) { fprintf(stderr, "ERROR: finding prog failed\n"); goto cleanup; } diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index 847da9284fa8..f090d0dc60d6 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c @@ -17,7 +17,7 @@ int main(int ac, char **argv) long key, next_key, value; struct bpf_program *prog; int map_fd, i, j = 0; - const char *title; + const char *section; struct ksym *sym; if (setrlimit(RLIMIT_MEMLOCK, &r)) { @@ -51,8 +51,8 @@ int main(int ac, char **argv) } bpf_object__for_each_program(prog, obj) { - title = bpf_program__title(prog, false); - if (sscanf(title, "kprobe/%s", symbol) != 1) + section = bpf_program__section_name(prog); + if (sscanf(section, "kprobe/%s", symbol) != 1) continue; /* Attach prog only when symbol exists */ diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c index 98dad57a96c4..c17d3fb5fd64 100644 --- a/samples/bpf/tracex5_user.c +++ b/samples/bpf/tracex5_user.c @@ -39,8 +39,8 @@ int main(int ac, char **argv) struct bpf_program *prog; struct bpf_object *obj; int key, fd, progs_fd; + const char *section; char filename[256]; - const char *title; FILE *f; setrlimit(RLIMIT_MEMLOCK, &r); @@ -78,9 +78,9 @@ int main(int ac, char **argv) } bpf_object__for_each_program(prog, obj) { - title = bpf_program__title(prog, false); + section = bpf_program__section_name(prog); /* register only syscalls to PROG_ARRAY */ - if (sscanf(title, "kprobe/%d", &key) != 1) + if (sscanf(section, "kprobe/%d", &key) != 1) continue; fd = bpf_program__fd(prog); diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 004c0622c913..3dd366e9474d 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -111,7 +111,7 @@ static void print_avail_progs(struct bpf_object *obj) bpf_object__for_each_program(pos, obj) { if (bpf_program__is_xdp(pos)) - printf(" %s\n", bpf_program__title(pos, false)); + printf(" %s\n", bpf_program__section_name(pos)); } } -- cgit v1.2.3 From f9bec5d756b30d5b21aa5ff9b7d5d115741517c1 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Fri, 4 Sep 2020 15:34:34 +0900 Subject: samples, bpf: Add xsk_fwd test file to .gitignore This commit adds xsk_fwd test file to .gitignore which is newly added to samples/bpf. Signed-off-by: Daniel T. Lee Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200904063434.24963-2-danieltimlee@gmail.com --- samples/bpf/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'samples') diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index 034800c4d1e6..b2f29bc8dc43 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -50,4 +50,5 @@ xdp_rxq_info xdp_sample_pkts xdp_tx_iptunnel xdpsock +xsk_fwd testfile.img -- cgit v1.2.3 From 3131cf66d3033aa40db5b5d72a2673315b61c862 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Thu, 10 Sep 2020 10:31:04 +0200 Subject: samples/bpf: Fix one packet sending in xdpsock Fix the sending of a single packet (or small burst) in xdpsock when executing in copy mode. Currently, the l2fwd application in xdpsock only transmits the packets after a batch of them has been received, which might be confusing if you only send one packet and expect that it is returned pronto. Fix this by calling sendto() more often and add a comment in the code that states that this can be optimized if needed. Reported-by: Tirthendu Sarkar Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1599726666-8431-2-git-send-email-magnus.karlsson@gmail.com --- samples/bpf/xdpsock_user.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 4cead341ae57..b6175cb9a31d 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -897,6 +897,14 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, if (!xsk->outstanding_tx) return; + /* In copy mode, Tx is driven by a syscall so we need to use e.g. sendto() to + * really send the packets. In zero-copy mode we do not have to do this, since Tx + * is driven by the NAPI loop. So as an optimization, we do not have to call + * sendto() all the time in zero-copy mode for l2fwd. + */ + if (opt_xdp_bind_flags & XDP_COPY) + kick_tx(xsk); + ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size : xsk->outstanding_tx; -- cgit v1.2.3 From 5a2a0dd88f0f267ac5953acd81050ae43a82201f Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Thu, 10 Sep 2020 10:31:05 +0200 Subject: samples/bpf: Fix possible deadlock in xdpsock Fix a possible deadlock in the l2fwd application in xdpsock that can occur when there is no space in the Tx ring. There are two ways to get the kernel to consume entries in the Tx ring: calling sendto() to make it send packets and freeing entries from the completion ring, as the kernel will not send a packet if there is no space for it to add a completion entry in the completion ring. The Tx loop in l2fwd only used to call sendto(). This patches adds cleaning the completion ring in that loop. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1599726666-8431-3-git-send-email-magnus.karlsson@gmail.com --- samples/bpf/xdpsock_user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index b6175cb9a31d..b60bf4ef7cdb 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1125,6 +1125,7 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); + complete_tx_l2fwd(xsk, fds); if (xsk_ring_prod__needs_wakeup(&xsk->tx)) kick_tx(xsk); ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); -- cgit v1.2.3 From 74e00676d7f1f3c0676fdfffcf404a09a037bfe3 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Thu, 10 Sep 2020 10:31:06 +0200 Subject: samples/bpf: Add quiet option to xdpsock Add a quiet option (-Q) that disables the statistics print outs of xdpsock. This is good to have when measuring 0% loss rate performance as it will be quite terrible if the application uses printfs. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1599726666-8431-4-git-send-email-magnus.karlsson@gmail.com --- samples/bpf/xdpsock_user.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index b60bf4ef7cdb..b220173dbe1e 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -78,6 +78,7 @@ static int opt_pkt_count; static u16 opt_pkt_size = MIN_PKT_SIZE; static u32 opt_pkt_fill_pattern = 0x12345678; static bool opt_extra_stats; +static bool opt_quiet; static int opt_poll; static int opt_interval = 1; static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; @@ -718,6 +719,7 @@ static struct option long_options[] = { {"tx-pkt-size", required_argument, 0, 's'}, {"tx-pkt-pattern", required_argument, 0, 'P'}, {"extra-stats", no_argument, 0, 'x'}, + {"quiet", no_argument, 0, 'Q'}, {0, 0, 0, 0} }; @@ -753,6 +755,7 @@ static void usage(const char *prog) " Min size: %d, Max size %d.\n" " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n" " -x, --extra-stats Display extra statistics.\n" + " -Q, --quiet Do not display any stats.\n" "\n"; fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, @@ -768,7 +771,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:x", + c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQ", long_options, &option_index); if (c == -1) break; @@ -852,6 +855,9 @@ static void parse_command_line(int argc, char **argv) case 'x': opt_extra_stats = 1; break; + case 'Q': + opt_quiet = 1; + break; default: usage(basename(argv[0])); } @@ -1286,9 +1292,11 @@ int main(int argc, char **argv) setlocale(LC_ALL, ""); - ret = pthread_create(&pt, NULL, poller, NULL); - if (ret) - exit_with_error(ret); + if (!opt_quiet) { + ret = pthread_create(&pt, NULL, poller, NULL); + if (ret) + exit_with_error(ret); + } prev_time = get_nsecs(); start_time = prev_time; @@ -1302,7 +1310,8 @@ int main(int argc, char **argv) benchmark_done = true; - pthread_join(pt, NULL); + if (!opt_quiet) + pthread_join(pt, NULL); xdpsock_cleanup(); -- cgit v1.2.3 From f55f4c349a03d820c27145bdf457013b42e4b487 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 15 Sep 2020 13:55:19 +0200 Subject: samples/bpf: Fix test_map_in_map on s390 s390 uses socketcall multiplexer instead of individual socket syscalls. Therefore, "kprobe/" SYSCALL(sys_connect) does not trigger and test_map_in_map fails. Fix by using "kprobe/__sys_connect" instead. Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200915115519.3769807-1-iii@linux.ibm.com --- samples/bpf/test_map_in_map_kern.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'samples') diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c index 8def45c5b697..b0200c8eac09 100644 --- a/samples/bpf/test_map_in_map_kern.c +++ b/samples/bpf/test_map_in_map_kern.c @@ -103,10 +103,9 @@ static __always_inline int do_inline_hash_lookup(void *inner_map, u32 port) return result ? *result : -ENOENT; } -SEC("kprobe/" SYSCALL(sys_connect)) +SEC("kprobe/__sys_connect") int trace_sys_connect(struct pt_regs *ctx) { - struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx); struct sockaddr_in6 *in6; u16 test_case, port, dst6[8]; int addrlen, ret, inline_ret, ret_key = 0; @@ -114,8 +113,8 @@ int trace_sys_connect(struct pt_regs *ctx) void *outer_map, *inner_map; bool inline_hash = false; - in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(real_regs); - addrlen = (int)PT_REGS_PARM3_CORE(real_regs); + in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(ctx); + addrlen = (int)PT_REGS_PARM3_CORE(ctx); if (addrlen != sizeof(*in6)) return 0; -- cgit v1.2.3 From faef26fa444dc44eeff70c9a63ebe7fef00b6c37 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2020 17:18:19 +0200 Subject: bpf, selftests: Use bpf_tail_call_static where appropriate For those locations where we use an immediate tail call map index use the newly added bpf_tail_call_static() helper. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/3cfb2b799a62d22c6e7ae5897c23940bdcc24cbc.1601477936.git.daniel@iogearbox.net --- samples/bpf/sockex3_kern.c | 20 +++++++++------- tools/testing/selftests/bpf/progs/bpf_flow.c | 12 +++++----- tools/testing/selftests/bpf/progs/tailcall1.c | 28 +++++++++++----------- tools/testing/selftests/bpf/progs/tailcall2.c | 14 +++++------ tools/testing/selftests/bpf/progs/tailcall3.c | 4 ++-- .../selftests/bpf/progs/tailcall_bpf2bpf1.c | 4 ++-- .../selftests/bpf/progs/tailcall_bpf2bpf2.c | 6 ++--- .../selftests/bpf/progs/tailcall_bpf2bpf3.c | 6 ++--- .../selftests/bpf/progs/tailcall_bpf2bpf4.c | 6 ++--- 9 files changed, 51 insertions(+), 49 deletions(-) (limited to 'samples') diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c index cab9cca0b8eb..8142d02b33e6 100644 --- a/samples/bpf/sockex3_kern.c +++ b/samples/bpf/sockex3_kern.c @@ -31,28 +31,30 @@ struct { #define PARSE_IP 3 #define PARSE_IPV6 4 -/* protocol dispatch routine. - * It tail-calls next BPF program depending on eth proto - * Note, we could have used: - * bpf_tail_call(skb, &jmp_table, proto); - * but it would need large prog_array +/* Protocol dispatch routine. It tail-calls next BPF program depending + * on eth proto. Note, we could have used ... + * + * bpf_tail_call(skb, &jmp_table, proto); + * + * ... but it would need large prog_array and cannot be optimised given + * the map key is not static. */ static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto) { switch (proto) { case ETH_P_8021Q: case ETH_P_8021AD: - bpf_tail_call(skb, &jmp_table, PARSE_VLAN); + bpf_tail_call_static(skb, &jmp_table, PARSE_VLAN); break; case ETH_P_MPLS_UC: case ETH_P_MPLS_MC: - bpf_tail_call(skb, &jmp_table, PARSE_MPLS); + bpf_tail_call_static(skb, &jmp_table, PARSE_MPLS); break; case ETH_P_IP: - bpf_tail_call(skb, &jmp_table, PARSE_IP); + bpf_tail_call_static(skb, &jmp_table, PARSE_IP); break; case ETH_P_IPV6: - bpf_tail_call(skb, &jmp_table, PARSE_IPV6); + bpf_tail_call_static(skb, &jmp_table, PARSE_IPV6); break; } } diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index de6de9221518..5a65f6b51377 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -118,18 +118,18 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) switch (proto) { case bpf_htons(ETH_P_IP): - bpf_tail_call(skb, &jmp_table, IP); + bpf_tail_call_static(skb, &jmp_table, IP); break; case bpf_htons(ETH_P_IPV6): - bpf_tail_call(skb, &jmp_table, IPV6); + bpf_tail_call_static(skb, &jmp_table, IPV6); break; case bpf_htons(ETH_P_MPLS_MC): case bpf_htons(ETH_P_MPLS_UC): - bpf_tail_call(skb, &jmp_table, MPLS); + bpf_tail_call_static(skb, &jmp_table, MPLS); break; case bpf_htons(ETH_P_8021Q): case bpf_htons(ETH_P_8021AD): - bpf_tail_call(skb, &jmp_table, VLAN); + bpf_tail_call_static(skb, &jmp_table, VLAN); break; default: /* Protocol not supported */ @@ -246,10 +246,10 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) switch (nexthdr) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: - bpf_tail_call(skb, &jmp_table, IPV6OP); + bpf_tail_call_static(skb, &jmp_table, IPV6OP); break; case IPPROTO_FRAGMENT: - bpf_tail_call(skb, &jmp_table, IPV6FR); + bpf_tail_call_static(skb, &jmp_table, IPV6FR); break; default: return parse_ip_proto(skb, nexthdr); diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c index 1f407e65ae52..7115bcefbe8a 100644 --- a/tools/testing/selftests/bpf/progs/tailcall1.c +++ b/tools/testing/selftests/bpf/progs/tailcall1.c @@ -26,20 +26,20 @@ int entry(struct __sk_buff *skb) /* Multiple locations to make sure we patch * all of them. */ - bpf_tail_call(skb, &jmp_table, 0); - bpf_tail_call(skb, &jmp_table, 0); - bpf_tail_call(skb, &jmp_table, 0); - bpf_tail_call(skb, &jmp_table, 0); - - bpf_tail_call(skb, &jmp_table, 1); - bpf_tail_call(skb, &jmp_table, 1); - bpf_tail_call(skb, &jmp_table, 1); - bpf_tail_call(skb, &jmp_table, 1); - - bpf_tail_call(skb, &jmp_table, 2); - bpf_tail_call(skb, &jmp_table, 2); - bpf_tail_call(skb, &jmp_table, 2); - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); + + bpf_tail_call_static(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); + + bpf_tail_call_static(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); return 3; } diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c index a093e739cf0e..0431e4fe7efd 100644 --- a/tools/testing/selftests/bpf/progs/tailcall2.c +++ b/tools/testing/selftests/bpf/progs/tailcall2.c @@ -13,14 +13,14 @@ struct { SEC("classifier/0") int bpf_func_0(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); return 0; } SEC("classifier/1") int bpf_func_1(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); return 1; } @@ -33,25 +33,25 @@ int bpf_func_2(struct __sk_buff *skb) SEC("classifier/3") int bpf_func_3(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 4); + bpf_tail_call_static(skb, &jmp_table, 4); return 3; } SEC("classifier/4") int bpf_func_4(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 3); + bpf_tail_call_static(skb, &jmp_table, 3); return 4; } SEC("classifier") int entry(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); /* Check multi-prog update. */ - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); /* Check tail call limit. */ - bpf_tail_call(skb, &jmp_table, 3); + bpf_tail_call_static(skb, &jmp_table, 3); return 3; } diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c index cabda877cf0a..739dc2a51e74 100644 --- a/tools/testing/selftests/bpf/progs/tailcall3.c +++ b/tools/testing/selftests/bpf/progs/tailcall3.c @@ -16,14 +16,14 @@ SEC("classifier/0") int bpf_func_0(struct __sk_buff *skb) { count++; - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return 1; } SEC("classifier") int entry(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c index b5d9c8e778ae..0103f3dd9f02 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c @@ -21,7 +21,7 @@ TAIL_FUNC(1) static __noinline int subprog_tail(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return skb->len * 2; } @@ -29,7 +29,7 @@ int subprog_tail(struct __sk_buff *skb) SEC("classifier") int entry(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); return subprog_tail(skb); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c index a004ab28ce28..7b1c04183824 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c @@ -14,9 +14,9 @@ static __noinline int subprog_tail(struct __sk_buff *skb) { if (load_byte(skb, 0)) - bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); else - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return 1; } @@ -32,7 +32,7 @@ int bpf_func_0(struct __sk_buff *skb) SEC("classifier") int entry(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c index 96dbef2b6b7c..0d5482bea6c9 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c @@ -16,9 +16,9 @@ int subprog_tail2(struct __sk_buff *skb) volatile char arr[64] = {}; if (load_word(skb, 0) || load_half(skb, 0)) - bpf_tail_call(skb, &jmp_table, 10); + bpf_tail_call_static(skb, &jmp_table, 10); else - bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); return skb->len; } @@ -28,7 +28,7 @@ int subprog_tail(struct __sk_buff *skb) { volatile char arr[64] = {}; - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return skb->len * 2; } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c index 98b40a95bc67..9a1b166b7fbe 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c @@ -14,21 +14,21 @@ static volatile int count; __noinline int subprog_tail_2(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); return skb->len * 3; } __noinline int subprog_tail_1(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); return skb->len * 2; } __noinline int subprog_tail(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return skb->len; } -- cgit v1.2.3 From 9618bde489b2d23779b1e2d04c10983da21f3818 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 5 Oct 2020 21:34:26 -0700 Subject: samples/bpf: Change Makefile to cope with latest llvm With latest llvm trunk, bpf programs under samples/bpf directory, if using CORE, may experience the following errors: LLVM ERROR: Cannot select: intrinsic %llvm.preserve.struct.access.index PLEASE submit a bug report to https://bugs.llvm.org/ and include the crash backtrace. Stack dump: 0. Program arguments: llc -march=bpf -filetype=obj -o samples/bpf/test_probe_write_user_kern.o 1. Running pass 'Function Pass Manager' on module ''. 2. Running pass 'BPF DAG->DAG Pattern Instruction Selection' on function '@bpf_prog1' #0 0x000000000183c26c llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/data/users/yhs/work/llvm-project/llvm/build.cur/install/bin/llc+0x183c26c) ... #7 0x00000000017c375e (/data/users/yhs/work/llvm-project/llvm/build.cur/install/bin/llc+0x17c375e) #8 0x00000000016a75c5 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/data/users/yhs/work/llvm-project/llvm/build.cur/install/bin/llc+0x16a75c5) #9 0x00000000016ab4f8 llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/data/users/yhs/work/llvm-project/llvm/build.cur/install/bin/llc+0x16ab4f8) ... Aborted (core dumped) | llc -march=bpf -filetype=obj -o samples/bpf/test_probe_write_user_kern.o The reason is due to llvm change https://reviews.llvm.org/D87153 where the CORE relocation global generation is moved from the beginning of target dependent optimization (llc) to the beginning of target independent optimization (opt). Since samples/bpf programs did not use vmlinux.h and its clang compilation uses native architecture, we need to adjust arch triple at opt level to do CORE relocation global generation properly. Otherwise, the above error will appear. This patch fixed the issue by introduce opt and llvm-dis to compilation chain, which will do proper CORE relocation global generation as well as O2 level optimization. Tested with llvm10, llvm11 and trunk/llvm12. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201006043427.1891742-1-yhs@fb.com --- samples/bpf/Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4f1ed0e3cf9f..e29b1e89dd3e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -211,6 +211,8 @@ TPROGLDLIBS_xsk_fwd += -pthread # make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang LLC ?= llc CLANG ?= clang +OPT ?= opt +LLVM_DIS ?= llvm-dis LLVM_OBJCOPY ?= llvm-objcopy BTF_PAHOLE ?= pahole @@ -303,6 +305,11 @@ $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h # asm/sysreg.h - inline assembly used by it is incompatible with llvm. # But, there is no easy way to fix it, so just exclude it since it is # useless for BPF samples. +# below we use long chain of commands, clang | opt | llvm-dis | llc, +# to generate final object file. 'clang' compiles the source into IR +# with native target, e.g., x64, arm64, etc. 'opt' does bpf CORE IR builtin +# processing (llvm12) and IR optimizations. 'llvm-dis' converts +# 'opt' output to IR, and finally 'llc' generates bpf byte code. $(obj)/%.o: $(src)/%.c @echo " CLANG-bpf " $@ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ @@ -314,7 +321,9 @@ $(obj)/%.o: $(src)/%.c -Wno-address-of-packed-member -Wno-tautological-compare \ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ -I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \ - -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@ + -O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \ + $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \ + $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@ ifeq ($(DWARF2BTF),y) $(BTF_PAHOLE) -J $@ endif -- cgit v1.2.3 From 544d6adf3c3dddbf80e5757a3098c63612f5f8f8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 5 Oct 2020 21:34:27 -0700 Subject: samples/bpf: Fix a compilation error with fallthrough marking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling samples/bpf hits an error related to fallthrough marking. ... CC samples/bpf/hbm.o samples/bpf/hbm.c: In function ‘main’: samples/bpf/hbm.c:486:4: error: ‘fallthrough’ undeclared (first use in this function) fallthrough; ^~~~~~~~~~~ The "fallthrough" is not defined under tools/include directory. Rather, it is "__fallthrough" is defined in linux/compiler.h. Including "linux/compiler.h" and using "__fallthrough" fixed the issue. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201006043427.1891805-1-yhs@fb.com --- samples/bpf/hbm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'samples') diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c index 4b22ace52f80..ff4c533dfac2 100644 --- a/samples/bpf/hbm.c +++ b/samples/bpf/hbm.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -483,7 +484,7 @@ int main(int argc, char **argv) "Option -%c requires an argument.\n\n", optopt); case 'h': - fallthrough; + __fallthrough; default: Usage(); return 0; -- cgit v1.2.3 From 2e8806f032f5d9d5c59b790c73b83842bda78f23 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Fri, 2 Oct 2020 13:36:10 +0000 Subject: samples: bpf: Split xdpsock stats into new struct New statistics will be added in future commits. In preparation for this, let's split out the existing statistics into their own struct. Signed-off-by: Ciara Loftus Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201002133612.31536-1-ciara.loftus@intel.com --- samples/bpf/xdpsock_user.c | 123 +++++++++++++++++++++++++-------------------- 1 file changed, 69 insertions(+), 54 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index b220173dbe1e..4c5022e6479e 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -91,18 +91,7 @@ static bool opt_need_wakeup = true; static u32 opt_num_xsks = 1; static u32 prog_id; -struct xsk_umem_info { - struct xsk_ring_prod fq; - struct xsk_ring_cons cq; - struct xsk_umem *umem; - void *buffer; -}; - -struct xsk_socket_info { - struct xsk_ring_cons rx; - struct xsk_ring_prod tx; - struct xsk_umem_info *umem; - struct xsk_socket *xsk; +struct xsk_ring_stats { unsigned long rx_npkts; unsigned long tx_npkts; unsigned long rx_dropped_npkts; @@ -119,6 +108,21 @@ struct xsk_socket_info { unsigned long prev_rx_full_npkts; unsigned long prev_rx_fill_empty_npkts; unsigned long prev_tx_empty_npkts; +}; + +struct xsk_umem_info { + struct xsk_ring_prod fq; + struct xsk_ring_cons cq; + struct xsk_umem *umem; + void *buffer; +}; + +struct xsk_socket_info { + struct xsk_ring_cons rx; + struct xsk_ring_prod tx; + struct xsk_umem_info *umem; + struct xsk_socket *xsk; + struct xsk_ring_stats ring_stats; u32 outstanding_tx; }; @@ -173,12 +177,12 @@ static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk) return err; if (optlen == sizeof(struct xdp_statistics)) { - xsk->rx_dropped_npkts = stats.rx_dropped; - xsk->rx_invalid_npkts = stats.rx_invalid_descs; - xsk->tx_invalid_npkts = stats.tx_invalid_descs; - xsk->rx_full_npkts = stats.rx_ring_full; - xsk->rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs; - xsk->tx_empty_npkts = stats.tx_ring_empty_descs; + xsk->ring_stats.rx_dropped_npkts = stats.rx_dropped; + xsk->ring_stats.rx_invalid_npkts = stats.rx_invalid_descs; + xsk->ring_stats.tx_invalid_npkts = stats.tx_invalid_descs; + xsk->ring_stats.rx_full_npkts = stats.rx_ring_full; + xsk->ring_stats.rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs; + xsk->ring_stats.tx_empty_npkts = stats.tx_ring_empty_descs; return 0; } @@ -198,9 +202,9 @@ static void dump_stats(void) double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps, tx_invalid_pps, tx_empty_pps; - rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) * + rx_pps = (xsks[i]->ring_stats.rx_npkts - xsks[i]->ring_stats.prev_rx_npkts) * 1000000000. / dt; - tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) * + tx_pps = (xsks[i]->ring_stats.tx_npkts - xsks[i]->ring_stats.prev_tx_npkts) * 1000000000. / dt; printf("\n sock%d@", i); @@ -209,47 +213,58 @@ static void dump_stats(void) printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts", dt / 1000000000.); - printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts); - printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts); + printf(fmt, "rx", rx_pps, xsks[i]->ring_stats.rx_npkts); + printf(fmt, "tx", tx_pps, xsks[i]->ring_stats.tx_npkts); - xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts; - xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts; + xsks[i]->ring_stats.prev_rx_npkts = xsks[i]->ring_stats.rx_npkts; + xsks[i]->ring_stats.prev_tx_npkts = xsks[i]->ring_stats.tx_npkts; if (opt_extra_stats) { if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) { - dropped_pps = (xsks[i]->rx_dropped_npkts - - xsks[i]->prev_rx_dropped_npkts) * 1000000000. / dt; - rx_invalid_pps = (xsks[i]->rx_invalid_npkts - - xsks[i]->prev_rx_invalid_npkts) * 1000000000. / dt; - tx_invalid_pps = (xsks[i]->tx_invalid_npkts - - xsks[i]->prev_tx_invalid_npkts) * 1000000000. / dt; - full_pps = (xsks[i]->rx_full_npkts - - xsks[i]->prev_rx_full_npkts) * 1000000000. / dt; - fill_empty_pps = (xsks[i]->rx_fill_empty_npkts - - xsks[i]->prev_rx_fill_empty_npkts) - * 1000000000. / dt; - tx_empty_pps = (xsks[i]->tx_empty_npkts - - xsks[i]->prev_tx_empty_npkts) * 1000000000. / dt; + dropped_pps = (xsks[i]->ring_stats.rx_dropped_npkts - + xsks[i]->ring_stats.prev_rx_dropped_npkts) * + 1000000000. / dt; + rx_invalid_pps = (xsks[i]->ring_stats.rx_invalid_npkts - + xsks[i]->ring_stats.prev_rx_invalid_npkts) * + 1000000000. / dt; + tx_invalid_pps = (xsks[i]->ring_stats.tx_invalid_npkts - + xsks[i]->ring_stats.prev_tx_invalid_npkts) * + 1000000000. / dt; + full_pps = (xsks[i]->ring_stats.rx_full_npkts - + xsks[i]->ring_stats.prev_rx_full_npkts) * + 1000000000. / dt; + fill_empty_pps = (xsks[i]->ring_stats.rx_fill_empty_npkts - + xsks[i]->ring_stats.prev_rx_fill_empty_npkts) * + 1000000000. / dt; + tx_empty_pps = (xsks[i]->ring_stats.tx_empty_npkts - + xsks[i]->ring_stats.prev_tx_empty_npkts) * + 1000000000. / dt; printf(fmt, "rx dropped", dropped_pps, - xsks[i]->rx_dropped_npkts); + xsks[i]->ring_stats.rx_dropped_npkts); printf(fmt, "rx invalid", rx_invalid_pps, - xsks[i]->rx_invalid_npkts); + xsks[i]->ring_stats.rx_invalid_npkts); printf(fmt, "tx invalid", tx_invalid_pps, - xsks[i]->tx_invalid_npkts); + xsks[i]->ring_stats.tx_invalid_npkts); printf(fmt, "rx queue full", full_pps, - xsks[i]->rx_full_npkts); + xsks[i]->ring_stats.rx_full_npkts); printf(fmt, "fill ring empty", fill_empty_pps, - xsks[i]->rx_fill_empty_npkts); + xsks[i]->ring_stats.rx_fill_empty_npkts); printf(fmt, "tx ring empty", tx_empty_pps, - xsks[i]->tx_empty_npkts); - - xsks[i]->prev_rx_dropped_npkts = xsks[i]->rx_dropped_npkts; - xsks[i]->prev_rx_invalid_npkts = xsks[i]->rx_invalid_npkts; - xsks[i]->prev_tx_invalid_npkts = xsks[i]->tx_invalid_npkts; - xsks[i]->prev_rx_full_npkts = xsks[i]->rx_full_npkts; - xsks[i]->prev_rx_fill_empty_npkts = xsks[i]->rx_fill_empty_npkts; - xsks[i]->prev_tx_empty_npkts = xsks[i]->tx_empty_npkts; + xsks[i]->ring_stats.tx_empty_npkts); + + xsks[i]->ring_stats.prev_rx_dropped_npkts = + xsks[i]->ring_stats.rx_dropped_npkts; + xsks[i]->ring_stats.prev_rx_invalid_npkts = + xsks[i]->ring_stats.rx_invalid_npkts; + xsks[i]->ring_stats.prev_tx_invalid_npkts = + xsks[i]->ring_stats.tx_invalid_npkts; + xsks[i]->ring_stats.prev_rx_full_npkts = + xsks[i]->ring_stats.rx_full_npkts; + xsks[i]->ring_stats.prev_rx_fill_empty_npkts = + xsks[i]->ring_stats.rx_fill_empty_npkts; + xsks[i]->ring_stats.prev_tx_empty_npkts = + xsks[i]->ring_stats.tx_empty_npkts; } else { printf("%-15s\n", "Error retrieving extra stats"); } @@ -936,7 +951,7 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, xsk_ring_prod__submit(&xsk->umem->fq, rcvd); xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; - xsk->tx_npkts += rcvd; + xsk->ring_stats.tx_npkts += rcvd; } } @@ -956,7 +971,7 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk, if (rcvd > 0) { xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; - xsk->tx_npkts += rcvd; + xsk->ring_stats.tx_npkts += rcvd; } } @@ -996,7 +1011,7 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds) xsk_ring_prod__submit(&xsk->umem->fq, rcvd); xsk_ring_cons__release(&xsk->rx, rcvd); - xsk->rx_npkts += rcvd; + xsk->ring_stats.rx_npkts += rcvd; } static void rx_drop_all(void) @@ -1155,7 +1170,7 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) xsk_ring_prod__submit(&xsk->tx, rcvd); xsk_ring_cons__release(&xsk->rx, rcvd); - xsk->rx_npkts += rcvd; + xsk->ring_stats.rx_npkts += rcvd; xsk->outstanding_tx += rcvd; } -- cgit v1.2.3 From 60dc609dbd545226e00c9e7b518d0f4873f0a3e1 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Fri, 2 Oct 2020 13:36:11 +0000 Subject: samples: bpf: Count syscalls in xdpsock Categorise and record syscalls issued in the xdpsock sample app. The categories recorded are: rx_empty_polls: polls when the rx ring is empty fill_fail_polls: polls when failed to get addr from fill ring copy_tx_sendtos: sendtos issued for tx when copy mode enabled tx_wakeup_sendtos: sendtos issued when tx ring needs waking up opt_polls: polls issued since the '-p' flag is set Print the stats using '-a' on the xdpsock command line. Signed-off-by: Ciara Loftus Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201002133612.31536-2-ciara.loftus@intel.com --- samples/bpf/xdpsock_user.c | 113 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 103 insertions(+), 10 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 4c5022e6479e..ff119ede4ab1 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -79,6 +79,7 @@ static u16 opt_pkt_size = MIN_PKT_SIZE; static u32 opt_pkt_fill_pattern = 0x12345678; static bool opt_extra_stats; static bool opt_quiet; +static bool opt_app_stats; static int opt_poll; static int opt_interval = 1; static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; @@ -110,6 +111,19 @@ struct xsk_ring_stats { unsigned long prev_tx_empty_npkts; }; +struct xsk_app_stats { + unsigned long rx_empty_polls; + unsigned long fill_fail_polls; + unsigned long copy_tx_sendtos; + unsigned long tx_wakeup_sendtos; + unsigned long opt_polls; + unsigned long prev_rx_empty_polls; + unsigned long prev_fill_fail_polls; + unsigned long prev_copy_tx_sendtos; + unsigned long prev_tx_wakeup_sendtos; + unsigned long prev_opt_polls; +}; + struct xsk_umem_info { struct xsk_ring_prod fq; struct xsk_ring_cons cq; @@ -123,6 +137,7 @@ struct xsk_socket_info { struct xsk_umem_info *umem; struct xsk_socket *xsk; struct xsk_ring_stats ring_stats; + struct xsk_app_stats app_stats; u32 outstanding_tx; }; @@ -189,6 +204,45 @@ static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk) return -EINVAL; } +static void dump_app_stats(long dt) +{ + int i; + + for (i = 0; i < num_socks && xsks[i]; i++) { + char *fmt = "%-18s %'-14.0f %'-14lu\n"; + double rx_empty_polls_ps, fill_fail_polls_ps, copy_tx_sendtos_ps, + tx_wakeup_sendtos_ps, opt_polls_ps; + + rx_empty_polls_ps = (xsks[i]->app_stats.rx_empty_polls - + xsks[i]->app_stats.prev_rx_empty_polls) * 1000000000. / dt; + fill_fail_polls_ps = (xsks[i]->app_stats.fill_fail_polls - + xsks[i]->app_stats.prev_fill_fail_polls) * 1000000000. / dt; + copy_tx_sendtos_ps = (xsks[i]->app_stats.copy_tx_sendtos - + xsks[i]->app_stats.prev_copy_tx_sendtos) * 1000000000. / dt; + tx_wakeup_sendtos_ps = (xsks[i]->app_stats.tx_wakeup_sendtos - + xsks[i]->app_stats.prev_tx_wakeup_sendtos) + * 1000000000. / dt; + opt_polls_ps = (xsks[i]->app_stats.opt_polls - + xsks[i]->app_stats.prev_opt_polls) * 1000000000. / dt; + + printf("\n%-18s %-14s %-14s\n", "", "calls/s", "count"); + printf(fmt, "rx empty polls", rx_empty_polls_ps, xsks[i]->app_stats.rx_empty_polls); + printf(fmt, "fill fail polls", fill_fail_polls_ps, + xsks[i]->app_stats.fill_fail_polls); + printf(fmt, "copy tx sendtos", copy_tx_sendtos_ps, + xsks[i]->app_stats.copy_tx_sendtos); + printf(fmt, "tx wakeup sendtos", tx_wakeup_sendtos_ps, + xsks[i]->app_stats.tx_wakeup_sendtos); + printf(fmt, "opt polls", opt_polls_ps, xsks[i]->app_stats.opt_polls); + + xsks[i]->app_stats.prev_rx_empty_polls = xsks[i]->app_stats.rx_empty_polls; + xsks[i]->app_stats.prev_fill_fail_polls = xsks[i]->app_stats.fill_fail_polls; + xsks[i]->app_stats.prev_copy_tx_sendtos = xsks[i]->app_stats.copy_tx_sendtos; + xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos; + xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls; + } +} + static void dump_stats(void) { unsigned long now = get_nsecs(); @@ -198,7 +252,7 @@ static void dump_stats(void) prev_time = now; for (i = 0; i < num_socks && xsks[i]; i++) { - char *fmt = "%-15s %'-11.0f %'-11lu\n"; + char *fmt = "%-18s %'-14.0f %'-14lu\n"; double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps, tx_invalid_pps, tx_empty_pps; @@ -211,7 +265,7 @@ static void dump_stats(void) print_benchmark(false); printf("\n"); - printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts", + printf("%-18s %-14s %-14s %-14.2f\n", "", "pps", "pkts", dt / 1000000000.); printf(fmt, "rx", rx_pps, xsks[i]->ring_stats.rx_npkts); printf(fmt, "tx", tx_pps, xsks[i]->ring_stats.tx_npkts); @@ -270,6 +324,9 @@ static void dump_stats(void) } } } + + if (opt_app_stats) + dump_app_stats(dt); } static bool is_benchmark_done(void) @@ -708,6 +765,17 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, if (ret) exit_with_error(-ret); + xsk->app_stats.rx_empty_polls = 0; + xsk->app_stats.fill_fail_polls = 0; + xsk->app_stats.copy_tx_sendtos = 0; + xsk->app_stats.tx_wakeup_sendtos = 0; + xsk->app_stats.opt_polls = 0; + xsk->app_stats.prev_rx_empty_polls = 0; + xsk->app_stats.prev_fill_fail_polls = 0; + xsk->app_stats.prev_copy_tx_sendtos = 0; + xsk->app_stats.prev_tx_wakeup_sendtos = 0; + xsk->app_stats.prev_opt_polls = 0; + return xsk; } @@ -735,6 +803,7 @@ static struct option long_options[] = { {"tx-pkt-pattern", required_argument, 0, 'P'}, {"extra-stats", no_argument, 0, 'x'}, {"quiet", no_argument, 0, 'Q'}, + {"app-stats", no_argument, 0, 'a'}, {0, 0, 0, 0} }; @@ -771,6 +840,7 @@ static void usage(const char *prog) " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n" " -x, --extra-stats Display extra statistics.\n" " -Q, --quiet Do not display any stats.\n" + " -a, --app-stats Display application (syscall) statistics.\n" "\n"; fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, @@ -786,7 +856,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQ", + c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQa", long_options, &option_index); if (c == -1) break; @@ -873,6 +943,9 @@ static void parse_command_line(int argc, char **argv) case 'Q': opt_quiet = 1; break; + case 'a': + opt_app_stats = 1; + break; default: usage(basename(argv[0])); } @@ -923,8 +996,10 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, * is driven by the NAPI loop. So as an optimization, we do not have to call * sendto() all the time in zero-copy mode for l2fwd. */ - if (opt_xdp_bind_flags & XDP_COPY) + if (opt_xdp_bind_flags & XDP_COPY) { + xsk->app_stats.copy_tx_sendtos++; kick_tx(xsk); + } ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size : xsk->outstanding_tx; @@ -939,8 +1014,10 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); - if (xsk_ring_prod__needs_wakeup(&umem->fq)) + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + xsk->app_stats.fill_fail_polls++; ret = poll(fds, num_socks, opt_timeout); + } ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } @@ -964,8 +1041,10 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk, if (!xsk->outstanding_tx) return; - if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) + if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) { + xsk->app_stats.tx_wakeup_sendtos++; kick_tx(xsk); + } rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); if (rcvd > 0) { @@ -983,8 +1062,10 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds) rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); if (!rcvd) { - if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { + xsk->app_stats.rx_empty_polls++; ret = poll(fds, num_socks, opt_timeout); + } return; } @@ -992,8 +1073,10 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds) while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); - if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { + xsk->app_stats.fill_fail_polls++; ret = poll(fds, num_socks, opt_timeout); + } ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); } @@ -1026,6 +1109,8 @@ static void rx_drop_all(void) for (;;) { if (opt_poll) { + for (i = 0; i < num_socks; i++) + xsks[i]->app_stats.opt_polls++; ret = poll(fds, num_socks, opt_timeout); if (ret <= 0) continue; @@ -1106,6 +1191,8 @@ static void tx_only_all(void) int batch_size = get_batch_size(pkt_cnt); if (opt_poll) { + for (i = 0; i < num_socks; i++) + xsks[i]->app_stats.opt_polls++; ret = poll(fds, num_socks, opt_timeout); if (ret <= 0) continue; @@ -1137,8 +1224,10 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); if (!rcvd) { - if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { + xsk->app_stats.rx_empty_polls++; ret = poll(fds, num_socks, opt_timeout); + } return; } @@ -1147,8 +1236,10 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) if (ret < 0) exit_with_error(-ret); complete_tx_l2fwd(xsk, fds); - if (xsk_ring_prod__needs_wakeup(&xsk->tx)) + if (xsk_ring_prod__needs_wakeup(&xsk->tx)) { + xsk->app_stats.tx_wakeup_sendtos++; kick_tx(xsk); + } ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); } @@ -1186,6 +1277,8 @@ static void l2fwd_all(void) for (;;) { if (opt_poll) { + for (i = 0; i < num_socks; i++) + xsks[i]->app_stats.opt_polls++; ret = poll(fds, num_socks, opt_timeout); if (ret <= 0) continue; -- cgit v1.2.3 From 67ed375530e223624508382a9348a21c3b2ccc09 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Fri, 2 Oct 2020 13:36:12 +0000 Subject: samples: bpf: Driver interrupt statistics in xdpsock Add an option to count the number of interrupts generated per second and total number of interrupts during the lifetime of the application for a given interface. This information is extracted from /proc/interrupts. Since there is no naming convention across drivers, the user must provide the string which is specific to their interface in the /proc/interrupts file on the command line. Usage: ./xdpsock ... -I eg. for queue 0 of i40e device eth0: ./xdpsock ... -I i40e-eth0-TxRx-0 Signed-off-by: Ciara Loftus Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201002133612.31536-3-ciara.loftus@intel.com --- samples/bpf/xdpsock_user.c | 120 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 1 deletion(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index ff119ede4ab1..1149e94ca32f 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -80,6 +81,9 @@ static u32 opt_pkt_fill_pattern = 0x12345678; static bool opt_extra_stats; static bool opt_quiet; static bool opt_app_stats; +static const char *opt_irq_str = ""; +static u32 irq_no; +static int irqs_at_init = -1; static int opt_poll; static int opt_interval = 1; static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; @@ -111,6 +115,11 @@ struct xsk_ring_stats { unsigned long prev_tx_empty_npkts; }; +struct xsk_driver_stats { + unsigned long intrs; + unsigned long prev_intrs; +}; + struct xsk_app_stats { unsigned long rx_empty_polls; unsigned long fill_fail_polls; @@ -138,6 +147,7 @@ struct xsk_socket_info { struct xsk_socket *xsk; struct xsk_ring_stats ring_stats; struct xsk_app_stats app_stats; + struct xsk_driver_stats drv_stats; u32 outstanding_tx; }; @@ -243,6 +253,100 @@ static void dump_app_stats(long dt) } } +static bool get_interrupt_number(void) +{ + FILE *f_int_proc; + char line[4096]; + bool found = false; + + f_int_proc = fopen("/proc/interrupts", "r"); + if (f_int_proc == NULL) { + printf("Failed to open /proc/interrupts.\n"); + return found; + } + + while (!feof(f_int_proc) && !found) { + /* Make sure to read a full line at a time */ + if (fgets(line, sizeof(line), f_int_proc) == NULL || + line[strlen(line) - 1] != '\n') { + printf("Error reading from interrupts file\n"); + break; + } + + /* Extract interrupt number from line */ + if (strstr(line, opt_irq_str) != NULL) { + irq_no = atoi(line); + found = true; + break; + } + } + + fclose(f_int_proc); + + return found; +} + +static int get_irqs(void) +{ + char count_path[PATH_MAX]; + int total_intrs = -1; + FILE *f_count_proc; + char line[4096]; + + snprintf(count_path, sizeof(count_path), + "/sys/kernel/irq/%i/per_cpu_count", irq_no); + f_count_proc = fopen(count_path, "r"); + if (f_count_proc == NULL) { + printf("Failed to open %s\n", count_path); + return total_intrs; + } + + if (fgets(line, sizeof(line), f_count_proc) == NULL || + line[strlen(line) - 1] != '\n') { + printf("Error reading from %s\n", count_path); + } else { + static const char com[2] = ","; + char *token; + + total_intrs = 0; + token = strtok(line, com); + while (token != NULL) { + /* sum up interrupts across all cores */ + total_intrs += atoi(token); + token = strtok(NULL, com); + } + } + + fclose(f_count_proc); + + return total_intrs; +} + +static void dump_driver_stats(long dt) +{ + int i; + + for (i = 0; i < num_socks && xsks[i]; i++) { + char *fmt = "%-18s %'-14.0f %'-14lu\n"; + double intrs_ps; + int n_ints = get_irqs(); + + if (n_ints < 0) { + printf("error getting intr info for intr %i\n", irq_no); + return; + } + xsks[i]->drv_stats.intrs = n_ints - irqs_at_init; + + intrs_ps = (xsks[i]->drv_stats.intrs - xsks[i]->drv_stats.prev_intrs) * + 1000000000. / dt; + + printf("\n%-18s %-14s %-14s\n", "", "intrs/s", "count"); + printf(fmt, "irqs", intrs_ps, xsks[i]->drv_stats.intrs); + + xsks[i]->drv_stats.prev_intrs = xsks[i]->drv_stats.intrs; + } +} + static void dump_stats(void) { unsigned long now = get_nsecs(); @@ -327,6 +431,8 @@ static void dump_stats(void) if (opt_app_stats) dump_app_stats(dt); + if (irq_no) + dump_driver_stats(dt); } static bool is_benchmark_done(void) @@ -804,6 +910,7 @@ static struct option long_options[] = { {"extra-stats", no_argument, 0, 'x'}, {"quiet", no_argument, 0, 'Q'}, {"app-stats", no_argument, 0, 'a'}, + {"irq-string", no_argument, 0, 'I'}, {0, 0, 0, 0} }; @@ -841,6 +948,7 @@ static void usage(const char *prog) " -x, --extra-stats Display extra statistics.\n" " -Q, --quiet Do not display any stats.\n" " -a, --app-stats Display application (syscall) statistics.\n" + " -I, --irq-string Display driver interrupt statistics for interface associated with irq-string.\n" "\n"; fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, @@ -856,7 +964,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQa", + c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:", long_options, &option_index); if (c == -1) break; @@ -945,6 +1053,16 @@ static void parse_command_line(int argc, char **argv) break; case 'a': opt_app_stats = 1; + break; + case 'I': + opt_irq_str = optarg; + if (get_interrupt_number()) + irqs_at_init = get_irqs(); + if (irqs_at_init < 0) { + fprintf(stderr, "ERROR: Failed to get irqs for %s\n", opt_irq_str); + usage(basename(argv[0])); + } + break; default: usage(basename(argv[0])); -- cgit v1.2.3 From 8ac91df6de164923c1025e340ea81d085fb4ab85 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sun, 11 Oct 2020 03:17:32 +0900 Subject: samples: bpf: Refactor xdp_monitor with libbpf To avoid confusion caused by the increasing fragmentation of the BPF Loader program, this commit would like to change to the libbpf loader instead of using the bpf_load. Thanks to libbpf's bpf_link interface, managing the tracepoint BPF program is much easier. bpf_program__attach_tracepoint manages the enable of tracepoint event and attach of BPF programs to it with a single interface bpf_link, so there is no need to manage event_fd and prog_fd separately. This commit refactors xdp_monitor with using this libbpf API, and the bpf_load is removed and migrated to libbpf. Signed-off-by: Daniel T. Lee Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201010181734.1109-2-danieltimlee@gmail.com --- samples/bpf/Makefile | 2 +- samples/bpf/xdp_monitor_user.c | 159 +++++++++++++++++++++++++++++++---------- 2 files changed, 121 insertions(+), 40 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index e29b1e89dd3e..50ad7e8b05bc 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -99,7 +99,7 @@ per_socket_stats_example-objs := cookie_uid_helper_example.o xdp_redirect-objs := xdp_redirect_user.o xdp_redirect_map-objs := xdp_redirect_map_user.o xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o -xdp_monitor-objs := bpf_load.o xdp_monitor_user.o +xdp_monitor-objs := xdp_monitor_user.o xdp_rxq_info-objs := xdp_rxq_info_user.o syscall_tp-objs := syscall_tp_user.o cpustat-objs := cpustat_user.o diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index ef53b93db573..03d0a182913f 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -26,12 +26,37 @@ static const char *__doc_err_only__= #include #include +#include #include -#include "bpf_load.h" +#include #include "bpf_util.h" +enum map_type { + REDIRECT_ERR_CNT, + EXCEPTION_CNT, + CPUMAP_ENQUEUE_CNT, + CPUMAP_KTHREAD_CNT, + DEVMAP_XMIT_CNT, +}; + +static const char *const map_type_strings[] = { + [REDIRECT_ERR_CNT] = "redirect_err_cnt", + [EXCEPTION_CNT] = "exception_cnt", + [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt", + [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt", + [DEVMAP_XMIT_CNT] = "devmap_xmit_cnt", +}; + +#define NUM_MAP 5 +#define NUM_TP 8 + +static int tp_cnt; +static int map_cnt; static int verbose = 1; static bool debug = false; +struct bpf_map *map_data[NUM_MAP] = {}; +struct bpf_link *tp_links[NUM_TP] = {}; +struct bpf_object *obj; static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, @@ -41,6 +66,16 @@ static const struct option long_options[] = { {0, 0, NULL, 0 } }; +static void int_exit(int sig) +{ + /* Detach tracepoints */ + while (tp_cnt) + bpf_link__destroy(tp_links[--tp_cnt]); + + bpf_object__close(obj); + exit(0); +} + /* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */ #define EXIT_FAIL_MEM 5 @@ -483,23 +518,23 @@ static bool stats_collect(struct stats_record *rec) * this can happen by someone running perf-record -e */ - fd = map_data[0].fd; /* map0: redirect_err_cnt */ + fd = bpf_map__fd(map_data[REDIRECT_ERR_CNT]); for (i = 0; i < REDIR_RES_MAX; i++) map_collect_record_u64(fd, i, &rec->xdp_redirect[i]); - fd = map_data[1].fd; /* map1: exception_cnt */ + fd = bpf_map__fd(map_data[EXCEPTION_CNT]); for (i = 0; i < XDP_ACTION_MAX; i++) { map_collect_record_u64(fd, i, &rec->xdp_exception[i]); } - fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */ + fd = bpf_map__fd(map_data[CPUMAP_ENQUEUE_CNT]); for (i = 0; i < MAX_CPUS; i++) map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]); - fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */ + fd = bpf_map__fd(map_data[CPUMAP_KTHREAD_CNT]); map_collect_record(fd, 0, &rec->xdp_cpumap_kthread); - fd = map_data[4].fd; /* map4: devmap_xmit_cnt */ + fd = bpf_map__fd(map_data[DEVMAP_XMIT_CNT]); map_collect_record(fd, 0, &rec->xdp_devmap_xmit); return true; @@ -598,8 +633,8 @@ static void stats_poll(int interval, bool err_only) /* TODO Need more advanced stats on error types */ if (verbose) { - printf(" - Stats map0: %s\n", map_data[0].name); - printf(" - Stats map1: %s\n", map_data[1].name); + printf(" - Stats map0: %s\n", bpf_map__name(map_data[0])); + printf(" - Stats map1: %s\n", bpf_map__name(map_data[1])); printf("\n"); } fflush(stdout); @@ -618,44 +653,51 @@ static void stats_poll(int interval, bool err_only) static void print_bpf_prog_info(void) { - int i; + struct bpf_program *prog; + struct bpf_map *map; + int i = 0; /* Prog info */ - printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt); - for (i = 0; i < prog_cnt; i++) { - printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]); + printf("Loaded BPF prog have %d bpf program(s)\n", tp_cnt); + bpf_object__for_each_program(prog, obj) { + printf(" - prog_fd[%d] = fd(%d)\n", i, bpf_program__fd(prog)); + i++; } + i = 0; /* Maps info */ - printf("Loaded BPF prog have %d map(s)\n", map_data_count); - for (i = 0; i < map_data_count; i++) { - char *name = map_data[i].name; - int fd = map_data[i].fd; + printf("Loaded BPF prog have %d map(s)\n", map_cnt); + bpf_object__for_each_map(map, obj) { + const char *name = bpf_map__name(map); + int fd = bpf_map__fd(map); printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name); + i++; } /* Event info */ - printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt); - for (i = 0; i < prog_cnt; i++) { - if (event_fd[i] != -1) - printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]); + printf("Searching for (max:%d) event file descriptor(s)\n", tp_cnt); + for (i = 0; i < tp_cnt; i++) { + int fd = bpf_link__fd(tp_links[i]); + + if (fd != -1) + printf(" - event_fd[%d] = fd(%d)\n", i, fd); } } int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_program *prog; int longindex = 0, opt; - int ret = EXIT_SUCCESS; - char bpf_obj_file[256]; + int ret = EXIT_FAILURE; + enum map_type type; + char filename[256]; /* Default settings: */ bool errors_only = true; int interval = 2; - snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]); - /* Parse commands line args */ while ((opt = getopt_long(argc, argv, "hDSs:", long_options, &longindex)) != -1) { @@ -672,40 +714,79 @@ int main(int argc, char **argv) case 'h': default: usage(argv); - return EXIT_FAILURE; + return ret; } } + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); if (setrlimit(RLIMIT_MEMLOCK, &r)) { perror("setrlimit(RLIMIT_MEMLOCK)"); - return EXIT_FAILURE; + return ret; } - if (load_bpf_file(bpf_obj_file)) { - printf("ERROR - bpf_log_buf: %s", bpf_log_buf); - return EXIT_FAILURE; + /* Remove tracepoint program when program is interrupted or killed */ + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + printf("ERROR: opening BPF object file failed\n"); + obj = NULL; + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + printf("ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + for (type = 0; type < NUM_MAP; type++) { + map_data[type] = + bpf_object__find_map_by_name(obj, map_type_strings[type]); + + if (libbpf_get_error(map_data[type])) { + printf("ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + map_cnt++; } - if (!prog_fd[0]) { - printf("ERROR - load_bpf_file: %s\n", strerror(errno)); - return EXIT_FAILURE; + + bpf_object__for_each_program(prog, obj) { + tp_links[tp_cnt] = bpf_program__attach(prog); + if (libbpf_get_error(tp_links[tp_cnt])) { + printf("ERROR: bpf_program__attach failed\n"); + tp_links[tp_cnt] = NULL; + goto cleanup; + } + tp_cnt++; } if (debug) { print_bpf_prog_info(); } - /* Unload/stop tracepoint event by closing fd's */ + /* Unload/stop tracepoint event by closing bpf_link's */ if (errors_only) { - /* The prog_fd[i] and event_fd[i] depend on the - * order the functions was defined in _kern.c + /* The bpf_link[i] depend on the order of + * the functions was defined in _kern.c */ - close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */ - close(prog_fd[2]); /* func: trace_xdp_redirect */ - close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */ - close(prog_fd[3]); /* func: trace_xdp_redirect_map */ + bpf_link__destroy(tp_links[2]); /* tracepoint/xdp/xdp_redirect */ + tp_links[2] = NULL; + + bpf_link__destroy(tp_links[3]); /* tracepoint/xdp/xdp_redirect_map */ + tp_links[3] = NULL; } stats_poll(interval, errors_only); + ret = EXIT_SUCCESS; + +cleanup: + /* Detach tracepoints */ + while (tp_cnt) + bpf_link__destroy(tp_links[--tp_cnt]); + + bpf_object__close(obj); return ret; } -- cgit v1.2.3 From 151936bf51afcdd6db52b3b73e339d021064b0ea Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sun, 11 Oct 2020 03:17:33 +0900 Subject: samples: bpf: Replace attach_tracepoint() to attach() in xdp_redirect_cpu >From commit d7a18ea7e8b6 ("libbpf: Add generic bpf_program__attach()"), for some BPF programs, it is now possible to attach BPF programs with __attach() instead of explicitly calling __attach_(). This commit refactors the __attach_tracepoint() with libbpf's generic __attach() method. In addition, this refactors the logic of setting the map FD to simplify the code. Also, the missing removal of bpf_load.o in Makefile has been fixed. Signed-off-by: Daniel T. Lee Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201010181734.1109-3-danieltimlee@gmail.com --- samples/bpf/Makefile | 2 +- samples/bpf/xdp_redirect_cpu_user.c | 153 +++++++++++++++++------------------- 2 files changed, 73 insertions(+), 82 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 50ad7e8b05bc..aeebf5d12f32 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -98,7 +98,7 @@ test_map_in_map-objs := test_map_in_map_user.o per_socket_stats_example-objs := cookie_uid_helper_example.o xdp_redirect-objs := xdp_redirect_user.o xdp_redirect_map-objs := xdp_redirect_map_user.o -xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o +xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o xdp_monitor-objs := xdp_monitor_user.o xdp_rxq_info-objs := xdp_rxq_info_user.o syscall_tp-objs := syscall_tp_user.o diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 3dd366e9474d..6fb8dbde62c5 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -37,18 +37,35 @@ static __u32 prog_id; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int n_cpus; -static int cpu_map_fd; -static int rx_cnt_map_fd; -static int redirect_err_cnt_map_fd; -static int cpumap_enqueue_cnt_map_fd; -static int cpumap_kthread_cnt_map_fd; -static int cpus_available_map_fd; -static int cpus_count_map_fd; -static int cpus_iterator_map_fd; -static int exception_cnt_map_fd; + +enum map_type { + CPU_MAP, + RX_CNT, + REDIRECT_ERR_CNT, + CPUMAP_ENQUEUE_CNT, + CPUMAP_KTHREAD_CNT, + CPUS_AVAILABLE, + CPUS_COUNT, + CPUS_ITERATOR, + EXCEPTION_CNT, +}; + +static const char *const map_type_strings[] = { + [CPU_MAP] = "cpu_map", + [RX_CNT] = "rx_cnt", + [REDIRECT_ERR_CNT] = "redirect_err_cnt", + [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt", + [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt", + [CPUS_AVAILABLE] = "cpus_available", + [CPUS_COUNT] = "cpus_count", + [CPUS_ITERATOR] = "cpus_iterator", + [EXCEPTION_CNT] = "exception_cnt", +}; #define NUM_TP 5 -struct bpf_link *tp_links[NUM_TP] = { 0 }; +#define NUM_MAP 9 +struct bpf_link *tp_links[NUM_TP] = {}; +static int map_fds[NUM_MAP]; static int tp_cnt = 0; /* Exit return codes */ @@ -527,20 +544,20 @@ static void stats_collect(struct stats_record *rec) { int fd, i; - fd = rx_cnt_map_fd; + fd = map_fds[RX_CNT]; map_collect_percpu(fd, 0, &rec->rx_cnt); - fd = redirect_err_cnt_map_fd; + fd = map_fds[REDIRECT_ERR_CNT]; map_collect_percpu(fd, 1, &rec->redir_err); - fd = cpumap_enqueue_cnt_map_fd; + fd = map_fds[CPUMAP_ENQUEUE_CNT]; for (i = 0; i < n_cpus; i++) map_collect_percpu(fd, i, &rec->enq[i]); - fd = cpumap_kthread_cnt_map_fd; + fd = map_fds[CPUMAP_KTHREAD_CNT]; map_collect_percpu(fd, 0, &rec->kthread); - fd = exception_cnt_map_fd; + fd = map_fds[EXCEPTION_CNT]; map_collect_percpu(fd, 0, &rec->exception); } @@ -565,7 +582,7 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, /* Add a CPU entry to cpumap, as this allocate a cpu entry in * the kernel for the cpu. */ - ret = bpf_map_update_elem(cpu_map_fd, &cpu, value, 0); + ret = bpf_map_update_elem(map_fds[CPU_MAP], &cpu, value, 0); if (ret) { fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret); exit(EXIT_FAIL_BPF); @@ -574,21 +591,21 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, /* Inform bpf_prog's that a new CPU is available to select * from via some control maps. */ - ret = bpf_map_update_elem(cpus_available_map_fd, &avail_idx, &cpu, 0); + ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &avail_idx, &cpu, 0); if (ret) { fprintf(stderr, "Add to avail CPUs failed\n"); exit(EXIT_FAIL_BPF); } /* When not replacing/updating existing entry, bump the count */ - ret = bpf_map_lookup_elem(cpus_count_map_fd, &key, &curr_cpus_count); + ret = bpf_map_lookup_elem(map_fds[CPUS_COUNT], &key, &curr_cpus_count); if (ret) { fprintf(stderr, "Failed reading curr cpus_count\n"); exit(EXIT_FAIL_BPF); } if (new) { curr_cpus_count++; - ret = bpf_map_update_elem(cpus_count_map_fd, &key, + ret = bpf_map_update_elem(map_fds[CPUS_COUNT], &key, &curr_cpus_count, 0); if (ret) { fprintf(stderr, "Failed write curr cpus_count\n"); @@ -612,7 +629,7 @@ static void mark_cpus_unavailable(void) int ret, i; for (i = 0; i < n_cpus; i++) { - ret = bpf_map_update_elem(cpus_available_map_fd, &i, + ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &i, &invalid_cpu, 0); if (ret) { fprintf(stderr, "Failed marking CPU unavailable\n"); @@ -665,68 +682,37 @@ static void stats_poll(int interval, bool use_separators, char *prog_name, free_stats_record(prev); } -static struct bpf_link * attach_tp(struct bpf_object *obj, - const char *tp_category, - const char* tp_name) +static int init_tracepoints(struct bpf_object *obj) { struct bpf_program *prog; - struct bpf_link *link; - char sec_name[PATH_MAX]; - int len; - len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s", - tp_category, tp_name); - if (len < 0) - exit(EXIT_FAIL); + bpf_object__for_each_program(prog, obj) { + if (bpf_program__is_tracepoint(prog) != true) + continue; - prog = bpf_object__find_program_by_title(obj, sec_name); - if (!prog) { - fprintf(stderr, "ERR: finding progsec: %s\n", sec_name); - exit(EXIT_FAIL_BPF); + tp_links[tp_cnt] = bpf_program__attach(prog); + if (libbpf_get_error(tp_links[tp_cnt])) { + tp_links[tp_cnt] = NULL; + return -EINVAL; + } + tp_cnt++; } - link = bpf_program__attach_tracepoint(prog, tp_category, tp_name); - if (libbpf_get_error(link)) - exit(EXIT_FAIL_BPF); - - return link; -} - -static void init_tracepoints(struct bpf_object *obj) { - tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err"); - tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err"); - tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception"); - tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue"); - tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread"); + return 0; } static int init_map_fds(struct bpf_object *obj) { - /* Maps updated by tracepoints */ - redirect_err_cnt_map_fd = - bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt"); - exception_cnt_map_fd = - bpf_object__find_map_fd_by_name(obj, "exception_cnt"); - cpumap_enqueue_cnt_map_fd = - bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt"); - cpumap_kthread_cnt_map_fd = - bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt"); - - /* Maps used by XDP */ - rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); - cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); - cpus_available_map_fd = - bpf_object__find_map_fd_by_name(obj, "cpus_available"); - cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count"); - cpus_iterator_map_fd = - bpf_object__find_map_fd_by_name(obj, "cpus_iterator"); - - if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 || - redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 || - cpumap_kthread_cnt_map_fd < 0 || cpus_available_map_fd < 0 || - cpus_count_map_fd < 0 || cpus_iterator_map_fd < 0 || - exception_cnt_map_fd < 0) - return -ENOENT; + enum map_type type; + + for (type = 0; type < NUM_MAP; type++) { + map_fds[type] = + bpf_object__find_map_fd_by_name(obj, + map_type_strings[type]); + + if (map_fds[type] < 0) + return -ENOENT; + } return 0; } @@ -795,13 +781,13 @@ int main(int argc, char **argv) bool stress_mode = false; struct bpf_program *prog; struct bpf_object *obj; + int err = EXIT_FAIL; char filename[256]; int added_cpus = 0; int longindex = 0; int interval = 2; int add_cpu = -1; - int opt, err; - int prog_fd; + int opt, prog_fd; int *cpu, i; __u32 qsize; @@ -824,24 +810,29 @@ int main(int argc, char **argv) } if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) - return EXIT_FAIL; + return err; if (prog_fd < 0) { fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", strerror(errno)); - return EXIT_FAIL; + return err; } - init_tracepoints(obj); + + if (init_tracepoints(obj) < 0) { + fprintf(stderr, "ERR: bpf_program__attach failed\n"); + return err; + } + if (init_map_fds(obj) < 0) { fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n"); - return EXIT_FAIL; + return err; } mark_cpus_unavailable(); cpu = malloc(n_cpus * sizeof(int)); if (!cpu) { fprintf(stderr, "failed to allocate cpu array\n"); - return EXIT_FAIL; + return err; } memset(cpu, 0, n_cpus * sizeof(int)); @@ -960,14 +951,12 @@ int main(int argc, char **argv) prog = bpf_object__find_program_by_title(obj, prog_name); if (!prog) { fprintf(stderr, "bpf_object__find_program_by_title failed\n"); - err = EXIT_FAIL; goto out; } prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { fprintf(stderr, "bpf_program__fd failed\n"); - err = EXIT_FAIL; goto out; } @@ -986,6 +975,8 @@ int main(int argc, char **argv) stats_poll(interval, use_separators, prog_name, mprog_name, &value, stress_mode); + + err = EXIT_OK; out: free(cpu); return err; -- cgit v1.2.3 From 321f6324500eb2ec75b6fcff7dcd66d64ba18529 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sun, 11 Oct 2020 03:17:34 +0900 Subject: samples: bpf: Refactor XDP kern program maps with BTF-defined map Most of the samples were converted to use the new BTF-defined MAP as they moved to libbpf, but some of the samples were missing. Instead of using the previous BPF MAP definition, this commit refactors xdp_monitor and xdp_sample_pkts_kern MAP definition with the new BTF-defined MAP format. Also, this commit removes the max_entries attribute at PERF_EVENT_ARRAY map type. The libbpf's bpf_object__create_map() will automatically set max_entries to the maximum configured number of CPUs on the host. Signed-off-by: Daniel T. Lee Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201010181734.1109-4-danieltimlee@gmail.com --- samples/bpf/xdp_monitor_kern.c | 60 +++++++++++++++++++------------------- samples/bpf/xdp_sample_pkts_kern.c | 14 ++++----- samples/bpf/xdp_sample_pkts_user.c | 1 - 3 files changed, 36 insertions(+), 39 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c index 3d33cca2d48a..5c955b812c47 100644 --- a/samples/bpf/xdp_monitor_kern.c +++ b/samples/bpf/xdp_monitor_kern.c @@ -6,21 +6,21 @@ #include #include -struct bpf_map_def SEC("maps") redirect_err_cnt = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u64), - .max_entries = 2, +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, u64); + __uint(max_entries, 2); /* TODO: have entries for all possible errno's */ -}; +} redirect_err_cnt SEC(".maps"); #define XDP_UNKNOWN XDP_REDIRECT + 1 -struct bpf_map_def SEC("maps") exception_cnt = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(u64), - .max_entries = XDP_UNKNOWN + 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, u64); + __uint(max_entries, XDP_UNKNOWN + 1); +} exception_cnt SEC(".maps"); /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format * Code in: kernel/include/trace/events/xdp.h @@ -129,19 +129,19 @@ struct datarec { }; #define MAX_CPUS 64 -struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(struct datarec), - .max_entries = MAX_CPUS, -}; +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, struct datarec); + __uint(max_entries, MAX_CPUS); +} cpumap_enqueue_cnt SEC(".maps"); -struct bpf_map_def SEC("maps") cpumap_kthread_cnt = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(struct datarec), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, struct datarec); + __uint(max_entries, 1); +} cpumap_kthread_cnt SEC(".maps"); /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format * Code in: kernel/include/trace/events/xdp.h @@ -210,12 +210,12 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) return 0; } -struct bpf_map_def SEC("maps") devmap_xmit_cnt = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .key_size = sizeof(u32), - .value_size = sizeof(struct datarec), - .max_entries = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, struct datarec); + __uint(max_entries, 1); +} devmap_xmit_cnt SEC(".maps"); /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format * Code in: kernel/include/trace/events/xdp.h diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c index 33377289e2a8..9cf76b340dd7 100644 --- a/samples/bpf/xdp_sample_pkts_kern.c +++ b/samples/bpf/xdp_sample_pkts_kern.c @@ -5,14 +5,12 @@ #include #define SAMPLE_SIZE 64ul -#define MAX_CPUS 128 - -struct bpf_map_def SEC("maps") my_map = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(u32), - .max_entries = MAX_CPUS, -}; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(u32)); +} my_map SEC(".maps"); SEC("xdp_sample") int xdp_sample_prog(struct xdp_md *ctx) diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 991ef6f0880b..4b2a300c750c 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -18,7 +18,6 @@ #include "perf-sys.h" -#define MAX_CPUS 128 static int if_idx; static char *if_name; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; -- cgit v1.2.3