From 028568d84da3cfca49f5f846eeeef01441d70451 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 2 Oct 2017 17:07:28 +0900 Subject: kbuild: revert $(realpath ...) to $(shell cd ... && /bin/pwd) I thought commit 8e9b46679923 ("kbuild: use $(abspath ...) instead of $(shell cd ... && /bin/pwd)") was a safe conversion, but it changed the behavior. $(abspath ...) / $(realpath ...) does not expand shell special characters, such as '~'. Here is a simple Makefile example: ---------------->8---------------- $(info /bin/pwd: $(shell cd ~/; /bin/pwd)) $(info abspath: $(abspath ~/)) $(info realpath: $(realpath ~/)) all: @: ---------------->8---------------- $ make /bin/pwd: /home/masahiro abspath: /home/masahiro/workspace/~ realpath: This can be a real problem if 'make O=~/foo' is invoked from another Makefile or primitive shell like dash. This commit partially reverts 8e9b46679923. Fixes: 8e9b46679923 ("kbuild: use $(abspath ...) instead of $(shell cd ... && /bin/pwd)") Reported-by: Julien Grall Signed-off-by: Masahiro Yamada Tested-by: Julien Grall --- tools/power/cpupower/Makefile | 2 +- tools/scripts/Makefile.include | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 4c5a481a850c..d6e1c02ddcfe 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -26,7 +26,7 @@ endif ifneq ($(OUTPUT),) # check that the output directory actually exists -OUTDIR := $(realpath $(OUTPUT)) +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) endif diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 9dc8f078a83c..1e8b6116ba3c 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -1,7 +1,7 @@ ifneq ($(O),) ifeq ($(origin O), command line) - ABSOLUTE_O := $(realpath $(O)) - dummy := $(if $(ABSOLUTE_O),,$(error O=$(O) does not exist)) + dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) + ABSOLUTE_O := $(shell cd $(O) ; pwd) OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/) COMMAND_O := O=$(ABSOLUTE_O) ifeq ($(objtree),) @@ -12,7 +12,7 @@ endif # check that the output directory actually exists ifneq ($(OUTPUT),) -OUTDIR := $(realpath $(OUTPUT)) +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) endif -- cgit v1.2.3 From 70b01dfd765dd2196d51f33a49df23954416f34a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 13 Oct 2017 10:37:28 +0200 Subject: perf hists: Fix crash in perf_hpp__reset_output_field() Du Changbin reported crash [1] when calling perf_hpp__reset_output_field() after unregistering field via perf_hpp__column_unregister(). This ends up in calling following list_del* sequence on the same format: perf_hpp__column_unregister: list_del(&format->list); perf_hpp__reset_output_field: list_del_init(&fmt->list); where the later list_del_init might touch already freed formats. Fixing this by replacing list_del() with list_del_init() in perf_hpp__column_unregister(). [1] http://marc.info/?l=linux-kernel&m=149059595826019&w=2 Reported-by: Changbin Du Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20171013083736.15037-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index ddb2c6fbdf91..6ee6b36bbc76 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -532,7 +532,7 @@ void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { - list_del(&format->list); + list_del_init(&format->list); } void perf_hpp__cancel_cumulate(void) -- cgit v1.2.3 From d0e35234f647631ddfa5fa8c8ec66c9bc698f0ab Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 13 Oct 2017 10:37:29 +0200 Subject: perf hists: Add extra integrity checks to fmt_free() Make sure the struct perf_hpp_fmt is properly unhooked before we free it. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Changbin Du Cc: David Ahern Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20171013083736.15037-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 6ee6b36bbc76..db79017a6e56 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -606,6 +606,13 @@ next: static void fmt_free(struct perf_hpp_fmt *fmt) { + /* + * At this point fmt should be completely + * unhooked, if not it's a bug. + */ + BUG_ON(!list_empty(&fmt->list)); + BUG_ON(!list_empty(&fmt->sort_list)); + if (fmt->free) fmt->free(fmt); } -- cgit v1.2.3 From 29479bfe83bafb8aa37f36ca132ee8349d11da0c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 13 Oct 2017 10:37:35 +0200 Subject: perf tools: Check wether the eBPF file exists in event parsing Adding the check wether the eBPF file exists, to consider it as eBPF input file. This way we can differentiate eBPF events from events that end up with same suffix as eBPF file. Before: $ perf stat -e 'cpu/uops_executed.core/' true bpf: builtin compilation failed: -95, try external compiler WARNING: unable to get correct kernel building directory. Hint: Set correct kbuild directory using 'kbuild-dir' option in [llvm] section of ~/.perfconfig or set it to "" to suppress kbuild detection. event syntax error: 'cpu/uops_executed.core/' \___ Failed to load cpu/uops_executed.c from source: 'version' section incorrect or lost After: $ perf stat -e 'cpu/uops_executed.core/' true Performance counter stats for 'true': 181,533 cpu/uops_executed.core/:u 0.002795447 seconds time elapsed If user makes type in the eBPF file, we prioritize the event syntax and show following warning: $ perf stat -e 'krava.c//' true event syntax error: 'krava.c//' \___ Cannot find PMU `krava.c'. Missing kernel support? Reported-and-Tested-by: Andi Kleen Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Changbin Du Cc: David Ahern Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20171013083736.15037-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.l | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index c42edeac451f..dcfdafdc2f1c 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -8,6 +8,9 @@ %{ #include +#include +#include +#include #include "../perf.h" #include "parse-events.h" #include "parse-events-bison.h" @@ -53,9 +56,8 @@ static int str(yyscan_t scanner, int token) return token; } -static bool isbpf(yyscan_t scanner) +static bool isbpf_suffix(char *text) { - char *text = parse_events_get_text(scanner); int len = strlen(text); if (len < 2) @@ -68,6 +70,17 @@ static bool isbpf(yyscan_t scanner) return false; } +static bool isbpf(yyscan_t scanner) +{ + char *text = parse_events_get_text(scanner); + struct stat st; + + if (!isbpf_suffix(text)) + return false; + + return stat(text, &st) == 0; +} + /* * This function is called when the parser gets two kind of input: * -- cgit v1.2.3 From 3f50f614d61f91ad30b1947c429d1f235493a7f9 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Sat, 14 Oct 2017 00:10:12 +0900 Subject: perf record: Fix documentation for a inexistent option '-l' 'perf record' had a '-l' option that meant "scale counter values" a very long time ago, but it currently belongs to 'perf stat' as '-c'. So remove it. I found this problem in the below case. $ perf record -e cycles -l sleep 3 Error: unknown switch `l Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1507907412-19813-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e397453e5a46..63526f4416ea 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -8,8 +8,8 @@ perf-record - Run a command and record its profile into perf.data SYNOPSIS -------- [verse] -'perf record' [-e | --event=EVENT] [-l] [-a] -'perf record' [-e | --event=EVENT] [-l] [-a] -- [] +'perf record' [-e | --event=EVENT] [-a] +'perf record' [-e | --event=EVENT] [-a] -- [] DESCRIPTION ----------- -- cgit v1.2.3 From 7f0cd23615040b9e53bb4980c986b721cba08bbc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 17 Oct 2017 22:29:00 +0900 Subject: perf buildid-list: Fix crash when processing PERF_RECORD_NAMESPACE Thomas reported that 'perf buildid-list' gets a SEGFAULT due to NULL pointer deref when he ran it on a data with namespace events. It was because the buildid_id__mark_dso_hit_ops lacks the namespace event handler and perf_too__fill_default() didn't set it. Program received signal SIGSEGV, Segmentation fault. 0x0000000000000000 in ?? () Missing separate debuginfos, use: dnf debuginfo-install audit-libs-2.7.7-1.fc25.s390x bzip2-libs-1.0.6-21.fc25.s390x elfutils-libelf-0.169-1.fc25.s390x +elfutils-libs-0.169-1.fc25.s390x libcap-ng-0.7.8-1.fc25.s390x numactl-libs-2.0.11-2.ibm.fc25.s390x openssl-libs-1.1.0e-1.1.ibm.fc25.s390x perl-libs-5.24.1-386.fc25.s390x +python-libs-2.7.13-2.fc25.s390x slang-2.3.0-7.fc25.s390x xz-libs-5.2.3-2.fc25.s390x zlib-1.2.8-10.fc25.s390x (gdb) where #0 0x0000000000000000 in ?? () #1 0x00000000010fad6a in machines__deliver_event (machines=, machines@entry=0x2c6fd18, evlist=, event=event@entry=0x3fffdf00470, sample=0x3ffffffe880, sample@entry=0x3ffffffe888, tool=tool@entry=0x1312968 , file_offset=1136) at util/session.c:1287 #2 0x00000000010fbf4e in perf_session__deliver_event (file_offset=1136, tool=0x1312968 , sample=0x3ffffffe888, event=0x3fffdf00470, session=0x2c6fc30) at util/session.c:1340 #3 perf_session__process_event (session=0x2c6fc30, session@entry=0x0, event=event@entry=0x3fffdf00470, file_offset=file_offset@entry=1136) at util/session.c:1522 #4 0x00000000010fddde in __perf_session__process_events (file_size=11880, data_size=, data_offset=, session=0x0) at util/session.c:1899 #5 perf_session__process_events (session=0x0, session@entry=0x2c6fc30) at util/session.c:1953 #6 0x000000000103b2ac in perf_session__list_build_ids (with_hits=, force=) at builtin-buildid-list.c:83 #7 cmd_buildid_list (argc=, argv=) at builtin-buildid-list.c:115 #8 0x00000000010a026c in run_builtin (p=0x1311f78 , argc=argc@entry=2, argv=argv@entry=0x3fffffff3c0) at perf.c:296 #9 0x000000000102bc00 in handle_internal_command (argv=, argc=2) at perf.c:348 #10 run_argv (argcp=, argv=) at perf.c:392 #11 main (argc=, argv=0x3fffffff3c0) at perf.c:536 (gdb) Fix it by adding a stub event handler for namespace event. Committer testing: Further clarifying, plain using 'perf buildid-list' will not end up in a SEGFAULT when processing a perf.data file with namespace info: # perf record -a --namespaces sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.024 MB perf.data (1058 samples) ] # perf buildid-list | wc -l 38 # perf buildid-list | head -5 e2a171c7b905826fc8494f0711ba76ab6abbd604 /lib/modules/4.14.0-rc3+/build/vmlinux 874840a02d8f8a31cedd605d0b8653145472ced3 /lib/modules/4.14.0-rc3+/kernel/arch/x86/kvm/kvm-intel.ko ea7223776730cd8a22f320040aae4d54312984bc /lib/modules/4.14.0-rc3+/kernel/drivers/gpu/drm/i915/i915.ko 5961535e6732a8edb7f22b3f148bb2fa2e0be4b9 /lib/modules/4.14.0-rc3+/kernel/drivers/gpu/drm/drm.ko f045f54aa78cf1931cc893f78b6cbc52c72a8cb1 /usr/lib64/libc-2.25.so # It is only when one asks for checking what of those entries actually had samples, i.e. when we use either -H or --with-hits, that we will process all the PERF_RECORD_ events, and since tools/perf/builtin-buildid-list.c neither explicitely set a perf_tool.namespaces() callback nor the default stub was set that we end up, when processing a PERF_RECORD_NAMESPACE record, causing a SEGFAULT: # perf buildid-list -H Segmentation fault (core dumped) ^C # Reported-and-Tested-by: Thomas-Mich Richter Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Hari Bathini Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Thomas-Mich Richter Fixes: f3b3614a284d ("perf tools: Add PERF_RECORD_NAMESPACES to include namespaces related info") Link: http://lkml.kernel.org/r/20171017132900.11043-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a7ebd9fe8e40..76ab0709a20c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -374,6 +374,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->mmap2 = process_event_stub; if (tool->comm == NULL) tool->comm = process_event_stub; + if (tool->namespaces == NULL) + tool->namespaces = process_event_stub; if (tool->fork == NULL) tool->fork = process_event_stub; if (tool->exit == NULL) -- cgit v1.2.3 From c97cc7dbce2fe6f46e137f4b040f915a0181ee85 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 17 Oct 2017 16:00:02 -0400 Subject: Revert "tools/power turbostat: stop migrating, unless '-m'" This reverts commit c91fc8519d87715a3a173475ea3778794c139996. That change caused a C6 and PC6 residency regression on large idle systems. Users also complained about new output indicating jitter: turbostat: cpu6 jitter 3794 9142 Signed-off-by: Len Brown Cc: 4.13+ # v4.13+ Signed-off-by: Rafael J. Wysocki --- tools/power/x86/turbostat/turbostat.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0dafba2c1e7d..bd9c6b31a504 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -92,7 +92,6 @@ unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; -int do_migrate; double discover_bclk(unsigned int family, unsigned int model); unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ @@ -303,9 +302,6 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg int cpu_migrate(int cpu) { - if (!do_migrate) - return 0; - CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) @@ -5007,7 +5003,6 @@ void cmdline(int argc, char **argv) {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help {"Joules", no_argument, 0, 'J'}, {"list", no_argument, 0, 'l'}, - {"migrate", no_argument, 0, 'm'}, {"out", required_argument, 0, 'o'}, {"quiet", no_argument, 0, 'q'}, {"show", required_argument, 0, 's'}, @@ -5019,7 +5014,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:Jmo:qST:v", + while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v", long_options, &option_index)) != -1) { switch (opt) { case 'a': @@ -5062,9 +5057,6 @@ void cmdline(int argc, char **argv) list_header_only++; quiet++; break; - case 'm': - do_migrate = 1; - break; case 'o': outf = fopen_or_die(optarg, "w"); break; -- cgit v1.2.3 From 3d8bba9535ac6e79453c769dd0c8ea852a51ad60 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 18 Oct 2017 23:11:18 +0800 Subject: perf xyarray: Fix wrong processing when closing evsel fd In current xyarray code, xyarray__max_x() returns max_y, and xyarray__max_y() returns max_x. It's confusing and for code logic it looks not correct. Error happens when closing evsel fd. Let's see this scenario: 1. Allocate an fd (pseudo-code) perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); } xyarray__new(int xlen, int ylen, size_t entry_size) { size_t row_size = ylen * entry_size; struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size); xy->entry_size = entry_size; xy->row_size = row_size; xy->entries = xlen * ylen; xy->max_x = xlen; xy->max_y = ylen; ...... } So max_x is ncpus, max_y is nthreads and row_size = nthreads * 4. 2. Use perf syscall and get the fd int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads) { for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < nthreads; thread++) { int fd, group_fd; fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], group_fd, flags); FD(evsel, cpu, thread) = fd; } } static inline void *xyarray__entry(struct xyarray *xy, int x, int y) { return &xy->contents[x * xy->row_size + y * xy->entry_size]; } These codes don't have issues. The issue happens in the closing of fd. 3. Close fd. void perf_evsel__close_fd(struct perf_evsel *evsel) { int cpu, thread; for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { close(FD(evsel, cpu, thread)); FD(evsel, cpu, thread) = -1; } } Since xyarray__max_x() returns max_y (nthreads) and xyarry__max_y() returns max_x (ncpus), so above code is actually to be: for (cpu = 0; cpu < nthreads; cpu++) for (thread = 0; thread < ncpus; ++thread) { close(FD(evsel, cpu, thread)); FD(evsel, cpu, thread) = -1; } It's not correct! This change is introduced by "475fb533fb7d" ("perf evsel: Fix buffer overflow while freeing events") This fix is to let xyarray__max_x() return max_x (ncpus) and let xyarry__max_y() return max_y (nthreads) Committer note: This was also fixed by Ravi Bangoria, who provided the same patch, noticing the problem with 'perf record': I see 'perf record -p ' crashes with following log: *** Error in `./perf': free(): invalid next size (normal): 0x000000000298b340 *** ======= Backtrace: ========= /lib/x86_64-linux-gnu/libc.so.6(+0x777e5)[0x7f7fd85c87e5] /lib/x86_64-linux-gnu/libc.so.6(+0x8037a)[0x7f7fd85d137a] /lib/x86_64-linux-gnu/libc.so.6(cfree+0x4c)[0x7f7fd85d553c] ./perf(perf_evsel__close+0xb4)[0x4b7614] ./perf(perf_evlist__delete+0x100)[0x4ab180] ./perf(cmd_record+0x1d9)[0x43a5a9] ./perf[0x49aa2f] ./perf(main+0x631)[0x427841] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f7fd8571830] ./perf(_start+0x29)[0x427a59] Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Cc: Ravi Bangoria Fixes: d74be4767367 ("perf xyarray: Save max_x, max_y") Link: http://lkml.kernel.org/r/1508339478-26674-1-git-send-email-yao.jin@linux.intel.com Link: http://lkml.kernel.org/r/1508327446-15302-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/xyarray.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h index 4ba726c90870..54af60462130 100644 --- a/tools/perf/util/xyarray.h +++ b/tools/perf/util/xyarray.h @@ -23,12 +23,12 @@ static inline void *xyarray__entry(struct xyarray *xy, int x, int y) static inline int xyarray__max_y(struct xyarray *xy) { - return xy->max_x; + return xy->max_y; } static inline int xyarray__max_x(struct xyarray *xy) { - return xy->max_y; + return xy->max_x; } #endif /* _PERF_XYARRAY_H_ */ -- cgit v1.2.3 From 74f8e22c153f4464060a0c2e4cfd1d6e51af2109 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 18 Oct 2017 16:34:09 +0800 Subject: perf test shell trace+probe_libc_inet_pton.sh: Be compatible with Debian/Ubuntu In debian/ubuntu, libc.so is located at a different place, /lib/x86_64-linux-gnu/libc-2.23.so, so it outputs like this when testing: PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.040 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.040/0.040/0.040/0.000 ms 0.000 probe_libc:inet_pton:(7f0e2db741c0)) __GI___inet_pton (/lib/x86_64-linux-gnu/libc-2.23.so) getaddrinfo (/lib/x86_64-linux-gnu/libc-2.23.so) [0xffffa9d40f34ff4d] (/bin/ping) Fix up the libc path to make sure this test works in more OSes. Committer testing: When this test fails one can use 'perf test -v', i.e. in verbose mode, where it'll show the expected backtrace, so, after applying this test: On Fedora 26: # perf test -v ping 62: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 23322 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.058 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.058/0.058/0.058/0.000 ms 0.000 probe_libc:inet_pton:(7fe344310d80)) __GI___inet_pton (/usr/lib64/libc-2.25.so) getaddrinfo (/usr/lib64/libc-2.25.so) _init (/usr/bin/ping) test child finished with 0 ---- end ---- probe libc's inet_pton & backtrace it with ping: Ok # Signed-off-by: Li Zhijian Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Kim Phillips Cc: Li Zhijian Cc: Peter Zijlstra Cc: Philip Li Link: http://lkml.kernel.org/r/1508315649-18836-1-git-send-email-lizhijian@cn.fujitsu.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_libc_inet_pton.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh index 462fc755092e..7a84d73324e3 100755 --- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh @@ -10,6 +10,9 @@ . $(dirname $0)/lib/probe.sh +ld=$(realpath /lib64/ld*.so.* | uniq) +libc=$(echo $ld | sed 's/ld/libc/g') + trace_libc_inet_pton_backtrace() { idx=0 expected[0]="PING.*bytes" @@ -18,8 +21,8 @@ trace_libc_inet_pton_backtrace() { expected[3]=".*packets transmitted.*" expected[4]="rtt min.*" expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)" - expected[6]=".*inet_pton[[:space:]]\(/usr/lib.*/libc-[0-9]+\.[0-9]+\.so\)$" - expected[7]="getaddrinfo[[:space:]]\(/usr/lib.*/libc-[0-9]+\.[0-9]+\.so\)$" + expected[6]=".*inet_pton[[:space:]]\($libc\)$" + expected[7]="getaddrinfo[[:space:]]\($libc\)$" expected[8]=".*\(.*/bin/ping.*\)$" perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do @@ -35,7 +38,7 @@ trace_libc_inet_pton_backtrace() { } skip_if_no_perf_probe && \ -perf probe -q /lib64/libc-*.so inet_pton && \ +perf probe -q $libc inet_pton && \ trace_libc_inet_pton_backtrace err=$? rm -f ${file} -- cgit v1.2.3 From 28e33f9d78eefe98ea86673ab31e988b37a9a738 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 11:16:55 -0700 Subject: bpf: disallow arithmetic operations on context pointer Commit f1174f77b50c ("bpf/verifier: rework value tracking") removed the crafty selection of which pointer types are allowed to be modified. This is OK for most pointer types since adjust_ptr_min_max_vals() will catch operations on immutable pointers. One exception is PTR_TO_CTX which is now allowed to be offseted freely. The intent of aforementioned commit was to allow context access via modified registers. The offset passed to ->is_valid_access() verifier callback has been adjusted by the value of the variable offset. What is missing, however, is taking the variable offset into account when the context register is used. Or in terms of the code adding the offset to the value passed to the ->convert_ctx_access() callback. This leads to the following eBPF user code: r1 += 68 r0 = *(u32 *)(r1 + 8) exit being translated to this in kernel space: 0: (07) r1 += 68 1: (61) r0 = *(u32 *)(r1 +180) 2: (95) exit Offset 8 is corresponding to 180 in the kernel, but offset 76 is valid too. Verifier will "accept" access to offset 68+8=76 but then "convert" access to offset 8 as 180. Effective access to offset 248 is beyond the kernel context. (This is a __sk_buff example on a debug-heavy kernel - packet mark is 8 -> 180, 76 would be data.) Dereferencing the modified context pointer is not as easy as dereferencing other types, because we have to translate the access to reading a field in kernel structures which is usually at a different offset and often of a different size. To allow modifying the pointer we would have to make sure that given eBPF instruction will always access the same field or the fields accessed are "compatible" in terms of offset and size... Disallow dereferencing modified context pointers and add to selftests the test case described here. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Edward Cree Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 8 ++++++-- tools/testing/selftests/bpf/test_verifier.c | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8b8d6ba39e23..20f3889c006e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1116,7 +1116,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn /* ctx accesses must be at a fixed offset, so that we can * determine what type of data were returned. */ - if (!tnum_is_const(reg->var_off)) { + if (reg->off) { + verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n", + regno, reg->off, off - reg->off); + return -EACCES; + } + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); @@ -1124,7 +1129,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn tn_buf, off, size); return -EACCES; } - off += reg->var_off.value; err = check_ctx_access(env, insn_idx, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { /* ctx access returns either a scalar, or a diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 26f3250bdcd2..3c7d3a45a3c5 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6645,6 +6645,20 @@ static struct bpf_test tests[] = { .errstr = "BPF_END uses reserved fields", .result = REJECT, }, + { + "arithmetic ops make PTR_TO_CTX unusable", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, + offsetof(struct __sk_buff, data) - + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .errstr = "dereference of modified ctx ptr R1 off=68+8, ctx+const is allowed, ctx+const+const is not", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From b703798386fb7288d5a995bd2284a984a5e24f3c Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Thu, 19 Oct 2017 11:27:24 -0500 Subject: objtool: Fix memory leak in decode_instructions() When an error occurs before adding an allocated insn to the list, free it before returning. Signed-off-by: Kamalesh Babulal Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/336da800bf6070eae11f4e0a3b9ca64c27658114.1508430423.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/check.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a0c518ecf085..c0e26ad1fa7e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -267,12 +267,13 @@ static int decode_instructions(struct objtool_file *file) &insn->immediate, &insn->stack_op); if (ret) - return ret; + goto err; if (!insn->type || insn->type > INSN_LAST) { WARN_FUNC("invalid instruction type %d", insn->sec, insn->offset, insn->type); - return -1; + ret = -1; + goto err; } hash_add(file->insn_hash, &insn->hash, insn->offset); @@ -296,6 +297,10 @@ static int decode_instructions(struct objtool_file *file) } return 0; + +err: + free(insn); + return ret; } /* -- cgit v1.2.3 From 435bf0d3f99a164df7e8c30428cef266b91d1d3b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:15 -0700 Subject: bpf: enforce TCP only support for sockmap Only TCP sockets have been tested and at the moment the state change callback only handles TCP sockets. This adds a check to ensure that sockets actually being added are TCP sockets. For net-next we can consider UDP support. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/sockmap.c | 6 ++++++ tools/testing/selftests/bpf/test_maps.c | 12 +++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 6424ce0e4969..c68899d5b246 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -840,6 +840,12 @@ static int sock_map_update_elem(struct bpf_map *map, return -EINVAL; } + if (skops.sk->sk_type != SOCK_STREAM || + skops.sk->sk_protocol != IPPROTO_TCP) { + fput(socket->file); + return -EOPNOTSUPP; + } + err = sock_map_ctx_update_elem(&skops, map, key, flags); fput(socket->file); return err; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index fe3a443a1102..50ce52d2013d 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -466,7 +466,7 @@ static void test_sockmap(int tasks, void *data) int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc; struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break; int ports[] = {50200, 50201, 50202, 50204}; - int err, i, fd, sfd[6] = {0xdeadbeef}; + int err, i, fd, udp, sfd[6] = {0xdeadbeef}; u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0}; int parse_prog, verdict_prog; struct sockaddr_in addr; @@ -548,6 +548,16 @@ static void test_sockmap(int tasks, void *data) goto out_sockmap; } + /* Test update with unsupported UDP socket */ + udp = socket(AF_INET, SOCK_DGRAM, 0); + i = 0; + err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY); + if (!err) { + printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n", + i, udp); + goto out_sockmap; + } + /* Test update without programs */ for (i = 0; i < 6; i++) { err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); -- cgit v1.2.3 From 34f79502bbcfab659b8729da68b5e387f96eb4c1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:36 -0700 Subject: bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region SK_SKB BPF programs are run from the socket/tcp context but early in the stack before much of the TCP metadata is needed in tcp_skb_cb. So we can use some unused fields to place BPF metadata needed for SK_SKB programs when implementing the redirect function. This allows us to drop the preempt disable logic. It does however require an API change so sk_redirect_map() has been updated to additionally provide ctx_ptr to skb. Note, we do however continue to disable/enable preemption around actual BPF program running to account for map updates. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 2 +- include/net/tcp.h | 5 ++++ kernel/bpf/sockmap.c | 19 +++++++------- net/core/filter.c | 29 +++++++++++----------- samples/sockmap/sockmap_kern.c | 2 +- tools/include/uapi/linux/bpf.h | 3 ++- tools/testing/selftests/bpf/bpf_helpers.h | 2 +- tools/testing/selftests/bpf/sockmap_verdict_prog.c | 4 +-- 8 files changed, 36 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/include/linux/filter.h b/include/linux/filter.h index d29e58fde364..818a0b26249e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -728,7 +728,7 @@ void xdp_do_flush_map(void); void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_redirect(u32 ifindex); -struct sock *do_sk_redirect_map(void); +struct sock *do_sk_redirect_map(struct sk_buff *skb); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/include/net/tcp.h b/include/net/tcp.h index 89974c5286d8..b1ef98ebce53 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -840,6 +840,11 @@ struct tcp_skb_cb { struct inet6_skb_parm h6; #endif } header; /* For incoming skbs */ + struct { + __u32 key; + __u32 flags; + struct bpf_map *map; + } bpf; }; }; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index c68899d5b246..beaabb21c3a3 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -39,6 +39,7 @@ #include #include #include +#include struct bpf_stab { struct bpf_map map; @@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) return SK_DROP; skb_orphan(skb); + /* We need to ensure that BPF metadata for maps is also cleared + * when we orphan the skb so that we don't have the possibility + * to reference a stale map. + */ + TCP_SKB_CB(skb)->bpf.map = NULL; skb->sk = psock->sock; bpf_compute_data_end(skb); + preempt_disable(); rc = (*prog->bpf_func)(skb, prog->insnsi); + preempt_enable(); skb->sk = NULL; return rc; @@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) struct sock *sk; int rc; - /* Because we use per cpu values to feed input from sock redirect - * in BPF program to do_sk_redirect_map() call we need to ensure we - * are not preempted. RCU read lock is not sufficient in this case - * with CONFIG_PREEMPT_RCU enabled so we must be explicit here. - */ - preempt_disable(); rc = smap_verdict_func(psock, skb); switch (rc) { case SK_REDIRECT: - sk = do_sk_redirect_map(); - preempt_enable(); + sk = do_sk_redirect_map(skb); if (likely(sk)) { struct smap_psock *peer = smap_psock_sk(sk); @@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) /* Fall through and free skb otherwise */ case SK_DROP: default: - if (rc != SK_REDIRECT) - preempt_enable(); kfree_skb(skb); } } diff --git a/net/core/filter.c b/net/core/filter.c index 74b8c91fb5f4..ca1ba0bbfbc2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags) +BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, + struct bpf_map *, map, u32, key, u64, flags) { - struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); if (unlikely(flags)) return SK_ABORTED; - ri->ifindex = key; - ri->flags = flags; - ri->map = map; + tcb->bpf.key = key; + tcb->bpf.flags = flags; + tcb->bpf.map = map; return SK_REDIRECT; } -struct sock *do_sk_redirect_map(void) +struct sock *do_sk_redirect_map(struct sk_buff *skb) { - struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); struct sock *sk = NULL; - if (ri->map) { - sk = __sock_map_lookup_elem(ri->map, ri->ifindex); + if (tcb->bpf.map) { + sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key); - ri->ifindex = 0; - ri->map = NULL; - /* we do not clear flags for future lookup */ + tcb->bpf.key = 0; + tcb->bpf.map = NULL; } return sk; @@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = { .func = bpf_sk_redirect_map, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_ANYTHING, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, }; BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c index f9b38ef82dc2..52b0053274f4 100644 --- a/samples/sockmap/sockmap_kern.c +++ b/samples/sockmap/sockmap_kern.c @@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb) ret = 1; bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret); - return bpf_sk_redirect_map(&sock_map, ret, 0); + return bpf_sk_redirect_map(skb, &sock_map, ret, 0); } SEC("sockops") diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 43ab5c402f98..be9a631a69f7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -569,9 +569,10 @@ union bpf_attr { * @flags: reserved for future use * Return: 0 on success or negative error code * - * int bpf_sk_redirect_map(map, key, flags) + * int bpf_sk_redirect_map(skb, map, key, flags) * Redirect skb to a sock in map using key as a lookup key for the * sock in map. + * @skb: pointer to skb * @map: pointer to sockmap * @key: key to lookup sock in map * @flags: reserved for future use diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 36fb9161b34a..b2e02bdcd098 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; -static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = +static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = (void *) BPF_FUNC_sk_redirect_map; static int (*bpf_sock_map_update)(void *map, void *key, void *value, unsigned long long flags) = diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c index 9b99bd10807d..2cd2d552938b 100644 --- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c @@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb) bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk); if (!map) - return bpf_sk_redirect_map(&sock_map_rx, sk, 0); - return bpf_sk_redirect_map(&sock_map_tx, sk, 0); + return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0); + return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0); } char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From f7e9cb1ecb6d922584abff16db07930162c57155 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:58 -0700 Subject: bpf: remove mark access for SK_SKB program types The skb->mark field is a union with reserved_tailroom which is used in the TCP code paths from stream memory allocation. Allowing SK_SKB programs to set this field creates a conflict with future code optimizations, such as "gifting" the skb to the egress path instead of creating a new skb and doing a memcpy. Because we do not have a released version of SK_SKB yet lets just remove it for now. A more appropriate scratch pad to use at the socket layer is dev_scratch, but lets add that in future kernels when needed. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 2 +- tools/testing/selftests/bpf/test_verifier.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/net/core/filter.c b/net/core/filter.c index ca1ba0bbfbc2..aa0265997f93 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3684,7 +3684,6 @@ static bool sk_skb_is_valid_access(int off, int size, { if (type == BPF_WRITE) { switch (off) { - case bpf_ctx_range(struct __sk_buff, mark): case bpf_ctx_range(struct __sk_buff, tc_index): case bpf_ctx_range(struct __sk_buff, priority): break; @@ -3694,6 +3693,7 @@ static bool sk_skb_is_valid_access(int off, int size, } switch (off) { + case bpf_ctx_range(struct __sk_buff, mark): case bpf_ctx_range(struct __sk_buff, tc_classid): return false; case bpf_ctx_range(struct __sk_buff, data): diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3c7d3a45a3c5..50e15cedbb7f 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1130,15 +1130,27 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_context access", }, { - "check skb->mark is writeable by SK_SKB", + "invalid access of skb->mark for SK_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + .errstr = "invalid bpf_context access", + }, + { + "check skb->mark is not writeable by SK_SKB", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .result = ACCEPT, + .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_SKB, + .errstr = "invalid bpf_context access", }, { "check skb->tc_index is writeable by SK_SKB", -- cgit v1.2.3 From b37242c773b21edcd566e3bf995fb91d06b9537a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Oct 2017 02:34:23 +0200 Subject: bpf: add test cases to bpf selftests to cover all access tests Lets add test cases to cover really all possible direct packet access tests for good/bad access cases so we keep tracking them. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_verifier.c | 480 ++++++++++++++++++++++++++++ 1 file changed, 480 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 50e15cedbb7f..64ae21f64489 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6671,6 +6671,486 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "XDP pkt read, pkt_end mangling, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end mangling, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' > pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' > pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' > pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end > pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end > pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end > pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' < pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' < pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' < pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end < pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end < pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end < pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' >= pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' >= pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' >= pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end >= pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end >= pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end >= pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' <= pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' <= pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' <= pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end <= pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end <= pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end <= pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From bfa640757e9378c2f26867e723f1287e94f5a7ad Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 27 Oct 2017 09:45:53 -0700 Subject: bpf: rename sk_actions to align with bpf infrastructure Recent additions to support multiple programs in cgroups impose a strict requirement, "all yes is yes, any no is no". To enforce this the infrastructure requires the 'no' return code, SK_DROP in this case, to be 0. To apply these rules to SK_SKB program types the sk_actions return codes need to be adjusted. This fix adds SK_PASS and makes 'SK_DROP = 0'. Finally, remove SK_ABORTED to remove any chance that the API may allow aborted program flows to be passed up the stack. This would be incorrect behavior and allow programs to break existing policies. Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 6 +++--- kernel/bpf/sockmap.c | 3 ++- net/core/filter.c | 5 +++-- tools/include/uapi/linux/bpf.h | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f90860d1f897..0d7948ce2128 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -575,7 +575,7 @@ union bpf_attr { * @map: pointer to sockmap * @key: key to lookup sock in map * @flags: reserved for future use - * Return: SK_REDIRECT + * Return: SK_PASS * * int bpf_sock_map_update(skops, map, key, flags) * @skops: pointer to bpf_sock_ops @@ -786,8 +786,8 @@ struct xdp_md { }; enum sk_action { - SK_ABORTED = 0, - SK_DROP, + SK_DROP = 0, + SK_PASS, SK_REDIRECT, }; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 6778fb773934..66f00a2b27f4 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -122,7 +122,8 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) preempt_enable(); skb->sk = NULL; - return rc; + return rc == SK_PASS ? + (TCP_SKB_CB(skb)->bpf.map ? SK_REDIRECT : SK_PASS) : SK_DROP; } static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) diff --git a/net/core/filter.c b/net/core/filter.c index 68eaa2f81a8e..6ae94f825f72 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1844,14 +1844,15 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, { struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + /* If user passes invalid input drop the packet. */ if (unlikely(flags)) - return SK_ABORTED; + return SK_DROP; tcb->bpf.key = key; tcb->bpf.flags = flags; tcb->bpf.map = map; - return SK_REDIRECT; + return SK_PASS; } struct sock *do_sk_redirect_map(struct sk_buff *skb) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 24b35a1fd4d6..c174971afbe6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -787,8 +787,8 @@ struct xdp_md { }; enum sk_action { - SK_ABORTED = 0, - SK_DROP, + SK_DROP = 0, + SK_PASS, SK_REDIRECT, }; -- cgit v1.2.3 From 7f071998474a9e5f7b98103d3058a1b8ca5887e6 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 26 Oct 2017 18:24:42 -0700 Subject: selftests: Introduce a new script to generate tc batch file # ./tdc_batch.py -h usage: tdc_batch.py [-h] [-n NUMBER] [-o] [-s] [-p] device file TC batch file generator positional arguments: device device name file batch file name optional arguments: -h, --help show this help message and exit -n NUMBER, --number NUMBER how many lines in batch file -o, --skip_sw skip_sw (offload), by default skip_hw -s, --share_action all filters share the same action -p, --prio all filters have different prio Acked-by: Jamal Hadi Salim Acked-by: Lucas Bates Signed-off-by: Chris Mi Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc_batch.py | 62 +++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100755 tools/testing/selftests/tc-testing/tdc_batch.py (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py new file mode 100755 index 000000000000..707c6bfef689 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tdc_batch.py @@ -0,0 +1,62 @@ +#!/usr/bin/python3 + +""" +tdc_batch.py - a script to generate TC batch file + +Copyright (C) 2017 Chris Mi +""" + +import argparse + +parser = argparse.ArgumentParser(description='TC batch file generator') +parser.add_argument("device", help="device name") +parser.add_argument("file", help="batch file name") +parser.add_argument("-n", "--number", type=int, + help="how many lines in batch file") +parser.add_argument("-o", "--skip_sw", + help="skip_sw (offload), by default skip_hw", + action="store_true") +parser.add_argument("-s", "--share_action", + help="all filters share the same action", + action="store_true") +parser.add_argument("-p", "--prio", + help="all filters have different prio", + action="store_true") +args = parser.parse_args() + +device = args.device +file = open(args.file, 'w') + +number = 1 +if args.number: + number = args.number + +skip = "skip_hw" +if args.skip_sw: + skip = "skip_sw" + +share_action = "" +if args.share_action: + share_action = "index 1" + +prio = "prio 1" +if args.prio: + prio = "" + if number > 0x4000: + number = 0x4000 + +index = 0 +for i in range(0x100): + for j in range(0x100): + for k in range(0x100): + mac = ("%02x:%02x:%02x" % (i, j, k)) + src_mac = "e4:11:00:" + mac + dst_mac = "e4:12:00:" + mac + cmd = ("filter add dev %s %s protocol ip parent ffff: flower %s " + "src_mac %s dst_mac %s action drop %s" % + (device, prio, skip, src_mac, dst_mac, share_action)) + file.write("%s\n" % cmd) + index += 1 + if index >= number: + file.close() + exit(0) -- cgit v1.2.3 From 31c2611b66e01378b54f7ef641cb0d23fcd8502f Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 26 Oct 2017 18:24:43 -0700 Subject: selftests: Introduce a new test case to tc testsuite In this patchset, we fixed a tc bug. This patch adds the test case that reproduces the bug. To run this test case, user should specify an existing NIC device: # sudo ./tdc.py -d enp4s0f0 This test case belongs to category "flower". If user doesn't specify a NIC device, the test cases belong to "flower" will not be run. In this test case, we create 1M filters and all filters share the same action. When destroying all filters, kernel should not panic. It takes about 18s to run it. Acked-by: Jamal Hadi Salim Acked-by: Lucas Bates Signed-off-by: Chris Mi Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/filters/tests.json | 23 +++++++++++++++++++++- tools/testing/selftests/tc-testing/tdc.py | 20 +++++++++++++++---- tools/testing/selftests/tc-testing/tdc_config.py | 2 ++ 3 files changed, 40 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index c727b96a59b0..5fa02d86b35f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -17,5 +17,26 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "d052", + "name": "Add 1M filters with the same action", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress", + "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000" + ], + "cmdUnderTest": "$TC -b $BATCH_FILE", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress", + "/bin/rm $BATCH_FILE" + ] } -] \ No newline at end of file +] diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index cd61b7844c0d..5f11f5d7456e 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -88,7 +88,7 @@ def prepare_env(cmdlist): exit(1) -def test_runner(filtered_tests): +def test_runner(filtered_tests, args): """ Driver function for the unit tests. @@ -105,6 +105,8 @@ def test_runner(filtered_tests): for tidx in testlist: result = True tresult = "" + if "flower" in tidx["category"] and args.device == None: + continue print("Test " + tidx["id"] + ": " + tidx["name"]) prepare_env(tidx["setup"]) (p, procout) = exec_cmd(tidx["cmdUnderTest"]) @@ -152,6 +154,10 @@ def ns_create(): exec_cmd(cmd, False) cmd = 'ip -s $NS link set $DEV1 up' exec_cmd(cmd, False) + cmd = 'ip link set $DEV2 netns $NS' + exec_cmd(cmd, False) + cmd = 'ip -s $NS link set $DEV2 up' + exec_cmd(cmd, False) def ns_destroy(): @@ -211,7 +217,8 @@ def set_args(parser): help='Execute the single test case with specified ID') parser.add_argument('-i', '--id', action='store_true', dest='gen_id', help='Generate ID numbers for new test cases') - return parser + parser.add_argument('-d', '--device', + help='Execute the test case in flower category') return parser @@ -225,6 +232,8 @@ def check_default_settings(args): if args.path != None: NAMES['TC'] = args.path + if args.device != None: + NAMES['DEV2'] = args.device if not os.path.isfile(NAMES['TC']): print("The specified tc path " + NAMES['TC'] + " does not exist.") exit(1) @@ -381,14 +390,17 @@ def set_operation_mode(args): if (len(alltests) == 0): print("Cannot find a test case with ID matching " + target_id) exit(1) - catresults = test_runner(alltests) + catresults = test_runner(alltests, args) print("All test results: " + "\n\n" + catresults) elif (len(target_category) > 0): + if (target_category == "flower") and args.device == None: + print("Please specify a NIC device (-d) to run category flower") + exit(1) if (target_category not in ucat): print("Specified category is not present in this file.") exit(1) else: - catresults = test_runner(testcases[target_category]) + catresults = test_runner(testcases[target_category], args) print("Category " + target_category + "\n\n" + catresults) ns_destroy() diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index 01087375a7c3..b6352515c1b5 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -12,6 +12,8 @@ NAMES = { # Name of veth devices to be created for the namespace 'DEV0': 'v0p0', 'DEV1': 'v0p1', + 'DEV2': '', + 'BATCH_FILE': './batch.txt', # Name of the namespace to use 'NS': 'tcut' } -- cgit v1.2.3