From c1fbc0cf81f1c464f5fda322c1104d4bb1da6711 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 5 Oct 2017 14:42:34 +0530 Subject: perf callchain: Compare dsos (as well) for CCKEY_FUNCTION Two functions from different binaries can have same start address. Thus, comparing only start address in match_chain() leads to inconsistent callchains. Fix this by adding a check for dsos as well. Ex, https://www.spinics.net/lists/linux-perf-users/msg04067.html Reported-by: Alexander Pozdneev Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Krister Johansen Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yao Jin Cc: zhangmengting@huawei.com Link: http://lkml.kernel.org/r/20171005091234.5874-1-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index be09d77cade0..a971caf3759d 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -685,6 +685,8 @@ static enum match_result match_chain(struct callchain_cursor_node *node, { struct symbol *sym = node->sym; u64 left, right; + struct dso *left_dso = NULL; + struct dso *right_dso = NULL; if (callchain_param.key == CCKEY_SRCLINE) { enum match_result match = match_chain_srcline(node, cnode); @@ -696,12 +698,14 @@ static enum match_result match_chain(struct callchain_cursor_node *node, if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) { left = cnode->ms.sym->start; right = sym->start; + left_dso = cnode->ms.map->dso; + right_dso = node->map->dso; } else { left = cnode->ip; right = node->ip; } - if (left == right) { + if (left == right && left_dso == right_dso) { if (node->branch) { cnode->branch_count++; -- cgit v1.2.3 From 3346a6a4e5ba8c040360f753b26938cec31a4bdc Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 2 Oct 2017 16:16:13 -0600 Subject: selftests: x86: sysret_ss_attrs doesn't build on a PIE build sysret_ss_attrs fails to compile leading x86 test run to fail on systems configured to build using PIE by default. Add -no-pie fix it. Relocation might still fail if relocated above 4G. For now this change fixes the build and runs x86 tests. tools/testing/selftests/x86$ make gcc -m64 -o .../tools/testing/selftests/x86/single_step_syscall_64 -O2 -g -std=gnu99 -pthread -Wall single_step_syscall.c -lrt -ldl gcc -m64 -o .../tools/testing/selftests/x86/sysret_ss_attrs_64 -O2 -g -std=gnu99 -pthread -Wall sysret_ss_attrs.c thunks.S -lrt -ldl /usr/bin/ld: /tmp/ccS6pvIh.o: relocation R_X86_64_32S against `.text' can not be used when making a shared object; recompile with -fPIC /usr/bin/ld: final link failed: Nonrepresentable section on output collect2: error: ld returned 1 exit status Makefile:49: recipe for target '.../tools/testing/selftests/x86/sysret_ss_attrs_64' failed make: *** [.../tools/testing/selftests/x86/sysret_ss_attrs_64] Error 1 Suggested-by: Andy Lutomirski Signed-off-by: Shuah Khan --- tools/testing/selftests/x86/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 97f187e2663f..0a74a20ca32b 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -20,7 +20,7 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) -CFLAGS := -O2 -g -std=gnu99 -pthread -Wall +CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie UNAME_M := $(shell uname -m) CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) -- cgit v1.2.3 From ea344f6a507f8ce92bf1e10f044c6cfc67e4d22b Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 2 Oct 2017 17:02:22 -0600 Subject: selftests: mqueue: fix regression in silencing output from RUN_TESTS Fix fix regression in silencing output from RUN_TESTS introduced by commit <8230b905a6780c6> selftests: mqueue: Use full path to run tests from Makefile Signed-off-by: Shuah Khan --- tools/testing/selftests/mqueue/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile index 0f5e347b068d..152823b6cb21 100644 --- a/tools/testing/selftests/mqueue/Makefile +++ b/tools/testing/selftests/mqueue/Makefile @@ -5,8 +5,8 @@ TEST_GEN_PROGS := mq_open_tests mq_perf_tests include ../lib.mk override define RUN_TESTS - $(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" - $(OUTPUT)//mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" + @$(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" + @$(OUTPUT)/mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" endef override define EMIT_TESTS -- cgit v1.2.3 From e9516c0813aeb89ebd19ec0ed39fbfcd78b6ef3a Mon Sep 17 00:00:00 2001 From: Mark Santaniello Date: Fri, 6 Oct 2017 01:07:22 -0700 Subject: perf script: Add missing separator for "-F ip,brstack" (and brstackoff) Prior to commit 55b9b50811ca ("perf script: Support -F brstack,dso and brstacksym,dso"), we were printing a space before the brstack data. It seems that this space was important. Without it, parsing is difficult. Very sorry for the mistake. Notice here how the "ip" and "brstack" run together: $ perf script -F ip,brstack | head -n 1 22e18c40x22e19e2/0x22e190b/P/-/-/0 0x22e19a1/0x22e19d0/P/-/-/0 0x22e195d/0x22e1990/P/-/-/0 0x22e18e9/0x22e1943/P/-/-/0 0x22e1a69/0x22e18c0/P/-/-/0 0x22e19f7/0x22e1a20/P/-/-/0 0x22e1910/0x22e19ee/P/-/-/0 0x22e19e2/0x22e190b/P/-/-/0 0x22e19a1/0x22e19d0/P/-/-/0 0x22e195d/0x22e1990/P/-/-/0 0x22e18e9/0x22e1943/P/-/-/0 0x22e1a69/0x22e18c0/P/-/-/0 0x22e19f7/0x22e1a20/P/-/-/0 0x22e1910/0x22e19ee/P/-/-/0 0x22e19e2/0x22e190b/P/-/-/0 0x22e19a1/0x22e19d0/P/-/-/0 After this diff, sanity is restored: $ perf script -F ip,brstack | head -n 1 22e18c4 0x22e19e2/0x22e190b/P/-/-/0 0x22e19a1/0x22e19d0/P/-/-/0 0x22e195d/0x22e1990/P/-/-/0 0x22e18e9/0x22e1943/P/-/-/0 0x22e1a69/0x22e18c0/P/-/-/0 0x22e19f7/0x22e1a20/P/-/-/0 0x22e1910/0x22e19ee/P/-/-/0 0x22e19e2/0x22e190b/P/-/-/0 0x22e19a1/0x22e19d0/P/-/-/0 0x22e195d/0x22e1990/P/-/-/0 0x22e18e9/0x22e1943/P/-/-/0 0x22e1a69/0x22e18c0/P/-/-/0 0x22e19f7/0x22e1a20/P/-/-/0 0x22e1910/0x22e19ee/P/-/-/0 0x22e19e2/0x22e190b/P/-/-/0 0x22e19a1/0x22e19d0/P/-/-/0 Signed-off-by: Mark Santaniello Cc: Alexander Shishkin Cc: Peter Zijlstra Cc: 4.13+ Fixes: 55b9b50811ca ("perf script: Support -F brstack,dso and brstacksym,dso") Link: http://lkml.kernel.org/r/20171006080722.3442046-1-marksan@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3d4c3b5e1868..0c977b6e0f8b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -586,7 +586,7 @@ static void print_sample_brstack(struct perf_sample *sample, thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); } - printf("0x%"PRIx64, from); + printf(" 0x%"PRIx64, from); if (PRINT_FIELD(DSO)) { printf("("); map__fprintf_dsoname(alf.map, stdout); @@ -681,7 +681,7 @@ static void print_sample_brstackoff(struct perf_sample *sample, if (alt.map && !alt.map->dso->adjust_symbols) to = map__map_ip(alt.map, to); - printf("0x%"PRIx64, from); + printf(" 0x%"PRIx64, from); if (PRINT_FIELD(DSO)) { printf("("); map__fprintf_dsoname(alf.map, stdout); -- cgit v1.2.3 From 66ec11919a0f96e936bb731fdbc2851316077d26 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 6 Oct 2017 19:38:22 +0100 Subject: perf pmu: Unbreak perf record for arm/arm64 with events with explicit PMU Currently, perf record is broken on arm/arm64 systems when the PMU is specified explicitly as part of the event, e.g. $ ./perf record -e armv8_cortex_a53/cpu_cycles/u true In such cases, perf record fails to open events unless perf_event_paranoid is set to -1, even if the PMU in question supports mode exclusion. Further, even when perf_event_paranoid is toggled, no samples are recorded. This is an unintended side effect of commit: e3ba76deef23064f ("perf tools: Force uncore events to system wide monitoring) ... which assumes that if a PMU has an associated cpu_map, it is an uncore PMU, and forces events for such PMUs to be system-wide. This is not true for arm/arm64 systems, which can have heterogeneous CPUs. To account for this, multiple CPU PMUs are exposed, each with a "cpus" field under sysfs, which the perf tool parses into a cpu_map. ARM PMUs do not have a "cpumask" file, and only have a "cpus" file. For the gory details as to why, see commit: 7e3fcffe95544010 ("perf pmu: Support alternative sysfs cpumask") Given all of this, we can instead identify uncore PMUs by explicitly checking for a "cpumask" file, and restore arm/arm64 PMU support back to a working state. This patch does so, adding a new perf_pmu::is_uncore field, and splitting the existing cpumask parsing so that it can be reused. Signed-off-by: Mark Rutland Tested-by Will Deacon Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: 4.12+ Fixes: e3ba76deef23064f ("perf tools: Force uncore events to system wide monitoring) Link: http://lkml.kernel.org/r/1507315102-5942-1-git-send-email-mark.rutland@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 9 ++++--- tools/perf/util/pmu.c | 56 +++++++++++++++++++++++++++++++----------- tools/perf/util/pmu.h | 1 + 3 files changed, 47 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f6257fb4f08c..39b15968eab1 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -309,10 +309,11 @@ static char *get_config_name(struct list_head *head_terms) static struct perf_evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, - char *name, struct cpu_map *cpus, + char *name, struct perf_pmu *pmu, struct list_head *config_terms, bool auto_merge_stats) { struct perf_evsel *evsel; + struct cpu_map *cpus = pmu ? pmu->cpus : NULL; event_attr_init(attr); @@ -323,7 +324,7 @@ __add_event(struct list_head *list, int *idx, (*idx)++; evsel->cpus = cpu_map__get(cpus); evsel->own_cpus = cpu_map__get(cpus); - evsel->system_wide = !!cpus; + evsel->system_wide = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; if (name) @@ -1233,7 +1234,7 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL, auto_merge_stats); + evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats); return evsel ? 0 : -ENOMEM; } @@ -1254,7 +1255,7 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state, return -EINVAL; evsel = __add_event(list, &parse_state->idx, &attr, - get_config_name(head_config), pmu->cpus, + get_config_name(head_config), pmu, &config_terms, auto_merge_stats); if (evsel) { evsel->unit = info.unit; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ac16a9db1fb5..1c4d7b4e4fb5 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -470,17 +470,36 @@ static void pmu_read_sysfs(void) closedir(dir); } +static struct cpu_map *__pmu_cpumask(const char *path) +{ + FILE *file; + struct cpu_map *cpus; + + file = fopen(path, "r"); + if (!file) + return NULL; + + cpus = cpu_map__read(file); + fclose(file); + return cpus; +} + +/* + * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64) + * may have a "cpus" file. + */ +#define CPUS_TEMPLATE_UNCORE "%s/bus/event_source/devices/%s/cpumask" +#define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" + static struct cpu_map *pmu_cpumask(const char *name) { - struct stat st; char path[PATH_MAX]; - FILE *file; struct cpu_map *cpus; const char *sysfs = sysfs__mountpoint(); const char *templates[] = { - "%s/bus/event_source/devices/%s/cpumask", - "%s/bus/event_source/devices/%s/cpus", - NULL + CPUS_TEMPLATE_UNCORE, + CPUS_TEMPLATE_CPU, + NULL }; const char **template; @@ -489,20 +508,25 @@ static struct cpu_map *pmu_cpumask(const char *name) for (template = templates; *template; template++) { snprintf(path, PATH_MAX, *template, sysfs, name); - if (stat(path, &st) == 0) - break; + cpus = __pmu_cpumask(path); + if (cpus) + return cpus; } - if (!*template) - return NULL; + return NULL; +} - file = fopen(path, "r"); - if (!file) - return NULL; +static bool pmu_is_uncore(const char *name) +{ + char path[PATH_MAX]; + struct cpu_map *cpus; + const char *sysfs = sysfs__mountpoint(); - cpus = cpu_map__read(file); - fclose(file); - return cpus; + snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name); + cpus = __pmu_cpumask(path); + cpu_map__put(cpus); + + return !!cpus; } /* @@ -617,6 +641,8 @@ static struct perf_pmu *pmu_lookup(const char *name) pmu->cpus = pmu_cpumask(name); + pmu->is_uncore = pmu_is_uncore(name); + INIT_LIST_HEAD(&pmu->format); INIT_LIST_HEAD(&pmu->aliases); list_splice(&format, &pmu->format); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 389e9729331f..fe0de0502ce2 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -22,6 +22,7 @@ struct perf_pmu { char *name; __u32 type; bool selectable; + bool is_uncore; struct perf_event_attr *default_config; struct cpu_map *cpus; struct list_head format; /* HEAD struct perf_pmu_format -> list */ -- cgit v1.2.3 From aa7b4e02b328f0589b6133e72aafb1289f614a79 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 9 Oct 2017 15:55:45 -0300 Subject: tools include uapi bpf.h: Sync kernel ABI header with tooling header Silences the checker: Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h' The 90caccdd8cc0 ("bpf: fix bpf_tail_call() x64 JIT") cset only updated a comment in uapi/bpf.h. Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: David Ahern Cc: David S. Miller Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-rwx2cqbf0x1lwa1krsr6e6hd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 43ab5c402f98..f90860d1f897 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -312,7 +312,7 @@ union bpf_attr { * jump into another BPF program * @ctx: context pointer passed to next program * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: index inside array that selects specific program to run + * @index: 32-bit index inside array that selects specific program to run * Return: 0 on success or negative error * * int bpf_clone_redirect(skb, ifindex, flags) -- cgit v1.2.3 From 7ddd8faf4399ab4f4edad5604eab35f8a87caf02 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 13 Oct 2017 15:57:54 -0700 Subject: userfaultfd: selftest: exercise -EEXIST only in background transfer I was stress testing some backports and with high load, after some time, the latest version of the selftest showed some false positive in connection with the uffdio_copy_retry. This seems to fix it while still exercising -EEXIST in the background transfer once in a while. The fork child will quit after the last UFFDIO_COPY is run, so a repeated UFFDIO_COPY may not return -EEXIST. This change restricts the -EEXIST stress to the background transfer where the memory can't go away from under it. Also updated uffdio_zeropage, so the interface is consistent. Link: http://lkml.kernel.org/r/20171004171541.1495-2-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Cc: Pavel Emelyanov Cc: Mike Rapoport Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index a2c53a3d223d..de2f9ec8a87f 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -397,7 +397,7 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, } } -static int copy_page(int ufd, unsigned long offset) +static int __copy_page(int ufd, unsigned long offset, bool retry) { struct uffdio_copy uffdio_copy; @@ -418,7 +418,7 @@ static int copy_page(int ufd, unsigned long offset) fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", uffdio_copy.copy), exit(1); } else { - if (test_uffdio_copy_eexist) { + if (test_uffdio_copy_eexist && retry) { test_uffdio_copy_eexist = false; retry_copy_page(ufd, &uffdio_copy, offset); } @@ -427,6 +427,16 @@ static int copy_page(int ufd, unsigned long offset) return 0; } +static int copy_page_retry(int ufd, unsigned long offset) +{ + return __copy_page(ufd, offset, true); +} + +static int copy_page(int ufd, unsigned long offset) +{ + return __copy_page(ufd, offset, false); +} + static void *uffd_poll_thread(void *arg) { unsigned long cpu = (unsigned long) arg; @@ -544,7 +554,7 @@ static void *background_thread(void *arg) for (page_nr = cpu * nr_pages_per_cpu; page_nr < (cpu+1) * nr_pages_per_cpu; page_nr++) - copy_page(uffd, page_nr * page_size); + copy_page_retry(uffd, page_nr * page_size); return NULL; } @@ -779,7 +789,7 @@ static void retry_uffdio_zeropage(int ufd, } } -static int uffdio_zeropage(int ufd, unsigned long offset) +static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) { struct uffdio_zeropage uffdio_zeropage; int ret; @@ -814,7 +824,7 @@ static int uffdio_zeropage(int ufd, unsigned long offset) fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n", uffdio_zeropage.zeropage), exit(1); } else { - if (test_uffdio_zeropage_eexist) { + if (test_uffdio_zeropage_eexist && retry) { test_uffdio_zeropage_eexist = false; retry_uffdio_zeropage(ufd, &uffdio_zeropage, offset); @@ -830,6 +840,11 @@ static int uffdio_zeropage(int ufd, unsigned long offset) return 0; } +static int uffdio_zeropage(int ufd, unsigned long offset) +{ + return __uffdio_zeropage(ufd, offset, false); +} + /* exercise UFFDIO_ZEROPAGE */ static int userfaultfd_zeropage_test(void) { -- cgit v1.2.3 From c97cc7dbce2fe6f46e137f4b040f915a0181ee85 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 17 Oct 2017 16:00:02 -0400 Subject: Revert "tools/power turbostat: stop migrating, unless '-m'" This reverts commit c91fc8519d87715a3a173475ea3778794c139996. That change caused a C6 and PC6 residency regression on large idle systems. Users also complained about new output indicating jitter: turbostat: cpu6 jitter 3794 9142 Signed-off-by: Len Brown Cc: 4.13+ # v4.13+ Signed-off-by: Rafael J. Wysocki --- tools/power/x86/turbostat/turbostat.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0dafba2c1e7d..bd9c6b31a504 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -92,7 +92,6 @@ unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; -int do_migrate; double discover_bclk(unsigned int family, unsigned int model); unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ @@ -303,9 +302,6 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg int cpu_migrate(int cpu) { - if (!do_migrate) - return 0; - CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) @@ -5007,7 +5003,6 @@ void cmdline(int argc, char **argv) {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help {"Joules", no_argument, 0, 'J'}, {"list", no_argument, 0, 'l'}, - {"migrate", no_argument, 0, 'm'}, {"out", required_argument, 0, 'o'}, {"quiet", no_argument, 0, 'q'}, {"show", required_argument, 0, 's'}, @@ -5019,7 +5014,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:Jmo:qST:v", + while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v", long_options, &option_index)) != -1) { switch (opt) { case 'a': @@ -5062,9 +5057,6 @@ void cmdline(int argc, char **argv) list_header_only++; quiet++; break; - case 'm': - do_migrate = 1; - break; case 'o': outf = fopen_or_die(optarg, "w"); break; -- cgit v1.2.3 From 28e33f9d78eefe98ea86673ab31e988b37a9a738 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 11:16:55 -0700 Subject: bpf: disallow arithmetic operations on context pointer Commit f1174f77b50c ("bpf/verifier: rework value tracking") removed the crafty selection of which pointer types are allowed to be modified. This is OK for most pointer types since adjust_ptr_min_max_vals() will catch operations on immutable pointers. One exception is PTR_TO_CTX which is now allowed to be offseted freely. The intent of aforementioned commit was to allow context access via modified registers. The offset passed to ->is_valid_access() verifier callback has been adjusted by the value of the variable offset. What is missing, however, is taking the variable offset into account when the context register is used. Or in terms of the code adding the offset to the value passed to the ->convert_ctx_access() callback. This leads to the following eBPF user code: r1 += 68 r0 = *(u32 *)(r1 + 8) exit being translated to this in kernel space: 0: (07) r1 += 68 1: (61) r0 = *(u32 *)(r1 +180) 2: (95) exit Offset 8 is corresponding to 180 in the kernel, but offset 76 is valid too. Verifier will "accept" access to offset 68+8=76 but then "convert" access to offset 8 as 180. Effective access to offset 248 is beyond the kernel context. (This is a __sk_buff example on a debug-heavy kernel - packet mark is 8 -> 180, 76 would be data.) Dereferencing the modified context pointer is not as easy as dereferencing other types, because we have to translate the access to reading a field in kernel structures which is usually at a different offset and often of a different size. To allow modifying the pointer we would have to make sure that given eBPF instruction will always access the same field or the fields accessed are "compatible" in terms of offset and size... Disallow dereferencing modified context pointers and add to selftests the test case described here. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Edward Cree Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 8 ++++++-- tools/testing/selftests/bpf/test_verifier.c | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8b8d6ba39e23..20f3889c006e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1116,7 +1116,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn /* ctx accesses must be at a fixed offset, so that we can * determine what type of data were returned. */ - if (!tnum_is_const(reg->var_off)) { + if (reg->off) { + verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n", + regno, reg->off, off - reg->off); + return -EACCES; + } + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); @@ -1124,7 +1129,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn tn_buf, off, size); return -EACCES; } - off += reg->var_off.value; err = check_ctx_access(env, insn_idx, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { /* ctx access returns either a scalar, or a diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 26f3250bdcd2..3c7d3a45a3c5 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6645,6 +6645,20 @@ static struct bpf_test tests[] = { .errstr = "BPF_END uses reserved fields", .result = REJECT, }, + { + "arithmetic ops make PTR_TO_CTX unusable", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, + offsetof(struct __sk_buff, data) - + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .errstr = "dereference of modified ctx ptr R1 off=68+8, ctx+const is allowed, ctx+const+const is not", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From 435bf0d3f99a164df7e8c30428cef266b91d1d3b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:15 -0700 Subject: bpf: enforce TCP only support for sockmap Only TCP sockets have been tested and at the moment the state change callback only handles TCP sockets. This adds a check to ensure that sockets actually being added are TCP sockets. For net-next we can consider UDP support. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/sockmap.c | 6 ++++++ tools/testing/selftests/bpf/test_maps.c | 12 +++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 6424ce0e4969..c68899d5b246 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -840,6 +840,12 @@ static int sock_map_update_elem(struct bpf_map *map, return -EINVAL; } + if (skops.sk->sk_type != SOCK_STREAM || + skops.sk->sk_protocol != IPPROTO_TCP) { + fput(socket->file); + return -EOPNOTSUPP; + } + err = sock_map_ctx_update_elem(&skops, map, key, flags); fput(socket->file); return err; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index fe3a443a1102..50ce52d2013d 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -466,7 +466,7 @@ static void test_sockmap(int tasks, void *data) int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc; struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break; int ports[] = {50200, 50201, 50202, 50204}; - int err, i, fd, sfd[6] = {0xdeadbeef}; + int err, i, fd, udp, sfd[6] = {0xdeadbeef}; u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0}; int parse_prog, verdict_prog; struct sockaddr_in addr; @@ -548,6 +548,16 @@ static void test_sockmap(int tasks, void *data) goto out_sockmap; } + /* Test update with unsupported UDP socket */ + udp = socket(AF_INET, SOCK_DGRAM, 0); + i = 0; + err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY); + if (!err) { + printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n", + i, udp); + goto out_sockmap; + } + /* Test update without programs */ for (i = 0; i < 6; i++) { err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); -- cgit v1.2.3 From 34f79502bbcfab659b8729da68b5e387f96eb4c1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:36 -0700 Subject: bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region SK_SKB BPF programs are run from the socket/tcp context but early in the stack before much of the TCP metadata is needed in tcp_skb_cb. So we can use some unused fields to place BPF metadata needed for SK_SKB programs when implementing the redirect function. This allows us to drop the preempt disable logic. It does however require an API change so sk_redirect_map() has been updated to additionally provide ctx_ptr to skb. Note, we do however continue to disable/enable preemption around actual BPF program running to account for map updates. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 2 +- include/net/tcp.h | 5 ++++ kernel/bpf/sockmap.c | 19 +++++++------- net/core/filter.c | 29 +++++++++++----------- samples/sockmap/sockmap_kern.c | 2 +- tools/include/uapi/linux/bpf.h | 3 ++- tools/testing/selftests/bpf/bpf_helpers.h | 2 +- tools/testing/selftests/bpf/sockmap_verdict_prog.c | 4 +-- 8 files changed, 36 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/include/linux/filter.h b/include/linux/filter.h index d29e58fde364..818a0b26249e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -728,7 +728,7 @@ void xdp_do_flush_map(void); void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_redirect(u32 ifindex); -struct sock *do_sk_redirect_map(void); +struct sock *do_sk_redirect_map(struct sk_buff *skb); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/include/net/tcp.h b/include/net/tcp.h index 89974c5286d8..b1ef98ebce53 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -840,6 +840,11 @@ struct tcp_skb_cb { struct inet6_skb_parm h6; #endif } header; /* For incoming skbs */ + struct { + __u32 key; + __u32 flags; + struct bpf_map *map; + } bpf; }; }; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index c68899d5b246..beaabb21c3a3 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -39,6 +39,7 @@ #include #include #include +#include struct bpf_stab { struct bpf_map map; @@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) return SK_DROP; skb_orphan(skb); + /* We need to ensure that BPF metadata for maps is also cleared + * when we orphan the skb so that we don't have the possibility + * to reference a stale map. + */ + TCP_SKB_CB(skb)->bpf.map = NULL; skb->sk = psock->sock; bpf_compute_data_end(skb); + preempt_disable(); rc = (*prog->bpf_func)(skb, prog->insnsi); + preempt_enable(); skb->sk = NULL; return rc; @@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) struct sock *sk; int rc; - /* Because we use per cpu values to feed input from sock redirect - * in BPF program to do_sk_redirect_map() call we need to ensure we - * are not preempted. RCU read lock is not sufficient in this case - * with CONFIG_PREEMPT_RCU enabled so we must be explicit here. - */ - preempt_disable(); rc = smap_verdict_func(psock, skb); switch (rc) { case SK_REDIRECT: - sk = do_sk_redirect_map(); - preempt_enable(); + sk = do_sk_redirect_map(skb); if (likely(sk)) { struct smap_psock *peer = smap_psock_sk(sk); @@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) /* Fall through and free skb otherwise */ case SK_DROP: default: - if (rc != SK_REDIRECT) - preempt_enable(); kfree_skb(skb); } } diff --git a/net/core/filter.c b/net/core/filter.c index 74b8c91fb5f4..ca1ba0bbfbc2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags) +BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, + struct bpf_map *, map, u32, key, u64, flags) { - struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); if (unlikely(flags)) return SK_ABORTED; - ri->ifindex = key; - ri->flags = flags; - ri->map = map; + tcb->bpf.key = key; + tcb->bpf.flags = flags; + tcb->bpf.map = map; return SK_REDIRECT; } -struct sock *do_sk_redirect_map(void) +struct sock *do_sk_redirect_map(struct sk_buff *skb) { - struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); struct sock *sk = NULL; - if (ri->map) { - sk = __sock_map_lookup_elem(ri->map, ri->ifindex); + if (tcb->bpf.map) { + sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key); - ri->ifindex = 0; - ri->map = NULL; - /* we do not clear flags for future lookup */ + tcb->bpf.key = 0; + tcb->bpf.map = NULL; } return sk; @@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = { .func = bpf_sk_redirect_map, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_ANYTHING, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, }; BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c index f9b38ef82dc2..52b0053274f4 100644 --- a/samples/sockmap/sockmap_kern.c +++ b/samples/sockmap/sockmap_kern.c @@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb) ret = 1; bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret); - return bpf_sk_redirect_map(&sock_map, ret, 0); + return bpf_sk_redirect_map(skb, &sock_map, ret, 0); } SEC("sockops") diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 43ab5c402f98..be9a631a69f7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -569,9 +569,10 @@ union bpf_attr { * @flags: reserved for future use * Return: 0 on success or negative error code * - * int bpf_sk_redirect_map(map, key, flags) + * int bpf_sk_redirect_map(skb, map, key, flags) * Redirect skb to a sock in map using key as a lookup key for the * sock in map. + * @skb: pointer to skb * @map: pointer to sockmap * @key: key to lookup sock in map * @flags: reserved for future use diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 36fb9161b34a..b2e02bdcd098 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; -static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = +static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = (void *) BPF_FUNC_sk_redirect_map; static int (*bpf_sock_map_update)(void *map, void *key, void *value, unsigned long long flags) = diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c index 9b99bd10807d..2cd2d552938b 100644 --- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c @@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb) bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk); if (!map) - return bpf_sk_redirect_map(&sock_map_rx, sk, 0); - return bpf_sk_redirect_map(&sock_map_tx, sk, 0); + return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0); + return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0); } char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From f7e9cb1ecb6d922584abff16db07930162c57155 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:58 -0700 Subject: bpf: remove mark access for SK_SKB program types The skb->mark field is a union with reserved_tailroom which is used in the TCP code paths from stream memory allocation. Allowing SK_SKB programs to set this field creates a conflict with future code optimizations, such as "gifting" the skb to the egress path instead of creating a new skb and doing a memcpy. Because we do not have a released version of SK_SKB yet lets just remove it for now. A more appropriate scratch pad to use at the socket layer is dev_scratch, but lets add that in future kernels when needed. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 2 +- tools/testing/selftests/bpf/test_verifier.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/net/core/filter.c b/net/core/filter.c index ca1ba0bbfbc2..aa0265997f93 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3684,7 +3684,6 @@ static bool sk_skb_is_valid_access(int off, int size, { if (type == BPF_WRITE) { switch (off) { - case bpf_ctx_range(struct __sk_buff, mark): case bpf_ctx_range(struct __sk_buff, tc_index): case bpf_ctx_range(struct __sk_buff, priority): break; @@ -3694,6 +3693,7 @@ static bool sk_skb_is_valid_access(int off, int size, } switch (off) { + case bpf_ctx_range(struct __sk_buff, mark): case bpf_ctx_range(struct __sk_buff, tc_classid): return false; case bpf_ctx_range(struct __sk_buff, data): diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3c7d3a45a3c5..50e15cedbb7f 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1130,15 +1130,27 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_context access", }, { - "check skb->mark is writeable by SK_SKB", + "invalid access of skb->mark for SK_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + .errstr = "invalid bpf_context access", + }, + { + "check skb->mark is not writeable by SK_SKB", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .result = ACCEPT, + .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_SKB, + .errstr = "invalid bpf_context access", }, { "check skb->tc_index is writeable by SK_SKB", -- cgit v1.2.3 From b37242c773b21edcd566e3bf995fb91d06b9537a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Oct 2017 02:34:23 +0200 Subject: bpf: add test cases to bpf selftests to cover all access tests Lets add test cases to cover really all possible direct packet access tests for good/bad access cases so we keep tracking them. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_verifier.c | 480 ++++++++++++++++++++++++++++ 1 file changed, 480 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 50e15cedbb7f..64ae21f64489 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6671,6 +6671,486 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "XDP pkt read, pkt_end mangling, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end mangling, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' > pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' > pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' > pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end > pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end > pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end > pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' < pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' < pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' < pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end < pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end < pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end < pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' >= pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' >= pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' >= pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end >= pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end >= pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end >= pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' <= pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' <= pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' <= pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end <= pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end <= pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end <= pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3