From cee271678d0e3177a25d0fcb2fa5e051d48e4262 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Mon, 8 Oct 2018 19:40:16 +0200 Subject: xsk: do not call synchronize_net() under RCU read lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XSKMAP update and delete functions called synchronize_net(), which can sleep. It is not allowed to sleep during an RCU read section. Instead we need to make sure that the sock sk_destruct (xsk_destruct) function is asynchronously called after an RCU grace period. Setting the SOCK_RCU_FREE flag for XDP sockets takes care of this. Fixes: fbfc504a24f5 ("bpf: introduce new bpf AF_XDP map type BPF_MAP_TYPE_XSKMAP") Reported-by: Eric Dumazet Signed-off-by: Björn Töpel Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- kernel/bpf/xskmap.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 9f8463afda9c..47147c9e184d 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -192,11 +192,8 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, sock_hold(sock->sk); old_xs = xchg(&m->xsk_map[i], xs); - if (old_xs) { - /* Make sure we've flushed everything. */ - synchronize_net(); + if (old_xs) sock_put((struct sock *)old_xs); - } sockfd_put(sock); return 0; @@ -212,11 +209,8 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key) return -EINVAL; old_xs = xchg(&m->xsk_map[k], NULL); - if (old_xs) { - /* Make sure we've flushed everything. */ - synchronize_net(); + if (old_xs) sock_put((struct sock *)old_xs); - } return 0; } -- cgit v1.2.3 From 9c0be3f6b5d776dfe3ed249862c244a4486414dc Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 13 Oct 2018 15:10:50 -0400 Subject: tracepoint: Fix tracepoint array element size mismatch commit 46e0c9be206f ("kernel: tracepoints: add support for relative references") changes the layout of the __tracepoint_ptrs section on architectures supporting relative references. However, it does so without turning struct tracepoint * const into const int elsewhere in the tracepoint code, which has the following side-effect: Setting mod->num_tracepoints is done in by module.c: mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs", sizeof(*mod->tracepoints_ptrs), &mod->num_tracepoints); Basically, since sizeof(*mod->tracepoints_ptrs) is a pointer size (rather than sizeof(int)), num_tracepoints is erroneously set to half the size it should be on 64-bit arch. So a module with an odd number of tracepoints misses the last tracepoint due to effect of integer division. So in the module going notifier: for_each_tracepoint_range(mod->tracepoints_ptrs, mod->tracepoints_ptrs + mod->num_tracepoints, tp_module_going_check_quiescent, NULL); the expression (mod->tracepoints_ptrs + mod->num_tracepoints) actually evaluates to something within the bounds of the array, but miss the last tracepoint if the number of tracepoints is odd on 64-bit arch. Fix this by introducing a new typedef: tracepoint_ptr_t, which is either "const int" on architectures that have PREL32 relocations, or "struct tracepoint * const" on architectures that does not have this feature. Also provide a new tracepoint_ptr_defer() static inline to encapsulate deferencing this type rather than duplicate code and ugly idefs within the for_each_tracepoint_range() implementation. This issue appears in 4.19-rc kernels, and should ideally be fixed before the end of the rc cycle. Acked-by: Ard Biesheuvel Acked-by: Jessica Yu Link: http://lkml.kernel.org/r/20181013191050.22389-1-mathieu.desnoyers@efficios.com Link: http://lkml.kernel.org/r/20180704083651.24360-7-ard.biesheuvel@linaro.org Cc: Michael Ellerman Cc: Ingo Molnar Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: James Morris Cc: James Morris Cc: Josh Poimboeuf Cc: Kees Cook Cc: Nicolas Pitre Cc: Paul Mackerras Cc: Petr Mladek Cc: Russell King Cc: "Serge E. Hallyn" Cc: Sergey Senozhatsky Cc: Thomas Garnier Cc: Thomas Gleixner Cc: Will Deacon Cc: Andrew Morton Cc: Linus Torvalds Cc: Greg Kroah-Hartman Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (VMware) --- include/linux/module.h | 3 ++- include/linux/tracepoint-defs.h | 6 ++++++ include/linux/tracepoint.h | 36 +++++++++++++++++++++++------------- kernel/tracepoint.c | 24 ++++++++---------------- 4 files changed, 39 insertions(+), 30 deletions(-) (limited to 'kernel') diff --git a/include/linux/module.h b/include/linux/module.h index f807f15bebbe..e19ae08c7fb8 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -430,7 +431,7 @@ struct module { #ifdef CONFIG_TRACEPOINTS unsigned int num_tracepoints; - struct tracepoint * const *tracepoints_ptrs; + tracepoint_ptr_t *tracepoints_ptrs; #endif #ifdef HAVE_JUMP_LABEL struct jump_entry *jump_entries; diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 22c5a46e9693..49ba9cde7e4b 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -35,6 +35,12 @@ struct tracepoint { struct tracepoint_func __rcu *funcs; }; +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +typedef const int tracepoint_ptr_t; +#else +typedef struct tracepoint * const tracepoint_ptr_t; +#endif + struct bpf_raw_event_map { struct tracepoint *tp; void *bpf_func; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 041f7e56a289..538ba1a58f5b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -99,6 +99,29 @@ extern void syscall_unregfunc(void); #define TRACE_DEFINE_ENUM(x) #define TRACE_DEFINE_SIZEOF(x) +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) +{ + return offset_to_ptr(p); +} + +#define __TRACEPOINT_ENTRY(name) \ + asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \ + " .balign 4 \n" \ + " .long __tracepoint_" #name " - . \n" \ + " .previous \n") +#else +static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) +{ + return *p; +} + +#define __TRACEPOINT_ENTRY(name) \ + static tracepoint_ptr_t __tracepoint_ptr_##name __used \ + __attribute__((section("__tracepoints_ptrs"))) = \ + &__tracepoint_##name +#endif + #endif /* _LINUX_TRACEPOINT_H */ /* @@ -253,19 +276,6 @@ extern void syscall_unregfunc(void); return static_key_false(&__tracepoint_##name.key); \ } -#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS -#define __TRACEPOINT_ENTRY(name) \ - asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \ - " .balign 4 \n" \ - " .long __tracepoint_" #name " - . \n" \ - " .previous \n") -#else -#define __TRACEPOINT_ENTRY(name) \ - static struct tracepoint * const __tracepoint_ptr_##name __used \ - __attribute__((section("__tracepoints_ptrs"))) = \ - &__tracepoint_##name -#endif - /* * We have no guarantee that gcc and the linker won't up-align the tracepoint * structures, so we create an array of pointers that will be used for iteration diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index bf2c06ef9afc..a3be42304485 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -28,8 +28,8 @@ #include #include -extern struct tracepoint * const __start___tracepoints_ptrs[]; -extern struct tracepoint * const __stop___tracepoints_ptrs[]; +extern tracepoint_ptr_t __start___tracepoints_ptrs[]; +extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; DEFINE_SRCU(tracepoint_srcu); EXPORT_SYMBOL_GPL(tracepoint_srcu); @@ -371,25 +371,17 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data) } EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); -static void for_each_tracepoint_range(struct tracepoint * const *begin, - struct tracepoint * const *end, +static void for_each_tracepoint_range( + tracepoint_ptr_t *begin, tracepoint_ptr_t *end, void (*fct)(struct tracepoint *tp, void *priv), void *priv) { + tracepoint_ptr_t *iter; + if (!begin) return; - - if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) { - const int *iter; - - for (iter = (const int *)begin; iter < (const int *)end; iter++) - fct(offset_to_ptr(iter), priv); - } else { - struct tracepoint * const *iter; - - for (iter = begin; iter < end; iter++) - fct(*iter, priv); - } + for (iter = begin; iter < end; iter++) + fct(tracepoint_ptr_deref(iter), priv); } #ifdef CONFIG_MODULES -- cgit v1.2.3 From 12ad0cb2123aed30241a14792ef5bef9efcccbcd Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 15 Oct 2018 23:31:42 -0400 Subject: tracing: Use trace_clock_local() for looping in preemptirq_delay_test.c The preemptirq_delay_test module is used for the ftrace selftest code that tests the latency tracers. The problem is that it uses ktime for the delay loop, and then checks the tracer to see if the delay loop is caught, but the tracer uses trace_clock_local() which uses various different other clocks to measure the latency. As ktime uses the clock cycles, and the code then converts that to nanoseconds, it causes rounding errors, and the preemptirq latency tests are failing due to being off by 1 (it expects to see a delay of 500000 us, but the delay is only 499999 us). This is happening due to a rounding error in the ktime (which is totally legit). The purpose of the test is to see if it can catch the delay, not to test the accuracy between trace_clock_local() and ktime_get(). Best to use apples to apples, and have the delay loop use the same clock as the latency tracer does. Cc: stable@vger.kernel.org Fixes: f96e8577da102 ("lib: Add module for testing preemptoff/irqsoff latency tracers") Acked-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/preemptirq_delay_test.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index f704390db9fc..d8765c952fab 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -5,12 +5,12 @@ * Copyright (C) 2018 Joel Fernandes (Google) */ +#include #include #include #include #include #include -#include #include #include #include @@ -25,13 +25,13 @@ MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt or irq (default ir static void busy_wait(ulong time) { - ktime_t start, end; - start = ktime_get(); + u64 start, end; + start = trace_clock_local(); do { - end = ktime_get(); + end = trace_clock_local(); if (kthread_should_stop()) break; - } while (ktime_to_ns(ktime_sub(end, start)) < (time * 1000)); + } while ((end - start) < (time * 1000)); } static int preemptirq_delay_run(void *data) -- cgit v1.2.3