From 0372007f5a79d61d3cb48a507717b9afb5d6addd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Mar 2020 11:05:22 +0100 Subject: context_tracking: Ensure that the critical path cannot be instrumented context tracking lacks a few protection mechanisms against instrumentation: - While the core functions are marked NOKPROBE they lack protection against function tracing which is required as the function entry/exit points can be utilized by BPF. - static functions invoked from the protected functions need to be marked as well as they can be instrumented otherwise. - using plain inline allows the compiler to emit traceable and probable functions. Fix this by marking the functions noinstr and converting the plain inlines to __always_inline. The NOKPROBE_SYMBOL() annotations are removed as the .noinstr.text section is already excluded from being probed. Cures the following objtool warnings: vmlinux.o: warning: objtool: enter_from_user_mode()+0x34: call to __context_tracking_exit() leaves .noinstr.text section vmlinux.o: warning: objtool: prepare_exit_to_usermode()+0x29: call to __context_tracking_enter() leaves .noinstr.text section vmlinux.o: warning: objtool: syscall_return_slowpath()+0x29: call to __context_tracking_enter() leaves .noinstr.text section vmlinux.o: warning: objtool: do_syscall_64()+0x7f: call to __context_tracking_enter() leaves .noinstr.text section vmlinux.o: warning: objtool: do_int80_syscall_32()+0x3d: call to __context_tracking_enter() leaves .noinstr.text section vmlinux.o: warning: objtool: do_fast_syscall_32()+0x9c: call to __context_tracking_enter() leaves .noinstr.text section and generates new ones... Signed-off-by: Thomas Gleixner Reviewed-by: Masami Hiramatsu Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134340.811520478@linutronix.de --- include/linux/context_tracking.h | 6 +++--- include/linux/context_tracking_state.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 8cac62ee6add..981b880d5b60 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -33,13 +33,13 @@ static inline void user_exit(void) } /* Called with interrupts disabled. */ -static inline void user_enter_irqoff(void) +static __always_inline void user_enter_irqoff(void) { if (context_tracking_enabled()) __context_tracking_enter(CONTEXT_USER); } -static inline void user_exit_irqoff(void) +static __always_inline void user_exit_irqoff(void) { if (context_tracking_enabled()) __context_tracking_exit(CONTEXT_USER); @@ -75,7 +75,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) * is enabled. If context tracking is disabled, returns * CONTEXT_DISABLED. This should be used primarily for debugging. */ -static inline enum ctx_state ct_state(void) +static __always_inline enum ctx_state ct_state(void) { return context_tracking_enabled() ? this_cpu_read(context_tracking.state) : CONTEXT_DISABLED; diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index e7fe6678b7ad..65a60d3313b0 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -26,12 +26,12 @@ struct context_tracking { extern struct static_key_false context_tracking_key; DECLARE_PER_CPU(struct context_tracking, context_tracking); -static inline bool context_tracking_enabled(void) +static __always_inline bool context_tracking_enabled(void) { return static_branch_unlikely(&context_tracking_key); } -static inline bool context_tracking_enabled_cpu(int cpu) +static __always_inline bool context_tracking_enabled_cpu(int cpu) { return context_tracking_enabled() && per_cpu(context_tracking.active, cpu); } @@ -41,7 +41,7 @@ static inline bool context_tracking_enabled_this_cpu(void) return context_tracking_enabled() && __this_cpu_read(context_tracking.active); } -static inline bool context_tracking_in_user(void) +static __always_inline bool context_tracking_in_user(void) { return __this_cpu_read(context_tracking.state) == CONTEXT_USER; } -- cgit v1.2.3 From 5916d5f9b3347344a3d96ba6b54cf8e290eba96a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Mar 2020 13:49:51 +0100 Subject: bug: Annotate WARN/BUG/stackfail as noinstr safe Warnings, bugs and stack protection fails from noinstr sections, e.g. low level and early entry code, are likely to be fatal. Mark them as "safe" to be invoked from noinstr protected code to avoid annotating all usage sites. Getting the information out is important. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134100.376598577@linutronix.de --- arch/x86/include/asm/bug.h | 3 +++ include/asm-generic/bug.h | 9 +++++++-- kernel/panic.c | 4 +++- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index facba9bc30ca..fb34ff641e0a 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -70,14 +70,17 @@ do { \ #define HAVE_ARCH_BUG #define BUG() \ do { \ + instrumentation_begin(); \ _BUG_FLAGS(ASM_UD2, 0); \ unreachable(); \ } while (0) #define __WARN_FLAGS(flags) \ do { \ + instrumentation_begin(); \ _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \ annotate_reachable(); \ + instrumentation_end(); \ } while (0) #include diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 384b5c835ced..c94e33ae3e7b 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -83,14 +83,19 @@ extern __printf(4, 5) void warn_slowpath_fmt(const char *file, const int line, unsigned taint, const char *fmt, ...); #define __WARN() __WARN_printf(TAINT_WARN, NULL) -#define __WARN_printf(taint, arg...) \ - warn_slowpath_fmt(__FILE__, __LINE__, taint, arg) +#define __WARN_printf(taint, arg...) do { \ + instrumentation_begin(); \ + warn_slowpath_fmt(__FILE__, __LINE__, taint, arg); \ + instrumentation_end(); \ + } while (0) #else extern __printf(1, 2) void __warn_printk(const char *fmt, ...); #define __WARN() __WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN)) #define __WARN_printf(taint, arg...) do { \ + instrumentation_begin(); \ __warn_printk(arg); \ __WARN_FLAGS(BUGFLAG_NO_CUT_HERE | BUGFLAG_TAINT(taint));\ + instrumentation_end(); \ } while (0) #define WARN_ON_ONCE(condition) ({ \ int __ret_warn_on = !!(condition); \ diff --git a/kernel/panic.c b/kernel/panic.c index 85568bbfb12b..e2157ca387c8 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -680,10 +680,12 @@ device_initcall(register_warn_debugfs); * Called when gcc's -fstack-protector feature is used, and * gcc detects corruption of the on-stack canary value */ -__visible void __stack_chk_fail(void) +__visible noinstr void __stack_chk_fail(void) { + instrumentation_begin(); panic("stack-protector: Kernel stack is corrupted in: %pB", __builtin_return_address(0)); + instrumentation_end(); } EXPORT_SYMBOL(__stack_chk_fail); -- cgit v1.2.3 From df65bba1dcd8ffadd922a71196b78c6d7630c33b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 Feb 2020 18:25:09 +0100 Subject: lib/bsearch: Provide __always_inline variant For code that needs the ultimate performance (it can inline the @cmp function too) or simply needs to avoid calling external functions for whatever reason, provide an __always_inline variant of bsearch(). [ tglx: Renamed to __inline_bsearch() as suggested by Andy ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200505135313.624443814@linutronix.de --- include/linux/bsearch.h | 26 ++++++++++++++++++++++++-- lib/bsearch.c | 22 ++-------------------- 2 files changed, 26 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/bsearch.h b/include/linux/bsearch.h index 8ed53d7524ea..e66b711d091e 100644 --- a/include/linux/bsearch.h +++ b/include/linux/bsearch.h @@ -4,7 +4,29 @@ #include -void *bsearch(const void *key, const void *base, size_t num, size_t size, - cmp_func_t cmp); +static __always_inline +void *__inline_bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp) +{ + const char *pivot; + int result; + + while (num > 0) { + pivot = base + (num >> 1) * size; + result = cmp(key, pivot); + + if (result == 0) + return (void *)pivot; + + if (result > 0) { + base = pivot + size; + num--; + } + num >>= 1; + } + + return NULL; +} + +extern void *bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp); #endif /* _LINUX_BSEARCH_H */ diff --git a/lib/bsearch.c b/lib/bsearch.c index 8b3aae5ae77a..bf86aa66f2b2 100644 --- a/lib/bsearch.c +++ b/lib/bsearch.c @@ -28,27 +28,9 @@ * the key and elements in the array are of the same type, you can use * the same comparison function for both sort() and bsearch(). */ -void *bsearch(const void *key, const void *base, size_t num, size_t size, - cmp_func_t cmp) +void *bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp) { - const char *pivot; - int result; - - while (num > 0) { - pivot = base + (num >> 1) * size; - result = cmp(key, pivot); - - if (result == 0) - return (void *)pivot; - - if (result > 0) { - base = pivot + size; - num--; - } - num >>= 1; - } - - return NULL; + return __inline_bsearch(key, base, num, size, cmp); } EXPORT_SYMBOL(bsearch); NOKPROBE_SYMBOL(bsearch); -- cgit v1.2.3 From 2ab70319bc1f79228da4dce7b9d604740c9beeef Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:14 +0200 Subject: nmi, tracing: Make hardware latency tracing noinstr safe The hardware latency tracer calls into instrumentable functions. Move the calls into the RCU watching sections and annotate them. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Cc: Paul E. McKenney Link: https://lore.kernel.org/r/20200521202116.904176298@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index e07cf853aa16..29b862aba740 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -87,20 +87,24 @@ extern void rcu_nmi_exit(void); arch_nmi_enter(); \ printk_nmi_enter(); \ lockdep_off(); \ - ftrace_nmi_enter(); \ BUG_ON(in_nmi() == NMI_MASK); \ __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ rcu_nmi_enter(); \ lockdep_hardirq_enter(); \ + instrumentation_begin(); \ + ftrace_nmi_enter(); \ + instrumentation_end(); \ } while (0) #define nmi_exit() \ do { \ + instrumentation_begin(); \ + ftrace_nmi_exit(); \ + instrumentation_end(); \ lockdep_hardirq_exit(); \ rcu_nmi_exit(); \ BUG_ON(!in_nmi()); \ __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ - ftrace_nmi_exit(); \ lockdep_on(); \ printk_nmi_exit(); \ arch_nmi_exit(); \ -- cgit v1.2.3 From 8a6bc4787f05d087fda8e11ead225c8830250703 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:21 +0200 Subject: genirq: Provide irq_enter/exit_rcu() irq_enter()/exit() currently include RCU handling. To properly separate the RCU handling code, provide variants which contain only the non-RCU related functionality. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202117.567023613@linutronix.de --- include/linux/hardirq.h | 13 +++++++++++-- kernel/softirq.c | 35 +++++++++++++++++++++++++++-------- 2 files changed, 38 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 29b862aba740..3dc9102d16cf 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -40,7 +40,11 @@ static __always_inline void rcu_irq_enter_check_tick(void) /* * Enter irq context (on NO_HZ, update jiffies): */ -extern void irq_enter(void); +void irq_enter(void); +/* + * Like irq_enter(), but RCU is already watching. + */ +void irq_enter_rcu(void); /* * Exit irq context without processing softirqs: @@ -55,7 +59,12 @@ extern void irq_enter(void); /* * Exit irq context and process softirqs if needed: */ -extern void irq_exit(void); +void irq_exit(void); + +/* + * Like irq_exit(), but return with RCU watching. + */ +void irq_exit_rcu(void); #ifndef arch_nmi_enter #define arch_nmi_enter() do { } while (0) diff --git a/kernel/softirq.c b/kernel/softirq.c index a47c6dd57452..beb8e3a66c7c 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -339,12 +339,11 @@ asmlinkage __visible void do_softirq(void) local_irq_restore(flags); } -/* - * Enter an interrupt context. +/** + * irq_enter_rcu - Enter an interrupt context with RCU watching */ -void irq_enter(void) +void irq_enter_rcu(void) { - rcu_irq_enter(); if (is_idle_task(current) && !in_interrupt()) { /* * Prevent raise_softirq from needlessly waking up ksoftirqd @@ -354,10 +353,18 @@ void irq_enter(void) tick_irq_enter(); _local_bh_enable(); } - __irq_enter(); } +/** + * irq_enter - Enter an interrupt context including RCU update + */ +void irq_enter(void) +{ + rcu_irq_enter(); + irq_enter_rcu(); +} + static inline void invoke_softirq(void) { if (ksoftirqd_running(local_softirq_pending())) @@ -397,10 +404,12 @@ static inline void tick_irq_exit(void) #endif } -/* - * Exit an interrupt context. Process softirqs if needed and possible: +/** + * irq_exit_rcu() - Exit an interrupt context without updating RCU + * + * Also processes softirqs if needed and possible. */ -void irq_exit(void) +void irq_exit_rcu(void) { #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED local_irq_disable(); @@ -413,6 +422,16 @@ void irq_exit(void) invoke_softirq(); tick_irq_exit(); +} + +/** + * irq_exit - Exit an interrupt context, update RCU and lockdep + * + * Also processes softirqs if needed and possible. + */ +void irq_exit(void) +{ + irq_exit_rcu(); rcu_irq_exit(); /* must be last! */ lockdep_hardirq_exit(); -- cgit v1.2.3 From 98a3bf195e1a14755da3d2b83e1dbb4a3158866d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:22 +0200 Subject: genirq: Provide __irq_enter/exit_raw() Like __irq_enter/exit() but without time accounting. To be used for "empty" system vectors like the scheduler IPI to avoid the overhead. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202117.671682341@linutronix.de --- include/linux/hardirq.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 3dc9102d16cf..03c9fece7d43 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -37,6 +37,17 @@ static __always_inline void rcu_irq_enter_check_tick(void) lockdep_hardirq_enter(); \ } while (0) +/* + * Like __irq_enter() without time accounting for fast + * interrupts, e.g. reschedule IPI where time accounting + * is more expensive than the actual interrupt. + */ +#define __irq_enter_raw() \ + do { \ + preempt_count_add(HARDIRQ_OFFSET); \ + lockdep_hardirq_enter(); \ + } while (0) + /* * Enter irq context (on NO_HZ, update jiffies): */ @@ -56,6 +67,15 @@ void irq_enter_rcu(void); preempt_count_sub(HARDIRQ_OFFSET); \ } while (0) +/* + * Like __irq_exit() without time accounting + */ +#define __irq_exit_raw() \ + do { \ + lockdep_hardirq_exit(); \ + preempt_count_sub(HARDIRQ_OFFSET); \ + } while (0) + /* * Exit irq context and process softirqs if needed: */ -- cgit v1.2.3 From 2f6474e4636bcc68af6c44abb2703f12d7f083da Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:26 +0200 Subject: x86/entry: Switch XEN/PV hypercall entry to IDTENTRY Convert the XEN/PV hypercall to IDTENTRY: - Emit the ASM stub with DECLARE_IDTENTRY - Remove the ASM idtentry in 64-bit - Remove the open coded ASM entry code in 32-bit - Remove the old prototypes The handler stubs need to stay in ASM code as they need corner case handling and adjustment of the stack pointer. Provide a new C function which invokes the entry/exit handling and calls into the XEN handler on the interrupt stack if required. The exit code is slightly different from the regular idtentry_exit() on non-preemptible kernels. If the hypercall is preemptible and need_resched() is set then XEN provides a preempt hypercall scheduling function. Move this functionality into the entry code so it can use the existing idtentry functionality. [ mingo: Build fixes. ] Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Acked-by: Juergen Gross Tested-by: Juergen Gross Link: https://lore.kernel.org/r/20200521202118.055270078@linutronix.de --- arch/x86/entry/common.c | 78 +++++++++++++++++++++++++++++++++++++++++ arch/x86/entry/entry_32.S | 20 ++++++----- arch/x86/entry/entry_64.S | 20 ++++------- arch/x86/include/asm/idtentry.h | 34 ++++++++++++++++++ arch/x86/xen/setup.c | 4 ++- arch/x86/xen/smp_pv.c | 3 +- arch/x86/xen/xen-asm_32.S | 12 ++++--- arch/x86/xen/xen-asm_64.S | 2 +- arch/x86/xen/xen-ops.h | 1 - drivers/xen/Makefile | 2 +- drivers/xen/preempt.c | 42 ---------------------- include/xen/xen-ops.h | 19 ++++------ 12 files changed, 151 insertions(+), 86 deletions(-) delete mode 100644 drivers/xen/preempt.c (limited to 'include') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 066215a243b4..a0f8c3cb130a 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -27,6 +27,11 @@ #include #include +#ifdef CONFIG_XEN_PV +#include +#include +#endif + #include #include #include @@ -35,6 +40,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -680,3 +686,75 @@ void noinstr idtentry_exit_user(struct pt_regs *regs) prepare_exit_to_usermode(regs); } + +#ifdef CONFIG_XEN_PV +#ifndef CONFIG_PREEMPTION +/* + * Some hypercalls issued by the toolstack can take many 10s of + * seconds. Allow tasks running hypercalls via the privcmd driver to + * be voluntarily preempted even if full kernel preemption is + * disabled. + * + * Such preemptible hypercalls are bracketed by + * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end() + * calls. + */ +DEFINE_PER_CPU(bool, xen_in_preemptible_hcall); +EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); + +/* + * In case of scheduling the flag must be cleared and restored after + * returning from schedule as the task might move to a different CPU. + */ +static __always_inline bool get_and_clear_inhcall(void) +{ + bool inhcall = __this_cpu_read(xen_in_preemptible_hcall); + + __this_cpu_write(xen_in_preemptible_hcall, false); + return inhcall; +} + +static __always_inline void restore_inhcall(bool inhcall) +{ + __this_cpu_write(xen_in_preemptible_hcall, inhcall); +} +#else +static __always_inline bool get_and_clear_inhcall(void) { return false; } +static __always_inline void restore_inhcall(bool inhcall) { } +#endif + +static void __xen_pv_evtchn_do_upcall(void) +{ + irq_enter_rcu(); + inc_irq_stat(irq_hv_callback_count); + + xen_hvm_evtchn_do_upcall(); + + irq_exit_rcu(); +} + +__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) +{ + struct pt_regs *old_regs; + bool inhcall, rcu_exit; + + rcu_exit = idtentry_enter_cond_rcu(regs); + old_regs = set_irq_regs(regs); + + instrumentation_begin(); + run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, NULL, regs); + instrumentation_begin(); + + set_irq_regs(old_regs); + + inhcall = get_and_clear_inhcall(); + if (inhcall && !WARN_ON_ONCE(rcu_exit)) { + instrumentation_begin(); + idtentry_exit_cond_resched(regs, true); + instrumentation_end(); + restore_inhcall(inhcall); + } else { + idtentry_exit_cond_rcu(regs, rcu_exit); + } +} +#endif /* CONFIG_XEN_PV */ diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 28d13f07b84d..3ab04dca9aab 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1298,7 +1298,13 @@ SYM_CODE_END(native_iret) #endif #ifdef CONFIG_XEN_PV -SYM_FUNC_START(xen_hypervisor_callback) +/* + * See comment in entry_64.S for further explanation + * + * Note: This is not an actual IDT entry point. It's a XEN specific entry + * point and therefore named to match the 64-bit trampoline counterpart. + */ +SYM_FUNC_START(xen_asm_exc_xen_hypervisor_callback) /* * Check to see if we got the event in the critical * region in xen_iret_direct, after we've reenabled @@ -1315,14 +1321,11 @@ SYM_FUNC_START(xen_hypervisor_callback) pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL ENCODE_FRAME_POINTER - TRACE_IRQS_OFF + mov %esp, %eax - call xen_evtchn_do_upcall -#ifndef CONFIG_PREEMPTION - call xen_maybe_preempt_hcall -#endif - jmp ret_from_intr -SYM_FUNC_END(xen_hypervisor_callback) + call xen_pv_evtchn_do_upcall + jmp handle_exception_return +SYM_FUNC_END(xen_asm_exc_xen_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. @@ -1464,6 +1467,7 @@ SYM_CODE_START_LOCAL_NOALIGN(handle_exception) movl %esp, %eax # pt_regs pointer CALL_NOSPEC edi +handle_exception_return: #ifdef CONFIG_VM86 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS movb PT_CS(%esp), %al diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 6b518be4da0a..dadb37d07266 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1067,10 +1067,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt idtentry X86_TRAP_PF page_fault do_page_fault has_error_code=1 -#ifdef CONFIG_XEN_PV -idtentry 512 /* dummy */ hypervisor_callback xen_do_hypervisor_callback has_error_code=0 -#endif - /* * Reload gs selector with exception handling * edi: new selector @@ -1158,9 +1154,10 @@ SYM_FUNC_END(asm_call_on_stack) * So, on entry to the handler we detect whether we interrupted an * existing activation in its critical region -- if so, we pop the current * activation and restart the handler using the previous one. + * + * C calling convention: exc_xen_hypervisor_callback(struct *pt_regs) */ -/* do_hypervisor_callback(struct *pt_regs) */ -SYM_CODE_START_LOCAL(xen_do_hypervisor_callback) +SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback) /* * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will @@ -1170,15 +1167,10 @@ SYM_CODE_START_LOCAL(xen_do_hypervisor_callback) movq %rdi, %rsp /* we don't return, adjust the stack frame */ UNWIND_HINT_REGS - ENTER_IRQ_STACK old_rsp=%r10 - call xen_evtchn_do_upcall - LEAVE_IRQ_STACK + call xen_pv_evtchn_do_upcall -#ifndef CONFIG_PREEMPTION - call xen_maybe_preempt_hcall -#endif - jmp error_exit -SYM_CODE_END(xen_do_hypervisor_callback) + jmp error_return +SYM_CODE_END(exc_xen_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index b05688973b92..b2a5fe02dcf0 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -165,6 +165,21 @@ __visible noinstr void func(struct pt_regs *regs) #define DEFINE_IDTENTRY_RAW_ERRORCODE(func) \ __visible noinstr void func(struct pt_regs *regs, unsigned long error_code) +/** + * DECLARE_IDTENTRY_XENCB - Declare functions for XEN HV callback entry point + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Maps to DECLARE_IDTENTRY(). Distinct entry point to handle the 32/64-bit + * difference + */ +#define DECLARE_IDTENTRY_XENCB(vector, func) \ + DECLARE_IDTENTRY(vector, func) #ifdef CONFIG_X86_64 /** @@ -307,6 +322,9 @@ __visible noinstr void func(struct pt_regs *regs, \ # define DECLARE_IDTENTRY_DF(vector, func) \ idtentry_df vector asm_##func func +# define DECLARE_IDTENTRY_XENCB(vector, func) \ + DECLARE_IDTENTRY(vector, func) + #else # define DECLARE_IDTENTRY_MCE(vector, func) \ DECLARE_IDTENTRY(vector, func) @@ -317,6 +335,9 @@ __visible noinstr void func(struct pt_regs *regs, \ /* No ASM emitted for DF as this goes through a C shim */ # define DECLARE_IDTENTRY_DF(vector, func) +/* No ASM emitted for XEN hypervisor callback */ +# define DECLARE_IDTENTRY_XENCB(vector, func) + #endif /* No ASM code emitted for NMI */ @@ -337,6 +358,13 @@ __visible noinstr void func(struct pt_regs *regs, \ * This avoids duplicate defines and ensures that everything is consistent. */ +/* + * Dummy trap number so the low level ASM macro vector number checks do not + * match which results in emitting plain IDTENTRY stubs without bells and + * whistels. + */ +#define X86_TRAP_OTHER 0xFFFF + /* Simple exception entry points. No hardware error code */ DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); @@ -376,4 +404,10 @@ DECLARE_IDTENTRY_XEN(X86_TRAP_DB, debug); /* #DF */ DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback); +#endif + +#undef X86_TRAP_OTHER + #endif diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 1a2d8a50dac4..3566e37241d7 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -993,7 +994,8 @@ static void __init xen_pvmmu_arch_setup(void) HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); - if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || + if (register_callback(CALLBACKTYPE_event, + xen_asm_exc_xen_hypervisor_callback) || register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) BUG(); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 8fa01c545460..171aff1b11f2 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -348,7 +349,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif ctxt->event_callback_eip = - (unsigned long)xen_hypervisor_callback; + (unsigned long)xen_asm_exc_xen_hypervisor_callback; ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 812ff01e4e34..4757cec33abe 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -93,7 +93,7 @@ xen_iret_start_crit: /* * If there's something pending, mask events again so we can - * jump back into xen_hypervisor_callback. Otherwise do not + * jump back into exc_xen_hypervisor_callback. Otherwise do not * touch XEN_vcpu_info_mask. */ jne 1f @@ -113,7 +113,7 @@ iret_restore_end: * Events are masked, so jumping out of the critical region is * OK. */ - je xen_hypervisor_callback + je xen_asm_exc_xen_hypervisor_callback 1: iret xen_iret_end_crit: @@ -127,7 +127,7 @@ SYM_CODE_END(xen_iret) .globl xen_iret_start_crit, xen_iret_end_crit /* - * This is called by xen_hypervisor_callback in entry_32.S when it sees + * This is called by xen_asm_exc_xen_hypervisor_callback in entry_32.S when it sees * that the EIP at the time of interrupt was between * xen_iret_start_crit and xen_iret_end_crit. * @@ -144,7 +144,7 @@ SYM_CODE_END(xen_iret) * eflags } * cs } nested exception info * eip } - * return address : (into xen_hypervisor_callback) + * return address : (into xen_asm_exc_xen_hypervisor_callback) * * In order to deliver the nested exception properly, we need to discard the * nested exception frame such that when we handle the exception, we do it @@ -152,7 +152,8 @@ SYM_CODE_END(xen_iret) * * The only caveat is that if the outer eax hasn't been restored yet (i.e. * it's still on stack), we need to restore its value here. - */ +*/ +.pushsection .noinstr.text, "ax" SYM_CODE_START(xen_iret_crit_fixup) /* * Paranoia: Make sure we're really coming from kernel space. @@ -181,3 +182,4 @@ SYM_CODE_START(xen_iret_crit_fixup) 2: ret SYM_CODE_END(xen_iret_crit_fixup) +.popsection diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index e46d863bcaa4..19fbbdbcbde9 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -54,7 +54,7 @@ xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION xen_pv_trap entry_INT80_compat #endif -xen_pv_trap hypervisor_callback +xen_pv_trap asm_exc_xen_hypervisor_callback __INIT SYM_CODE_START(xen_early_idt_handler_array) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 1cc1568bfe04..ad05d0589381 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -8,7 +8,6 @@ #include /* These are code, but not functions. Defined in entry.S */ -extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; void xen_sysenter_target(void); diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 0c4efa6fe450..0d322f3d90cd 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o -obj-y += grant-table.o features.o balloon.o manage.o preempt.o time.o +obj-y += grant-table.o features.o balloon.o manage.o time.o obj-y += mem-reservation.o obj-y += events/ obj-y += xenbus/ diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c deleted file mode 100644 index 17240c5325a3..000000000000 --- a/drivers/xen/preempt.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Preemptible hypercalls - * - * Copyright (C) 2014 Citrix Systems R&D ltd. - */ - -#include -#include - -#ifndef CONFIG_PREEMPTION - -/* - * Some hypercalls issued by the toolstack can take many 10s of - * seconds. Allow tasks running hypercalls via the privcmd driver to - * be voluntarily preempted even if full kernel preemption is - * disabled. - * - * Such preemptible hypercalls are bracketed by - * xen_preemptible_hcall_begin() and xen_preemptible_hcall_end() - * calls. - */ - -DEFINE_PER_CPU(bool, xen_in_preemptible_hcall); -EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); - -asmlinkage __visible void xen_maybe_preempt_hcall(void) -{ - if (unlikely(__this_cpu_read(xen_in_preemptible_hcall) - && need_resched())) { - /* - * Clear flag as we may be rescheduled on a different - * cpu. - */ - __this_cpu_write(xen_in_preemptible_hcall, false); - local_irq_enable(); - cond_resched(); - local_irq_disable(); - __this_cpu_write(xen_in_preemptible_hcall, true); - } -} -#endif /* CONFIG_PREEMPTION */ diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 095be1d66f31..39a5580f8feb 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -215,17 +215,7 @@ bool xen_running_on_version_or_later(unsigned int major, unsigned int minor); void xen_efi_runtime_setup(void); -#ifdef CONFIG_PREEMPTION - -static inline void xen_preemptible_hcall_begin(void) -{ -} - -static inline void xen_preemptible_hcall_end(void) -{ -} - -#else +#if defined(CONFIG_XEN_PV) && !defined(CONFIG_PREEMPTION) DECLARE_PER_CPU(bool, xen_in_preemptible_hcall); @@ -239,6 +229,11 @@ static inline void xen_preemptible_hcall_end(void) __this_cpu_write(xen_in_preemptible_hcall, false); } -#endif /* CONFIG_PREEMPTION */ +#else + +static inline void xen_preemptible_hcall_begin(void) { } +static inline void xen_preemptible_hcall_end(void) { } + +#endif /* CONFIG_XEN_PV && !CONFIG_PREEMPTION */ #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3 From cb09ea2924cbf1a42da59bd30a59cc1836240bcb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 May 2020 22:05:44 +0200 Subject: x86/entry: Convert XEN hypercall vector to IDTENTRY_SYSVEC Convert the last oldstyle defined vector to IDTENTRY_SYSVEC: - Implement the C entry point with DEFINE_IDTENTRY_SYSVEC - Emit the ASM stub with DECLARE_IDTENTRY_SYSVEC - Remove the ASM idtentries in 64-bit - Remove the BUILD_INTERRUPT entries in 32-bit - Remove the old prototypes Fixup the related XEN code by providing the primary C entry point in x86 to avoid cluttering the generic code with X86'isms. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20200521202119.741950104@linutronix.de --- arch/x86/entry/entry_32.S | 5 ----- arch/x86/entry/entry_64.S | 5 ----- arch/x86/include/asm/idtentry.h | 4 ++++ arch/x86/xen/enlighten_hvm.c | 12 ++++++++++++ drivers/xen/events/events_base.c | 6 ++---- include/xen/events.h | 7 ------- 6 files changed, 18 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bb9ebd2df05e..f8e8aeb10ba4 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1340,11 +1340,6 @@ SYM_FUNC_START(xen_failsafe_callback) SYM_FUNC_END(xen_failsafe_callback) #endif /* CONFIG_XEN_PV */ -#ifdef CONFIG_XEN_PVHVM -BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, - xen_evtchn_do_upcall) -#endif - SYM_CODE_START_LOCAL_NOALIGN(handle_exception) /* the function address is in %gs's slot on the stack */ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b97fcda28019..fd7efb8deded 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1111,11 +1111,6 @@ SYM_CODE_START(xen_failsafe_callback) SYM_CODE_END(xen_failsafe_callback) #endif /* CONFIG_XEN_PV */ -#ifdef CONFIG_XEN_PVHVM -apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ - xen_hvm_callback_vector xen_evtchn_do_upcall -#endif - /* * Save all registers in pt_regs, and switch gs if needed. * Use slow, but surefire "are we in kernel?" check. diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 1b6d3ea1fc96..71cf82bf24ba 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -644,6 +644,10 @@ DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_STIMER0_VECTOR, sysvec_hyperv_stimer0); DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback); #endif +#ifdef CONFIG_XEN_PVHVM +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback); +#endif + #undef X86_TRAP_OTHER #endif diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index e138f7de52d2..3e89b0067ff0 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,17 @@ static void __init init_hvm_pv_info(void) this_cpu_write(xen_vcpu_id, smp_processor_id()); } +DEFINE_IDTENTRY_SYSVEC(sysvec_xen_hvm_callback) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + inc_irq_stat(irq_hv_callback_count); + + xen_hvm_evtchn_do_upcall(); + + set_irq_regs(old_regs); +} + #ifdef CONFIG_KEXEC_CORE static void xen_hvm_shutdown(void) { diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index eb35c3cda9a6..140c7bf33a98 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -37,6 +37,7 @@ #ifdef CONFIG_X86 #include #include +#include #include #include #include @@ -1236,9 +1237,6 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); irq_enter(); -#ifdef CONFIG_X86 - inc_irq_stat(irq_hv_callback_count); -#endif __xen_evtchn_do_upcall(); @@ -1658,7 +1656,7 @@ static __init void xen_alloc_callback_vector(void) return; pr_info("Xen HVM callback vector for event delivery is enabled\n"); - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, xen_hvm_callback_vector); + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback); } #else void xen_setup_callback_vector(void) {} diff --git a/include/xen/events.h b/include/xen/events.h index 12b0dcb6a120..df1e6391f63f 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -90,13 +90,6 @@ unsigned int irq_from_evtchn(evtchn_port_t evtchn); int irq_from_virq(unsigned int cpu, unsigned int virq); evtchn_port_t evtchn_from_irq(unsigned irq); -#ifdef CONFIG_XEN_PVHVM -/* Xen HVM evtchn vector callback */ -void xen_hvm_callback_vector(void); -#ifdef CONFIG_TRACING -#define trace_xen_hvm_callback_vector xen_hvm_callback_vector -#endif -#endif int xen_set_callback_via(uint64_t via); void xen_evtchn_do_upcall(struct pt_regs *regs); void xen_hvm_evtchn_do_upcall(void); -- cgit v1.2.3 From 998c2034c6a36bd48284e8c8f945a6c6ffc0e3f0 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 20 May 2020 18:16:00 +0200 Subject: xen: Move xen_setup_callback_vector() definition to include/xen/hvm.h Kbuild test robot reports the following problem on ARM: for 'xen_setup_callback_vector' [-Wmissing-prototypes] 1664 | void xen_setup_callback_vector(void) {} | ^~~~~~~~~~~~~~~~~~~~~~~~~ The problem is that xen_setup_callback_vector is a x86 only thing, its definition is present in arch/x86/xen/xen-ops.h but not on ARM. In events_base.c there is a stub for !CONFIG_XEN_PVHVM but it is not declared as 'static'. On x86 the situation is hardly better: drivers/xen/events/events_base.c doesn't include 'xen-ops.h' from arch/x86/xen/, it includes its namesake from include/xen/ which also results in a 'no previous prototype' warning. Currently, xen_setup_callback_vector() has two call sites: one in drivers/xen/events_base.c and another in arch/x86/xen/suspend_hvm.c. The former is placed under #ifdef CONFIG_X86 and the later is only compiled in when CONFIG_XEN_PVHVM. Resolve the issue by moving xen_setup_callback_vector() declaration to arch neutral 'include/xen/hvm.h' as the implementation lives in arch neutral drivers/xen/events/events_base.c. Reported-by: kbuild test robot Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Link: https://lkml.kernel.org/r/20200520161600.361895-1-vkuznets@redhat.com --- arch/x86/xen/suspend_hvm.c | 1 + arch/x86/xen/xen-ops.h | 1 - include/xen/hvm.h | 2 ++ include/xen/interface/hvm/hvm_op.h | 2 ++ 4 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/xen/suspend_hvm.c b/arch/x86/xen/suspend_hvm.c index 5152afe16876..9d548b0c772f 100644 --- a/arch/x86/xen/suspend_hvm.c +++ b/arch/x86/xen/suspend_hvm.c @@ -2,6 +2,7 @@ #include #include +#include #include #include diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index ad05d0589381..53b224fd6177 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -54,7 +54,6 @@ void xen_enable_sysenter(void); void xen_enable_syscall(void); void xen_vcpu_restore(void); -void xen_setup_callback_vector(void); void xen_hvm_init_shared_info(void); void xen_unplug_emulated_devices(void); diff --git a/include/xen/hvm.h b/include/xen/hvm.h index 0b15f8cb17fc..b7fd7fc9ad41 100644 --- a/include/xen/hvm.h +++ b/include/xen/hvm.h @@ -58,4 +58,6 @@ static inline int hvm_get_parameter(int idx, uint64_t *value) #define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\ HVM_CALLBACK_VIA_TYPE_SHIFT | (x)) +void xen_setup_callback_vector(void); + #endif /* XEN_HVM_H__ */ diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h index 956a04682865..25d945ef17de 100644 --- a/include/xen/interface/hvm/hvm_op.h +++ b/include/xen/interface/hvm/hvm_op.h @@ -21,6 +21,8 @@ #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ #define __XEN_PUBLIC_HVM_HVM_OP_H__ +#include + /* Get/set subcommands: the second argument of the hypercall is a * pointer to a xen_hvm_param struct. */ #define HVMOP_set_param 0 -- cgit v1.2.3 From bf2b3008440072068580c609d79a079656af0588 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 May 2020 23:27:40 +0200 Subject: x86/entry: Rename trace_hardirqs_off_prepare() The typical pattern for trace_hardirqs_off_prepare() is: ENTRY lockdep_hardirqs_off(); // because hardware ... do entry magic instrumentation_begin(); trace_hardirqs_off_prepare(); ... do actual work trace_hardirqs_on_prepare(); lockdep_hardirqs_on_prepare(); instrumentation_end(); ... do exit magic lockdep_hardirqs_on(); which shows that it's named wrong, rename it to trace_hardirqs_off_finish(), as it concludes the hardirq_off transition. Also, given that the above is the only correct order, make the traditional all-in-one trace_hardirqs_off() follow suit. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200529213321.415774872@infradead.org --- arch/x86/entry/common.c | 6 +++--- arch/x86/kernel/cpu/mce/core.c | 2 +- arch/x86/kernel/nmi.c | 2 +- arch/x86/kernel/traps.c | 4 ++-- include/linux/irqflags.h | 4 ++-- kernel/trace/trace_preemptirq.c | 10 +++++----- 6 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index b0b1c3cf0e6e..f4d57782c14b 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -65,7 +65,7 @@ static noinstr void enter_from_user_mode(void) instrumentation_begin(); CT_WARN_ON(state != CONTEXT_USER); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); instrumentation_end(); } #else @@ -73,7 +73,7 @@ static __always_inline void enter_from_user_mode(void) { lockdep_hardirqs_off(CALLER_ADDR0); instrumentation_begin(); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); instrumentation_end(); } #endif @@ -569,7 +569,7 @@ bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs) lockdep_hardirqs_off(CALLER_ADDR0); rcu_irq_enter(); instrumentation_begin(); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); instrumentation_end(); return true; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index be499267bbb4..b9cb381b4019 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1922,7 +1922,7 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) * that out because it's an indirect call. Annotate it. */ instrumentation_begin(); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); machine_check_vector(regs); if (regs->flags & X86_EFLAGS_IF) trace_hardirqs_on_prepare(); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 873a8c040b86..3a98ff36f411 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -330,7 +330,7 @@ static noinstr void default_do_nmi(struct pt_regs *regs) __this_cpu_write(last_nmi_rip, regs->ip); instrumentation_begin(); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); handled = nmi_handle(NMI_LOCAL, regs); __this_cpu_add(nmi_stats.normal, handled); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6f887be1ac0c..79af913e78a3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -634,7 +634,7 @@ DEFINE_IDTENTRY_RAW(exc_int3) } else { nmi_enter(); instrumentation_begin(); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); if (!do_int3(regs)) die("int3", regs, 0); if (regs->flags & X86_EFLAGS_IF) @@ -833,7 +833,7 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, { nmi_enter(); instrumentation_begin(); - trace_hardirqs_off_prepare(); + trace_hardirqs_off_finish(); instrumentation_end(); /* diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index d7f7e436c3af..6384d2813ded 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -32,7 +32,7 @@ #ifdef CONFIG_TRACE_IRQFLAGS extern void trace_hardirqs_on_prepare(void); - extern void trace_hardirqs_off_prepare(void); + extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); # define lockdep_hardirq_context(p) ((p)->hardirq_context) @@ -101,7 +101,7 @@ do { \ #else # define trace_hardirqs_on_prepare() do { } while (0) -# define trace_hardirqs_off_prepare() do { } while (0) +# define trace_hardirqs_off_finish() do { } while (0) # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) # define lockdep_hardirq_context(p) 0 diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index fb0691b8a88d..f10073e62603 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -58,7 +58,7 @@ NOKPROBE_SYMBOL(trace_hardirqs_on); * and lockdep uses a staged approach which splits the lockdep hardirq * tracking into a RCU on and a RCU off section. */ -void trace_hardirqs_off_prepare(void) +void trace_hardirqs_off_finish(void) { if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); @@ -68,19 +68,19 @@ void trace_hardirqs_off_prepare(void) } } -EXPORT_SYMBOL(trace_hardirqs_off_prepare); -NOKPROBE_SYMBOL(trace_hardirqs_off_prepare); +EXPORT_SYMBOL(trace_hardirqs_off_finish); +NOKPROBE_SYMBOL(trace_hardirqs_off_finish); void trace_hardirqs_off(void) { + lockdep_hardirqs_off(CALLER_ADDR0); + if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); if (!in_nmi()) trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); } - - lockdep_hardirqs_off(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_off); NOKPROBE_SYMBOL(trace_hardirqs_off); -- cgit v1.2.3 From 6eebad1ad303db360ebe3e51c2b9656c3d407157 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Jun 2020 13:40:21 +0200 Subject: lockdep: __always_inline more for noinstr vmlinux.o: warning: objtool: debug_locks_off()+0xd: call to __debug_locks_off() leaves .noinstr.text section vmlinux.o: warning: objtool: match_held_lock()+0x6a: call to look_up_lock_class.isra.0() leaves .noinstr.text section vmlinux.o: warning: objtool: lock_is_held_type()+0x90: call to lockdep_recursion_finish() leaves .noinstr.text section Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200603114052.185201076@infradead.org --- include/linux/debug_locks.h | 2 +- kernel/locking/lockdep.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 257ab3c92cb8..e7e45f0cc7da 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -12,7 +12,7 @@ extern int debug_locks __read_mostly; extern int debug_locks_silent __read_mostly; -static inline int __debug_locks_off(void) +static __always_inline int __debug_locks_off(void) { return xchg(&debug_locks, 0); } diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 38cce34d03dc..29a8de4c50b9 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -393,7 +393,7 @@ void lockdep_init_task(struct task_struct *task) task->lockdep_recursion = 0; } -static inline void lockdep_recursion_finish(void) +static __always_inline void lockdep_recursion_finish(void) { if (WARN_ON_ONCE(--current->lockdep_recursion)) current->lockdep_recursion = 0; @@ -801,7 +801,7 @@ static int count_matching_names(struct lock_class *new_class) } /* used from NMI context -- must be lockless */ -static inline struct lock_class * +static __always_inline struct lock_class * look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) { struct lockdep_subclass_key *key; -- cgit v1.2.3 From f0178fc01fe46bab6a95415f5647d1a74efcad1b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 10 Jun 2020 08:37:01 +0200 Subject: x86/entry: Unbreak __irqentry_text_start/end magic The entry rework moved interrupt entry code from the irqentry to the noinstr section which made the irqentry section empty. This breaks boundary checks which rely on the __irqentry_text_start/end markers to find out whether a function in a stack trace is interrupt/exception entry code. This affects the function graph tracer and filter_irq_stacks(). As the IDT entry points are all sequentialy emitted this is rather simple to unbreak by injecting __irqentry_text_start/end as global labels. To make this work correctly: - Remove the IRQENTRY_TEXT section from the x86 linker script - Define __irqentry so it breaks the build if it's used - Adjust the entry mirroring in PTI - Remove the redundant kprobes and unwinder bound checks Reported-by: Qian Cai Signed-off-by: Thomas Gleixner --- arch/x86/entry/entry_32.S | 11 ++++++++++- arch/x86/entry/entry_64.S | 11 ++++++++++- arch/x86/include/asm/irq.h | 7 +++++++ arch/x86/kernel/kprobes/core.c | 7 ------- arch/x86/kernel/kprobes/opt.c | 4 +--- arch/x86/kernel/unwind_frame.c | 8 +------- arch/x86/kernel/vmlinux.lds.S | 1 - arch/x86/mm/pti.c | 4 ++-- include/linux/interrupt.h | 8 +++++--- 9 files changed, 36 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2d29f77a3601..024d7d276cd4 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -743,10 +743,19 @@ SYM_CODE_END(asm_\cfunc) /* * Include the defines which emit the idt entries which are shared - * shared between 32 and 64 bit. + * shared between 32 and 64 bit and emit the __irqentry_text_* markers + * so the stacktrace boundary checks work. */ + .align 16 + .globl __irqentry_text_start +__irqentry_text_start: + #include + .align 16 + .globl __irqentry_text_end +__irqentry_text_end: + /* * %eax: prev task * %edx: next task diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8ecaeee53653..d2a00c97e53f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -478,10 +478,19 @@ SYM_CODE_END(\asmsym) /* * Include the defines which emit the idt entries which are shared - * shared between 32 and 64 bit. + * shared between 32 and 64 bit and emit the __irqentry_text_* markers + * so the stacktrace boundary checks work. */ + .align 16 + .globl __irqentry_text_start +__irqentry_text_start: + #include + .align 16 + .globl __irqentry_text_end +__irqentry_text_end: + SYM_CODE_START_LOCAL(common_interrupt_return) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) #ifdef CONFIG_DEBUG_ENTRY diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index f73dd3f8b043..528c8a71fe7f 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -11,6 +11,13 @@ #include #include +/* + * The irq entry code is in the noinstr section and the start/end of + * __irqentry_text is emitted via labels. Make the build fail if + * something moves a C function into the __irq_entry section. + */ +#define __irq_entry __invalid_section + static inline int irq_canonicalize(int irq) { return ((irq == 2) ? 9 : irq); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 85de8fa69b24..3bafe1bd4dc7 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1073,13 +1073,6 @@ NOKPROBE_SYMBOL(kprobe_fault_handler); int __init arch_populate_kprobe_blacklist(void) { - int ret; - - ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start, - (unsigned long)__irqentry_text_end); - if (ret) - return ret; - return kprobe_add_area_blacklist((unsigned long)__entry_text_start, (unsigned long)__entry_text_end); } diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 234f58e0fe8c..321c19950285 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -286,9 +286,7 @@ static int can_optimize(unsigned long paddr) * stack handling and registers setup. */ if (((paddr >= (unsigned long)__entry_text_start) && - (paddr < (unsigned long)__entry_text_end)) || - ((paddr >= (unsigned long)__irqentry_text_start) && - (paddr < (unsigned long)__irqentry_text_end))) + (paddr < (unsigned long)__entry_text_end))) return 0; /* Check there is enough space for a relative jump. */ diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 54226110bc7f..722a85f3b2dd 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -74,13 +74,7 @@ static bool in_entry_code(unsigned long ip) { char *addr = (char *)ip; - if (addr >= __entry_text_start && addr < __entry_text_end) - return true; - - if (addr >= __irqentry_text_start && addr < __irqentry_text_end) - return true; - - return false; + return addr >= __entry_text_start && addr < __entry_text_end; } static inline unsigned long *last_frame(struct unwind_state *state) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 1bf7e312361f..b4c6b6f35548 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -134,7 +134,6 @@ SECTIONS KPROBES_TEXT ALIGN_ENTRY_TEXT_BEGIN ENTRY_TEXT - IRQENTRY_TEXT ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT *(.fixup) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index a3c6757a65c7..a8a924b3c335 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -492,12 +492,12 @@ static void __init pti_setup_espfix64(void) } /* - * Clone the populated PMDs of the entry and irqentry text and force it RO. + * Clone the populated PMDs of the entry text and force it RO. */ static void pti_clone_entry_text(void) { pti_clone_pgtable((unsigned long) __entry_text_start, - (unsigned long) __irqentry_text_end, + (unsigned long) __entry_text_end, PTI_CLONE_PMD); } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 80f637c3a6f3..5db970b6615a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -760,8 +760,10 @@ extern int arch_early_irq_init(void); /* * We want to know which function is an entrypoint of a hardirq or a softirq. */ -#define __irq_entry __attribute__((__section__(".irqentry.text"))) -#define __softirq_entry \ - __attribute__((__section__(".softirqentry.text"))) +#ifndef __irq_entry +# define __irq_entry __attribute__((__section__(".irqentry.text"))) +#endif + +#define __softirq_entry __attribute__((__section__(".softirqentry.text"))) #endif -- cgit v1.2.3