diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-04 09:52:57 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-04 09:52:57 -0700 |
commit | b0c79f49c343cda8954b3322984c32f258ca4ccb (patch) | |
tree | dd823d13683b7e6b0caebcaf3964df6150aee294 /arch/x86/entry/entry_64.S | |
parent | f213a6c84c1b4b396a0713ee33cff0e02ba8235f (diff) | |
parent | dd88a0a0c8615417fe6b4285769b5b772de87279 (diff) | |
download | linux-b0c79f49c343cda8954b3322984c32f258ca4ccb.tar.bz2 |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm updates from Ingo Molnar:
- Introduce the ORC unwinder, which can be enabled via
CONFIG_ORC_UNWINDER=y.
The ORC unwinder is a lightweight, Linux kernel specific debuginfo
implementation, which aims to be DWARF done right for unwinding.
Objtool is used to generate the ORC unwinder tables during build, so
the data format is flexible and kernel internal: there's no
dependency on debuginfo created by an external toolchain.
The ORC unwinder is almost two orders of magnitude faster than the
(out of tree) DWARF unwinder - which is important for perf call graph
profiling. It is also significantly simpler and is coded defensively:
there has not been a single ORC related kernel crash so far, even
with early versions. (knock on wood!)
But the main advantage is that enabling the ORC unwinder allows
CONFIG_FRAME_POINTERS to be turned off - which speeds up the kernel
measurably:
With frame pointers disabled, GCC does not have to add frame pointer
instrumentation code to every function in the kernel. The kernel's
.text size decreases by about 3.2%, resulting in better cache
utilization and fewer instructions executed, resulting in a broad
kernel-wide speedup. Average speedup of system calls should be
roughly in the 1-3% range - measurements by Mel Gorman [1] have shown
a speedup of 5-10% for some function execution intense workloads.
The main cost of the unwinder is that the unwinder data has to be
stored in RAM: the memory cost is 2-4MB of RAM, depending on kernel
config - which is a modest cost on modern x86 systems.
Given how young the ORC unwinder code is it's not enabled by default
- but given the performance advantages the plan is to eventually make
it the default unwinder on x86.
See Documentation/x86/orc-unwinder.txt for more details.
- Remove lguest support: its intended role was that of a temporary
proof of concept for virtualization, plus its removal will enable the
reduction (removal) of the paravirt API as well, so Rusty agreed to
its removal. (Juergen Gross)
- Clean up and fix FSGS related functionality (Andy Lutomirski)
- Clean up IO access APIs (Andy Shevchenko)
- Enhance the symbol namespace (Jiri Slaby)
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (47 commits)
objtool: Handle GCC stack pointer adjustment bug
x86/entry/64: Use ENTRY() instead of ALIGN+GLOBAL for stub32_clone()
x86/fpu/math-emu: Add ENDPROC to functions
x86/boot/64: Extract efi_pe_entry() from startup_64()
x86/boot/32: Extract efi_pe_entry() from startup_32()
x86/lguest: Remove lguest support
x86/paravirt/xen: Remove xen_patch()
objtool: Fix objtool fallthrough detection with function padding
x86/xen/64: Fix the reported SS and CS in SYSCALL
objtool: Track DRAP separately from callee-saved registers
objtool: Fix validate_branch() return codes
x86: Clarify/fix no-op barriers for text_poke_bp()
x86/switch_to/64: Rewrite FS/GS switching yet again to fix AMD CPUs
selftests/x86/fsgsbase: Test selectors 1, 2, and 3
x86/fsgsbase/64: Report FSBASE and GSBASE correctly in core dumps
x86/fsgsbase/64: Fully initialize FS and GS state in start_thread_common
x86/asm: Fix UNWIND_HINT_REGS macro for older binutils
x86/asm/32: Fix regs_get_register() on segment registers
x86/xen/64: Rearrange the SYSCALL entries
x86/asm/32: Remove a bunch of '& 0xffff' from pt_regs segment reads
...
Diffstat (limited to 'arch/x86/entry/entry_64.S')
-rw-r--r-- | arch/x86/entry/entry_64.S | 179 |
1 files changed, 142 insertions, 37 deletions
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ce8dc33dd640..ca0b250eefc4 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -36,6 +36,7 @@ #include <asm/smap.h> #include <asm/pgtable_types.h> #include <asm/export.h> +#include <asm/frame.h> #include <linux/err.h> .code64 @@ -43,9 +44,10 @@ #ifdef CONFIG_PARAVIRT ENTRY(native_usergs_sysret64) + UNWIND_HINT_EMPTY swapgs sysretq -ENDPROC(native_usergs_sysret64) +END(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ .macro TRACE_IRQS_IRETQ @@ -134,19 +136,14 @@ ENDPROC(native_usergs_sysret64) */ ENTRY(entry_SYSCALL_64) + UNWIND_HINT_EMPTY /* * Interrupts are off on entry. * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, * it is too small to ever cause noticeable irq latency. */ - SWAPGS_UNSAFE_STACK - /* - * A hypervisor implementation might want to use a label - * after the swapgs, so that it can do the swapgs - * for the guest and jump here on syscall. - */ -GLOBAL(entry_SYSCALL_64_after_swapgs) + swapgs movq %rsp, PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp @@ -158,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) pushq %r11 /* pt_regs->flags */ pushq $__USER_CS /* pt_regs->cs */ pushq %rcx /* pt_regs->ip */ +GLOBAL(entry_SYSCALL_64_after_hwframe) pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ @@ -169,6 +167,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) pushq %r10 /* pt_regs->r10 */ pushq %r11 /* pt_regs->r11 */ sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ + UNWIND_HINT_REGS extra=0 /* * If we need to do entry work or if we guess we'll need to do @@ -223,6 +222,7 @@ entry_SYSCALL_64_fastpath: movq EFLAGS(%rsp), %r11 RESTORE_C_REGS_EXCEPT_RCX_R11 movq RSP(%rsp), %rsp + UNWIND_HINT_EMPTY USERGS_SYSRET64 1: @@ -316,6 +316,7 @@ syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ RESTORE_C_REGS_EXCEPT_RCX_R11 movq RSP(%rsp), %rsp + UNWIND_HINT_EMPTY USERGS_SYSRET64 opportunistic_sysret_failed: @@ -343,6 +344,7 @@ ENTRY(stub_ptregs_64) DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF popq %rax + UNWIND_HINT_REGS extra=0 jmp entry_SYSCALL64_slow_path 1: @@ -351,6 +353,7 @@ END(stub_ptregs_64) .macro ptregs_stub func ENTRY(ptregs_\func) + UNWIND_HINT_FUNC leaq \func(%rip), %rax jmp stub_ptregs_64 END(ptregs_\func) @@ -367,6 +370,7 @@ END(ptregs_\func) * %rsi: next task */ ENTRY(__switch_to_asm) + UNWIND_HINT_FUNC /* * Save callee-saved registers * This must match the order in inactive_task_frame @@ -406,6 +410,7 @@ END(__switch_to_asm) * r12: kernel thread arg */ ENTRY(ret_from_fork) + UNWIND_HINT_EMPTY movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ @@ -413,6 +418,7 @@ ENTRY(ret_from_fork) jnz 1f /* kernel threads are uncommon */ 2: + UNWIND_HINT_REGS movq %rsp, %rdi call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ @@ -440,13 +446,102 @@ END(ret_from_fork) ENTRY(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) + UNWIND_HINT_IRET_REGS pushq $(~vector+0x80) /* Note: always in signed byte range */ - vector=vector+1 jmp common_interrupt .align 8 + vector=vector+1 .endr END(irq_entries_start) +.macro DEBUG_ENTRY_ASSERT_IRQS_OFF +#ifdef CONFIG_DEBUG_ENTRY + pushfq + testl $X86_EFLAGS_IF, (%rsp) + jz .Lokay_\@ + ud2 +.Lokay_\@: + addq $8, %rsp +#endif +.endm + +/* + * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers + * flags and puts old RSP into old_rsp, and leaves all other GPRs alone. + * Requires kernel GSBASE. + * + * The invariant is that, if irq_count != -1, then the IRQ stack is in use. + */ +.macro ENTER_IRQ_STACK regs=1 old_rsp + DEBUG_ENTRY_ASSERT_IRQS_OFF + movq %rsp, \old_rsp + + .if \regs + UNWIND_HINT_REGS base=\old_rsp + .endif + + incl PER_CPU_VAR(irq_count) + jnz .Lirq_stack_push_old_rsp_\@ + + /* + * Right now, if we just incremented irq_count to zero, we've + * claimed the IRQ stack but we haven't switched to it yet. + * + * If anything is added that can interrupt us here without using IST, + * it must be *extremely* careful to limit its stack usage. This + * could include kprobes and a hypothetical future IST-less #DB + * handler. + * + * The OOPS unwinder relies on the word at the top of the IRQ + * stack linking back to the previous RSP for the entire time we're + * on the IRQ stack. For this to work reliably, we need to write + * it before we actually move ourselves to the IRQ stack. + */ + + movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8) + movq PER_CPU_VAR(irq_stack_ptr), %rsp + +#ifdef CONFIG_DEBUG_ENTRY + /* + * If the first movq above becomes wrong due to IRQ stack layout + * changes, the only way we'll notice is if we try to unwind right + * here. Assert that we set up the stack right to catch this type + * of bug quickly. + */ + cmpq -8(%rsp), \old_rsp + je .Lirq_stack_okay\@ + ud2 + .Lirq_stack_okay\@: +#endif + +.Lirq_stack_push_old_rsp_\@: + pushq \old_rsp + + .if \regs + UNWIND_HINT_REGS indirect=1 + .endif +.endm + +/* + * Undoes ENTER_IRQ_STACK. + */ +.macro LEAVE_IRQ_STACK regs=1 + DEBUG_ENTRY_ASSERT_IRQS_OFF + /* We need to be off the IRQ stack before decrementing irq_count. */ + popq %rsp + + .if \regs + UNWIND_HINT_REGS + .endif + + /* + * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming + * the irq stack but we're not on it. + */ + + decl PER_CPU_VAR(irq_count) +.endm + /* * Interrupt entry/exit. * @@ -485,17 +580,7 @@ END(irq_entries_start) CALL_enter_from_user_mode 1: - /* - * Save previous stack pointer, optionally switch to interrupt stack. - * irq_count is used to check if a CPU is already on an interrupt stack - * or not. While this is essentially redundant with preempt_count it is - * a little cheaper to use a separate counter in the PDA (short of - * moving irq_enter into assembly, which would be too much work) - */ - movq %rsp, %rdi - incl PER_CPU_VAR(irq_count) - cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp - pushq %rdi + ENTER_IRQ_STACK old_rsp=%rdi /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF @@ -515,10 +600,8 @@ common_interrupt: ret_from_intr: DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF - decl PER_CPU_VAR(irq_count) - /* Restore saved previous stack */ - popq %rsp + LEAVE_IRQ_STACK testb $3, CS(%rsp) jz retint_kernel @@ -561,6 +644,7 @@ restore_c_regs_and_iret: INTERRUPT_RETURN ENTRY(native_iret) + UNWIND_HINT_IRET_REGS /* * Are we returning to a stack segment from the LDT? Note: in * 64-bit mode SS:RSP on the exception stack is always valid. @@ -633,6 +717,7 @@ native_irq_return_ldt: orq PER_CPU_VAR(espfix_stack), %rax SWAPGS movq %rax, %rsp + UNWIND_HINT_IRET_REGS offset=8 /* * At this point, we cannot write to the stack any more, but we can @@ -654,6 +739,7 @@ END(common_interrupt) */ .macro apicinterrupt3 num sym do_sym ENTRY(\sym) + UNWIND_HINT_IRET_REGS ASM_CLAC pushq $~(\num) .Lcommon_\sym: @@ -735,6 +821,8 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) + UNWIND_HINT_IRET_REGS offset=8 + /* Sanity check */ .if \shift_ist != -1 && \paranoid == 0 .error "using shift_ist requires paranoid=1" @@ -758,6 +846,7 @@ ENTRY(\sym) .else call error_entry .endif + UNWIND_HINT_REGS /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ .if \paranoid @@ -855,6 +944,7 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 * edi: new selector */ ENTRY(native_load_gs_index) + FRAME_BEGIN pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) SWAPGS @@ -863,8 +953,9 @@ ENTRY(native_load_gs_index) 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE SWAPGS popfq + FRAME_END ret -END(native_load_gs_index) +ENDPROC(native_load_gs_index) EXPORT_SYMBOL(native_load_gs_index) _ASM_EXTABLE(.Lgs_change, bad_gs) @@ -887,14 +978,12 @@ bad_gs: ENTRY(do_softirq_own_stack) pushq %rbp mov %rsp, %rbp - incl PER_CPU_VAR(irq_count) - cmove PER_CPU_VAR(irq_stack_ptr), %rsp - push %rbp /* frame pointer backlink */ + ENTER_IRQ_STACK regs=0 old_rsp=%r11 call __do_softirq + LEAVE_IRQ_STACK regs=0 leaveq - decl PER_CPU_VAR(irq_count) ret -END(do_softirq_own_stack) +ENDPROC(do_softirq_own_stack) #ifdef CONFIG_XEN idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 @@ -918,14 +1007,14 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will * see the correct pointer to the pt_regs */ + UNWIND_HINT_FUNC movq %rdi, %rsp /* we don't return, adjust the stack frame */ -11: incl PER_CPU_VAR(irq_count) - movq %rsp, %rbp - cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp - pushq %rbp /* frame pointer backlink */ + UNWIND_HINT_REGS + + ENTER_IRQ_STACK old_rsp=%r10 call xen_evtchn_do_upcall - popq %rsp - decl PER_CPU_VAR(irq_count) + LEAVE_IRQ_STACK + #ifndef CONFIG_PREEMPT call xen_maybe_preempt_hcall #endif @@ -946,6 +1035,7 @@ END(xen_do_hypervisor_callback) * with its current contents: any discrepancy means we in category 1. */ ENTRY(xen_failsafe_callback) + UNWIND_HINT_EMPTY movl %ds, %ecx cmpw %cx, 0x10(%rsp) jne 1f @@ -965,11 +1055,13 @@ ENTRY(xen_failsafe_callback) pushq $0 /* RIP */ pushq %r11 pushq %rcx + UNWIND_HINT_IRET_REGS offset=8 jmp general_protection 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ movq (%rsp), %rcx movq 8(%rsp), %r11 addq $0x30, %rsp + UNWIND_HINT_IRET_REGS pushq $-1 /* orig_ax = -1 => not a system call */ ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS @@ -1015,6 +1107,7 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vec * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ ENTRY(paranoid_entry) + UNWIND_HINT_FUNC cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 @@ -1042,6 +1135,7 @@ END(paranoid_entry) * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ ENTRY(paranoid_exit) + UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF_DEBUG testl %ebx, %ebx /* swapgs needed? */ @@ -1063,6 +1157,7 @@ END(paranoid_exit) * Return: EBX=0: came from user mode; EBX=1: otherwise */ ENTRY(error_entry) + UNWIND_HINT_FUNC cld SAVE_C_REGS 8 SAVE_EXTRA_REGS 8 @@ -1147,6 +1242,7 @@ END(error_entry) * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode */ ENTRY(error_exit) + UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF testl %ebx, %ebx @@ -1156,6 +1252,7 @@ END(error_exit) /* Runs on exception stack */ ENTRY(nmi) + UNWIND_HINT_IRET_REGS /* * Fix up the exception frame if we're on Xen. * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most @@ -1229,11 +1326,13 @@ ENTRY(nmi) cld movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT_IRET_REGS base=%rdx offset=8 pushq 5*8(%rdx) /* pt_regs->ss */ pushq 4*8(%rdx) /* pt_regs->rsp */ pushq 3*8(%rdx) /* pt_regs->flags */ pushq 2*8(%rdx) /* pt_regs->cs */ pushq 1*8(%rdx) /* pt_regs->rip */ + UNWIND_HINT_IRET_REGS pushq $-1 /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ @@ -1250,6 +1349,7 @@ ENTRY(nmi) pushq %r13 /* pt_regs->r13 */ pushq %r14 /* pt_regs->r14 */ pushq %r15 /* pt_regs->r15 */ + UNWIND_HINT_REGS ENCODE_FRAME_POINTER /* @@ -1404,6 +1504,7 @@ first_nmi: .rept 5 pushq 11*8(%rsp) .endr + UNWIND_HINT_IRET_REGS /* Everything up to here is safe from nested NMIs */ @@ -1419,6 +1520,7 @@ first_nmi: pushq $__KERNEL_CS /* CS */ pushq $1f /* RIP */ INTERRUPT_RETURN /* continues at repeat_nmi below */ + UNWIND_HINT_IRET_REGS 1: #endif @@ -1468,6 +1570,7 @@ end_repeat_nmi: * exceptions might do. */ call paranoid_entry + UNWIND_HINT_REGS /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp, %rdi @@ -1505,17 +1608,19 @@ nmi_restore: END(nmi) ENTRY(ignore_sysret) + UNWIND_HINT_EMPTY mov $-ENOSYS, %eax sysret END(ignore_sysret) ENTRY(rewind_stack_do_exit) + UNWIND_HINT_FUNC /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp movq PER_CPU_VAR(cpu_current_top_of_stack), %rax - leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp + leaq -PTREGS_SIZE(%rax), %rsp + UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE call do_exit -1: jmp 1b END(rewind_stack_do_exit) |