diff options
author | Andy Lutomirski <luto@kernel.org> | 2019-11-26 18:27:16 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2019-11-26 21:53:34 +0100 |
commit | dc4e0021b00b5a4ecba56fae509217776592b0aa (patch) | |
tree | f62d0813a0fe93bc4aa7612632ec7cd744ccf33f /arch/x86/kernel | |
parent | e99b6f46ee5c127d39d2f3a2682fdeef10386316 (diff) | |
download | linux-dc4e0021b00b5a4ecba56fae509217776592b0aa.tar.bz2 |
x86/doublefault/32: Move #DF stack and TSS to cpu_entry_area
There are three problems with the current layout of the doublefault
stack and TSS. First, the TSS is only cacheline-aligned, which is
not enough -- if the hardware portion of the TSS (struct x86_hw_tss)
crosses a page boundary, horrible things happen [0]. Second, the
stack and TSS are global, so simultaneous double faults on different
CPUs will cause massive corruption. Third, the whole mechanism
won't work if user CR3 is loaded, resulting in a triple fault [1].
Let the doublefault stack and TSS share a page (which prevents the
TSS from spanning a page boundary), make it percpu, and move it into
cpu_entry_area. Teach the stack dump code about the doublefault
stack.
[0] Real hardware will read past the end of the page onto the next
*physical* page if a task switch happens. Virtual machines may
have any number of bugs, and I would consider it reasonable for
a VM to summarily kill the guest if it tries to task-switch to
a page-spanning TSS.
[1] Real hardware triple faults. At least some VMs seem to hang.
I'm not sure what's going on.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/doublefault_32.c | 58 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack_32.c | 30 |
3 files changed, 71 insertions, 29 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index baa2fed8deb6..2e4d90294fe6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -24,6 +24,7 @@ #include <asm/stackprotector.h> #include <asm/perf_event.h> #include <asm/mmu_context.h> +#include <asm/doublefault.h> #include <asm/archrandom.h> #include <asm/hypervisor.h> #include <asm/processor.h> @@ -1814,8 +1815,6 @@ static inline void tss_setup_ist(struct tss_struct *tss) tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); } -static inline void gdt_setup_doublefault_tss(int cpu) { } - #else /* CONFIG_X86_64 */ static inline void setup_getcpu(int cpu) { } @@ -1827,13 +1826,6 @@ static inline void ucode_cpu_init(int cpu) static inline void tss_setup_ist(struct tss_struct *tss) { } -static inline void gdt_setup_doublefault_tss(int cpu) -{ -#ifdef CONFIG_DOUBLEFAULT - /* Set up the doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); -#endif -} #endif /* !CONFIG_X86_64 */ static inline void tss_setup_io_bitmap(struct tss_struct *tss) @@ -1923,7 +1915,7 @@ void cpu_init(void) clear_all_debug_regs(); dbg_restore_debug_regs(); - gdt_setup_doublefault_tss(cpu); + doublefault_init_cpu_tss(); fpu__init_cpu(); diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index 61c707ca8a09..4eecfe4825ed 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -10,10 +10,6 @@ #include <asm/processor.h> #include <asm/desc.h> -#define DOUBLEFAULT_STACKSIZE (1024) -static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; -#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) - #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM) static void doublefault_fn(void) @@ -21,6 +17,8 @@ static void doublefault_fn(void) struct desc_ptr gdt_desc = {0, 0}; unsigned long gdt, tss; + BUILD_BUG_ON(sizeof(struct doublefault_stack) != PAGE_SIZE); + native_store_gdt(&gdt_desc); gdt = gdt_desc.address; @@ -48,24 +46,46 @@ static void doublefault_fn(void) cpu_relax(); } -struct x86_hw_tss doublefault_tss __cacheline_aligned = { - .sp0 = STACK_START, - .ss0 = __KERNEL_DS, - .ldt = 0, +DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = { + .tss = { + /* + * No sp0 or ss0 -- we never run CPL != 0 with this TSS + * active. sp is filled in later. + */ + .ldt = 0, .io_bitmap_base = IO_BITMAP_OFFSET_INVALID, - .ip = (unsigned long) doublefault_fn, - /* 0x2 bit is always set */ - .flags = X86_EFLAGS_SF | 0x2, - .sp = STACK_START, - .es = __USER_DS, - .cs = __KERNEL_CS, - .ss = __KERNEL_DS, - .ds = __USER_DS, - .fs = __KERNEL_PERCPU, + .ip = (unsigned long) doublefault_fn, + /* 0x2 bit is always set */ + .flags = X86_EFLAGS_SF | 0x2, + .es = __USER_DS, + .cs = __KERNEL_CS, + .ss = __KERNEL_DS, + .ds = __USER_DS, + .fs = __KERNEL_PERCPU, #ifndef CONFIG_X86_32_LAZY_GS - .gs = __KERNEL_STACK_CANARY, + .gs = __KERNEL_STACK_CANARY, #endif - .__cr3 = __pa_nodebug(swapper_pg_dir), + .__cr3 = __pa_nodebug(swapper_pg_dir), + }, }; + +void doublefault_init_cpu_tss(void) +{ + unsigned int cpu = smp_processor_id(); + struct cpu_entry_area *cea = get_cpu_entry_area(cpu); + + /* + * The linker isn't smart enough to initialize percpu variables that + * point to other places in percpu space. + */ + this_cpu_write(doublefault_stack.tss.sp, + (unsigned long)&cea->doublefault_stack.stack + + sizeof(doublefault_stack.stack)); + + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, + &get_cpu_entry_area(cpu)->doublefault_stack.tss); + +} diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 64a59d726639..8e3a8fedfa4d 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -29,6 +29,9 @@ const char *stack_type_name(enum stack_type type) if (type == STACK_TYPE_ENTRY) return "ENTRY_TRAMPOLINE"; + if (type == STACK_TYPE_EXCEPTION) + return "#DF"; + return NULL; } @@ -82,6 +85,30 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) return true; } +static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info) +{ +#ifdef CONFIG_DOUBLEFAULT + struct cpu_entry_area *cea = get_cpu_entry_area(raw_smp_processor_id()); + struct doublefault_stack *ss = &cea->doublefault_stack; + + void *begin = ss->stack; + void *end = begin + sizeof(ss->stack); + + if ((void *)stack < begin || (void *)stack >= end) + return false; + + info->type = STACK_TYPE_EXCEPTION; + info->begin = begin; + info->end = end; + info->next_sp = (unsigned long *)this_cpu_read(cpu_tss_rw.x86_tss.sp); + + return true; +#else + return false; +#endif +} + + int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask) { @@ -105,6 +132,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (in_softirq_stack(stack, info)) goto recursion_check; + if (in_doublefault_stack(stack, info)) + goto recursion_check; + goto unknown; recursion_check: |