From 0c3292ca8025c5aef44dc389ac3a6bf4a325e0be Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Fri, 24 Nov 2017 19:29:06 -0800 Subject: x86/tlb: Refactor CR4 setting and shadow write Refactor the write to CR4 and its shadow value. This is done in preparation for the addition of an assertion to check that IRQs are disabled during CR4 update. No functional change. Signed-off-by: Nadav Amit Signed-off-by: Thomas Gleixner Cc: nadav.amit@gmail.com Cc: Andy Lutomirski Cc: linux-edac@vger.kernel.org Link: https://lkml.kernel.org/r/20171125032907.2241-2-namit@vmware.com --- arch/x86/include/asm/tlbflush.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 509046cfa5ce..e736f7f0ba92 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -173,17 +173,20 @@ static inline void cr4_init_shadow(void) this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); } +static inline void __cr4_set(unsigned long cr4) +{ + this_cpu_write(cpu_tlbstate.cr4, cr4); + __write_cr4(cr4); +} + /* Set in this cpu's CR4. */ static inline void cr4_set_bits(unsigned long mask) { unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 | mask) != cr4) { - cr4 |= mask; - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); - } + if ((cr4 | mask) != cr4) + __cr4_set(cr4 | mask); } /* Clear in this cpu's CR4. */ @@ -192,11 +195,8 @@ static inline void cr4_clear_bits(unsigned long mask) unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 & ~mask) != cr4) { - cr4 &= ~mask; - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); - } + if ((cr4 & ~mask) != cr4) + __cr4_set(cr4 & ~mask); } static inline void cr4_toggle_bits(unsigned long mask) @@ -204,9 +204,7 @@ static inline void cr4_toggle_bits(unsigned long mask) unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); - cr4 ^= mask; - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); + __cr4_set(cr4 ^ mask); } /* Read the CR4 shadow. */ -- cgit v1.2.3 From 9d0b62328d34c7044114d4f4281981d4c537c4ba Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Fri, 24 Nov 2017 19:29:07 -0800 Subject: x86/tlb: Disable interrupts when changing CR4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CR4 modifications are implemented as RMW operations which update a shadow variable and write the result to CR4. The RMW operation is protected by preemption disable, but there is no enforcement or debugging mechanism. CR4 modifications happen also in interrupt context via __native_flush_tlb_global(). This implementation does not affect a interrupted thread context CR4 operation, because the CR4 toggle restores the original content and does not modify the shadow variable. So the current situation seems to be safe, but a recent patch tried to add an actual RMW operation in interrupt context, which will cause subtle corruptions. To prevent that and make the CR4 handling future proof: - Add a lockdep assertion to __cr4_set() which will catch interrupt enabled invocations - Disable interrupts in the cr4 manipulator inlines - Rename cr4_toggle_bits() to cr4_toggle_bits_irqsoff(). This is called from __switch_to_xtra() where interrupts are already disabled and performance matters. All other call sites are not performance critical, so the extra overhead of an additional local_irq_save/restore() pair is not a problem. If new call sites care about performance then the necessary _irqsoff() variants can be added. [ tglx: Condensed the patch by moving the irq protection inside the manipulator functions. Updated changelog ] Signed-off-by: Nadav Amit Signed-off-by: Thomas Gleixner Cc: Luck Cc: Radim Krčmář Cc: Andy Lutomirski Cc: Paolo Bonzini Cc: Borislav Petkov Cc: nadav.amit@gmail.com Cc: linux-edac@vger.kernel.org Link: https://lkml.kernel.org/r/20171125032907.2241-3-namit@vmware.com --- arch/x86/include/asm/tlbflush.h | 11 ++++++++--- arch/x86/kernel/process.c | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index e736f7f0ba92..877b5c1a1b12 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -175,6 +175,7 @@ static inline void cr4_init_shadow(void) static inline void __cr4_set(unsigned long cr4) { + lockdep_assert_irqs_disabled(); this_cpu_write(cpu_tlbstate.cr4, cr4); __write_cr4(cr4); } @@ -182,24 +183,28 @@ static inline void __cr4_set(unsigned long cr4) /* Set in this cpu's CR4. */ static inline void cr4_set_bits(unsigned long mask) { - unsigned long cr4; + unsigned long cr4, flags; + local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); if ((cr4 | mask) != cr4) __cr4_set(cr4 | mask); + local_irq_restore(flags); } /* Clear in this cpu's CR4. */ static inline void cr4_clear_bits(unsigned long mask) { - unsigned long cr4; + unsigned long cr4, flags; + local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); if ((cr4 & ~mask) != cr4) __cr4_set(cr4 & ~mask); + local_irq_restore(flags); } -static inline void cr4_toggle_bits(unsigned long mask) +static inline void cr4_toggle_bits_irqsoff(unsigned long mask) { unsigned long cr4; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 97fb3e5737f5..bb988a24db92 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -299,7 +299,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, } if ((tifp ^ tifn) & _TIF_NOTSC) - cr4_toggle_bits(X86_CR4_TSD); + cr4_toggle_bits_irqsoff(X86_CR4_TSD); if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); -- cgit v1.2.3 From 42b3a4cb5609de757f5445fcad18945ba9239a07 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 24 Nov 2017 09:42:21 +0100 Subject: x86/xen: Support early interrupts in xen pv guests Add early interrupt handlers activated by idt_setup_early_handler() to the handlers supported by Xen pv guests. This will allow for early WARN() calls not crashing the guest. Suggested-by: Andy Lutomirski Signed-off-by: Juergen Gross Signed-off-by: Thomas Gleixner Cc: xen-devel@lists.xenproject.org Cc: boris.ostrovsky@oracle.com Link: https://lkml.kernel.org/r/20171124084221.30172-1-jgross@suse.com --- arch/x86/include/asm/segment.h | 12 ++++++++++++ arch/x86/mm/extable.c | 4 +++- arch/x86/xen/enlighten_pv.c | 37 ++++++++++++++++++++++++------------- arch/x86/xen/xen-asm_64.S | 14 ++++++++++++++ 4 files changed, 53 insertions(+), 14 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index b20f9d623f9c..8f09012b92e7 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -236,11 +236,23 @@ */ #define EARLY_IDT_HANDLER_SIZE 9 +/* + * xen_early_idt_handler_array is for Xen pv guests: for each entry in + * early_idt_handler_array it contains a prequel in the form of + * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to + * max 8 bytes. + */ +#define XEN_EARLY_IDT_HANDLER_SIZE 8 + #ifndef __ASSEMBLY__ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; extern void early_ignore_irq(void); +#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE]; +#endif + /* * Load a segment. Fall back on loading the zero segment if something goes * wrong. This variant assumes that loading zero fully clears the segment. diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 3321b446b66c..88754bfd425f 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -212,8 +213,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) * Old CPUs leave the high bits of CS on the stack * undefined. I'm not sure which CPUs do this, but at least * the 486 DX works this way. + * Xen pv domains are not using the default __KERNEL_CS. */ - if (regs->cs != __KERNEL_CS) + if (!xen_pv_domain() && regs->cs != __KERNEL_CS) goto fail; /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5b2b3f3f6531..f2414c6c5e7c 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -622,7 +622,7 @@ static struct trap_array_entry trap_array[] = { { simd_coprocessor_error, xen_simd_coprocessor_error, false }, }; -static bool get_trap_addr(void **addr, unsigned int ist) +static bool __ref get_trap_addr(void **addr, unsigned int ist) { unsigned int nr; bool ist_okay = false; @@ -644,6 +644,14 @@ static bool get_trap_addr(void **addr, unsigned int ist) } } + if (nr == ARRAY_SIZE(trap_array) && + *addr >= (void *)early_idt_handler_array[0] && + *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) { + nr = (*addr - (void *)early_idt_handler_array[0]) / + EARLY_IDT_HANDLER_SIZE; + *addr = (void *)xen_early_idt_handler_array[nr]; + } + if (WARN_ON(ist != 0 && !ist_okay)) return false; @@ -1262,6 +1270,21 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_setup_gdt(0); xen_init_irq_ops(); + + /* Let's presume PV guests always boot on vCPU with id 0. */ + per_cpu(xen_vcpu_id, 0) = 0; + + /* + * Setup xen_vcpu early because idt_setup_early_handler needs it for + * local_irq_disable(), irqs_disabled(). + * + * Don't do the full vcpu_info placement stuff until we have + * the cpu_possible_mask and a non-dummy shared_info. + */ + xen_vcpu_info_reset(0); + + idt_setup_early_handler(); + xen_init_capabilities(); #ifdef CONFIG_X86_LOCAL_APIC @@ -1295,18 +1318,6 @@ asmlinkage __visible void __init xen_start_kernel(void) */ acpi_numa = -1; #endif - /* Let's presume PV guests always boot on vCPU with id 0. */ - per_cpu(xen_vcpu_id, 0) = 0; - - /* - * Setup xen_vcpu early because start_kernel needs it for - * local_irq_disable(), irqs_disabled(). - * - * Don't do the full vcpu_info placement stuff until we have - * the cpu_possible_mask and a non-dummy shared_info. - */ - xen_vcpu_info_reset(0); - WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); local_irq_disable(); diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 8a10c9a9e2b5..417b339e5c8e 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -15,6 +15,7 @@ #include +#include #include .macro xen_pv_trap name @@ -54,6 +55,19 @@ xen_pv_trap entry_INT80_compat #endif xen_pv_trap hypervisor_callback + __INIT +ENTRY(xen_early_idt_handler_array) + i = 0 + .rept NUM_EXCEPTION_VECTORS + pop %rcx + pop %r11 + jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE + i = i + 1 + .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc + .endr +END(xen_early_idt_handler_array) + __FINIT + hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 /* * Xen64 iret frame: -- cgit v1.2.3 From e3811a3f74bd1ad773667b78323f396166891f3a Mon Sep 17 00:00:00 2001 From: Rudolf Marek Date: Tue, 28 Nov 2017 22:01:06 +0100 Subject: x86/cpufeatures: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD The latest AMD AMD64 Architecture Programmer's Manual adds a CPUID feature XSaveErPtr (CPUID_Fn80000008_EBX[2]). If this feature is set, the FXSAVE, XSAVE, FXSAVEOPT, XSAVEC, XSAVES / FXRSTOR, XRSTOR, XRSTORS always save/restore error pointers, thus making the X86_BUG_FXSAVE_LEAK workaround obsolete on such CPUs. Signed-off-by: Rudolf Marek Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Tested-by: Borislav Petkov Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/bdcebe90-62c5-1f05-083c-eba7f08b2540@assembler.cz Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/amd.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c0b0e9e8aa66..800104c8a3ed 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -266,6 +266,7 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d58184b7cd44..bcb75dc97d44 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x17: init_amd_zn(c); break; } - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) + /* + * Enable workaround for FXSAVE leak on CPUs + * without a XSaveErPtr feature + */ + if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR))) set_cpu_bug(c, X86_BUG_FXSAVE_LEAK); cpu_detect_cache_sizes(c); -- cgit v1.2.3