From 3f1f576a195aa266813cbd4ca70291deb61e0129 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Feb 2018 12:26:38 +0100 Subject: x86/microcode: Propagate return value from updating functions ... so that callers can know when microcode was updated and act accordingly. Tested-by: Ashok Raj Signed-off-by: Borislav Petkov Reviewed-by: Ashok Raj Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180216112640.11554-2-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/microcode/amd.c | 10 +++++----- arch/x86/kernel/cpu/microcode/core.c | 33 +++++++++++++++++---------------- arch/x86/kernel/cpu/microcode/intel.c | 10 +++++----- 3 files changed, 27 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 330b8462d426..a998e1a7d46f 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, return patch_size; } -static int apply_microcode_amd(int cpu) +static enum ucode_state apply_microcode_amd(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_amd *mc_amd; @@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu) p = find_patch(cpu); if (!p) - return 0; + return UCODE_NFOUND; mc_amd = p->data; uci->mc = p->data; @@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu) if (rev >= mc_amd->hdr.patch_id) { c->microcode = rev; uci->cpu_sig.rev = rev; - return 0; + return UCODE_OK; } if (__apply_microcode_amd(mc_amd)) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); - return -1; + return UCODE_ERROR; } pr_info("CPU%d: new patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); @@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu) uci->cpu_sig.rev = mc_amd->hdr.patch_id; c->microcode = mc_amd->hdr.patch_id; - return 0; + return UCODE_UPDATED; } static int install_equiv_cpu_table(const u8 *buf) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 319dd65f98a2..6fdaf7cf3182 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -374,7 +374,7 @@ static int collect_cpu_info(int cpu) } struct apply_microcode_ctx { - int err; + enum ucode_state err; }; static void apply_microcode_local(void *arg) @@ -489,31 +489,29 @@ static void __exit microcode_dev_exit(void) /* fake device for request_firmware */ static struct platform_device *microcode_pdev; -static int reload_for_cpu(int cpu) +static enum ucode_state reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; enum ucode_state ustate; - int err = 0; if (!uci->valid) - return err; + return UCODE_OK; ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - return err; + if (ustate != UCODE_OK) + return ustate; + + return apply_microcode_on_target(cpu); } static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { + enum ucode_state tmp_ret = UCODE_OK; unsigned long val; + ssize_t ret = 0; int cpu; - ssize_t ret = 0, tmp_ret; ret = kstrtoul(buf, 0, &val); if (ret) @@ -526,15 +524,18 @@ static ssize_t reload_store(struct device *dev, mutex_lock(µcode_mutex); for_each_online_cpu(cpu) { tmp_ret = reload_for_cpu(cpu); - if (tmp_ret != 0) + if (tmp_ret > UCODE_NFOUND) { pr_warn("Error reloading microcode on CPU %d\n", cpu); - /* save retval of the first encountered reload error */ - if (!ret) - ret = tmp_ret; + /* set retval for the first encountered reload error */ + if (!ret) + ret = -EINVAL; + } } - if (!ret) + + if (!ret && tmp_ret == UCODE_UPDATED) perf_check_microcode(); + mutex_unlock(µcode_mutex); put_online_cpus(); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index a15db2b4e0d6..923054a6b760 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -772,7 +772,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) return 0; } -static int apply_microcode_intel(int cpu) +static enum ucode_state apply_microcode_intel(int cpu) { struct microcode_intel *mc; struct ucode_cpu_info *uci; @@ -782,7 +782,7 @@ static int apply_microcode_intel(int cpu) /* We should bind the task to the CPU */ if (WARN_ON(raw_smp_processor_id() != cpu)) - return -1; + return UCODE_ERROR; uci = ucode_cpu_info + cpu; mc = uci->mc; @@ -790,7 +790,7 @@ static int apply_microcode_intel(int cpu) /* Look for a newer patch in our cache: */ mc = find_patch(uci); if (!mc) - return 0; + return UCODE_NFOUND; } /* write microcode via MSR 0x79 */ @@ -801,7 +801,7 @@ static int apply_microcode_intel(int cpu) if (rev != mc->hdr.rev) { pr_err("CPU%d update to revision 0x%x failed\n", cpu, mc->hdr.rev); - return -1; + return UCODE_ERROR; } if (rev != prev_rev) { @@ -818,7 +818,7 @@ static int apply_microcode_intel(int cpu) uci->cpu_sig.rev = rev; c->microcode = rev; - return 0; + return UCODE_UPDATED; } static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, -- cgit v1.2.3 From 1008c52c09dcb23d93f8e0ea83a6246265d2cce0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Feb 2018 12:26:39 +0100 Subject: x86/CPU: Add a microcode loader callback Add a callback function which the microcode loader calls when microcode has been updated to a newer revision. Do the callback only when no error was encountered during loading. Tested-by: Ashok Raj Signed-off-by: Borislav Petkov Reviewed-by: Ashok Raj Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180216112640.11554-3-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/common.c | 10 ++++++++++ arch/x86/kernel/cpu/microcode/core.c | 8 ++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1bd9ed87606f..b0ccd4847a58 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -977,4 +977,5 @@ bool xen_set_default_idle(void); void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); +void microcode_check(void); #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 824aee0117bb..84f1cd88608b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1749,3 +1749,13 @@ static int __init init_cpu_syscore(void) return 0; } core_initcall(init_cpu_syscore); + +/* + * The microcode loader calls this upon late microcode load to recheck features, + * only when microcode has been updated. Caller holds microcode_mutex and CPU + * hotplug lock. + */ +void microcode_check(void) +{ + perf_check_microcode(); +} diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 6fdaf7cf3182..aa1b9a422f2b 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -509,6 +509,7 @@ static ssize_t reload_store(struct device *dev, const char *buf, size_t size) { enum ucode_state tmp_ret = UCODE_OK; + bool do_callback = false; unsigned long val; ssize_t ret = 0; int cpu; @@ -531,10 +532,13 @@ static ssize_t reload_store(struct device *dev, if (!ret) ret = -EINVAL; } + + if (tmp_ret == UCODE_UPDATED) + do_callback = true; } - if (!ret && tmp_ret == UCODE_UPDATED) - perf_check_microcode(); + if (!ret && do_callback) + microcode_check(); mutex_unlock(µcode_mutex); put_online_cpus(); -- cgit v1.2.3 From 42ca8082e260dcfd8afa2afa6ec1940b9d41724c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Feb 2018 12:26:40 +0100 Subject: x86/CPU: Check CPU feature bits after microcode upgrade With some microcode upgrades, new CPUID features can become visible on the CPU. Check what the kernel has mirrored now and issue a warning hinting at possible things the user/admin can do to make use of the newly visible features. Originally-by: Ashok Raj Tested-by: Ashok Raj Signed-off-by: Borislav Petkov Reviewed-by: Ashok Raj Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180216112640.11554-4-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 84f1cd88608b..348cf4821240 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1757,5 +1757,25 @@ core_initcall(init_cpu_syscore); */ void microcode_check(void) { + struct cpuinfo_x86 info; + perf_check_microcode(); + + /* Reload CPUID max function as it might've changed. */ + info.cpuid_level = cpuid_eax(0); + + /* + * Copy all capability leafs to pick up the synthetic ones so that + * memcmp() below doesn't fail on that. The ones coming from CPUID will + * get overwritten in get_cpu_cap(). + */ + memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)); + + get_cpu_cap(&info); + + if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability))) + return; + + pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n"); + pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); } -- cgit v1.2.3 From 6262b6e78ce5ba62be47774ca80f5b0a6f0eb428 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Feb 2018 07:50:23 -0700 Subject: x86/IO-APIC: Avoid warning in 32-bit builds Constants wider than 32 bits should be tagged with ULL. Signed-off-by: Jan Beulich Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5A8AF23F02000078001A91E5@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8ad2e410974f..7c5538769f7e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void) do { rep_nop(); now = rdtsc(); - } while ((now - start) < 40000000000UL / HZ && + } while ((now - start) < 40000000000ULL / HZ && time_before_eq(jiffies, end)); } -- cgit v1.2.3 From dd84441a797150dcc49298ec95c459a8891d8bb1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 19 Feb 2018 10:50:54 +0000 Subject: x86/speculation: Use IBRS if available before calling into firmware Retpoline means the kernel is safe because it has no indirect branches. But firmware isn't, so use IBRS for firmware calls if it's available. Block preemption while IBRS is set, although in practice the call sites already had to be doing that. Ignore hpwdt.c for now. It's taking spinlocks and calling into firmware code, from an NMI handler. I don't want to touch that with a bargepole. Signed-off-by: David Woodhouse Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: arjan.van.de.ven@intel.com Cc: bp@alien8.de Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Link: http://lkml.kernel.org/r/1519037457-7643-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apm.h | 6 ++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/efi.h | 17 ++++++++++++++-- arch/x86/include/asm/nospec-branch.h | 39 +++++++++++++++++++++++++++--------- arch/x86/kernel/cpu/bugs.c | 12 ++++++++++- 5 files changed, 63 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 4d4015ddcf26..c356098b6fb9 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -7,6 +7,8 @@ #ifndef _ASM_X86_MACH_DEFAULT_APM_H #define _ASM_X86_MACH_DEFAULT_APM_H +#include + #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ "pushl %%ds\n\t" \ @@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, "=S" (*esi) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); } static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, "=S" (si) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); return error; } diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0dfe4d3f74e2..f41079da38c5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -213,6 +213,7 @@ #define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 85f6ccb80b91..a399c1ebf6f0 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -6,6 +6,7 @@ #include #include #include +#include /* * We map the EFI regions needed for runtime services non-contiguously, @@ -36,8 +37,18 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); -#define arch_efi_call_virt_setup() kernel_fpu_begin() -#define arch_efi_call_virt_teardown() kernel_fpu_end() +#define arch_efi_call_virt_setup() \ +({ \ + kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ +}) + +#define arch_efi_call_virt_teardown() \ +({ \ + firmware_restrict_branch_speculation_end(); \ + kernel_fpu_end(); \ +}) + /* * Wrap all the virtual calls in a way that forces the parameters on the stack. @@ -73,6 +84,7 @@ struct efi_scratch { efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ \ if (efi_scratch.use_pgd) { \ efi_scratch.prev_cr3 = __read_cr3(); \ @@ -91,6 +103,7 @@ struct efi_scratch { __flush_tlb_all(); \ } \ \ + firmware_restrict_branch_speculation_end(); \ __kernel_fpu_end(); \ preempt_enable(); \ }) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index af34b1e8069a..ec90c3228991 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -219,17 +219,38 @@ static inline void vmexit_fill_RSB(void) #endif } +#define alternative_msr_write(_msr, _val, _feature) \ + asm volatile(ALTERNATIVE("", \ + "movl %[msr], %%ecx\n\t" \ + "movl %[val], %%eax\n\t" \ + "movl $0, %%edx\n\t" \ + "wrmsr", \ + _feature) \ + : : [msr] "i" (_msr), [val] "i" (_val) \ + : "eax", "ecx", "edx", "memory") + static inline void indirect_branch_prediction_barrier(void) { - asm volatile(ALTERNATIVE("", - "movl %[msr], %%ecx\n\t" - "movl %[val], %%eax\n\t" - "movl $0, %%edx\n\t" - "wrmsr", - X86_FEATURE_USE_IBPB) - : : [msr] "i" (MSR_IA32_PRED_CMD), - [val] "i" (PRED_CMD_IBPB) - : "eax", "ecx", "edx", "memory"); + alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, + X86_FEATURE_USE_IBPB); +} + +/* + * With retpoline, we must use IBRS to restrict branch prediction + * before calling into firmware. + */ +static inline void firmware_restrict_branch_speculation_start(void) +{ + preempt_disable(); + alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, + X86_FEATURE_USE_IBRS_FW); +} + +static inline void firmware_restrict_branch_speculation_end(void) +{ + alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, + X86_FEATURE_USE_IBRS_FW); + preempt_enable(); } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d71c8b54b696..bfca937bdcc3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -300,6 +300,15 @@ retpoline_auto: setup_force_cpu_cap(X86_FEATURE_USE_IBPB); pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); } + + /* + * Retpoline means the kernel is safe because it has no indirect + * branches. But firmware isn't, so use IBRS to protect that. + */ + if (boot_cpu_has(X86_FEATURE_IBRS)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); + pr_info("Enabling Restricted Speculation for firmware calls\n"); + } } #undef pr_fmt @@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", spectre_v2_module_string()); } #endif -- cgit v1.2.3 From bd89004f6305cbf7352238f61da093207ee518d6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Jan 2018 10:38:09 +0100 Subject: x86/boot, objtool: Annotate indirect jump in secondary_startup_64() The objtool retpoline validation found this indirect jump. Seeing how it's on CPU bringup before we run userspace it should be safe, annotate it. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Woodhouse Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 04a625f0fcda..0f545b3cf926 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -23,6 +23,7 @@ #include #include "../entry/calling.h" #include +#include #ifdef CONFIG_PARAVIRT #include @@ -134,6 +135,7 @@ ENTRY(secondary_startup_64) /* Ensure I am executing from virtual addresses */ movq $1f, %rax + ANNOTATE_RETPOLINE_SAFE jmp *%rax 1: UNWIND_HINT_EMPTY -- cgit v1.2.3 From 9fbcc57aa16424ef84cb54e0d9db3221763de88a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 20 Feb 2018 11:37:53 -0600 Subject: extable: Make init_kernel_text() global Convert init_kernel_text() to a global function and use it in a few places instead of manually comparing _sinittext and _einittext. Note that kallsyms.h has a very similar function called is_kernel_inittext(), but its end check is inclusive. I'm not sure whether that's intentional behavior, so I didn't touch it. Suggested-by: Jason Baron Signed-off-by: Josh Poimboeuf Acked-by: Peter Zijlstra Acked-by: Steven Rostedt (VMware) Cc: Borislav Petkov Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/4335d02be8d45ca7d265d2f174251d0b7ee6c5fd.1519051220.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_orc.c | 3 +-- include/linux/kernel.h | 1 + kernel/extable.c | 2 +- kernel/jump_label.c | 4 +--- 4 files changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 1f9188f5357c..feb28fee6cea 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -5,7 +5,6 @@ #include #include #include -#include #define orc_warn(fmt, ...) \ printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__) @@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip) } /* vmlinux .init slow lookup: */ - if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext) + if (init_kernel_text(ip)) return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ce51455e2adf..3fd291503576 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -472,6 +472,7 @@ extern bool parse_option_str(const char *str, const char *option); extern char *next_arg(char *args, char **param, char **val); extern int core_kernel_text(unsigned long addr); +extern int init_kernel_text(unsigned long addr); extern int core_kernel_data(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); diff --git a/kernel/extable.c b/kernel/extable.c index a17fdb63dc3e..6a5b61ebc66c 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -64,7 +64,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) return e; } -static inline int init_kernel_text(unsigned long addr) +int init_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_sinittext && addr < (unsigned long)_einittext) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index b2f0b479191b..52a0a7af8640 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -16,7 +16,6 @@ #include #include #include -#include #ifdef HAVE_JUMP_LABEL @@ -429,8 +428,7 @@ void __init jump_label_invalidate_init(void) struct jump_entry *iter; for (iter = iter_start; iter < iter_stop; iter++) { - if (iter->code >= (unsigned long)_sinittext && - iter->code < (unsigned long)_einittext) + if (init_kernel_text(iter->code)) iter->code = 0; } } -- cgit v1.2.3 From 945fd17ab6bab8a4d05da6c3170519fbcfe62ddb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 28 Feb 2018 21:14:26 +0100 Subject: x86/cpu_entry_area: Sync cpu_entry_area to initial_page_table The separation of the cpu_entry_area from the fixmap missed the fact that on 32bit non-PAE kernels the cpu_entry_area mapping might not be covered in initial_page_table by the previous synchronizations. This results in suspend/resume failures because 32bit utilizes initial page table for resume. The absence of the cpu_entry_area mapping results in a triple fault, aka. insta reboot. With PAE enabled this works by chance because the PGD entry which covers the fixmap and other parts incindentally provides the cpu_entry_area mapping as well. Synchronize the initial page table after setting up the cpu entry area. Instead of adding yet another copy of the same code, move it to a function and invoke it from the various places. It needs to be investigated if the existing calls in setup_arch() and setup_per_cpu_areas() can be replaced by the later invocation from setup_cpu_entry_areas(), but that's beyond the scope of this fix. Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap") Reported-by: Woody Suwalski Signed-off-by: Thomas Gleixner Tested-by: Woody Suwalski Cc: William Grant Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1802282137290.1392@nanos.tec.linutronix.de --- arch/x86/include/asm/pgtable_32.h | 1 + arch/x86/include/asm/pgtable_64.h | 1 + arch/x86/kernel/setup.c | 17 +++++------------ arch/x86/kernel/setup_percpu.c | 17 ++++------------- arch/x86/mm/cpu_entry_area.c | 6 ++++++ arch/x86/mm/init_32.c | 15 +++++++++++++++ 6 files changed, 32 insertions(+), 25 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index e55466760ff8..b3ec519e3982 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -32,6 +32,7 @@ extern pmd_t initial_pg_pmd[]; static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); +void sync_initial_page_table(void); /* * Define this if things work differently on an i386 and an i486: diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 81462e9a34f6..1149d2112b2e 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -28,6 +28,7 @@ extern pgd_t init_top_pgt[]; #define swapper_pg_dir init_top_pgt extern void paging_init(void); +static inline void sync_initial_page_table(void) { } #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %p(%016lx)\n", \ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1ae67e982af7..4c616be28506 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1204,20 +1204,13 @@ void __init setup_arch(char **cmdline_p) kasan_init(); -#ifdef CONFIG_X86_32 - /* sync back kernel address range */ - clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); - /* - * sync back low identity map too. It is used for example - * in the 32-bit EFI stub. + * Sync back kernel address range. + * + * FIXME: Can the later sync in setup_cpu_entry_areas() replace + * this call? */ - clone_pgd_range(initial_page_table, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); -#endif + sync_initial_page_table(); tboot_probe(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 497aa766fab3..ea554f812ee1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -287,24 +287,15 @@ void __init setup_per_cpu_areas(void) /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); -#ifdef CONFIG_X86_32 /* * Sync back kernel address range again. We already did this in * setup_arch(), but percpu data also needs to be available in * the smpboot asm. We can't reliably pick up percpu mappings * using vmalloc_fault(), because exception dispatch needs * percpu data. + * + * FIXME: Can the later sync in setup_cpu_entry_areas() replace + * this call? */ - clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); - - /* - * sync back low identity map too. It is used for example - * in the 32-bit EFI stub. - */ - clone_pgd_range(initial_page_table, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); -#endif + sync_initial_page_table(); } diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index b9283cc27622..476d810639a8 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -163,4 +163,10 @@ void __init setup_cpu_entry_areas(void) for_each_possible_cpu(cpu) setup_cpu_entry_area(cpu); + + /* + * This is the last essential update to swapper_pgdir which needs + * to be synchronized to initial_page_table on 32bit. + */ + sync_initial_page_table(); } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 79cb066f40c0..396e1f0151ac 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -453,6 +453,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base) } #endif /* CONFIG_HIGHMEM */ +void __init sync_initial_page_table(void) +{ + clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); + + /* + * sync back low identity map too. It is used for example + * in the 32-bit EFI stub. + */ + clone_pgd_range(initial_page_table, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); +} + void __init native_pagetable_init(void) { unsigned long pfn, va; -- cgit v1.2.3 From 7c2178c1ff482679fb0ca0b628f720a888814548 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 6 Mar 2018 22:18:05 +0100 Subject: x86/syscalls: Use proper syscall definition for sys_ioperm() Using SYSCALL_DEFINEx() is recommended, so use it also here. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Acked-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: luto@amacapital.net Cc: viro@zeniv.linux.org.uk Signed-off-by: Ingo Molnar --- arch/x86/kernel/ioport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 2f723301eb58..38deafebb21b 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -23,7 +23,7 @@ /* * this changes the io permissions bitmap in the current task. */ -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) +SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) { struct thread_struct *t = ¤t->thread; struct tss_struct *tss; -- cgit v1.2.3 From 36268223c1e9981d6cfc33aff8520b3bde4b8114 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 26 Feb 2018 09:35:01 -0500 Subject: x86/spectre_v2: Don't check microcode versions when running under hypervisors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As: 1) It's known that hypervisors lie about the environment anyhow (host mismatch) 2) Even if the hypervisor (Xen, KVM, VMWare, etc) provided a valid "correct" value, it all gets to be very murky when migration happens (do you provide the "new" microcode of the machine?). And in reality the cloud vendors are the ones that should make sure that the microcode that is running is correct and we should just sing lalalala and trust them. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Paolo Bonzini Cc: Wanpeng Li Cc: kvm Cc: Krčmář Cc: Borislav Petkov CC: "H. Peter Anvin" CC: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180226213019.GE9497@char.us.oracle.com --- arch/x86/kernel/cpu/intel.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index d19e903214b4..4aa9fd379390 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -144,6 +144,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) { int i; + /* + * We know that the hypervisor lie to us on the microcode version so + * we may as well hope that it is running the correct version. + */ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return false; + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_model == spectre_bad_microcodes[i].model && c->x86_stepping == spectre_bad_microcodes[i].stepping) -- cgit v1.2.3 From 854857f5944c59a881ff607b37ed9ed41d031a3b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 28 Feb 2018 11:28:40 +0100 Subject: x86/microcode: Get rid of struct apply_microcode_ctx It is a useless remnant from earlier times. Use the ucode_state enum directly. No functional change. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Ashok Raj Cc: Arjan Van De Ven Link: https://lkml.kernel.org/r/20180228102846.13447-2-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index aa1b9a422f2b..63370651e376 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -373,26 +373,23 @@ static int collect_cpu_info(int cpu) return ret; } -struct apply_microcode_ctx { - enum ucode_state err; -}; - static void apply_microcode_local(void *arg) { - struct apply_microcode_ctx *ctx = arg; + enum ucode_state *err = arg; - ctx->err = microcode_ops->apply_microcode(smp_processor_id()); + *err = microcode_ops->apply_microcode(smp_processor_id()); } static int apply_microcode_on_target(int cpu) { - struct apply_microcode_ctx ctx = { .err = 0 }; + enum ucode_state err; int ret; - ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1); - if (!ret) - ret = ctx.err; - + ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1); + if (!ret) { + if (err == UCODE_ERROR) + ret = 1; + } return ret; } -- cgit v1.2.3 From c182d2b7d0ca48e0d6ff16f7d883161238c447ed Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Wed, 28 Feb 2018 11:28:41 +0100 Subject: x86/microcode/intel: Check microcode revision before updating sibling threads After updating microcode on one of the threads of a core, the other thread sibling automatically gets the update since the microcode resources on a hyperthreaded core are shared between the two threads. Check the microcode revision on the CPU before performing a microcode update and thus save us the WRMSR 0x79 because it is a particularly expensive operation. [ Borislav: Massage changelog and coding style. ] Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Ashok Raj Cc: Arjan Van De Ven Link: http://lkml.kernel.org/r/1519352533-15992-2-git-send-email-ashok.raj@intel.com Link: https://lkml.kernel.org/r/20180228102846.13447-3-bp@alien8.de --- arch/x86/kernel/cpu/microcode/intel.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 923054a6b760..87bd6dc94081 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -589,6 +589,17 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) if (!mc) return 0; + /* + * Save us the MSR write below - which is a particular expensive + * operation - when the other hyperthread has updated the microcode + * already. + */ + rev = intel_get_microcode_revision(); + if (rev >= mc->hdr.rev) { + uci->cpu_sig.rev = rev; + return UCODE_OK; + } + /* write microcode via MSR 0x79 */ native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -776,7 +787,7 @@ static enum ucode_state apply_microcode_intel(int cpu) { struct microcode_intel *mc; struct ucode_cpu_info *uci; - struct cpuinfo_x86 *c; + struct cpuinfo_x86 *c = &cpu_data(cpu); static int prev_rev; u32 rev; @@ -793,6 +804,18 @@ static enum ucode_state apply_microcode_intel(int cpu) return UCODE_NFOUND; } + /* + * Save us the MSR write below - which is a particular expensive + * operation - when the other hyperthread has updated the microcode + * already. + */ + rev = intel_get_microcode_revision(); + if (rev >= mc->hdr.rev) { + uci->cpu_sig.rev = rev; + c->microcode = rev; + return UCODE_OK; + } + /* write microcode via MSR 0x79 */ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -813,8 +836,6 @@ static enum ucode_state apply_microcode_intel(int cpu) prev_rev = rev; } - c = &cpu_data(cpu); - uci->cpu_sig.rev = rev; c->microcode = rev; -- cgit v1.2.3 From 91df9fdf51492aec9fed6b4cbd33160886740f47 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Wed, 28 Feb 2018 11:28:42 +0100 Subject: x86/microcode/intel: Writeback and invalidate caches before updating microcode Updating microcode is less error prone when caches have been flushed and depending on what exactly the microcode is updating. For example, some of the issues around certain Broadwell parts can be addressed by doing a full cache flush. [ Borislav: Massage it and use native_wbinvd() in both cases. ] Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Ashok Raj Cc: Arjan Van De Ven Link: http://lkml.kernel.org/r/1519352533-15992-3-git-send-email-ashok.raj@intel.com Link: https://lkml.kernel.org/r/20180228102846.13447-4-bp@alien8.de --- arch/x86/kernel/cpu/microcode/intel.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 87bd6dc94081..e2864bc2d575 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -600,6 +600,12 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) return UCODE_OK; } + /* + * Writeback and invalidate caches before updating microcode to avoid + * internal issues depending on what the microcode is updating. + */ + native_wbinvd(); + /* write microcode via MSR 0x79 */ native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -816,6 +822,12 @@ static enum ucode_state apply_microcode_intel(int cpu) return UCODE_OK; } + /* + * Writeback and invalidate caches before updating microcode to avoid + * internal issues depending on what the microcode is updating. + */ + native_wbinvd(); + /* write microcode via MSR 0x79 */ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); -- cgit v1.2.3 From 30ec26da9967d0d785abc24073129a34c3211777 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Wed, 28 Feb 2018 11:28:43 +0100 Subject: x86/microcode: Do not upload microcode if CPUs are offline Avoid loading microcode if any of the CPUs are offline, and issue a warning. Having different microcode revisions on the system at any time is outright dangerous. [ Borislav: Massage changelog. ] Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Ashok Raj Reviewed-by: Tom Lendacky Cc: Arjan Van De Ven Link: http://lkml.kernel.org/r/1519352533-15992-4-git-send-email-ashok.raj@intel.com Link: https://lkml.kernel.org/r/20180228102846.13447-5-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 63370651e376..fa32cb3dcca5 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -486,6 +486,16 @@ static void __exit microcode_dev_exit(void) /* fake device for request_firmware */ static struct platform_device *microcode_pdev; +static int check_online_cpus(void) +{ + if (num_online_cpus() == num_present_cpus()) + return 0; + + pr_err("Not all CPUs online, aborting microcode update.\n"); + + return -EINVAL; +} + static enum ucode_state reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -519,7 +529,13 @@ static ssize_t reload_store(struct device *dev, return size; get_online_cpus(); + + ret = check_online_cpus(); + if (ret) + goto put; + mutex_lock(µcode_mutex); + for_each_online_cpu(cpu) { tmp_ret = reload_for_cpu(cpu); if (tmp_ret > UCODE_NFOUND) { @@ -538,6 +554,8 @@ static ssize_t reload_store(struct device *dev, microcode_check(); mutex_unlock(µcode_mutex); + +put: put_online_cpus(); if (!ret) -- cgit v1.2.3 From d8c3b52c00a05036e0a6b315b4b17921a7b67997 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 28 Feb 2018 11:28:44 +0100 Subject: x86/microcode/intel: Look into the patch cache first The cache might contain a newer patch - look in there first. A follow-on change will make sure newest patches are loaded into the cache of microcode patches. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Ashok Raj Cc: Arjan Van De Ven Link: https://lkml.kernel.org/r/20180228102846.13447-6-bp@alien8.de --- arch/x86/kernel/cpu/microcode/intel.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index e2864bc2d575..2aded9db1d42 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -791,9 +791,9 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) static enum ucode_state apply_microcode_intel(int cpu) { - struct microcode_intel *mc; - struct ucode_cpu_info *uci; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct cpuinfo_x86 *c = &cpu_data(cpu); + struct microcode_intel *mc; static int prev_rev; u32 rev; @@ -801,11 +801,10 @@ static enum ucode_state apply_microcode_intel(int cpu) if (WARN_ON(raw_smp_processor_id() != cpu)) return UCODE_ERROR; - uci = ucode_cpu_info + cpu; - mc = uci->mc; + /* Look for a newer patch in our cache: */ + mc = find_patch(uci); if (!mc) { - /* Look for a newer patch in our cache: */ - mc = find_patch(uci); + mc = uci->mc; if (!mc) return UCODE_NFOUND; } -- cgit v1.2.3 From cfb52a5a09c8ae3a1dafb44ce549fde5b69e8117 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 28 Feb 2018 11:28:45 +0100 Subject: x86/microcode: Request microcode on the BSP ... so that any newer version can land in the cache and can later be fished out by the application functions. Do that before grabbing the hotplug lock. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Ashok Raj Reviewed-by: Tom Lendacky Cc: Arjan Van De Ven Link: https://lkml.kernel.org/r/20180228102846.13447-7-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index fa32cb3dcca5..5dd157d48606 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -499,15 +499,10 @@ static int check_online_cpus(void) static enum ucode_state reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; if (!uci->valid) return UCODE_OK; - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); - if (ustate != UCODE_OK) - return ustate; - return apply_microcode_on_target(cpu); } @@ -515,11 +510,11 @@ static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { + int cpu, bsp = boot_cpu_data.cpu_index; enum ucode_state tmp_ret = UCODE_OK; bool do_callback = false; unsigned long val; ssize_t ret = 0; - int cpu; ret = kstrtoul(buf, 0, &val); if (ret) @@ -528,6 +523,10 @@ static ssize_t reload_store(struct device *dev, if (val != 1) return size; + tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); + if (tmp_ret != UCODE_OK) + return size; + get_online_cpus(); ret = check_online_cpus(); -- cgit v1.2.3 From a5321aec6412b20b5ad15db2d6b916c05349dbff Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Wed, 28 Feb 2018 11:28:46 +0100 Subject: x86/microcode: Synchronize late microcode loading Original idea by Ashok, completely rewritten by Borislav. Before you read any further: the early loading method is still the preferred one and you should always do that. The following patch is improving the late loading mechanism for long running jobs and cloud use cases. Gather all cores and serialize the microcode update on them by doing it one-by-one to make the late update process as reliable as possible and avoid potential issues caused by the microcode update. [ Borislav: Rewrite completely. ] Co-developed-by: Borislav Petkov Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Ashok Raj Reviewed-by: Tom Lendacky Cc: Arjan Van De Ven Link: https://lkml.kernel.org/r/20180228102846.13447-8-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 118 +++++++++++++++++++++++++++-------- 1 file changed, 92 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 5dd157d48606..70ecbc8099c9 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -22,13 +22,16 @@ #define pr_fmt(fmt) "microcode: " fmt #include +#include #include #include #include #include #include +#include #include #include +#include #include #include @@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache); */ static DEFINE_MUTEX(microcode_mutex); +/* + * Serialize late loading so that CPUs get updated one-by-one. + */ +static DEFINE_SPINLOCK(update_lock); + struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; struct cpu_info_ctx { @@ -486,6 +494,19 @@ static void __exit microcode_dev_exit(void) /* fake device for request_firmware */ static struct platform_device *microcode_pdev; +/* + * Late loading dance. Why the heavy-handed stomp_machine effort? + * + * - HT siblings must be idle and not execute other code while the other sibling + * is loading microcode in order to avoid any negative interactions caused by + * the loading. + * + * - In addition, microcode update on the cores must be serialized until this + * requirement can be relaxed in the future. Right now, this is conservative + * and good. + */ +#define SPINUNIT 100 /* 100 nsec */ + static int check_online_cpus(void) { if (num_online_cpus() == num_present_cpus()) @@ -496,23 +517,85 @@ static int check_online_cpus(void) return -EINVAL; } -static enum ucode_state reload_for_cpu(int cpu) +static atomic_t late_cpus; + +/* + * Returns: + * < 0 - on error + * 0 - no update done + * 1 - microcode was updated + */ +static int __reload_late(void *info) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + unsigned int timeout = NSEC_PER_SEC; + int all_cpus = num_online_cpus(); + int cpu = smp_processor_id(); + enum ucode_state err; + int ret = 0; - if (!uci->valid) - return UCODE_OK; + atomic_dec(&late_cpus); + + /* + * Wait for all CPUs to arrive. A load will not be attempted unless all + * CPUs show up. + * */ + while (atomic_read(&late_cpus)) { + if (timeout < SPINUNIT) { + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", + atomic_read(&late_cpus)); + return -1; + } + + ndelay(SPINUNIT); + timeout -= SPINUNIT; + + touch_nmi_watchdog(); + } + + spin_lock(&update_lock); + apply_microcode_local(&err); + spin_unlock(&update_lock); + + if (err > UCODE_NFOUND) { + pr_warn("Error reloading microcode on CPU %d\n", cpu); + ret = -1; + } else if (err == UCODE_UPDATED) { + ret = 1; + } - return apply_microcode_on_target(cpu); + atomic_inc(&late_cpus); + + while (atomic_read(&late_cpus) != all_cpus) + cpu_relax(); + + return ret; +} + +/* + * Reload microcode late on all CPUs. Wait for a sec until they + * all gather together. + */ +static int microcode_reload_late(void) +{ + int ret; + + atomic_set(&late_cpus, num_online_cpus()); + + ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); + if (ret < 0) + return ret; + else if (ret > 0) + microcode_check(); + + return ret; } static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { - int cpu, bsp = boot_cpu_data.cpu_index; enum ucode_state tmp_ret = UCODE_OK; - bool do_callback = false; + int bsp = boot_cpu_data.cpu_index; unsigned long val; ssize_t ret = 0; @@ -534,30 +617,13 @@ static ssize_t reload_store(struct device *dev, goto put; mutex_lock(µcode_mutex); - - for_each_online_cpu(cpu) { - tmp_ret = reload_for_cpu(cpu); - if (tmp_ret > UCODE_NFOUND) { - pr_warn("Error reloading microcode on CPU %d\n", cpu); - - /* set retval for the first encountered reload error */ - if (!ret) - ret = -EINVAL; - } - - if (tmp_ret == UCODE_UPDATED) - do_callback = true; - } - - if (!ret && do_callback) - microcode_check(); - + ret = microcode_reload_late(); mutex_unlock(µcode_mutex); put: put_online_cpus(); - if (!ret) + if (ret >= 0) ret = size; return ret; -- cgit v1.2.3 From c07a8f8b08ba683ea24f3ac9159f37ae94daf47f Mon Sep 17 00:00:00 2001 From: Francis Deslauriers Date: Thu, 8 Mar 2018 22:18:12 -0500 Subject: x86/kprobes: Fix kernel crash when probing .entry_trampoline code Disable the kprobe probing of the entry trampoline: .entry_trampoline is a code area that is used to ensure page table isolation between userspace and kernelspace. At the beginning of the execution of the trampoline, we load the kernel's CR3 register. This has the effect of enabling the translation of the kernel virtual addresses to physical addresses. Before this happens most kernel addresses can not be translated because the running process' CR3 is still used. If a kprobe is placed on the trampoline code before that change of the CR3 register happens the kernel crashes because int3 handling pages are not accessible. To fix this, add the .entry_trampoline section to the kprobe blacklist to prohibit the probing of code before all the kernel pages are accessible. Signed-off-by: Francis Deslauriers Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: mathieu.desnoyers@efficios.com Cc: mhiramat@kernel.org Link: http://lkml.kernel.org/r/1520565492-4637-2-git-send-email-francis.deslauriers@efficios.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/sections.h | 1 + arch/x86/kernel/kprobes/core.c | 10 +++++++++- arch/x86/kernel/vmlinux.lds.S | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index d6baf23782bc..5c019d23d06b 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[]; #if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; +extern char __entry_trampoline_start[], __entry_trampoline_end[]; #endif #endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index bd36f3c33cd0..0715f827607c 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler); bool arch_within_kprobe_blacklist(unsigned long addr) { + bool is_in_entry_trampoline_section = false; + +#ifdef CONFIG_X86_64 + is_in_entry_trampoline_section = + (addr >= (unsigned long)__entry_trampoline_start && + addr < (unsigned long)__entry_trampoline_end); +#endif return (addr >= (unsigned long)__kprobes_text_start && addr < (unsigned long)__kprobes_text_end) || (addr >= (unsigned long)__entry_text_start && - addr < (unsigned long)__entry_text_end); + addr < (unsigned long)__entry_text_end) || + is_in_entry_trampoline_section; } int __init arch_init_kprobes(void) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9b138a06c1a4..b854ebf5851b 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -118,9 +118,11 @@ SECTIONS #ifdef CONFIG_X86_64 . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(__entry_trampoline_start) = .; _entry_trampoline = .; *(.entry_trampoline) . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(__entry_trampoline_end) = .; ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); #endif -- cgit v1.2.3