From 1d91c1d2c80cb70e2e553845e278b87a960c04da Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 16 Feb 2018 13:20:42 -0800 Subject: nospec: Kill array_index_nospec_mask_check() There are multiple problems with the dynamic sanity checking in array_index_nospec_mask_check(): * It causes unnecessary overhead in the 32-bit case since integer sized @index values will no longer cause the check to be compiled away like in the 64-bit case. * In the 32-bit case it may trigger with user controllable input when the expectation is that should only trigger during development of new kernel enabling. * The macro reuses the input parameter in multiple locations which is broken if someone passes an expression like 'index++' to array_index_nospec(). Reported-by: Linus Torvalds Signed-off-by: Dan Williams Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/151881604278.17395.6605847763178076520.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- include/linux/nospec.h | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/include/linux/nospec.h b/include/linux/nospec.h index fbc98e2c8228..d6701e34424f 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -29,26 +29,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, } #endif -/* - * Warn developers about inappropriate array_index_nospec() usage. - * - * Even if the CPU speculates past the WARN_ONCE branch, the - * sign bit of @index is taken into account when generating the - * mask. - * - * This warning is compiled out when the compiler can infer that - * @index and @size are less than LONG_MAX. - */ -#define array_index_mask_nospec_check(index, size) \ -({ \ - if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \ - "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \ - _mask = 0; \ - else \ - _mask = array_index_mask_nospec(index, size); \ - _mask; \ -}) - /* * array_index_nospec - sanitize an array index after a bounds check * @@ -67,7 +47,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, ({ \ typeof(index) _i = (index); \ typeof(size) _s = (size); \ - unsigned long _mask = array_index_mask_nospec_check(_i, _s); \ + unsigned long _mask = array_index_mask_nospec(_i, _s); \ \ BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ -- cgit v1.2.3 From b98c6a160a057d5686a8c54c79cc6c8c94a7d0c8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 16 Feb 2018 13:20:48 -0800 Subject: nospec: Allow index argument to have const-qualified type The last expression in a statement expression need not be a bare variable, quoting gcc docs The last thing in the compound statement should be an expression followed by a semicolon; the value of this subexpression serves as the value of the entire construct. and we already use that in e.g. the min/max macros which end with a ternary expression. This way, we can allow index to have const-qualified type, which will in some cases avoid the need for introducing a local copy of index of non-const qualified type. That, in turn, can prevent readers not familiar with the internals of array_index_nospec from wondering about the seemingly redundant extra variable, and I think that's worthwhile considering how confusing the whole _nospec business is. The expression _i&_mask has type unsigned long (since that is the type of _mask, and the BUILD_BUG_ONs guarantee that _i will get promoted to that), so in order not to change the type of the whole expression, add a cast back to typeof(_i). Signed-off-by: Rasmus Villemoes Signed-off-by: Dan Williams Acked-by: Linus Torvalds Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arch@vger.kernel.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/151881604837.17395.10812767547837568328.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- include/linux/nospec.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/nospec.h b/include/linux/nospec.h index d6701e34424f..172a19dc35ab 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -52,7 +52,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ \ - _i &= _mask; \ - _i; \ + (typeof(_i)) (_i & _mask); \ }) #endif /* _LINUX_NOSPEC_H */ -- cgit v1.2.3 From eb6174f6d1be16b19cfa43dac296bfed003ce1a6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 16 Feb 2018 13:20:54 -0800 Subject: nospec: Include dependency The nospec.h header expects the per-architecture header file to optionally define array_index_mask_nospec(). Include that dependency to prevent inadvertent fallback to the default array_index_mask_nospec() implementation. The default implementation may not provide a full mitigation on architectures that perform data value speculation. Reported-by: Christian Borntraeger Signed-off-by: Dan Williams Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/151881605404.17395.1341935530792574707.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- include/linux/nospec.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/nospec.h b/include/linux/nospec.h index 172a19dc35ab..e791ebc65c9c 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -5,6 +5,7 @@ #ifndef _LINUX_NOSPEC_H #define _LINUX_NOSPEC_H +#include /** * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise -- cgit v1.2.3 From 3f1f576a195aa266813cbd4ca70291deb61e0129 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Feb 2018 12:26:38 +0100 Subject: x86/microcode: Propagate return value from updating functions ... so that callers can know when microcode was updated and act accordingly. Tested-by: Ashok Raj Signed-off-by: Borislav Petkov Reviewed-by: Ashok Raj Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180216112640.11554-2-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/microcode.h | 9 +++++++-- arch/x86/kernel/cpu/microcode/amd.c | 10 +++++----- arch/x86/kernel/cpu/microcode/core.c | 33 +++++++++++++++++---------------- arch/x86/kernel/cpu/microcode/intel.c | 10 +++++----- 4 files changed, 34 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 55520cec8b27..7fb1047d61c7 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -37,7 +37,12 @@ struct cpu_signature { struct device; -enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; +enum ucode_state { + UCODE_OK = 0, + UCODE_UPDATED, + UCODE_NFOUND, + UCODE_ERROR, +}; struct microcode_ops { enum ucode_state (*request_microcode_user) (int cpu, @@ -54,7 +59,7 @@ struct microcode_ops { * are being called. * See also the "Synchronization" section in microcode_core.c. */ - int (*apply_microcode) (int cpu); + enum ucode_state (*apply_microcode) (int cpu); int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); }; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 330b8462d426..a998e1a7d46f 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, return patch_size; } -static int apply_microcode_amd(int cpu) +static enum ucode_state apply_microcode_amd(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_amd *mc_amd; @@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu) p = find_patch(cpu); if (!p) - return 0; + return UCODE_NFOUND; mc_amd = p->data; uci->mc = p->data; @@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu) if (rev >= mc_amd->hdr.patch_id) { c->microcode = rev; uci->cpu_sig.rev = rev; - return 0; + return UCODE_OK; } if (__apply_microcode_amd(mc_amd)) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); - return -1; + return UCODE_ERROR; } pr_info("CPU%d: new patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); @@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu) uci->cpu_sig.rev = mc_amd->hdr.patch_id; c->microcode = mc_amd->hdr.patch_id; - return 0; + return UCODE_UPDATED; } static int install_equiv_cpu_table(const u8 *buf) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 319dd65f98a2..6fdaf7cf3182 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -374,7 +374,7 @@ static int collect_cpu_info(int cpu) } struct apply_microcode_ctx { - int err; + enum ucode_state err; }; static void apply_microcode_local(void *arg) @@ -489,31 +489,29 @@ static void __exit microcode_dev_exit(void) /* fake device for request_firmware */ static struct platform_device *microcode_pdev; -static int reload_for_cpu(int cpu) +static enum ucode_state reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; enum ucode_state ustate; - int err = 0; if (!uci->valid) - return err; + return UCODE_OK; ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - return err; + if (ustate != UCODE_OK) + return ustate; + + return apply_microcode_on_target(cpu); } static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { + enum ucode_state tmp_ret = UCODE_OK; unsigned long val; + ssize_t ret = 0; int cpu; - ssize_t ret = 0, tmp_ret; ret = kstrtoul(buf, 0, &val); if (ret) @@ -526,15 +524,18 @@ static ssize_t reload_store(struct device *dev, mutex_lock(µcode_mutex); for_each_online_cpu(cpu) { tmp_ret = reload_for_cpu(cpu); - if (tmp_ret != 0) + if (tmp_ret > UCODE_NFOUND) { pr_warn("Error reloading microcode on CPU %d\n", cpu); - /* save retval of the first encountered reload error */ - if (!ret) - ret = tmp_ret; + /* set retval for the first encountered reload error */ + if (!ret) + ret = -EINVAL; + } } - if (!ret) + + if (!ret && tmp_ret == UCODE_UPDATED) perf_check_microcode(); + mutex_unlock(µcode_mutex); put_online_cpus(); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index a15db2b4e0d6..923054a6b760 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -772,7 +772,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) return 0; } -static int apply_microcode_intel(int cpu) +static enum ucode_state apply_microcode_intel(int cpu) { struct microcode_intel *mc; struct ucode_cpu_info *uci; @@ -782,7 +782,7 @@ static int apply_microcode_intel(int cpu) /* We should bind the task to the CPU */ if (WARN_ON(raw_smp_processor_id() != cpu)) - return -1; + return UCODE_ERROR; uci = ucode_cpu_info + cpu; mc = uci->mc; @@ -790,7 +790,7 @@ static int apply_microcode_intel(int cpu) /* Look for a newer patch in our cache: */ mc = find_patch(uci); if (!mc) - return 0; + return UCODE_NFOUND; } /* write microcode via MSR 0x79 */ @@ -801,7 +801,7 @@ static int apply_microcode_intel(int cpu) if (rev != mc->hdr.rev) { pr_err("CPU%d update to revision 0x%x failed\n", cpu, mc->hdr.rev); - return -1; + return UCODE_ERROR; } if (rev != prev_rev) { @@ -818,7 +818,7 @@ static int apply_microcode_intel(int cpu) uci->cpu_sig.rev = rev; c->microcode = rev; - return 0; + return UCODE_UPDATED; } static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, -- cgit v1.2.3 From 1008c52c09dcb23d93f8e0ea83a6246265d2cce0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Feb 2018 12:26:39 +0100 Subject: x86/CPU: Add a microcode loader callback Add a callback function which the microcode loader calls when microcode has been updated to a newer revision. Do the callback only when no error was encountered during loading. Tested-by: Ashok Raj Signed-off-by: Borislav Petkov Reviewed-by: Ashok Raj Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180216112640.11554-3-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/common.c | 10 ++++++++++ arch/x86/kernel/cpu/microcode/core.c | 8 ++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1bd9ed87606f..b0ccd4847a58 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -977,4 +977,5 @@ bool xen_set_default_idle(void); void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); +void microcode_check(void); #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 824aee0117bb..84f1cd88608b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1749,3 +1749,13 @@ static int __init init_cpu_syscore(void) return 0; } core_initcall(init_cpu_syscore); + +/* + * The microcode loader calls this upon late microcode load to recheck features, + * only when microcode has been updated. Caller holds microcode_mutex and CPU + * hotplug lock. + */ +void microcode_check(void) +{ + perf_check_microcode(); +} diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 6fdaf7cf3182..aa1b9a422f2b 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -509,6 +509,7 @@ static ssize_t reload_store(struct device *dev, const char *buf, size_t size) { enum ucode_state tmp_ret = UCODE_OK; + bool do_callback = false; unsigned long val; ssize_t ret = 0; int cpu; @@ -531,10 +532,13 @@ static ssize_t reload_store(struct device *dev, if (!ret) ret = -EINVAL; } + + if (tmp_ret == UCODE_UPDATED) + do_callback = true; } - if (!ret && tmp_ret == UCODE_UPDATED) - perf_check_microcode(); + if (!ret && do_callback) + microcode_check(); mutex_unlock(µcode_mutex); put_online_cpus(); -- cgit v1.2.3 From 42ca8082e260dcfd8afa2afa6ec1940b9d41724c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Feb 2018 12:26:40 +0100 Subject: x86/CPU: Check CPU feature bits after microcode upgrade With some microcode upgrades, new CPUID features can become visible on the CPU. Check what the kernel has mirrored now and issue a warning hinting at possible things the user/admin can do to make use of the newly visible features. Originally-by: Ashok Raj Tested-by: Ashok Raj Signed-off-by: Borislav Petkov Reviewed-by: Ashok Raj Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180216112640.11554-4-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 84f1cd88608b..348cf4821240 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1757,5 +1757,25 @@ core_initcall(init_cpu_syscore); */ void microcode_check(void) { + struct cpuinfo_x86 info; + perf_check_microcode(); + + /* Reload CPUID max function as it might've changed. */ + info.cpuid_level = cpuid_eax(0); + + /* + * Copy all capability leafs to pick up the synthetic ones so that + * memcmp() below doesn't fail on that. The ones coming from CPUID will + * get overwritten in get_cpu_cap(). + */ + memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)); + + get_cpu_cap(&info); + + if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability))) + return; + + pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n"); + pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); } -- cgit v1.2.3 From 9e809d15d6b692fa061d74be7aaab1c79f6784b8 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 14 Feb 2018 18:59:23 +0100 Subject: x86/entry: Reduce the code footprint of the 'idtentry' macro Play a little trick in the generic PUSH_AND_CLEAR_REGS macro to insert the GP registers "above" the original return address. This allows us to (re-)insert the macro in error_entry() and paranoid_entry() and to remove it from the idtentry macro. This reduces the static footprint significantly: text data bss dec hex filename 24307 0 0 24307 5ef3 entry_64.o-orig 20987 0 0 20987 51fb entry_64.o Co-developed-by: Linus Torvalds Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180214175924.23065-2-linux@dominikbrodowski.net [ Small tweaks to comments. ] Signed-off-by: Ingo Molnar --- arch/x86/entry/calling.h | 11 ++++++++++- arch/x86/entry/entry_64.S | 18 ++++++++---------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index dce7092ab24a..196b6103edf6 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with #define SIZEOF_PTREGS 21*8 -.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 /* * Push registers and sanitize registers of values that a * speculation attack might otherwise want to exploit. The @@ -105,8 +105,14 @@ For 32-bit we have the following conventions - kernel is built with * could be put to use in a speculative execution gadget. * Interleave XOR with PUSH for better uop scheduling: */ + .if \save_ret + pushq %rsi /* pt_regs->si */ + movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ + movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */ + .else pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ + .endif pushq \rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq \rax /* pt_regs->ax */ @@ -131,6 +137,9 @@ For 32-bit we have the following conventions - kernel is built with pushq %r15 /* pt_regs->r15 */ xorq %r15, %r15 /* nospec r15*/ UNWIND_HINT_REGS + .if \save_ret + pushq %rsi /* return address on top of stack */ + .endif .endm .macro POP_REGS pop_rdi=1 skip_r11rcx=0 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8971bd64d515..77edc2390868 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -875,12 +875,8 @@ ENTRY(\sym) pushq $-1 /* ORIG_RAX: no syscall to restart */ .endif - /* Save all registers in pt_regs */ - PUSH_AND_CLEAR_REGS - ENCODE_FRAME_POINTER - .if \paranoid < 2 - testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ + testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */ jnz .Lfrom_usermode_switch_stack_\@ .endif @@ -1130,13 +1126,15 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1 #endif /* - * Switch gs if needed. + * Save all registers in pt_regs, and switch gs if needed. * Use slow, but surefire "are we in kernel?" check. * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ ENTRY(paranoid_entry) UNWIND_HINT_FUNC cld + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 movl $1, %ebx movl $MSR_GS_BASE, %ecx rdmsr @@ -1181,12 +1179,14 @@ ENTRY(paranoid_exit) END(paranoid_exit) /* - * Switch gs if needed. + * Save all registers in pt_regs, and switch GS if needed. * Return: EBX=0: came from user mode; EBX=1: otherwise */ ENTRY(error_entry) - UNWIND_HINT_REGS offset=8 + UNWIND_HINT_FUNC cld + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1577,8 +1577,6 @@ end_repeat_nmi: * frame to point back to repeat_nmi. */ pushq $-1 /* ORIG_RAX: no syscall to restart */ - PUSH_AND_CLEAR_REGS - ENCODE_FRAME_POINTER /* * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit -- cgit v1.2.3 From ced5d0bf603fa0baee8ea889e1d70971fd210894 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 14 Feb 2018 18:59:24 +0100 Subject: x86/entry/64: Use 'xorl' for faster register clearing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some x86 CPU microarchitectures using 'xorq' to clear general-purpose registers is slower than 'xorl'. As 'xorl' is sufficient to clear all 64 bits of these registers due to zero-extension [*], switch the x86 64-bit entry code to use 'xorl'. No change in functionality and no change in code size. [*] According to Intel 64 and IA-32 Architecture Software Developer's Manual, section 3.4.1.1, the result of 32-bit operands are "zero- extended to a 64-bit result in the destination general-purpose register." The AMD64 Architecture Programmer’s Manual Volume 3, Appendix B.1, describes the same behaviour. Suggested-by: Denys Vlasenko Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180214175924.23065-3-linux@dominikbrodowski.net [ Improved on the changelog a bit. ] Signed-off-by: Ingo Molnar --- arch/x86/entry/calling.h | 16 ++++++------ arch/x86/entry/entry_64_compat.S | 54 ++++++++++++++++++++-------------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 196b6103edf6..5d10b7a85cad 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -117,25 +117,25 @@ For 32-bit we have the following conventions - kernel is built with pushq %rcx /* pt_regs->cx */ pushq \rax /* pt_regs->ax */ pushq %r8 /* pt_regs->r8 */ - xorq %r8, %r8 /* nospec r8 */ + xorl %r8d, %r8d /* nospec r8 */ pushq %r9 /* pt_regs->r9 */ - xorq %r9, %r9 /* nospec r9 */ + xorl %r9d, %r9d /* nospec r9 */ pushq %r10 /* pt_regs->r10 */ - xorq %r10, %r10 /* nospec r10 */ + xorl %r10d, %r10d /* nospec r10 */ pushq %r11 /* pt_regs->r11 */ - xorq %r11, %r11 /* nospec r11*/ + xorl %r11d, %r11d /* nospec r11*/ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx*/ pushq %rbp /* pt_regs->rbp */ xorl %ebp, %ebp /* nospec rbp*/ pushq %r12 /* pt_regs->r12 */ - xorq %r12, %r12 /* nospec r12*/ + xorl %r12d, %r12d /* nospec r12*/ pushq %r13 /* pt_regs->r13 */ - xorq %r13, %r13 /* nospec r13*/ + xorl %r13d, %r13d /* nospec r13*/ pushq %r14 /* pt_regs->r14 */ - xorq %r14, %r14 /* nospec r14*/ + xorl %r14d, %r14d /* nospec r14*/ pushq %r15 /* pt_regs->r15 */ - xorq %r15, %r15 /* nospec r15*/ + xorl %r15d, %r15d /* nospec r15*/ UNWIND_HINT_REGS .if \save_ret pushq %rsi /* return address on top of stack */ diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index fd65e016e413..364ea4a207be 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat) pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ - xorq %r8, %r8 /* nospec r8 */ + xorl %r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ - xorq %r9, %r9 /* nospec r9 */ + xorl %r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ - xorq %r10, %r10 /* nospec r10 */ + xorl %r10d, %r10d /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ - xorq %r11, %r11 /* nospec r11 */ + xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ - xorq %r12, %r12 /* nospec r12 */ + xorl %r12d, %r12d /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ - xorq %r13, %r13 /* nospec r13 */ + xorl %r13d, %r13d /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ - xorq %r14, %r14 /* nospec r14 */ + xorl %r14d, %r14d /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ - xorq %r15, %r15 /* nospec r15 */ + xorl %r15d, %r15d /* nospec r15 */ cld /* @@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) pushq %rbp /* pt_regs->cx (stashed in bp) */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ - xorq %r8, %r8 /* nospec r8 */ + xorl %r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ - xorq %r9, %r9 /* nospec r9 */ + xorl %r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ - xorq %r10, %r10 /* nospec r10 */ + xorl %r10d, %r10d /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ - xorq %r11, %r11 /* nospec r11 */ + xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ - xorq %r12, %r12 /* nospec r12 */ + xorl %r12d, %r12d /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ - xorq %r13, %r13 /* nospec r13 */ + xorl %r13d, %r13d /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ - xorq %r14, %r14 /* nospec r14 */ + xorl %r14d, %r14d /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ - xorq %r15, %r15 /* nospec r15 */ + xorl %r15d, %r15d /* nospec r15 */ /* * User mode is traced as though IRQs are on, and SYSENTER @@ -298,9 +298,9 @@ sysret32_from_system_call: */ SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 - xorq %r8, %r8 - xorq %r9, %r9 - xorq %r10, %r10 + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d swapgs sysretl END(entry_SYSCALL_compat) @@ -358,25 +358,25 @@ ENTRY(entry_INT80_compat) pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ - xorq %r8, %r8 /* nospec r8 */ + xorl %r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ - xorq %r9, %r9 /* nospec r9 */ + xorl %r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ - xorq %r10, %r10 /* nospec r10 */ + xorl %r10d, %r10d /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ - xorq %r11, %r11 /* nospec r11 */ + xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp */ xorl %ebp, %ebp /* nospec rbp */ pushq %r12 /* pt_regs->r12 */ - xorq %r12, %r12 /* nospec r12 */ + xorl %r12d, %r12d /* nospec r12 */ pushq %r13 /* pt_regs->r13 */ - xorq %r13, %r13 /* nospec r13 */ + xorl %r13d, %r13d /* nospec r13 */ pushq %r14 /* pt_regs->r14 */ - xorq %r14, %r14 /* nospec r14 */ + xorl %r14d, %r14d /* nospec r14 */ pushq %r15 /* pt_regs->r15 */ - xorq %r15, %r15 /* nospec r15 */ + xorl %r15d, %r15d /* nospec r15 */ cld /* -- cgit v1.2.3 From 842cef9113c2120f74f645111ded1e020193d84c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Feb 2018 07:48:11 -0700 Subject: x86/mm: Fix {pmd,pud}_{set,clear}_flags() Just like pte_{set,clear}_flags() their PMD and PUD counterparts should not do any address translation. This was outright wrong under Xen (causing a dead boot with no useful output on "suitable" systems), and produced needlessly more complicated code (even if just slightly) when paravirt was enabled. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/5A8AF1BB02000078001A91C3@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 8 ++++---- arch/x86/include/asm/pgtable_types.h | 10 ++++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 63c2552b6b65..b444d83cfc95 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) { pmdval_t v = native_pmd_val(pmd); - return __pmd(v | set); + return native_make_pmd(v | set); } static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) { pmdval_t v = native_pmd_val(pmd); - return __pmd(v & ~clear); + return native_make_pmd(v & ~clear); } static inline pmd_t pmd_mkold(pmd_t pmd) @@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set) { pudval_t v = native_pud_val(pud); - return __pud(v | set); + return native_make_pud(v | set); } static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) { pudval_t v = native_pud_val(pud); - return __pud(v & ~clear); + return native_make_pud(v & ~clear); } static inline pud_t pud_mkold(pud_t pud) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 3696398a9475..246f15b4e64c 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -323,6 +323,11 @@ static inline pudval_t native_pud_val(pud_t pud) #else #include +static inline pud_t native_make_pud(pudval_t val) +{ + return (pud_t) { .p4d.pgd = native_make_pgd(val) }; +} + static inline pudval_t native_pud_val(pud_t pud) { return native_pgd_val(pud.p4d.pgd); @@ -344,6 +349,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) #else #include +static inline pmd_t native_make_pmd(pmdval_t val) +{ + return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) }; +} + static inline pmdval_t native_pmd_val(pmd_t pmd) { return native_pgd_val(pmd.pud.p4d.pgd); -- cgit v1.2.3 From 3b3a9268bba62b35a29bafe0931715b1725fdf26 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 19 Feb 2018 18:50:39 +0100 Subject: x86/mm: Remove stale comment about KMEMCHECK This comment referred to a conditional call to kmemcheck_hide() that was here until commit 4950276672fc ("kmemcheck: remove annotations"). Now that kmemcheck has been removed, it doesn't make sense anymore. Signed-off-by: Jann Horn Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180219175039.253089-1-jannh@google.com Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 800de815519c..c88573d90f3e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1248,10 +1248,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, tsk = current; mm = tsk->mm; - /* - * Detect and handle instructions that would cause a page fault for - * both a tracked kernel page and a userspace page. - */ prefetchw(&mm->mmap_sem); if (unlikely(kmmio_fault(regs, address))) -- cgit v1.2.3 From 700b7c5409c3e9da279fbea78cf28a78fbc176cd Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Feb 2018 07:49:12 -0700 Subject: x86/asm: Improve how GEN_*_SUFFIXED_RMWcc() specify clobbers Commit: df3405245a ("x86/asm: Add suffix macro for GEN_*_RMWcc()") ... introduced "suffix" RMWcc operations, adding bogus clobber specifiers: For one, on x86 there's no point explicitly clobbering "cc". In fact, with GCC properly fixed, this results in an overlap being detected by the compiler between outputs and clobbers. Furthermore it seems bad practice to me to have clobber specification and use of the clobbered register(s) disconnected - it should rather be at the invocation place of that GEN_{UN,BIN}ARY_SUFFIXED_RMWcc() macros that the clobber is specified which this particular invocation needs. Drop the "cc" clobber altogether and move the "cx" one to refcount.h. Signed-off-by: Jan Beulich Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5A8AF1F802000078001A91E1@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/refcount.h | 4 ++-- arch/x86/include/asm/rmwcc.h | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index 4e44250e7d0d..d65171120e90 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h @@ -67,13 +67,13 @@ static __always_inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) { GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, "er", i, "%0", e); + r->refs.counter, "er", i, "%0", e, "cx"); } static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) { GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, "%0", e); + r->refs.counter, "%0", e, "cx"); } static __always_inline __must_check diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index f91c365e57c3..4914a3e7c803 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -2,8 +2,7 @@ #ifndef _ASM_X86_RMWcc #define _ASM_X86_RMWcc -#define __CLOBBERS_MEM "memory" -#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx" +#define __CLOBBERS_MEM(clb...) "memory", ## clb #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) @@ -40,18 +39,19 @@ do { \ #endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ - __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM) + __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM()) -#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \ +#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\ __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \ - __CLOBBERS_MEM_CC_CX) + __CLOBBERS_MEM(clobbers)) #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \ - __CLOBBERS_MEM, vcon (val)) + __CLOBBERS_MEM(), vcon (val)) -#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \ +#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \ + clobbers...) \ __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \ - __CLOBBERS_MEM_CC_CX, vcon (val)) + __CLOBBERS_MEM(clobbers), vcon (val)) #endif /* _ASM_X86_RMWcc */ -- cgit v1.2.3 From 6262b6e78ce5ba62be47774ca80f5b0a6f0eb428 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Feb 2018 07:50:23 -0700 Subject: x86/IO-APIC: Avoid warning in 32-bit builds Constants wider than 32 bits should be tagged with ULL. Signed-off-by: Jan Beulich Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5A8AF23F02000078001A91E5@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8ad2e410974f..7c5538769f7e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void) do { rep_nop(); now = rdtsc(); - } while ((now - start) < 40000000000UL / HZ && + } while ((now - start) < 40000000000ULL / HZ && time_before_eq(jiffies, end)); } -- cgit v1.2.3 From f2f18b16c779978ece4a04f304a92ff9ac8fbce5 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Feb 2018 07:52:10 -0700 Subject: x86/LDT: Avoid warning in 32-bit builds with older gcc BUG() doesn't always imply "no return", and hence should be followed by a return statement even if that's obviously (to a human) unreachable. Signed-off-by: Jan Beulich Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5A8AF2AA02000078001A91E9@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu_context.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index c931b88982a0..1de72ce514cd 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -74,6 +74,7 @@ static inline void *ldt_slot_va(int slot) return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); #else BUG(); + return (void *)fix_to_virt(FIX_HOLE); #endif } -- cgit v1.2.3 From 8554004a0231dedf44d4d62147fb3d6a6db489aa Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Feb 2018 08:06:14 -0700 Subject: x86-64/realmode: Add instruction suffix Omitting suffixes from instructions in AT&T mode is bad practice when operand size cannot be determined by the assembler from register operands, and is likely going to be warned about by upstream GAS in the future (mine does already). Add the single missing suffix here. Signed-off-by: Jan Beulich Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5A8AF5F602000078001A9230@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/realmode/rm/trampoline_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index de53bd15df5a..24bb7598774e 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -102,7 +102,7 @@ ENTRY(startup_32) * don't we'll eventually crash trying to execute encrypted * instructions. */ - bt $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags + btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags jnc .Ldone movl $MSR_K8_SYSCFG, %ecx rdmsr -- cgit v1.2.3 From d1c99108af3c5992640aa2afa7d2e88c3775c06e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 19 Feb 2018 10:50:56 +0000 Subject: Revert "x86/retpoline: Simplify vmexit_fill_RSB()" This reverts commit 1dde7415e99933bb7293d6b2843752cbdb43ec11. By putting the RSB filling out of line and calling it, we waste one RSB slot for returning from the function itself, which means one fewer actual function call we can make if we're doing the Skylake abomination of call-depth counting. It also changed the number of RSB stuffings we do on vmexit from 32, which was correct, to 16. Let's just stop with the bikeshedding; it didn't actually *fix* anything anyway. Signed-off-by: David Woodhouse Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: arjan.van.de.ven@intel.com Cc: bp@alien8.de Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Link: http://lkml.kernel.org/r/1519037457-7643-4-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 3 +- arch/x86/entry/entry_64.S | 3 +- arch/x86/include/asm/asm-prototypes.h | 3 -- arch/x86/include/asm/nospec-branch.h | 70 +++++++++++++++++++++++++++++++---- arch/x86/lib/Makefile | 1 - arch/x86/lib/retpoline.S | 56 ---------------------------- 6 files changed, 65 insertions(+), 71 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 16c2c022540d..6ad064c8cf35 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -252,8 +252,7 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - /* Clobbers %ebx */ - FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 77edc2390868..7a53879ec689 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -364,8 +364,7 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - /* Clobbers %rbx */ - FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 4d111616524b..1908214b9125 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -38,7 +38,4 @@ INDIRECT_THUNK(dx) INDIRECT_THUNK(si) INDIRECT_THUNK(di) INDIRECT_THUNK(bp) -asmlinkage void __fill_rsb(void); -asmlinkage void __clear_rsb(void); - #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 76b058533e47..af34b1e8069a 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -8,6 +8,50 @@ #include #include +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; lfence; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * We define a CPP macro such that it can be used from both .S files and + * inline assembly. It's possible to do a .macro and then include that + * from C via asm(".include ") but let's not go there. + */ + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +/* + * Google experimented with loop-unrolling and this turned out to be + * the optimal version — two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr, sp) \ + mov $(nr/2), reg; \ +771: \ + call 772f; \ +773: /* speculation trap */ \ + pause; \ + lfence; \ + jmp 773b; \ +772: \ + call 774f; \ +775: /* speculation trap */ \ + pause; \ + lfence; \ + jmp 775b; \ +774: \ + dec reg; \ + jnz 771b; \ + add $(BITS_PER_LONG/8) * nr, sp; + #ifdef __ASSEMBLY__ /* @@ -78,10 +122,17 @@ #endif .endm -/* This clobbers the BX register */ -.macro FILL_RETURN_BUFFER nr:req ftr:req + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req #ifdef CONFIG_RETPOLINE - ALTERNATIVE "", "call __clear_rsb", \ftr + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ + \ftr +.Lskip_rsb_\@: #endif .endm @@ -156,10 +207,15 @@ extern char __indirect_thunk_end[]; static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - alternative_input("", - "call __fill_rsb", - X86_FEATURE_RETPOLINE, - ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); + unsigned long loops; + + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE("jmp 910f", + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), + X86_FEATURE_RETPOLINE) + "910:" + : "=r" (loops), ASM_CALL_CONSTRAINT + : : "memory" ); #endif } diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 91e9700cc6dc..25a972c61b0a 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RETPOLINE) += retpoline.o -OBJECT_FILES_NON_STANDARD_retpoline.o :=y obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 480edc3a5e03..c909961e678a 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -7,7 +7,6 @@ #include #include #include -#include .macro THUNK reg .section .text.__x86.indirect_thunk @@ -47,58 +46,3 @@ GENERATE_THUNK(r13) GENERATE_THUNK(r14) GENERATE_THUNK(r15) #endif - -/* - * Fill the CPU return stack buffer. - * - * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; lfence; jmp' loop to capture speculative execution. - * - * This is required in various cases for retpoline and IBRS-based - * mitigations for the Spectre variant 2 vulnerability. Sometimes to - * eliminate potentially bogus entries from the RSB, and sometimes - * purely to ensure that it doesn't get empty, which on some CPUs would - * allow predictions from other (unwanted!) sources to be used. - * - * Google experimented with loop-unrolling and this turned out to be - * the optimal version - two calls, each with their own speculation - * trap should their return address end up getting used, in a loop. - */ -.macro STUFF_RSB nr:req sp:req - mov $(\nr / 2), %_ASM_BX - .align 16 -771: - call 772f -773: /* speculation trap */ - pause - lfence - jmp 773b - .align 16 -772: - call 774f -775: /* speculation trap */ - pause - lfence - jmp 775b - .align 16 -774: - dec %_ASM_BX - jnz 771b - add $((BITS_PER_LONG/8) * \nr), \sp -.endm - -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ - -ENTRY(__fill_rsb) - STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP - ret -END(__fill_rsb) -EXPORT_SYMBOL_GPL(__fill_rsb) - -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ - -ENTRY(__clear_rsb) - STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP - ret -END(__clear_rsb) -EXPORT_SYMBOL_GPL(__clear_rsb) -- cgit v1.2.3 From dd84441a797150dcc49298ec95c459a8891d8bb1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 19 Feb 2018 10:50:54 +0000 Subject: x86/speculation: Use IBRS if available before calling into firmware Retpoline means the kernel is safe because it has no indirect branches. But firmware isn't, so use IBRS for firmware calls if it's available. Block preemption while IBRS is set, although in practice the call sites already had to be doing that. Ignore hpwdt.c for now. It's taking spinlocks and calling into firmware code, from an NMI handler. I don't want to touch that with a bargepole. Signed-off-by: David Woodhouse Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: arjan.van.de.ven@intel.com Cc: bp@alien8.de Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Link: http://lkml.kernel.org/r/1519037457-7643-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apm.h | 6 ++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/efi.h | 17 ++++++++++++++-- arch/x86/include/asm/nospec-branch.h | 39 +++++++++++++++++++++++++++--------- arch/x86/kernel/cpu/bugs.c | 12 ++++++++++- 5 files changed, 63 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 4d4015ddcf26..c356098b6fb9 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -7,6 +7,8 @@ #ifndef _ASM_X86_MACH_DEFAULT_APM_H #define _ASM_X86_MACH_DEFAULT_APM_H +#include + #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ "pushl %%ds\n\t" \ @@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, "=S" (*esi) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); } static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, "=S" (si) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); return error; } diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0dfe4d3f74e2..f41079da38c5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -213,6 +213,7 @@ #define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 85f6ccb80b91..a399c1ebf6f0 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -6,6 +6,7 @@ #include #include #include +#include /* * We map the EFI regions needed for runtime services non-contiguously, @@ -36,8 +37,18 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); -#define arch_efi_call_virt_setup() kernel_fpu_begin() -#define arch_efi_call_virt_teardown() kernel_fpu_end() +#define arch_efi_call_virt_setup() \ +({ \ + kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ +}) + +#define arch_efi_call_virt_teardown() \ +({ \ + firmware_restrict_branch_speculation_end(); \ + kernel_fpu_end(); \ +}) + /* * Wrap all the virtual calls in a way that forces the parameters on the stack. @@ -73,6 +84,7 @@ struct efi_scratch { efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ \ if (efi_scratch.use_pgd) { \ efi_scratch.prev_cr3 = __read_cr3(); \ @@ -91,6 +103,7 @@ struct efi_scratch { __flush_tlb_all(); \ } \ \ + firmware_restrict_branch_speculation_end(); \ __kernel_fpu_end(); \ preempt_enable(); \ }) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index af34b1e8069a..ec90c3228991 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -219,17 +219,38 @@ static inline void vmexit_fill_RSB(void) #endif } +#define alternative_msr_write(_msr, _val, _feature) \ + asm volatile(ALTERNATIVE("", \ + "movl %[msr], %%ecx\n\t" \ + "movl %[val], %%eax\n\t" \ + "movl $0, %%edx\n\t" \ + "wrmsr", \ + _feature) \ + : : [msr] "i" (_msr), [val] "i" (_val) \ + : "eax", "ecx", "edx", "memory") + static inline void indirect_branch_prediction_barrier(void) { - asm volatile(ALTERNATIVE("", - "movl %[msr], %%ecx\n\t" - "movl %[val], %%eax\n\t" - "movl $0, %%edx\n\t" - "wrmsr", - X86_FEATURE_USE_IBPB) - : : [msr] "i" (MSR_IA32_PRED_CMD), - [val] "i" (PRED_CMD_IBPB) - : "eax", "ecx", "edx", "memory"); + alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, + X86_FEATURE_USE_IBPB); +} + +/* + * With retpoline, we must use IBRS to restrict branch prediction + * before calling into firmware. + */ +static inline void firmware_restrict_branch_speculation_start(void) +{ + preempt_disable(); + alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, + X86_FEATURE_USE_IBRS_FW); +} + +static inline void firmware_restrict_branch_speculation_end(void) +{ + alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, + X86_FEATURE_USE_IBRS_FW); + preempt_enable(); } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d71c8b54b696..bfca937bdcc3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -300,6 +300,15 @@ retpoline_auto: setup_force_cpu_cap(X86_FEATURE_USE_IBPB); pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); } + + /* + * Retpoline means the kernel is safe because it has no indirect + * branches. But firmware isn't, so use IBRS to protect that. + */ + if (boot_cpu_has(X86_FEATURE_IBRS)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); + pr_info("Enabling Restricted Speculation for firmware calls\n"); + } } #undef pr_fmt @@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", spectre_v2_module_string()); } #endif -- cgit v1.2.3 From 87358710c1fb4f1bf96bbe2349975ff9953fc9b2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 19 Feb 2018 10:50:57 +0000 Subject: x86/retpoline: Support retpoline builds with Clang Signed-off-by: David Woodhouse Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: arjan.van.de.ven@intel.com Cc: bp@alien8.de Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Link: http://lkml.kernel.org/r/1519037457-7643-5-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 5 ++++- include/linux/compiler-clang.h | 5 +++++ include/linux/compiler-gcc.h | 4 ++++ include/linux/init.h | 8 ++++---- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index fad55160dcb9..dbc7d0ed2eaa 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -232,7 +232,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE - RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) + RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register + RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk + + RETPOLINE_CFLAGS += $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG))) ifneq ($(RETPOLINE_CFLAGS),) KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE endif diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index d02a4df3f473..d3f264a5b04d 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -27,3 +27,8 @@ #if __has_feature(address_sanitizer) #define __SANITIZE_ADDRESS__ #endif + +/* Clang doesn't have a way to turn it off per-function, yet. */ +#ifdef __noretpoline +#undef __noretpoline +#endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 73bc63e0a1c4..673fbf904fe5 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -93,6 +93,10 @@ #define __weak __attribute__((weak)) #define __alias(symbol) __attribute__((alias(#symbol))) +#ifdef RETPOLINE +#define __noretpoline __attribute__((indirect_branch("keep"))) +#endif + /* * it doesn't make sense on ARM (currently the only user of __naked) * to trace naked functions because then mcount is called without diff --git a/include/linux/init.h b/include/linux/init.h index 506a98151131..bc27cf03c41e 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -6,10 +6,10 @@ #include /* Built-in __init functions needn't be compiled with retpoline */ -#if defined(RETPOLINE) && !defined(MODULE) -#define __noretpoline __attribute__((indirect_branch("keep"))) +#if defined(__noretpoline) && !defined(MODULE) +#define __noinitretpoline __noretpoline #else -#define __noretpoline +#define __noinitretpoline #endif /* These macros are used to mark some functions or @@ -47,7 +47,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold __latent_entropy __noretpoline +#define __init __section(.init.text) __cold __latent_entropy __noinitretpoline #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) -- cgit v1.2.3 From 9e0e3c5130e949c389caabc8033e9799b129e429 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Jan 2018 22:34:34 +0100 Subject: x86/speculation, objtool: Annotate indirect calls/jumps for objtool Annotate the indirect calls/jumps in the CALL_NOSPEC/JUMP_NOSPEC alternatives. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Woodhouse Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nospec-branch.h | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index ec90c3228991..1aad6c79a597 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -67,6 +67,18 @@ .popsection .endm +/* + * This should be used immediately before an indirect jump/call. It tells + * objtool the subsequent indirect jump/call is vouched safe for retpoline + * builds. + */ +.macro ANNOTATE_RETPOLINE_SAFE + .Lannotate_\@: + .pushsection .discard.retpoline_safe + _ASM_PTR .Lannotate_\@ + .popsection +.endm + /* * These are the bare retpoline primitives for indirect jmp and call. * Do not use these directly; they only exist to make the ALTERNATIVE @@ -103,9 +115,9 @@ .macro JMP_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(jmp *\reg), \ + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD #else jmp *\reg #endif @@ -114,9 +126,9 @@ .macro CALL_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(call *\reg), \ + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ - __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD #else call *\reg #endif @@ -144,6 +156,12 @@ ".long 999b - .\n\t" \ ".popsection\n\t" +#define ANNOTATE_RETPOLINE_SAFE \ + "999:\n\t" \ + ".pushsection .discard.retpoline_safe\n\t" \ + _ASM_PTR " 999b\n\t" \ + ".popsection\n\t" + #if defined(CONFIG_X86_64) && defined(RETPOLINE) /* @@ -153,6 +171,7 @@ # define CALL_NOSPEC \ ANNOTATE_NOSPEC_ALTERNATIVE \ ALTERNATIVE( \ + ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ "call __x86_indirect_thunk_%V[thunk_target]\n", \ X86_FEATURE_RETPOLINE) -- cgit v1.2.3 From 3010a0663fd949d122eca0561b06b0a9453f7866 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Jan 2018 16:58:11 +0100 Subject: x86/paravirt, objtool: Annotate indirect calls Paravirt emits indirect calls which get flagged by objtool retpoline checks, annotate it away because all these indirect calls will be patched out before we start userspace. This patching happens through alternative_instructions() -> apply_paravirt() -> pv_init_ops.patch() which will eventually end up in paravirt_patch_default(). This function _will_ write direct alternatives. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Woodhouse Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 17 +++++++++++++---- arch/x86/include/asm/paravirt_types.h | 5 ++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 554841fab717..c83a2f418cea 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -7,6 +7,7 @@ #ifdef CONFIG_PARAVIRT #include #include +#include #include @@ -879,23 +880,27 @@ extern void default_banner(void); #define INTERRUPT_RETURN \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) + ANNOTATE_RETPOLINE_SAFE; \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #ifdef CONFIG_X86_32 #define GET_CR0_INTO_EAX \ push %ecx; push %edx; \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ pop %edx; pop %ecx #else /* !CONFIG_X86_32 */ @@ -917,21 +922,25 @@ extern void default_banner(void); */ #define SWAPGS \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ ) #define GET_CR2_INTO_RAX \ - call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); #define USERGS_SYSRET64 \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) + ANNOTATE_RETPOLINE_SAFE; \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);) #ifdef CONFIG_DEBUG_ENTRY #define SAVE_FLAGS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #endif diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index f624f1f10316..180bc0bff0fb 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -43,6 +43,7 @@ #include #include #include +#include struct page; struct thread_struct; @@ -392,7 +393,9 @@ int paravirt_disable_iospace(void); * offset into the paravirt_patch_template structure, and can therefore be * freely converted back into a structure offset. */ -#define PARAVIRT_CALL "call *%c[paravirt_opptr];" +#define PARAVIRT_CALL \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%c[paravirt_opptr];" /* * These macros are intended to wrap calls through one of the paravirt -- cgit v1.2.3 From bd89004f6305cbf7352238f61da093207ee518d6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Jan 2018 10:38:09 +0100 Subject: x86/boot, objtool: Annotate indirect jump in secondary_startup_64() The objtool retpoline validation found this indirect jump. Seeing how it's on CPU bringup before we run userspace it should be safe, annotate it. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Woodhouse Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 04a625f0fcda..0f545b3cf926 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -23,6 +23,7 @@ #include #include "../entry/calling.h" #include +#include #ifdef CONFIG_PARAVIRT #include @@ -134,6 +135,7 @@ ENTRY(secondary_startup_64) /* Ensure I am executing from virtual addresses */ movq $1f, %rax + ANNOTATE_RETPOLINE_SAFE jmp *%rax 1: UNWIND_HINT_EMPTY -- cgit v1.2.3 From 531bb52a869a9c6e08c8d17ba955fcbfc18037ad Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Jan 2018 16:18:50 +0100 Subject: x86/mm/sme, objtool: Annotate indirect call in sme_encrypt_execute() This is boot code and thus Spectre-safe: we run this _way_ before userspace comes along to have a chance to poison our branch predictor. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Tom Lendacky Signed-off-by: Ingo Molnar --- arch/x86/mm/mem_encrypt_boot.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 01f682cf77a8..40a6085063d6 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -15,6 +15,7 @@ #include #include #include +#include .text .code64 @@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute) movq %rax, %r8 /* Workarea encryption routine */ addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ + ANNOTATE_RETPOLINE_SAFE call *%rax /* Call the encryption routine */ pop %r12 -- cgit v1.2.3 From 43a4525f80534530077683f6472d8971646b0ace Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Jan 2018 17:16:32 +0100 Subject: objtool: Use existing global variables for options Use the existing global variables instead of passing them around and creating duplicate global variables. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/objtool/builtin-check.c | 2 +- tools/objtool/builtin-orc.c | 6 +----- tools/objtool/builtin.h | 5 +++++ tools/objtool/check.c | 5 ++--- tools/objtool/check.h | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 57254f5b2779..8d0986d2a803 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -53,5 +53,5 @@ int cmd_check(int argc, const char **argv) objname = argv[0]; - return check(objname, no_fp, no_unreachable, false); + return check(objname, false); } diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 91e8e19ff5e0..77ea2b97117d 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -25,7 +25,6 @@ */ #include -#include #include "builtin.h" #include "check.h" @@ -36,9 +35,6 @@ static const char *orc_usage[] = { NULL, }; -extern const struct option check_options[]; -extern bool no_fp, no_unreachable; - int cmd_orc(int argc, const char **argv) { const char *objname; @@ -54,7 +50,7 @@ int cmd_orc(int argc, const char **argv) objname = argv[0]; - return check(objname, no_fp, no_unreachable, true); + return check(objname, true); } if (!strcmp(argv[0], "dump")) { diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h index dd526067fed5..f166ea1b1da2 100644 --- a/tools/objtool/builtin.h +++ b/tools/objtool/builtin.h @@ -17,6 +17,11 @@ #ifndef _BUILTIN_H #define _BUILTIN_H +#include + +extern const struct option check_options[]; +extern bool no_fp, no_unreachable; + extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a8cb69a26576..ab6f0de7f90d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -18,6 +18,7 @@ #include #include +#include "builtin.h" #include "check.h" #include "elf.h" #include "special.h" @@ -33,7 +34,6 @@ struct alternative { }; const char *objname; -static bool no_fp; struct cfi_state initial_func_cfi; struct instruction *find_insn(struct objtool_file *file, @@ -2022,13 +2022,12 @@ static void cleanup(struct objtool_file *file) elf_close(file->elf); } -int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc) +int check(const char *_objname, bool orc) { struct objtool_file file; int ret, warnings = 0; objname = _objname; - no_fp = _no_fp; file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY); if (!file.elf) diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 23a1d065cae1..936255ba23db 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -63,7 +63,7 @@ struct objtool_file { bool ignore_unreachables, c_file, hints; }; -int check(const char *objname, bool no_fp, bool no_unreachable, bool orc); +int check(const char *objname, bool orc); struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset); -- cgit v1.2.3 From b5bc2231b8ad4387c9641f235ca0ad8cd300b6df Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Jan 2018 10:24:06 +0100 Subject: objtool: Add retpoline validation David requested a objtool validation pass for CONFIG_RETPOLINE=y enabled builds, where it validates no unannotated indirect jumps or calls are left. Add an additional .discard.retpoline_safe section to allow annotating the few indirect sites that are required and safe. Requested-by: David Woodhouse Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Woodhouse Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- scripts/Makefile.build | 4 ++ tools/objtool/builtin-check.c | 3 +- tools/objtool/builtin.h | 2 +- tools/objtool/check.c | 86 ++++++++++++++++++++++++++++++++++++++++++- tools/objtool/check.h | 1 + 5 files changed, 93 insertions(+), 3 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 47cddf32aeba..53d862aee335 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -264,6 +264,10 @@ objtool_args += --no-unreachable else objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) endif +ifdef CONFIG_RETPOLINE + objtool_args += --retpoline +endif + ifdef CONFIG_MODVERSIONS objtool_o = $(@D)/.tmp_$(@F) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 8d0986d2a803..dd6bcd6097f5 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -29,7 +29,7 @@ #include "builtin.h" #include "check.h" -bool no_fp, no_unreachable; +bool no_fp, no_unreachable, retpoline; static const char * const check_usage[] = { "objtool check [] file.o", @@ -39,6 +39,7 @@ static const char * const check_usage[] = { const struct option check_options[] = { OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), + OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), OPT_END(), }; diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h index f166ea1b1da2..7b6addfce045 100644 --- a/tools/objtool/builtin.h +++ b/tools/objtool/builtin.h @@ -20,7 +20,7 @@ #include extern const struct option check_options[]; -extern bool no_fp, no_unreachable; +extern bool no_fp, no_unreachable, retpoline; extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ab6f0de7f90d..5e5db7b4d77b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -497,6 +497,7 @@ static int add_jump_destinations(struct objtool_file *file) * disguise, so convert them accordingly. */ insn->type = INSN_JUMP_DYNAMIC; + insn->retpoline_safe = true; continue; } else { /* sibling call */ @@ -548,7 +549,8 @@ static int add_call_destinations(struct objtool_file *file) if (!insn->call_dest && !insn->ignore) { WARN_FUNC("unsupported intra-function call", insn->sec, insn->offset); - WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); + if (retpoline) + WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); return -1; } @@ -1108,6 +1110,54 @@ static int read_unwind_hints(struct objtool_file *file) return 0; } +static int read_retpoline_hints(struct objtool_file *file) +{ + struct section *sec, *relasec; + struct instruction *insn; + struct rela *rela; + int i; + + sec = find_section_by_name(file->elf, ".discard.retpoline_safe"); + if (!sec) + return 0; + + relasec = sec->rela; + if (!relasec) { + WARN("missing .rela.discard.retpoline_safe section"); + return -1; + } + + if (sec->len % sizeof(unsigned long)) { + WARN("retpoline_safe size mismatch: %d %ld", sec->len, sizeof(unsigned long)); + return -1; + } + + for (i = 0; i < sec->len / sizeof(unsigned long); i++) { + rela = find_rela_by_dest(sec, i * sizeof(unsigned long)); + if (!rela) { + WARN("can't find rela for retpoline_safe[%d]", i); + return -1; + } + + insn = find_insn(file, rela->sym->sec, rela->addend); + if (!insn) { + WARN("can't find insn for retpoline_safe[%d]", i); + return -1; + } + + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC) { + WARN_FUNC("retpoline_safe hint not a indirect jump/call", + insn->sec, insn->offset); + return -1; + } + + insn->retpoline_safe = true; + } + + return 0; +} + static int decode_sections(struct objtool_file *file) { int ret; @@ -1146,6 +1196,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = read_retpoline_hints(file); + if (ret) + return ret; + return 0; } @@ -1891,6 +1945,29 @@ static int validate_unwind_hints(struct objtool_file *file) return warnings; } +static int validate_retpoline(struct objtool_file *file) +{ + struct instruction *insn; + int warnings = 0; + + for_each_insn(file, insn) { + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC) + continue; + + if (insn->retpoline_safe) + continue; + + WARN_FUNC("indirect %s found in RETPOLINE build", + insn->sec, insn->offset, + insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); + + warnings++; + } + + return warnings; +} + static bool is_kasan_insn(struct instruction *insn) { return (insn->type == INSN_CALL && @@ -2051,6 +2128,13 @@ int check(const char *_objname, bool orc) if (list_empty(&file.insn_list)) goto out; + if (retpoline) { + ret = validate_retpoline(&file); + if (ret < 0) + return ret; + warnings += ret; + } + ret = validate_functions(&file); if (ret < 0) goto out; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 936255ba23db..c6b68fcb926f 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -45,6 +45,7 @@ struct instruction { unsigned char type; unsigned long immediate; bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts; + bool retpoline_safe; struct symbol *call_dest; struct instruction *jump_dest; struct instruction *first_jump_src; -- cgit v1.2.3 From ca41b97ed9124fd62323a162de5852f6e28f94b8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 31 Jan 2018 10:18:28 +0100 Subject: objtool: Add module specific retpoline rules David allowed retpolines in .init.text, except for modules, which will trip up objtool retpoline validation, fix that. Requested-by: David Woodhouse Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- scripts/Makefile.build | 2 ++ tools/objtool/builtin-check.c | 3 ++- tools/objtool/builtin.h | 2 +- tools/objtool/check.c | 9 +++++++++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 53d862aee335..ce0fc4dd68c6 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -256,6 +256,8 @@ __objtool_obj := $(objtree)/tools/objtool/objtool objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check) +objtool_args += $(if $(part-of-module), --module,) + ifndef CONFIG_FRAME_POINTER objtool_args += --no-fp endif diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index dd6bcd6097f5..694abc628e9b 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -29,7 +29,7 @@ #include "builtin.h" #include "check.h" -bool no_fp, no_unreachable, retpoline; +bool no_fp, no_unreachable, retpoline, module; static const char * const check_usage[] = { "objtool check [] file.o", @@ -40,6 +40,7 @@ const struct option check_options[] = { OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), + OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), OPT_END(), }; diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h index 7b6addfce045..28ff40e19a14 100644 --- a/tools/objtool/builtin.h +++ b/tools/objtool/builtin.h @@ -20,7 +20,7 @@ #include extern const struct option check_options[]; -extern bool no_fp, no_unreachable, retpoline; +extern bool no_fp, no_unreachable, retpoline, module; extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5e5db7b4d77b..472e64e95891 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1958,6 +1958,15 @@ static int validate_retpoline(struct objtool_file *file) if (insn->retpoline_safe) continue; + /* + * .init.text code is ran before userspace and thus doesn't + * strictly need retpolines, except for modules which are + * loaded late, they very much do need retpoline in their + * .init.text + */ + if (!strcmp(insn->sec->name, ".init.text") && !module) + continue; + WARN_FUNC("indirect %s found in RETPOLINE build", insn->sec, insn->offset, insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); -- cgit v1.2.3 From d72f4e29e6d84b7ec02ae93088aa459ac70e733b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 21 Feb 2018 09:20:37 +0100 Subject: x86/speculation: Move firmware_restrict_branch_speculation_*() from C to CPP firmware_restrict_branch_speculation_*() recently started using preempt_enable()/disable(), but those are relatively high level primitives and cause build failures on some 32-bit builds. Since we want to keep low level, convert them to macros to avoid header hell... Cc: David Woodhouse Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: arjan.van.de.ven@intel.com Cc: bp@alien8.de Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nospec-branch.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1aad6c79a597..b7063cfa19f9 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -257,20 +257,22 @@ static inline void indirect_branch_prediction_barrier(void) /* * With retpoline, we must use IBRS to restrict branch prediction * before calling into firmware. + * + * (Implemented as CPP macros due to header hell.) */ -static inline void firmware_restrict_branch_speculation_start(void) -{ - preempt_disable(); - alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, - X86_FEATURE_USE_IBRS_FW); -} +#define firmware_restrict_branch_speculation_start() \ +do { \ + preempt_disable(); \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ + X86_FEATURE_USE_IBRS_FW); \ +} while (0) -static inline void firmware_restrict_branch_speculation_end(void) -{ - alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, - X86_FEATURE_USE_IBRS_FW); - preempt_enable(); -} +#define firmware_restrict_branch_speculation_end() \ +do { \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ + X86_FEATURE_USE_IBRS_FW); \ + preempt_enable(); \ +} while (0) #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ -- cgit v1.2.3 From 0e34d226342c27c4f96138b211547d423e4be8a1 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Feb 2018 22:01:08 +0100 Subject: x86/entry/64: Move PUSH_AND_CLEAR_REGS from interrupt macro to helper function The PUSH_AND_CLEAR_REGS macro is able to insert the GP registers "above" the original return address. This allows us to move a sizeable part of the interrupt entry macro to an interrupt entry helper function: text data bss dec hex filename 21088 0 0 21088 5260 entry_64.o-orig 18006 0 0 18006 4656 entry_64.o Suggested-by: Linus Torvalds Signed-off-by: Dominik Brodowski Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: David Woodhouse Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180220210113.6725-2-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 7a53879ec689..b0ae0c3e3815 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -525,6 +525,14 @@ END(irq_entries_start) * * Entry runs with interrupts off. */ +ENTRY(interrupt_entry) + UNWIND_HINT_FUNC + + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + + ret +END(interrupt_entry) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func @@ -536,8 +544,7 @@ END(irq_entries_start) call switch_to_thread_stack 1: - PUSH_AND_CLEAR_REGS - ENCODE_FRAME_POINTER + call interrupt_entry testb $3, CS(%rsp) jz 1f -- cgit v1.2.3 From 2ba6474104a1132c4af9f6dc42c6bfe3ca71f8c7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Feb 2018 22:01:09 +0100 Subject: x86/entry/64: Move ENTER_IRQ_STACK from interrupt macro to interrupt_entry Moving the switch to IRQ stack from the interrupt macro to the helper function requires some trickery: All ENTER_IRQ_STACK really cares about is where the "original" stack -- meaning the GP registers etc. -- is stored. Therefore, we need to offset the stored RSP value by 8 whenever ENTER_IRQ_STACK is called from within a function. In such cases, and after switching to the IRQ stack, we need to push the "original" return address (i.e. the return address from the call to the interrupt entry function) to the IRQ stack. This trickery allows us to carve another .85k from the text size (it would be more except for the additional unwind hints): text data bss dec hex filename 18006 0 0 18006 4656 entry_64.o-orig 17158 0 0 17158 4306 entry_64.o Signed-off-by: Dominik Brodowski Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: David Woodhouse Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180220210113.6725-3-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 56 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b0ae0c3e3815..7a6ae19962ec 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -448,9 +448,19 @@ END(irq_entries_start) * * The invariant is that, if irq_count != -1, then the IRQ stack is in use. */ -.macro ENTER_IRQ_STACK regs=1 old_rsp +.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0 DEBUG_ENTRY_ASSERT_IRQS_OFF + + .if \save_ret + /* + * If save_ret is set, the original stack contains one additional + * entry -- the return address. Therefore, move the address one + * entry below %rsp to \old_rsp. + */ + leaq 8(%rsp), \old_rsp + .else movq %rsp, \old_rsp + .endif .if \regs UNWIND_HINT_REGS base=\old_rsp @@ -496,6 +506,15 @@ END(irq_entries_start) .if \regs UNWIND_HINT_REGS indirect=1 .endif + + .if \save_ret + /* + * Push the return address to the stack. This return address can + * be found at the "real" original RSP, which was offset by 8 at + * the beginning of this macro. + */ + pushq -8(\old_rsp) + .endif .endm /* @@ -531,22 +550,7 @@ ENTRY(interrupt_entry) PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 - ret -END(interrupt_entry) - -/* 0(%rsp): ~(interrupt number) */ - .macro interrupt func - cld - - testb $3, CS-ORIG_RAX(%rsp) - jz 1f - SWAPGS - call switch_to_thread_stack -1: - - call interrupt_entry - - testb $3, CS(%rsp) + testb $3, CS+8(%rsp) jz 1f /* @@ -564,10 +568,26 @@ END(interrupt_entry) CALL_enter_from_user_mode 1: - ENTER_IRQ_STACK old_rsp=%rdi + ENTER_IRQ_STACK old_rsp=%rdi save_ret=1 /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF + ret +END(interrupt_entry) + +/* 0(%rsp): ~(interrupt number) */ + .macro interrupt func + cld + + testb $3, CS-ORIG_RAX(%rsp) + jz 1f + SWAPGS + call switch_to_thread_stack +1: + + call interrupt_entry + + UNWIND_HINT_REGS indirect=1 call \func /* rdi points to pt_regs */ .endm -- cgit v1.2.3 From 90a6acc4e7ebafa8672a7a1a5b23fbad3dd04130 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Feb 2018 22:01:10 +0100 Subject: x86/entry/64: Move the switch_to_thread_stack() call to interrupt_entry() We can also move the CLD, SWAPGS, and the switch_to_thread_stack() call to the interrupt_entry() helper function. As we do not want call depths of two, convert switch_to_thread_stack() to a macro. However, switch_to_thread_stack() has another user in entry_64_compat.S, which currently expects it to be a function. To keep the code changes in this patch minimal, create a wrapper function. The switch to a macro means that there is some binary code duplication if CONFIG_IA32_EMULATION=y is enabled. Therefore, the size reduction differs whether CONFIG_IA32_EMULATION is enabled or not: CONFIG_IA32_EMULATION=y (-0.13k): text data bss dec hex filename 17158 0 0 17158 4306 entry_64.o-orig 17028 0 0 17028 4284 entry_64.o CONFIG_IA32_EMULATION=n (-0.27k): text data bss dec hex filename 17158 0 0 17158 4306 entry_64.o-orig 16882 0 0 16882 41f2 entry_64.o Signed-off-by: Dominik Brodowski Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: David Woodhouse Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180220210113.6725-4-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 66 ++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 7a6ae19962ec..b45d76649eff 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -537,6 +537,31 @@ END(irq_entries_start) decl PER_CPU_VAR(irq_count) .endm +/* + * Switch to the thread stack. This is called with the IRET frame and + * orig_ax on the stack. (That is, RDI..R12 are not on the stack and + * space has not been allocated for them.) + */ +.macro DO_SWITCH_TO_THREAD_STACK + pushq %rdi + /* Need to switch before accessing the thread stack. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI + + pushq 7*8(%rdi) /* regs->ss */ + pushq 6*8(%rdi) /* regs->rsp */ + pushq 5*8(%rdi) /* regs->eflags */ + pushq 4*8(%rdi) /* regs->cs */ + pushq 3*8(%rdi) /* regs->ip */ + pushq 2*8(%rdi) /* regs->orig_ax */ + pushq 8(%rdi) /* return address */ + UNWIND_HINT_FUNC + + movq (%rdi), %rdi +.endm + /* * Interrupt entry/exit. * @@ -544,8 +569,16 @@ END(irq_entries_start) * * Entry runs with interrupts off. */ +/* 8(%rsp): ~(interrupt number) */ ENTRY(interrupt_entry) UNWIND_HINT_FUNC + cld + + testb $3, CS-ORIG_RAX+8(%rsp) + jz 1f + SWAPGS + DO_SWITCH_TO_THREAD_STACK +1: PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 @@ -577,14 +610,6 @@ END(interrupt_entry) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func - cld - - testb $3, CS-ORIG_RAX(%rsp) - jz 1f - SWAPGS - call switch_to_thread_stack -1: - call interrupt_entry UNWIND_HINT_REGS indirect=1 @@ -858,33 +883,16 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) -/* - * Switch to the thread stack. This is called with the IRET frame and - * orig_ax on the stack. (That is, RDI..R12 are not on the stack and - * space has not been allocated for them.) - */ +#if defined(CONFIG_IA32_EMULATION) +/* entry_64_compat.S::entry_INT80_compat expects this to be an ASM function */ ENTRY(switch_to_thread_stack) UNWIND_HINT_FUNC - pushq %rdi - /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi - movq %rsp, %rdi - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI - - pushq 7*8(%rdi) /* regs->ss */ - pushq 6*8(%rdi) /* regs->rsp */ - pushq 5*8(%rdi) /* regs->eflags */ - pushq 4*8(%rdi) /* regs->cs */ - pushq 3*8(%rdi) /* regs->ip */ - pushq 2*8(%rdi) /* regs->orig_ax */ - pushq 8(%rdi) /* return address */ - UNWIND_HINT_FUNC + DO_SWITCH_TO_THREAD_STACK - movq (%rdi), %rdi ret END(switch_to_thread_stack) +#endif .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) -- cgit v1.2.3 From 3aa99fc3e708b9cd9b4cfe2df0b7a66cf293e3cf Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Feb 2018 22:01:11 +0100 Subject: x86/entry/64: Remove 'interrupt' macro It is now trivial to call interrupt_entry() and then the actual worker. Therefore, remove the interrupt macro and open code it all. Suggested-by: Linus Torvalds Signed-off-by: Dominik Brodowski Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: David Woodhouse Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180220210113.6725-5-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b45d76649eff..8ea03cf94a2d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -608,14 +608,6 @@ ENTRY(interrupt_entry) ret END(interrupt_entry) -/* 0(%rsp): ~(interrupt number) */ - .macro interrupt func - call interrupt_entry - - UNWIND_HINT_REGS indirect=1 - call \func /* rdi points to pt_regs */ - .endm - /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -624,7 +616,9 @@ END(interrupt_entry) common_interrupt: ASM_CLAC addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ - interrupt do_IRQ + call interrupt_entry + UNWIND_HINT_REGS indirect=1 + call do_IRQ /* rdi points to pt_regs */ /* 0(%rsp): old RSP */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_ANY) @@ -820,7 +814,9 @@ ENTRY(\sym) ASM_CLAC pushq $~(\num) .Lcommon_\sym: - interrupt \do_sym + call interrupt_entry + UNWIND_HINT_REGS indirect=1 + call \do_sym /* rdi points to pt_regs */ jmp ret_from_intr END(\sym) .endm -- cgit v1.2.3 From b2855d8d2de0fa15c1ff30c69ed7756b00c48b22 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Feb 2018 22:01:12 +0100 Subject: x86/entry/64: Move ASM_CLAC to interrupt_entry() Moving ASM_CLAC to interrupt_entry means two instructions (addq / pushq and call interrupt_entry) are not covered by it. However, it offers a noticeable size reduction (-.2k): text data bss dec hex filename 16882 0 0 16882 41f2 entry_64.o-orig 16623 0 0 16623 40ef entry_64.o Suggested-by: Brian Gerst Signed-off-by: Dominik Brodowski Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: David Woodhouse Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180220210113.6725-6-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8ea03cf94a2d..42a4b652469d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -572,6 +572,7 @@ END(irq_entries_start) /* 8(%rsp): ~(interrupt number) */ ENTRY(interrupt_entry) UNWIND_HINT_FUNC + ASM_CLAC cld testb $3, CS-ORIG_RAX+8(%rsp) @@ -614,7 +615,6 @@ END(interrupt_entry) */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: - ASM_CLAC addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ call interrupt_entry UNWIND_HINT_REGS indirect=1 @@ -811,7 +811,6 @@ END(common_interrupt) .macro apicinterrupt3 num sym do_sym ENTRY(\sym) UNWIND_HINT_IRET_REGS - ASM_CLAC pushq $~(\num) .Lcommon_\sym: call interrupt_entry -- cgit v1.2.3 From f3d415ea46968ae1f9cbb9e201601d7207ce74c7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Feb 2018 22:01:13 +0100 Subject: x86/entry/64: Open-code switch_to_thread_stack() Open-code the two instances which called switch_to_thread_stack(). This allows us to remove the wrapper around DO_SWITCH_TO_THREAD_STACK. While at it, update the UNWIND hint to reflect where the IRET frame is, and update the commentary to reflect what we are actually doing here. Signed-off-by: Dominik Brodowski Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: David Woodhouse Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180220210113.6725-7-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 76 +++++++++++++++++++++------------------- arch/x86/entry/entry_64_compat.S | 17 +++++++-- 2 files changed, 55 insertions(+), 38 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 42a4b652469d..d5c7f18f79ac 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -538,17 +538,48 @@ END(irq_entries_start) .endm /* - * Switch to the thread stack. This is called with the IRET frame and - * orig_ax on the stack. (That is, RDI..R12 are not on the stack and - * space has not been allocated for them.) + * Interrupt entry helper function. + * + * Entry runs with interrupts off. Stack layout at entry: + * +----------------------------------------------------+ + * | regs->ss | + * | regs->rsp | + * | regs->eflags | + * | regs->cs | + * | regs->ip | + * +----------------------------------------------------+ + * | regs->orig_ax = ~(interrupt number) | + * +----------------------------------------------------+ + * | return address | + * +----------------------------------------------------+ */ -.macro DO_SWITCH_TO_THREAD_STACK +ENTRY(interrupt_entry) + UNWIND_HINT_FUNC + ASM_CLAC + cld + + testb $3, CS-ORIG_RAX+8(%rsp) + jz 1f + SWAPGS + + /* + * Switch to the thread stack. The IRET frame and orig_ax are + * on the stack, as well as the return address. RDI..R12 are + * not (yet) on the stack and space has not (yet) been + * allocated for them. + */ pushq %rdi + /* Need to switch before accessing the thread stack. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi movq %rsp, %rdi movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI + + /* + * We have RDI, return address, and orig_ax on the stack on + * top of the IRET frame. That means offset=24 + */ + UNWIND_HINT_IRET_REGS base=%rdi offset=24 pushq 7*8(%rdi) /* regs->ss */ pushq 6*8(%rdi) /* regs->rsp */ @@ -560,25 +591,6 @@ END(irq_entries_start) UNWIND_HINT_FUNC movq (%rdi), %rdi -.endm - -/* - * Interrupt entry/exit. - * - * Interrupt entry points save only callee clobbered registers in fast path. - * - * Entry runs with interrupts off. - */ -/* 8(%rsp): ~(interrupt number) */ -ENTRY(interrupt_entry) - UNWIND_HINT_FUNC - ASM_CLAC - cld - - testb $3, CS-ORIG_RAX+8(%rsp) - jz 1f - SWAPGS - DO_SWITCH_TO_THREAD_STACK 1: PUSH_AND_CLEAR_REGS save_ret=1 @@ -592,7 +604,7 @@ ENTRY(interrupt_entry) * * We need to tell lockdep that IRQs are off. We can't do this until * we fix gsbase, and we should do it before enter_from_user_mode - * (which can take locks). Since TRACE_IRQS_OFF idempotent, + * (which can take locks). Since TRACE_IRQS_OFF is idempotent, * the simplest way to handle it is to just call it twice if * we enter from user mode. There's no reason to optimize this since * TRACE_IRQS_OFF is a no-op if lockdep is off. @@ -609,6 +621,9 @@ ENTRY(interrupt_entry) ret END(interrupt_entry) + +/* Interrupt entry/exit. */ + /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -878,17 +893,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) -#if defined(CONFIG_IA32_EMULATION) -/* entry_64_compat.S::entry_INT80_compat expects this to be an ASM function */ -ENTRY(switch_to_thread_stack) - UNWIND_HINT_FUNC - - DO_SWITCH_TO_THREAD_STACK - - ret -END(switch_to_thread_stack) -#endif - .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 364ea4a207be..e811dd9c5e99 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -347,10 +347,23 @@ ENTRY(entry_INT80_compat) */ movl %eax, %eax + /* switch to thread stack expects orig_ax and rdi to be pushed */ pushq %rax /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + + /* Need to switch before accessing the thread stack. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + pushq 6*8(%rdi) /* regs->ss */ + pushq 5*8(%rdi) /* regs->rsp */ + pushq 4*8(%rdi) /* regs->eflags */ + pushq 3*8(%rdi) /* regs->cs */ + pushq 2*8(%rdi) /* regs->ip */ + pushq 1*8(%rdi) /* regs->orig_ax */ - /* switch to thread stack expects orig_ax to be pushed */ - call switch_to_thread_stack + movq (%rdi), %rdi /* restore %rdi */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ -- cgit v1.2.3 From 33352244706369ea6736781ae41fe41692eb69bb Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 20 Feb 2018 11:37:51 -0600 Subject: jump_label: Explicitly disable jump labels in __init code After initmem has been freed, any jump labels in __init code are prevented from being written to by the kernel_text_address() check in __jump_label_update(). However, this check is quite broad. If kernel_text_address() were to return false for any other reason, the jump label write would fail silently with no warning. For jump labels in module init code, entry->code is set to zero to indicate that the entry is disabled. Do the same thing for core kernel init code. This makes the behavior more consistent, and will also make it more straightforward to detect non-init jump label write failures in the next patch. Signed-off-by: Josh Poimboeuf Acked-by: Peter Zijlstra Cc: Borislav Petkov Cc: Jason Baron Cc: Linus Torvalds Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c52825c73f3a174e8398b6898284ec20d4deb126.1519051220.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 3 +++ init/main.c | 2 ++ kernel/jump_label.c | 16 ++++++++++++++++ 3 files changed, 21 insertions(+) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index b6a29c126cc4..2168cc6b8b30 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -151,6 +151,7 @@ extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; extern void jump_label_init(void); +extern void jump_label_invalidate_init(void); extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, @@ -198,6 +199,8 @@ static __always_inline void jump_label_init(void) static_key_initialized = true; } +static inline void jump_label_invalidate_init(void) {} + static __always_inline bool static_key_false(struct static_key *key) { if (unlikely(static_key_count(key) > 0)) diff --git a/init/main.c b/init/main.c index a8100b954839..969eaf140ef0 100644 --- a/init/main.c +++ b/init/main.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include @@ -1000,6 +1001,7 @@ static int __ref kernel_init(void *unused) /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); ftrace_free_init_mem(); + jump_label_invalidate_init(); free_initmem(); mark_readonly(); system_state = SYSTEM_RUNNING; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index b4517095db6a..b71776576a66 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef HAVE_JUMP_LABEL @@ -417,6 +418,20 @@ void __init jump_label_init(void) cpus_read_unlock(); } +/* Disable any jump label entries in __init code */ +void __init jump_label_invalidate_init(void) +{ + struct jump_entry *iter_start = __start___jump_table; + struct jump_entry *iter_stop = __stop___jump_table; + struct jump_entry *iter; + + for (iter = iter_start; iter < iter_stop; iter++) { + if (iter->code >= (unsigned long)_sinittext && + iter->code < (unsigned long)_einittext) + iter->code = 0; + } +} + #ifdef CONFIG_MODULES static enum jump_label_type jump_label_init_type(struct jump_entry *entry) @@ -633,6 +648,7 @@ static void jump_label_del_module(struct module *mod) } } +/* Disable any jump label entries in module init code */ static void jump_label_invalidate_module_init(struct module *mod) { struct jump_entry *iter_start = mod->jump_entries; -- cgit v1.2.3 From dc1dd184c2f0016bec35c0d7a48c057e0ad763d3 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 20 Feb 2018 11:37:52 -0600 Subject: jump_label: Warn on failed jump_label patching attempt Currently when the jump label code encounters an address which isn't recognized by kernel_text_address(), it just silently fails. This can be dangerous because jump labels are used in a variety of places, and are generally expected to work. Convert the silent failure to a warning. This won't warn about attempted writes to tracepoints in __init code after initmem has been freed, as those are already guarded by the entry->code check. Signed-off-by: Josh Poimboeuf Acked-by: Peter Zijlstra Cc: Borislav Petkov Cc: Jason Baron Cc: Linus Torvalds Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/de3a271c93807adb7ed48f4e946b4f9156617680.1519051220.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- kernel/jump_label.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index b71776576a66..b2f0b479191b 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -367,12 +367,15 @@ static void __jump_label_update(struct static_key *key, { for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { /* - * entry->code set to 0 invalidates module init text sections - * kernel_text_address() verifies we are not in core kernel - * init code, see jump_label_invalidate_module_init(). + * An entry->code of 0 indicates an entry which has been + * disabled because it was in an init text area. */ - if (entry->code && kernel_text_address(entry->code)) - arch_jump_label_transform(entry, jump_label_type(entry)); + if (entry->code) { + if (kernel_text_address(entry->code)) + arch_jump_label_transform(entry, jump_label_type(entry)); + else + WARN_ONCE(1, "can't patch jump_label at %pS", (void *)entry->code); + } } } -- cgit v1.2.3 From 9fbcc57aa16424ef84cb54e0d9db3221763de88a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 20 Feb 2018 11:37:53 -0600 Subject: extable: Make init_kernel_text() global Convert init_kernel_text() to a global function and use it in a few places instead of manually comparing _sinittext and _einittext. Note that kallsyms.h has a very similar function called is_kernel_inittext(), but its end check is inclusive. I'm not sure whether that's intentional behavior, so I didn't touch it. Suggested-by: Jason Baron Signed-off-by: Josh Poimboeuf Acked-by: Peter Zijlstra Acked-by: Steven Rostedt (VMware) Cc: Borislav Petkov Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/4335d02be8d45ca7d265d2f174251d0b7ee6c5fd.1519051220.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_orc.c | 3 +-- include/linux/kernel.h | 1 + kernel/extable.c | 2 +- kernel/jump_label.c | 4 +--- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 1f9188f5357c..feb28fee6cea 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -5,7 +5,6 @@ #include #include #include -#include #define orc_warn(fmt, ...) \ printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__) @@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip) } /* vmlinux .init slow lookup: */ - if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext) + if (init_kernel_text(ip)) return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ce51455e2adf..3fd291503576 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -472,6 +472,7 @@ extern bool parse_option_str(const char *str, const char *option); extern char *next_arg(char *args, char **param, char **val); extern int core_kernel_text(unsigned long addr); +extern int init_kernel_text(unsigned long addr); extern int core_kernel_data(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); diff --git a/kernel/extable.c b/kernel/extable.c index a17fdb63dc3e..6a5b61ebc66c 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -64,7 +64,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) return e; } -static inline int init_kernel_text(unsigned long addr) +int init_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_sinittext && addr < (unsigned long)_einittext) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index b2f0b479191b..52a0a7af8640 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -16,7 +16,6 @@ #include #include #include -#include #ifdef HAVE_JUMP_LABEL @@ -429,8 +428,7 @@ void __init jump_label_invalidate_init(void) struct jump_entry *iter; for (iter = iter_start; iter < iter_stop; iter++) { - if (iter->code >= (unsigned long)_sinittext && - iter->code < (unsigned long)_einittext) + if (init_kernel_text(iter->code)) iter->code = 0; } } -- cgit v1.2.3 From 0ca7d5baa1787e5f2a7abd6bfca3303b1bbb48ac Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 20 Feb 2018 20:42:14 -0600 Subject: x86/entry/64: Simplify ENCODE_FRAME_POINTER On 64-bit, the stack pointer is always aligned on interrupt, so instead of setting the LSB of the pt_regs address, we can just add 1 to it. Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Andrew Lutomirski Cc: Brian Gerst Cc: Dan Williams Cc: Dominik Brodowski Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180221024214.lhl5jfgw33c4vz3m@treble Signed-off-by: Ingo Molnar --- arch/x86/entry/calling.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 5d10b7a85cad..be63330c5511 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -181,12 +181,7 @@ For 32-bit we have the following conventions - kernel is built with */ .macro ENCODE_FRAME_POINTER ptregs_offset=0 #ifdef CONFIG_FRAME_POINTER - .if \ptregs_offset - leaq \ptregs_offset(%rsp), %rbp - .else - mov %rsp, %rbp - .endif - orq $0x1, %rbp + leaq 1+\ptregs_offset(%rsp), %rbp #endif .endm -- cgit v1.2.3 From d5028ba8ee5a18c9d0bb926d883c28b370f89009 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 6 Feb 2018 09:46:13 +0100 Subject: objtool, retpolines: Integrate objtool with retpoline support more closely Disable retpoline validation in objtool if your compiler sucks, and otherwise select the validation stuff for CONFIG_RETPOLINE=y (most builds would already have it set due to ORC). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- Makefile | 5 +++++ arch/x86/Kconfig | 1 + arch/x86/Makefile | 10 +++------- scripts/Makefile.build | 2 ++ 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 79ad2bfa24b6..3dfce4d2f25d 100644 --- a/Makefile +++ b/Makefile @@ -489,6 +489,11 @@ KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) endif +RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register +RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk +RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG))) +export RETPOLINE_CFLAGS + ifeq ($(config-targets),1) # =========================================================================== # *config targets only - make sure prerequisites are updated, and descend diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 63bf349b2b24..c1aed6c0e413 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -436,6 +436,7 @@ config GOLDFISH config RETPOLINE bool "Avoid speculative indirect branches in kernel" default y + select STACK_VALIDATION if HAVE_STACK_VALIDATION help Compile kernel with the retpoline compiler options to guard against kernel-to-user data leaks by avoiding speculative indirect diff --git a/arch/x86/Makefile b/arch/x86/Makefile index dbc7d0ed2eaa..498c1b812300 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -232,13 +232,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE - RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register - RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk - - RETPOLINE_CFLAGS += $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG))) - ifneq ($(RETPOLINE_CFLAGS),) - KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE - endif +ifneq ($(RETPOLINE_CFLAGS),) + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE +endif endif archscripts: scripts_basic diff --git a/scripts/Makefile.build b/scripts/Makefile.build index ce0fc4dd68c6..4f2b25d43ec9 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -267,8 +267,10 @@ else objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) endif ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_CFLAGS),) objtool_args += --retpoline endif +endif ifdef CONFIG_MODVERSIONS -- cgit v1.2.3 From ecb586bd29c99fb4de599dec388658e74388daad Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 22 Feb 2018 16:43:17 +0100 Subject: KVM/x86: Remove indirect MSR op calls from SPEC_CTRL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Having a paravirt indirect call in the IBRS restore path is not a good idea, since we are trying to protect from speculative execution of bogus indirect branch targets. It is also slower, so use native_wrmsrl() on the vmentry path too. Signed-off-by: Paolo Bonzini Reviewed-by: Jim Mattson Cc: David Woodhouse Cc: KarimAllah Ahmed Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Cc: stable@vger.kernel.org Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d Link: http://lkml.kernel.org/r/20180222154318.20361-2-pbonzini@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kvm/svm.c | 7 ++++--- arch/x86/kvm/vmx.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b3e488a74828..1598beeda11c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -5355,7 +5356,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5465,10 +5466,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * save it. */ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3dec126aa302..0927be315965 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include "trace.h" @@ -9452,7 +9453,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); vmx->__launched = vmx->loaded_vmcs->launched; asm( @@ -9588,10 +9589,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * save it. */ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); -- cgit v1.2.3 From 946fbbc13dce68902f64515b610eeb2a6c3d7a64 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 22 Feb 2018 16:43:18 +0100 Subject: KVM/VMX: Optimize vmx_vcpu_run() and svm_vcpu_run() by marking the RDMSR path as unlikely() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vmx_vcpu_run() and svm_vcpu_run() are large functions, and giving branch hints to the compiler can actually make a substantial cycle difference by keeping the fast path contiguous in memory. With this optimization, the retpoline-guest/retpoline-host case is about 50 cycles faster. Signed-off-by: Paolo Bonzini Reviewed-by: Jim Mattson Cc: David Woodhouse Cc: KarimAllah Ahmed Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180222154318.20361-3-pbonzini@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1598beeda11c..24c9521ebc24 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5465,7 +5465,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (svm->spec_ctrl) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0927be315965..7f8401d05939 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9588,7 +9588,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (vmx->spec_ctrl) -- cgit v1.2.3