summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/common.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 15:03:00 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 15:03:00 -0800
commit94a855111ed9106971ca2617c5d075269e6aefde (patch)
tree330c762a403cf70c2cdbf12e7394e5e7c2a69a79 /arch/x86/kernel/cpu/common.c
parent93761c93e9da28d8a020777cee2a84133082b477 (diff)
parentf1a033cc6b9eb6d80322008422df3c87aa5d47a0 (diff)
downloadlinux-94a855111ed9106971ca2617c5d075269e6aefde.tar.bz2
Merge tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 core updates from Borislav Petkov: - Add the call depth tracking mitigation for Retbleed which has been long in the making. It is a lighterweight software-only fix for Skylake-based cores where enabling IBRS is a big hammer and causes a significant performance impact. What it basically does is, it aligns all kernel functions to 16 bytes boundary and adds a 16-byte padding before the function, objtool collects all functions' locations and when the mitigation gets applied, it patches a call accounting thunk which is used to track the call depth of the stack at any time. When that call depth reaches a magical, microarchitecture-specific value for the Return Stack Buffer, the code stuffs that RSB and avoids its underflow which could otherwise lead to the Intel variant of Retbleed. This software-only solution brings a lot of the lost performance back, as benchmarks suggest: https://lore.kernel.org/all/20220915111039.092790446@infradead.org/ That page above also contains a lot more detailed explanation of the whole mechanism - Implement a new control flow integrity scheme called FineIBT which is based on the software kCFI implementation and uses hardware IBT support where present to annotate and track indirect branches using a hash to validate them - Other misc fixes and cleanups * tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (80 commits) x86/paravirt: Use common macro for creating simple asm paravirt functions x86/paravirt: Remove clobber bitmask from .parainstructions x86/debug: Include percpu.h in debugreg.h to get DECLARE_PER_CPU() et al x86/cpufeatures: Move X86_FEATURE_CALL_DEPTH from bit 18 to bit 19 of word 11, to leave space for WIP X86_FEATURE_SGX_EDECCSSA bit x86/Kconfig: Enable kernel IBT by default x86,pm: Force out-of-line memcpy() objtool: Fix weak hole vs prefix symbol objtool: Optimize elf_dirty_reloc_sym() x86/cfi: Add boot time hash randomization x86/cfi: Boot time selection of CFI scheme x86/ibt: Implement FineIBT objtool: Add --cfi to generate the .cfi_sites section x86: Add prefix symbols for function padding objtool: Add option to generate prefix symbols objtool: Avoid O(bloody terrible) behaviour -- an ode to libelf objtool: Slice up elf_create_section_symbol() kallsyms: Revert "Take callthunks into account" x86: Unconfuse CONFIG_ and X86_FEATURE_ namespaces x86/retpoline: Fix crash printing warning x86/paravirt: Fix a !PARAVIRT build warning ...
Diffstat (limited to 'arch/x86/kernel/cpu/common.c')
-rw-r--r--arch/x86/kernel/cpu/common.c97
1 files changed, 44 insertions, 53 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 62b83bc5b4b9..9cfca3d7d0e2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -610,6 +610,7 @@ static __always_inline void setup_cet(struct cpuinfo_x86 *c)
if (!ibt_selftest()) {
pr_err("IBT selftest: Failed!\n");
+ wrmsrl(MSR_IA32_S_CET, 0);
setup_clear_cpu_cap(X86_FEATURE_IBT);
return;
}
@@ -702,16 +703,6 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
__u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
-void load_percpu_segment(int cpu)
-{
-#ifdef CONFIG_X86_32
- loadsegment(fs, __KERNEL_PERCPU);
-#else
- __loadsegment_simple(gs, 0);
- wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
-#endif
-}
-
#ifdef CONFIG_X86_32
/* The 32-bit entry code needs to find cpu_entry_area. */
DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
@@ -739,16 +730,45 @@ void load_fixmap_gdt(int cpu)
}
EXPORT_SYMBOL_GPL(load_fixmap_gdt);
-/*
- * Current gdt points %fs at the "master" per-cpu area: after this,
- * it's on the real one.
+/**
+ * switch_gdt_and_percpu_base - Switch to direct GDT and runtime per CPU base
+ * @cpu: The CPU number for which this is invoked
+ *
+ * Invoked during early boot to switch from early GDT and early per CPU to
+ * the direct GDT and the runtime per CPU area. On 32-bit the percpu base
+ * switch is implicit by loading the direct GDT. On 64bit this requires
+ * to update GSBASE.
*/
-void switch_to_new_gdt(int cpu)
+void __init switch_gdt_and_percpu_base(int cpu)
{
- /* Load the original GDT */
load_direct_gdt(cpu);
- /* Reload the per-cpu base */
- load_percpu_segment(cpu);
+
+#ifdef CONFIG_X86_64
+ /*
+ * No need to load %gs. It is already correct.
+ *
+ * Writing %gs on 64bit would zero GSBASE which would make any per
+ * CPU operation up to the point of the wrmsrl() fault.
+ *
+ * Set GSBASE to the new offset. Until the wrmsrl() happens the
+ * early mapping is still valid. That means the GSBASE update will
+ * lose any prior per CPU data which was not copied over in
+ * setup_per_cpu_areas().
+ *
+ * This works even with stackprotector enabled because the
+ * per CPU stack canary is 0 in both per CPU areas.
+ */
+ wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
+#else
+ /*
+ * %fs is already set to __KERNEL_PERCPU, but after switching GDT
+ * it is required to load FS again so that the 'hidden' part is
+ * updated from the new GDT. Up to this point the early per CPU
+ * translation is active. Any content of the early per CPU data
+ * which was not copied over in setup_per_cpu_areas() is lost.
+ */
+ loadsegment(fs, __KERNEL_PERCPU);
+#endif
}
static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
@@ -1993,27 +2013,18 @@ static __init int setup_clearcpuid(char *arg)
}
__setup("clearcpuid=", setup_clearcpuid);
+DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
+ .current_task = &init_task,
+ .preempt_count = INIT_PREEMPT_COUNT,
+ .top_of_stack = TOP_OF_INIT_STACK,
+};
+EXPORT_PER_CPU_SYMBOL(pcpu_hot);
+
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
-/*
- * The following percpu variables are hot. Align current_task to
- * cacheline size such that they fall in the same cacheline.
- */
-DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
- &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
-DEFINE_PER_CPU(void *, hardirq_stack_ptr);
-DEFINE_PER_CPU(bool, hardirq_stack_inuse);
-
-DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
-EXPORT_PER_CPU_SYMBOL(__preempt_count);
-
-DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK;
-
static void wrmsrl_cstar(unsigned long val)
{
/*
@@ -2064,20 +2075,6 @@ void syscall_init(void)
#else /* CONFIG_X86_64 */
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
-EXPORT_PER_CPU_SYMBOL(__preempt_count);
-
-/*
- * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
- * the top of the kernel stack. Use an extra percpu variable to track the
- * top of the kernel stack directly.
- */
-DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
- (unsigned long)&init_thread_union + THREAD_SIZE;
-EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
-
#ifdef CONFIG_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
@@ -2248,12 +2245,6 @@ void cpu_init(void)
boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE))
cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
- /*
- * Initialize the per-CPU GDT with the boot GDT,
- * and set up the GDT descriptor:
- */
- switch_to_new_gdt(cpu);
-
if (IS_ENABLED(CONFIG_X86_64)) {
loadsegment(fs, 0);
memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);