summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-10 09:51:38 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-10 09:51:38 -0800
commit4a692ae360615026b25d64c29fc7c12c0ef63c5f (patch)
treedf529537228674a78078c5537878b28956100c12
parentbfed6efb8e136c0e0a4cc854f2aaa2ed6ac1b9e0 (diff)
parentb64dfcde1ca9cb82e38e573753f0c0db8fb841c2 (diff)
downloadlinux-4a692ae360615026b25d64c29fc7c12c0ef63c5f.tar.bz2
Merge tag 'x86_mm_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Borislav Petkov: - Flush *all* mappings from the TLB after switching to the trampoline pagetable to prevent any stale entries' presence - Flush global mappings from the TLB, in addition to the CR3-write, after switching off of the trampoline_pgd during boot to clear the identity mappings - Prevent instrumentation issues resulting from the above changes * tag 'x86_mm_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm: Prevent early boot triple-faults with instrumentation x86/mm: Include spinlock_t definition in pgtable. x86/mm: Flush global TLB when switching to trampoline page-table x86/mm/64: Flush global TLB on boot and AP bringup x86/realmode: Add comment for Global bit usage in trampoline_pgd x86/mm: Add missing <asm/cpufeatures.h> dependency to <asm/page_64.h>
-rw-r--r--arch/x86/include/asm/page_64.h1
-rw-r--r--arch/x86/include/asm/pgtable.h1
-rw-r--r--arch/x86/include/asm/realmode.h1
-rw-r--r--arch/x86/include/asm/tlbflush.h5
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/head64.c14
-rw-r--r--arch/x86/kernel/head_64.S19
-rw-r--r--arch/x86/kernel/reboot.c12
-rw-r--r--arch/x86/mm/init.c5
-rw-r--r--arch/x86/mm/tlb.c8
-rw-r--r--arch/x86/realmode/init.c26
11 files changed, 76 insertions, 18 deletions
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 4bde0dc66100..e9c86299b835 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -5,6 +5,7 @@
#include <asm/page_64_types.h>
#ifndef __ASSEMBLY__
+#include <asm/cpufeatures.h>
#include <asm/alternative.h>
/* duplicated to the one in bootmem.h */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 448cd01eb3ec..a34430b7af4a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -22,6 +22,7 @@
#define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot)))
#ifndef __ASSEMBLY__
+#include <linux/spinlock.h>
#include <asm/x86_init.h>
#include <asm/pkru.h>
#include <asm/fpu/api.h>
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 5db5d083c873..331474b150f1 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -89,6 +89,7 @@ static inline void set_real_mode_mem(phys_addr_t mem)
}
void reserve_real_mode(void);
+void load_trampoline_pgtable(void);
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index b587a9ee9cb2..98fa0a114074 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -261,4 +261,9 @@ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
#endif /* !MODULE */
+static inline void __native_tlb_flush_global(unsigned long cr4)
+{
+ native_write_cr4(cr4 ^ X86_CR4_PGE);
+ native_write_cr4(cr4);
+}
#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0083464de5e3..79b3d67addcc 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -384,7 +384,7 @@ set_register:
}
EXPORT_SYMBOL(native_write_cr0);
-void native_write_cr4(unsigned long val)
+void __no_profile native_write_cr4(unsigned long val)
{
unsigned long bits_changed = 0;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 3be9dd213dad..de563db9cdcd 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -487,6 +487,10 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
clear_bss();
+ /*
+ * This needs to happen *before* kasan_early_init() because latter maps stuff
+ * into that page.
+ */
clear_page(init_top_pgt);
/*
@@ -498,6 +502,16 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
kasan_early_init();
+ /*
+ * Flush global TLB entries which could be left over from the trampoline page
+ * table.
+ *
+ * This needs to happen *after* kasan_early_init() as KASAN-enabled .configs
+ * instrument native_write_cr4() so KASAN must be initialized for that
+ * instrumentation to work.
+ */
+ __native_tlb_flush_global(this_cpu_read(cpu_tlbstate.cr4));
+
idt_setup_early_handler();
copy_bootdata(__va(real_mode_data));
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d8b3ebd2bb85..9c63fc5988cd 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -166,9 +166,26 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
call sev_verify_cbit
popq %rsi
- /* Switch to new page-table */
+ /*
+ * Switch to new page-table
+ *
+ * For the boot CPU this switches to early_top_pgt which still has the
+ * indentity mappings present. The secondary CPUs will switch to the
+ * init_top_pgt here, away from the trampoline_pgd and unmap the
+ * indentity mapped ranges.
+ */
movq %rax, %cr3
+ /*
+ * Do a global TLB flush after the CR3 switch to make sure the TLB
+ * entries from the identity mapping are flushed.
+ */
+ movq %cr4, %rcx
+ movq %rcx, %rax
+ xorq $X86_CR4_PGE, %rcx
+ movq %rcx, %cr4
+ movq %rax, %cr4
+
/* Ensure I am executing from virtual addresses */
movq $1f, %rax
ANNOTATE_RETPOLINE_SAFE
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 0a40df66a40d..fa700b46588e 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -113,17 +113,9 @@ void __noreturn machine_real_restart(unsigned int type)
spin_unlock(&rtc_lock);
/*
- * Switch back to the initial page table.
+ * Switch to the trampoline page table.
*/
-#ifdef CONFIG_X86_32
- load_cr3(initial_page_table);
-#else
- write_cr3(real_mode_header->trampoline_pgd);
-
- /* Exiting long mode will fail if CR4.PCIDE is set. */
- if (boot_cpu_has(X86_FEATURE_PCID))
- cr4_clear_bits(X86_CR4_PCIDE);
-#endif
+ load_trampoline_pgtable();
/* Jump to the identity-mapped low memory code */
#ifdef CONFIG_X86_32
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 1895986842b9..4ba024d5b63a 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -714,6 +714,11 @@ static void __init memory_map_bottom_up(unsigned long map_start,
static void __init init_trampoline(void)
{
#ifdef CONFIG_X86_64
+ /*
+ * The code below will alias kernel page-tables in the user-range of the
+ * address space, including the Global bit. So global TLB entries will
+ * be created when using the trampoline page-table.
+ */
if (!kaslr_memory_enabled())
trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
else
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 59ba2968af1b..1e6513f95133 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1148,7 +1148,7 @@ void flush_tlb_one_user(unsigned long addr)
*/
STATIC_NOPV void native_flush_tlb_global(void)
{
- unsigned long cr4, flags;
+ unsigned long flags;
if (static_cpu_has(X86_FEATURE_INVPCID)) {
/*
@@ -1168,11 +1168,7 @@ STATIC_NOPV void native_flush_tlb_global(void)
*/
raw_local_irq_save(flags);
- cr4 = this_cpu_read(cpu_tlbstate.cr4);
- /* toggle PGE */
- native_write_cr4(cr4 ^ X86_CR4_PGE);
- /* write old PGE again and flush TLBs */
- native_write_cr4(cr4);
+ __native_tlb_flush_global(this_cpu_read(cpu_tlbstate.cr4));
raw_local_irq_restore(flags);
}
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 38d24d2ab38b..c5e29db02a46 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -17,6 +17,32 @@ u32 *trampoline_cr4_features;
/* Hold the pgd entry used on booting additional CPUs */
pgd_t trampoline_pgd_entry;
+void load_trampoline_pgtable(void)
+{
+#ifdef CONFIG_X86_32
+ load_cr3(initial_page_table);
+#else
+ /*
+ * This function is called before exiting to real-mode and that will
+ * fail with CR4.PCIDE still set.
+ */
+ if (boot_cpu_has(X86_FEATURE_PCID))
+ cr4_clear_bits(X86_CR4_PCIDE);
+
+ write_cr3(real_mode_header->trampoline_pgd);
+#endif
+
+ /*
+ * The CR3 write above will not flush global TLB entries.
+ * Stale, global entries from previous page tables may still be
+ * present. Flush those stale entries.
+ *
+ * This ensures that memory accessed while running with
+ * trampoline_pgd is *actually* mapped into trampoline_pgd.
+ */
+ __flush_tlb_all();
+}
+
void __init reserve_real_mode(void)
{
phys_addr_t mem;