diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-15 13:35:29 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-15 13:35:29 -0700 |
commit | 6b0a02e86c293c32a50d49b33a1f04420585d40b (patch) | |
tree | 048e4f6f19548cd0052395867327b4dcf2d19546 /arch/x86/mm | |
parent | 71b8ebbf3d7bee88427eb207ef643f2f6447c625 (diff) | |
parent | e3e288121408c3abeed5af60b87b95c847143845 (diff) | |
download | linux-6b0a02e86c293c32a50d49b33a1f04420585d40b.tar.bz2 |
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 pti updates from Thomas Gleixner:
"Another series of PTI related changes:
- Remove the manual stack switch for user entries from the idtentry
code. This debloats entry by 5k+ bytes of text.
- Use the proper types for the asm/bootparam.h defines to prevent
user space compile errors.
- Use PAGE_GLOBAL for !PCID systems to gain back performance
- Prevent setting of huge PUD/PMD entries when the entries are not
leaf entries otherwise the entries to which the PUD/PMD points to
and are populated get lost"
* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/pgtable: Don't set huge PUD/PMD on non-leaf entries
x86/pti: Leave kernel text global for !PCID
x86/pti: Never implicitly clear _PAGE_GLOBAL for kernel image
x86/pti: Enable global pages for shared areas
x86/mm: Do not forbid _PAGE_RW before init for __ro_after_init
x86/mm: Comment _PAGE_GLOBAL mystery
x86/mm: Remove extra filtering in pageattr code
x86/mm: Do not auto-massage page protections
x86/espfix: Document use of _PAGE_GLOBAL
x86/mm: Introduce "default" kernel PTE mask
x86/mm: Undo double _PAGE_PSE clearing
x86/mm: Factor out pageattr _PAGE_GLOBAL setting
x86/entry/64: Drop idtentry's manual stack switch for user entries
x86/uapi: Fix asm/bootparam.h userspace compilation errors
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/cpu_entry_area.c | 14 | ||||
-rw-r--r-- | arch/x86/mm/ident_map.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 14 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 11 | ||||
-rw-r--r-- | arch/x86/mm/iomap_32.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/kasan_init_64.c | 14 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 97 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 12 | ||||
-rw-r--r-- | arch/x86/mm/pti.c | 126 |
11 files changed, 239 insertions, 69 deletions
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 476d810639a8..b45f5aaefd74 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -27,8 +27,20 @@ EXPORT_SYMBOL(get_cpu_entry_area); void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) { unsigned long va = (unsigned long) cea_vaddr; + pte_t pte = pfn_pte(pa >> PAGE_SHIFT, flags); - set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags)); + /* + * The cpu_entry_area is shared between the user and kernel + * page tables. All of its ptes can safely be global. + * _PAGE_GLOBAL gets reused to help indicate PROT_NONE for + * non-present PTEs, so be careful not to set it in that + * case to avoid confusion. + */ + if (boot_cpu_has(X86_FEATURE_PGE) && + (pgprot_val(flags) & _PAGE_PRESENT)) + pte = pte_set_flags(pte, _PAGE_GLOBAL); + + set_pte_vaddr(va, pte); } static void __init diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index 9aa22be8331e..a2f0c7e20fb0 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -98,6 +98,9 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, if (!info->kernpg_flag) info->kernpg_flag = _KERNPG_TABLE; + /* Filter out unsupported __PAGE_KERNEL_* bits: */ + info->kernpg_flag &= __default_kernel_pte_mask; + for (; addr < end; addr = next) { pgd_t *pgd = pgd_page + pgd_index(addr); p4d_t *p4d; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 82f5252c723a..fec82b577c18 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -161,12 +161,6 @@ struct map_range { static int page_size_mask; -static void enable_global_pages(void) -{ - if (!static_cpu_has(X86_FEATURE_PTI)) - __supported_pte_mask |= _PAGE_GLOBAL; -} - static void __init probe_page_size_mask(void) { /* @@ -187,9 +181,15 @@ static void __init probe_page_size_mask(void) __supported_pte_mask &= ~_PAGE_GLOBAL; if (boot_cpu_has(X86_FEATURE_PGE)) { cr4_set_bits_and_update_boot(X86_CR4_PGE); - enable_global_pages(); + __supported_pte_mask |= _PAGE_GLOBAL; } + /* By the default is everything supported: */ + __default_kernel_pte_mask = __supported_pte_mask; + /* Except when with PTI where the kernel is mostly non-Global: */ + if (cpu_feature_enabled(X86_FEATURE_PTI)) + __default_kernel_pte_mask &= ~_PAGE_GLOBAL; + /* Enable 1 GB linear kernel mappings if available: */ if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { printk(KERN_INFO "Using GB pages for direct mapping\n"); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8008db2bddb3..c893c6a3d707 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -558,8 +558,14 @@ static void __init pagetable_init(void) permanent_kmaps_init(pgd_base); } -pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL); +#define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL) +/* Bits supported by the hardware: */ +pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK; +/* Bits allowed in normal kernel mappings: */ +pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK; EXPORT_SYMBOL_GPL(__supported_pte_mask); +/* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */ +EXPORT_SYMBOL(__default_kernel_pte_mask); /* user-defined highmem size */ static unsigned int highmem_pages = -1; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 66de40e45f58..0a400606dea0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -65,8 +65,13 @@ * around without checking the pgd every time. */ +/* Bits supported by the hardware: */ pteval_t __supported_pte_mask __read_mostly = ~0; +/* Bits allowed in normal kernel mappings: */ +pteval_t __default_kernel_pte_mask __read_mostly = ~0; EXPORT_SYMBOL_GPL(__supported_pte_mask); +/* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */ +EXPORT_SYMBOL(__default_kernel_pte_mask); int force_personality32; @@ -1286,6 +1291,12 @@ void mark_rodata_ro(void) (unsigned long) __va(__pa_symbol(_sdata))); debug_checkwx(); + + /* + * Do this after all of the manipulation of the + * kernel text page tables are complete. + */ + pti_clone_kernel_text(); } int kern_addr_valid(unsigned long addr) diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index ada98b39b8ad..b3294d36769d 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -44,6 +44,9 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) return ret; *prot = __pgprot(__PAGE_KERNEL | cachemode2protval(pcm)); + /* Filter out unsupported __PAGE_KERNEL* bits: */ + pgprot_val(*prot) &= __default_kernel_pte_mask; + return 0; } EXPORT_SYMBOL_GPL(iomap_create_wc); @@ -88,6 +91,9 @@ iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) prot = __pgprot(__PAGE_KERNEL | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)); + /* Filter out unsupported __PAGE_KERNEL* bits: */ + pgprot_val(prot) &= __default_kernel_pte_mask; + return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, prot); } EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index e2db83bebc3b..c63a545ec199 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -816,6 +816,9 @@ void __init __early_set_fixmap(enum fixed_addresses idx, } pte = early_ioremap_pte(addr); + /* Sanitize 'prot' against any unsupported bits: */ + pgprot_val(flags) &= __default_kernel_pte_mask; + if (pgprot_val(flags)) set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); else diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index d8ff013ea9d0..980dbebd0ca7 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -269,6 +269,12 @@ void __init kasan_early_init(void) pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE; p4dval_t p4d_val = __pa_nodebug(kasan_zero_pud) | _KERNPG_TABLE; + /* Mask out unsupported __PAGE_KERNEL bits: */ + pte_val &= __default_kernel_pte_mask; + pmd_val &= __default_kernel_pte_mask; + pud_val &= __default_kernel_pte_mask; + p4d_val &= __default_kernel_pte_mask; + for (i = 0; i < PTRS_PER_PTE; i++) kasan_zero_pte[i] = __pte(pte_val); @@ -371,7 +377,13 @@ void __init kasan_init(void) */ memset(kasan_zero_page, 0, PAGE_SIZE); for (i = 0; i < PTRS_PER_PTE; i++) { - pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO | _PAGE_ENC); + pte_t pte; + pgprot_t prot; + + prot = __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC); + pgprot_val(prot) &= __default_kernel_pte_mask; + + pte = __pte(__pa(kasan_zero_page) | pgprot_val(prot)); set_pte(&kasan_zero_pte[i], pte); } /* Flush TLBs again to be sure that write protection applied. */ diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 85cf12219dea..0f3d50f4c48c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -298,9 +298,11 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, /* * The .rodata section needs to be read-only. Using the pfn - * catches all aliases. + * catches all aliases. This also includes __ro_after_init, + * so do not enforce until kernel_set_to_readonly is true. */ - if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, + if (kernel_set_to_readonly && + within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, __pa_symbol(__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; @@ -512,6 +514,23 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) #endif } +static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot) +{ + /* + * _PAGE_GLOBAL means "global page" for present PTEs. + * But, it is also used to indicate _PAGE_PROTNONE + * for non-present PTEs. + * + * This ensures that a _PAGE_GLOBAL PTE going from + * present to non-present is not confused as + * _PAGE_PROTNONE. + */ + if (!(pgprot_val(prot) & _PAGE_PRESENT)) + pgprot_val(prot) &= ~_PAGE_GLOBAL; + + return prot; +} + static int try_preserve_large_page(pte_t *kpte, unsigned long address, struct cpa_data *cpa) @@ -566,6 +585,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * up accordingly. */ old_pte = *kpte; + /* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */ req_prot = pgprot_large_2_4k(old_prot); pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); @@ -577,19 +597,9 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * different bit positions in the two formats. */ req_prot = pgprot_4k_2_large(req_prot); - - /* - * Set the PSE and GLOBAL flags only if the PRESENT flag is - * set otherwise pmd_present/pmd_huge will return true even on - * a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL - * for the ancient hardware that doesn't support it. - */ + req_prot = pgprot_clear_protnone_bits(req_prot); if (pgprot_val(req_prot) & _PAGE_PRESENT) - pgprot_val(req_prot) |= _PAGE_PSE | _PAGE_GLOBAL; - else - pgprot_val(req_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL); - - req_prot = canon_pgprot(req_prot); + pgprot_val(req_prot) |= _PAGE_PSE; /* * old_pfn points to the large page base pfn. So we need @@ -674,8 +684,12 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, switch (level) { case PG_LEVEL_2M: ref_prot = pmd_pgprot(*(pmd_t *)kpte); - /* clear PSE and promote PAT bit to correct position */ + /* + * Clear PSE (aka _PAGE_PAT) and move + * PAT bit to correct position. + */ ref_prot = pgprot_large_2_4k(ref_prot); + ref_pfn = pmd_pfn(*(pmd_t *)kpte); break; @@ -698,23 +712,14 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, return 1; } - /* - * Set the GLOBAL flags only if the PRESENT flag is set - * otherwise pmd/pte_present will return true even on a non - * present pmd/pte. The canon_pgprot will clear _PAGE_GLOBAL - * for the ancient hardware that doesn't support it. - */ - if (pgprot_val(ref_prot) & _PAGE_PRESENT) - pgprot_val(ref_prot) |= _PAGE_GLOBAL; - else - pgprot_val(ref_prot) &= ~_PAGE_GLOBAL; + ref_prot = pgprot_clear_protnone_bits(ref_prot); /* * Get the target pfn from the original entry: */ pfn = ref_pfn; for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) - set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot))); + set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); if (virt_addr_valid(address)) { unsigned long pfn = PFN_DOWN(__pa(address)); @@ -930,19 +935,7 @@ static void populate_pte(struct cpa_data *cpa, pte = pte_offset_kernel(pmd, start); - /* - * Set the GLOBAL flags only if the PRESENT flag is - * set otherwise pte_present will return true even on - * a non present pte. The canon_pgprot will clear - * _PAGE_GLOBAL for the ancient hardware that doesn't - * support it. - */ - if (pgprot_val(pgprot) & _PAGE_PRESENT) - pgprot_val(pgprot) |= _PAGE_GLOBAL; - else - pgprot_val(pgprot) &= ~_PAGE_GLOBAL; - - pgprot = canon_pgprot(pgprot); + pgprot = pgprot_clear_protnone_bits(pgprot); while (num_pages-- && start < end) { set_pte(pte, pfn_pte(cpa->pfn, pgprot)); @@ -1234,24 +1227,14 @@ repeat: new_prot = static_protections(new_prot, address, pfn); - /* - * Set the GLOBAL flags only if the PRESENT flag is - * set otherwise pte_present will return true even on - * a non present pte. The canon_pgprot will clear - * _PAGE_GLOBAL for the ancient hardware that doesn't - * support it. - */ - if (pgprot_val(new_prot) & _PAGE_PRESENT) - pgprot_val(new_prot) |= _PAGE_GLOBAL; - else - pgprot_val(new_prot) &= ~_PAGE_GLOBAL; + new_prot = pgprot_clear_protnone_bits(new_prot); /* * We need to keep the pfn from the existing PTE, * after all we're only going to change it's attributes * not the memory it points to */ - new_pte = pfn_pte(pfn, canon_pgprot(new_prot)); + new_pte = pfn_pte(pfn, new_prot); cpa->pfn = pfn; /* * Do we really change anything ? @@ -1428,11 +1411,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, memset(&cpa, 0, sizeof(cpa)); /* - * Check, if we are requested to change a not supported - * feature: + * Check, if we are requested to set a not supported + * feature. Clearing non-supported features is OK. */ mask_set = canon_pgprot(mask_set); - mask_clr = canon_pgprot(mask_clr); + if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split) return 0; @@ -1775,6 +1758,12 @@ int set_memory_4k(unsigned long addr, int numpages) __pgprot(0), 1, 0, NULL); } +int set_memory_nonglobal(unsigned long addr, int numpages) +{ + return change_page_attr_clear(&addr, numpages, + __pgprot(_PAGE_GLOBAL), 0); +} + static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) { struct cpa_data cpa; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 34cda7e0551b..ffc8c13c50e4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/mm.h> #include <linux/gfp.h> +#include <linux/hugetlb.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/tlb.h> @@ -583,6 +584,9 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { + /* Sanitize 'prot' against any unsupported bits: */ + pgprot_val(flags) &= __default_kernel_pte_mask; + __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); } @@ -636,6 +640,10 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) (mtrr != MTRR_TYPE_WRBACK)) return 0; + /* Bail out if we are we on a populated non-leaf entry: */ + if (pud_present(*pud) && !pud_huge(*pud)) + return 0; + prot = pgprot_4k_2_large(prot); set_pte((pte_t *)pud, pfn_pte( @@ -664,6 +672,10 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) return 0; } + /* Bail out if we are we on a populated non-leaf entry: */ + if (pmd_present(*pmd) && !pmd_huge(*pmd)) + return 0; + prot = pgprot_4k_2_large(prot); set_pte((pte_t *)pmd, pfn_pte( diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 631507f0c198..f1fd52f449e0 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -66,12 +66,22 @@ static void __init pti_print_if_secure(const char *reason) pr_info("%s\n", reason); } +enum pti_mode { + PTI_AUTO = 0, + PTI_FORCE_OFF, + PTI_FORCE_ON +} pti_mode; + void __init pti_check_boottime_disable(void) { char arg[5]; int ret; + /* Assume mode is auto unless overridden. */ + pti_mode = PTI_AUTO; + if (hypervisor_is_type(X86_HYPER_XEN_PV)) { + pti_mode = PTI_FORCE_OFF; pti_print_if_insecure("disabled on XEN PV."); return; } @@ -79,18 +89,23 @@ void __init pti_check_boottime_disable(void) ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); if (ret > 0) { if (ret == 3 && !strncmp(arg, "off", 3)) { + pti_mode = PTI_FORCE_OFF; pti_print_if_insecure("disabled on command line."); return; } if (ret == 2 && !strncmp(arg, "on", 2)) { + pti_mode = PTI_FORCE_ON; pti_print_if_secure("force enabled on command line."); goto enable; } - if (ret == 4 && !strncmp(arg, "auto", 4)) + if (ret == 4 && !strncmp(arg, "auto", 4)) { + pti_mode = PTI_AUTO; goto autosel; + } } if (cmdline_find_option_bool(boot_command_line, "nopti")) { + pti_mode = PTI_FORCE_OFF; pti_print_if_insecure("disabled on command line."); return; } @@ -149,7 +164,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) * * Returns a pointer to a P4D on success, or NULL on failure. */ -static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) +static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) { pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address)); gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); @@ -177,7 +192,7 @@ static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) * * Returns a pointer to a PMD on success, or NULL on failure. */ -static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) +static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); p4d_t *p4d = pti_user_pagetable_walk_p4d(address); @@ -267,7 +282,7 @@ static void __init pti_setup_vsyscall(void) static void __init pti_setup_vsyscall(void) { } #endif -static void __init +static void pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) { unsigned long addr; @@ -300,6 +315,27 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) return; /* + * Only clone present PMDs. This ensures only setting + * _PAGE_GLOBAL on present PMDs. This should only be + * called on well-known addresses anyway, so a non- + * present PMD would be a surprise. + */ + if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT))) + return; + + /* + * Setting 'target_pmd' below creates a mapping in both + * the user and kernel page tables. It is effectively + * global, so set it as global in both copies. Note: + * the X86_FEATURE_PGE check is not _required_ because + * the CPU ignores _PAGE_GLOBAL when PGE is not + * supported. The check keeps consistentency with + * code that only set this bit when supported. + */ + if (boot_cpu_has(X86_FEATURE_PGE)) + *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL); + + /* * Copy the PMD. That is, the kernelmode and usermode * tables will share the last-level page tables of this * address range @@ -348,7 +384,83 @@ static void __init pti_clone_entry_text(void) { pti_clone_pmds((unsigned long) __entry_text_start, (unsigned long) __irqentry_text_end, - _PAGE_RW | _PAGE_GLOBAL); + _PAGE_RW); +} + +/* + * Global pages and PCIDs are both ways to make kernel TLB entries + * live longer, reduce TLB misses and improve kernel performance. + * But, leaving all kernel text Global makes it potentially accessible + * to Meltdown-style attacks which make it trivial to find gadgets or + * defeat KASLR. + * + * Only use global pages when it is really worth it. + */ +static inline bool pti_kernel_image_global_ok(void) +{ + /* + * Systems with PCIDs get litlle benefit from global + * kernel text and are not worth the downsides. + */ + if (cpu_feature_enabled(X86_FEATURE_PCID)) + return false; + + /* + * Only do global kernel image for pti=auto. Do the most + * secure thing (not global) if pti=on specified. + */ + if (pti_mode != PTI_AUTO) + return false; + + /* + * K8 may not tolerate the cleared _PAGE_RW on the userspace + * global kernel image pages. Do the safe thing (disable + * global kernel image). This is unlikely to ever be + * noticed because PTI is disabled by default on AMD CPUs. + */ + if (boot_cpu_has(X86_FEATURE_K8)) + return false; + + return true; +} + +/* + * For some configurations, map all of kernel text into the user page + * tables. This reduces TLB misses, especially on non-PCID systems. + */ +void pti_clone_kernel_text(void) +{ + unsigned long start = PFN_ALIGN(_text); + unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); + + if (!pti_kernel_image_global_ok()) + return; + + pti_clone_pmds(start, end, _PAGE_RW); +} + +/* + * This is the only user for it and it is not arch-generic like + * the other set_memory.h functions. Just extern it. + */ +extern int set_memory_nonglobal(unsigned long addr, int numpages); +void pti_set_kernel_image_nonglobal(void) +{ + /* + * The identity map is created with PMDs, regardless of the + * actual length of the kernel. We need to clear + * _PAGE_GLOBAL up to a PMD boundary, not just to the end + * of the image. + */ + unsigned long start = PFN_ALIGN(_text); + unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); + + if (pti_kernel_image_global_ok()) + return; + + pr_debug("set kernel image non-global\n"); + + set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT); } /* @@ -362,6 +474,10 @@ void __init pti_init(void) pr_info("enabled\n"); pti_clone_user_shared(); + + /* Undo all global bits from the init pagetables in head_64.S: */ + pti_set_kernel_image_nonglobal(); + /* Replace some of the global bits just for shared entry text: */ pti_clone_entry_text(); pti_setup_espfix64(); pti_setup_vsyscall(); |