From eedb92abb9bb03ef21442614a6f5867eaac6e77f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:50 +0300 Subject: x86/mm: Make virtual memory layout dynamic for CONFIG_X86_5LEVEL=y We need to be able to adjust virtual memory layout at runtime to be able to switch between 4- and 5-level paging at boot-time. KASLR already has movable __VMALLOC_BASE, __VMEMMAP_BASE and __PAGE_OFFSET. Let's re-use it. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7ba5d819ebe3..bf5c9ba63ba1 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -39,6 +39,15 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; static unsigned int __initdata next_early_pgt; pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); +#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT +unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE; +EXPORT_SYMBOL(page_offset_base); +unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE; +EXPORT_SYMBOL(vmalloc_base); +unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE; +EXPORT_SYMBOL(vmemmap_base); +#endif + #define __head __section(.head.text) static void __head *fixup_pointer(void *ptr, unsigned long physaddr) -- cgit v1.2.3 From e626e6bb0dfaca41487241d49ce0ae827716101a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:51 +0300 Subject: x86/mm: Introduce 'pgtable_l5_enabled' The new flag would indicate what paging mode we are in. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-5-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 4 ++++ arch/x86/include/asm/pgtable_32_types.h | 2 ++ arch/x86/include/asm/pgtable_64_types.h | 6 ++++++ arch/x86/kernel/head64.c | 5 +++++ 4 files changed, 17 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 8199a6187251..bd69e1830fbe 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -46,6 +46,10 @@ #define STATIC #include +#ifdef CONFIG_X86_5LEVEL +unsigned int pgtable_l5_enabled __ro_after_init = 1; +#endif + extern unsigned long get_cmd_line_ptr(void); /* Simplified build-specific string for starting entropy. */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 0777e18a1d23..e3225e83db7d 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -15,6 +15,8 @@ # include #endif +#define pgtable_l5_enabled 0 + #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index a0db91ab63b8..5e2d724f8f47 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -20,6 +20,12 @@ typedef unsigned long pgprotval_t; typedef struct { pteval_t pte; } pte_t; +#ifdef CONFIG_X86_5LEVEL +extern unsigned int pgtable_l5_enabled; +#else +#define pgtable_l5_enabled 0 +#endif + #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index bf5c9ba63ba1..17d00d1886de 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -39,6 +39,11 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; static unsigned int __initdata next_early_pgt; pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); +#ifdef CONFIG_X86_5LEVEL +unsigned int pgtable_l5_enabled __ro_after_init = 1; +EXPORT_SYMBOL(pgtable_l5_enabled); +#endif + #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE; EXPORT_SYMBOL(page_offset_base); -- cgit v1.2.3 From c65e774fb3f6af212641538694b9778ff9ab4300 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:53 +0300 Subject: x86/mm: Make PGDIR_SHIFT and PTRS_PER_P4D variable For boot-time switching between 4- and 5-level paging we need to be able to fold p4d page table level at runtime. It requires variable PGDIR_SHIFT and PTRS_PER_P4D. The change doesn't affect the kernel image size much: text data bss dec hex filename 8628091 4734304 1368064 14730459 e0c4db vmlinux.before 8628393 4734340 1368064 14730797 e0c62d vmlinux.after Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-7-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 2 ++ arch/x86/include/asm/pgtable_32.h | 2 ++ arch/x86/include/asm/pgtable_64_types.h | 19 ++++++++++++------- arch/x86/kernel/cpu/mcheck/mce.c | 18 ++++++------------ arch/x86/kernel/head64.c | 6 +++++- arch/x86/mm/dump_pagetables.c | 12 +++++------- arch/x86/mm/init_64.c | 2 +- arch/x86/mm/kasan_init_64.c | 2 +- arch/x86/platform/efi/efi_64.c | 4 ++-- include/asm-generic/5level-fixup.h | 1 + include/asm-generic/pgtable-nop4d.h | 9 +++++---- include/linux/kasan.h | 2 +- mm/kasan/kasan_init.c | 2 +- 13 files changed, 44 insertions(+), 37 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index bd69e1830fbe..b18e8f9512de 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -48,6 +48,8 @@ #ifdef CONFIG_X86_5LEVEL unsigned int pgtable_l5_enabled __ro_after_init = 1; +unsigned int pgdir_shift __ro_after_init = 48; +unsigned int ptrs_per_p4d __ro_after_init = 512; #endif extern unsigned long get_cmd_line_ptr(void); diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index e67c0620aec2..d829360e26bd 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -33,6 +33,8 @@ static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); +static inline int pgd_large(pgd_t pgd) { return 0; } + /* * Define this if things work differently on an i386 and an i486: * it will (on an i486) warn about kernel memory accesses that are diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 903e4d054bcb..0c48d80e11d4 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -26,6 +26,9 @@ extern unsigned int pgtable_l5_enabled; #define pgtable_l5_enabled 0 #endif +extern unsigned int pgdir_shift; +extern unsigned int ptrs_per_p4d; + #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 @@ -35,16 +38,17 @@ extern unsigned int pgtable_l5_enabled; /* * PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT 48 +#define PGDIR_SHIFT pgdir_shift #define PTRS_PER_PGD 512 /* * 4th level page in 5-level paging case */ -#define P4D_SHIFT 39 -#define PTRS_PER_P4D 512 -#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) -#define P4D_MASK (~(P4D_SIZE - 1)) +#define P4D_SHIFT 39 +#define MAX_PTRS_PER_P4D 512 +#define PTRS_PER_P4D ptrs_per_p4d +#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE - 1)) #define MAX_POSSIBLE_PHYSMEM_BITS 52 @@ -53,8 +57,9 @@ extern unsigned int pgtable_l5_enabled; /* * PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT 39 -#define PTRS_PER_PGD 512 +#define PGDIR_SHIFT 39 +#define PTRS_PER_PGD 512 +#define MAX_PTRS_PER_P4D 1 #endif /* CONFIG_X86_5LEVEL */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3a8e88a611eb..cbb3af721291 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1082,19 +1082,7 @@ void arch_unmap_kpfn(unsigned long pfn) * a legal address. */ -/* - * Build time check to see if we have a spare virtual bit. Don't want - * to leave this until run time because most developers don't have a - * system that can exercise this code path. This will only become a - * problem if/when we move beyond 5-level page tables. - * - * Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD) - */ -#if PGDIR_SHIFT + 9 < 63 decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); -#else -#error "no unused virtual bit available" -#endif if (set_memory_np(decoy_addr, 1)) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); @@ -2328,6 +2316,12 @@ static __init int mcheck_init_device(void) { int err; + /* + * Check if we have a spare virtual bit. This will only become + * a problem if/when we move beyond 5-level page tables. + */ + MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63); + if (!mce_available(&boot_cpu_data)) { err = -EIO; goto err_out; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 17d00d1886de..98b0ff49b220 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -42,6 +42,10 @@ pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); #ifdef CONFIG_X86_5LEVEL unsigned int pgtable_l5_enabled __ro_after_init = 1; EXPORT_SYMBOL(pgtable_l5_enabled); +unsigned int pgdir_shift __ro_after_init = 48; +EXPORT_SYMBOL(pgdir_shift); +unsigned int ptrs_per_p4d __ro_after_init = 512; +EXPORT_SYMBOL(ptrs_per_p4d); #endif #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT @@ -336,7 +340,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0); BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0); BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); - BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == + MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == (__START_KERNEL & PGDIR_MASK))); BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index a89f2dbc3531..420058b05d39 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -428,14 +428,15 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, #define p4d_none(a) pud_none(__pud(p4d_val(a))) #endif -#if PTRS_PER_P4D > 1 - static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) { int i; p4d_t *start, *p4d_start; pgprotval_t prot; + if (PTRS_PER_P4D == 1) + return walk_pud_level(m, st, __p4d(pgd_val(addr)), P); + p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); for (i = 0; i < PTRS_PER_P4D; i++) { @@ -455,11 +456,8 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, } } -#else -#define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p) -#define pgd_large(a) p4d_large(__p4d(pgd_val(a))) -#define pgd_none(a) p4d_none(__p4d(pgd_val(a))) -#endif +#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a)))) +#define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a)))) static inline bool is_hypervisor_range(int idx) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1ab42c852069..6a4b20bc7527 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -143,7 +143,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) * With folded p4d, pgd_none() is always false, we need to * handle synchonization on p4d level. */ - BUILD_BUG_ON(pgd_none(*pgd_ref)); + MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref)); p4d_ref = p4d_offset(pgd_ref, addr); if (p4d_none(*p4d_ref)) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index af6f2f9c6a26..12ec90f62457 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -19,7 +19,7 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES]; -static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); +static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); static __init void *early_alloc(size_t size, int nid, bool panic) { diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 780460aa5ea5..d52aaa7dc088 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -257,8 +257,8 @@ void efi_sync_low_kernel_mappings(void) * only span a single PGD entry and that the entry also maps * other important kernel regions. */ - BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); - BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != + MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); + MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != (EFI_VA_END & PGDIR_MASK)); pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h index dfbd9d990637..9c2e0708eb82 100644 --- a/include/asm-generic/5level-fixup.h +++ b/include/asm-generic/5level-fixup.h @@ -8,6 +8,7 @@ #define P4D_SHIFT PGDIR_SHIFT #define P4D_SIZE PGDIR_SIZE #define P4D_MASK PGDIR_MASK +#define MAX_PTRS_PER_P4D 1 #define PTRS_PER_P4D 1 #define p4d_t pgd_t diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h index 8f22f55de17a..1a29b2a0282b 100644 --- a/include/asm-generic/pgtable-nop4d.h +++ b/include/asm-generic/pgtable-nop4d.h @@ -8,10 +8,11 @@ typedef struct { pgd_t pgd; } p4d_t; -#define P4D_SHIFT PGDIR_SHIFT -#define PTRS_PER_P4D 1 -#define P4D_SIZE (1UL << P4D_SHIFT) -#define P4D_MASK (~(P4D_SIZE-1)) +#define P4D_SHIFT PGDIR_SHIFT +#define MAX_PTRS_PER_P4D 1 +#define PTRS_PER_P4D 1 +#define P4D_SIZE (1UL << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE-1)) /* * The "pgd_xxx()" functions here are trivial for a folded two-level diff --git a/include/linux/kasan.h b/include/linux/kasan.h index adc13474a53b..d6459bd1376d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -18,7 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE]; extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; extern pud_t kasan_zero_pud[PTRS_PER_PUD]; -extern p4d_t kasan_zero_p4d[PTRS_PER_P4D]; +extern p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D]; void kasan_populate_zero_shadow(const void *shadow_start, const void *shadow_end); diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c index 554e4c0f23a2..f436246ccc79 100644 --- a/mm/kasan/kasan_init.c +++ b/mm/kasan/kasan_init.c @@ -31,7 +31,7 @@ unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; #if CONFIG_PGTABLE_LEVELS > 4 -p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss; +p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss; #endif #if CONFIG_PGTABLE_LEVELS > 3 pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; -- cgit v1.2.3 From 162434e7f58b21f0b6c9cc5fb02222cd7d9064cc Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:54 +0300 Subject: x86/mm: Make MAX_PHYSADDR_BITS and MAX_PHYSMEM_BITS dynamic For boot-time switching between paging modes, we need to be able to adjust size of physical address space at runtime. As part of making physical address space size variable, we have to make X86_5LEVEL dependent on SPARSEMEM_VMEMMAP. !SPARSEMEM_VMEMMAP configuration doesn't build with variable MAX_PHYSMEM_BITS. For !SPARSEMEM_VMEMMAP SECTIONS_WIDTH depends on MAX_PHYSMEM_BITS: SECTIONS_WIDTH SECTIONS_SHIFT MAX_PHYSMEM_BITS And SECTIONS_WIDTH is used on pre-processor stage, it doesn't work if it's dyncamic. See include/linux/page-flags-layout.h. Effect on kernel image size: text data bss dec hex filename 8628393 4734340 1368064 14730797 e0c62d vmlinux.before 8628892 4734340 1368064 14731296 e0c820 vmlinux.after Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-8-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/include/asm/pgtable_64_types.h | 2 +- arch/x86/include/asm/sparsemem.h | 9 ++------- arch/x86/kernel/setup.c | 5 ++--- 4 files changed, 6 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 92256489b8a4..fcc3f88996b3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1431,6 +1431,7 @@ config X86_PAE config X86_5LEVEL bool "Enable 5-level page tables support" select DYNAMIC_MEMORY_LAYOUT + select SPARSEMEM_VMEMMAP depends on X86_64 ---help--- 5-level paging enables access to larger address space: diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 0c48d80e11d4..59d971c85de5 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -95,7 +95,7 @@ extern unsigned int ptrs_per_p4d; * range must not overlap with anything except the KASAN shadow area, which * is correct as KASAN disables KASLR. */ -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define MAXMEM (1UL << MAX_PHYSMEM_BITS) #define LDT_PGD_ENTRY_L4 -3UL #define LDT_PGD_ENTRY_L5 -112UL diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 4fc1e9d3c43e..4617a2bf123c 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -27,13 +27,8 @@ # endif #else /* CONFIG_X86_32 */ # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ -# ifdef CONFIG_X86_5LEVEL -# define MAX_PHYSADDR_BITS 52 -# define MAX_PHYSMEM_BITS 52 -# else -# define MAX_PHYSADDR_BITS 44 -# define MAX_PHYSMEM_BITS 46 -# endif +# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44) +# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46) #endif #endif /* CONFIG_SPARSEMEM */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1ae67e982af7..399d0f7fa8f1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -189,9 +189,7 @@ struct ist_info ist_info; #endif #else -struct cpuinfo_x86 boot_cpu_data __read_mostly = { - .x86_phys_bits = MAX_PHYSMEM_BITS, -}; +struct cpuinfo_x86 boot_cpu_data __read_mostly; EXPORT_SYMBOL(boot_cpu_data); #endif @@ -851,6 +849,7 @@ void __init setup_arch(char **cmdline_p) __flush_tlb_all(); #else printk(KERN_INFO "Command line: %s\n", boot_command_line); + boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS; #endif /* -- cgit v1.2.3 From 4c2b4058ab32581931c2caf760b689fd4b019a87 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:34 +0300 Subject: x86/mm: Initialize 'pgtable_l5_enabled' at boot-time 'pgtable_l5_enabled' indicates which paging mode we are using. We need to initialize it at boot-time according to machine capability. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-2-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 8 +++++++- arch/x86/kernel/head64.c | 24 +++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index b18e8f9512de..d02a838c0ce4 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -47,7 +47,7 @@ #include #ifdef CONFIG_X86_5LEVEL -unsigned int pgtable_l5_enabled __ro_after_init = 1; +unsigned int pgtable_l5_enabled __ro_after_init; unsigned int pgdir_shift __ro_after_init = 48; unsigned int ptrs_per_p4d __ro_after_init = 512; #endif @@ -729,6 +729,12 @@ void choose_random_location(unsigned long input, return; } +#ifdef CONFIG_X86_5LEVEL + if (__read_cr4() & X86_CR4_LA57) { + pgtable_l5_enabled = 1; + } +#endif + boot_params->hdr.loadflags |= KASLR_FLAG; /* Prepare to add new identity pagetables on demand. */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 98b0ff49b220..ffb31c50d515 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -40,7 +40,7 @@ static unsigned int __initdata next_early_pgt; pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); #ifdef CONFIG_X86_5LEVEL -unsigned int pgtable_l5_enabled __ro_after_init = 1; +unsigned int pgtable_l5_enabled __ro_after_init; EXPORT_SYMBOL(pgtable_l5_enabled); unsigned int pgdir_shift __ro_after_init = 48; EXPORT_SYMBOL(pgdir_shift); @@ -64,6 +64,26 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr) return ptr - (void *)_text + (void *)physaddr; } +#ifdef CONFIG_X86_5LEVEL +static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr) +{ + return fixup_pointer(ptr, physaddr); +} + +static void __head check_la57_support(unsigned long physaddr) +{ + if (native_cpuid_eax(0) < 7) + return; + + if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) + return; + + *fixup_int(&pgtable_l5_enabled, physaddr) = 1; +} +#else +static void __head check_la57_support(unsigned long physaddr) {} +#endif + unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { @@ -76,6 +96,8 @@ unsigned long __head __startup_64(unsigned long physaddr, int i; unsigned int *next_pgt_ptr; + check_la57_support(physaddr); + /* Is the address too large? */ if (physaddr >> MAX_PHYSMEM_BITS) for (;;); -- cgit v1.2.3 From b16e770bfa5344f1cd4f7b4ecd7bbae25001e120 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:35 +0300 Subject: x86/mm: Initialize 'pgdir_shift' and 'ptrs_per_p4d' at boot-time Switching between paging modes requires the folding of the p4d page table level when we only have 4 paging levels, which means we need to adjust 'pgdir_shift' and 'ptrs_per_p4d' during early boot according to paging mode. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-3-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 6 ++++-- arch/x86/kernel/head64.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index d02a838c0ce4..66e42a098d70 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -48,8 +48,8 @@ #ifdef CONFIG_X86_5LEVEL unsigned int pgtable_l5_enabled __ro_after_init; -unsigned int pgdir_shift __ro_after_init = 48; -unsigned int ptrs_per_p4d __ro_after_init = 512; +unsigned int pgdir_shift __ro_after_init = 39; +unsigned int ptrs_per_p4d __ro_after_init = 1; #endif extern unsigned long get_cmd_line_ptr(void); @@ -732,6 +732,8 @@ void choose_random_location(unsigned long input, #ifdef CONFIG_X86_5LEVEL if (__read_cr4() & X86_CR4_LA57) { pgtable_l5_enabled = 1; + pgdir_shift = 48; + ptrs_per_p4d = 512; } #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index ffb31c50d515..8a0a485524da 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -42,9 +42,9 @@ pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); #ifdef CONFIG_X86_5LEVEL unsigned int pgtable_l5_enabled __ro_after_init; EXPORT_SYMBOL(pgtable_l5_enabled); -unsigned int pgdir_shift __ro_after_init = 48; +unsigned int pgdir_shift __ro_after_init = 39; EXPORT_SYMBOL(pgdir_shift); -unsigned int ptrs_per_p4d __ro_after_init = 512; +unsigned int ptrs_per_p4d __ro_after_init = 1; EXPORT_SYMBOL(ptrs_per_p4d); #endif @@ -79,6 +79,8 @@ static void __head check_la57_support(unsigned long physaddr) return; *fixup_int(&pgtable_l5_enabled, physaddr) = 1; + *fixup_int(&pgdir_shift, physaddr) = 48; + *fixup_int(&ptrs_per_p4d, physaddr) = 512; } #else static void __head check_la57_support(unsigned long physaddr) {} -- cgit v1.2.3 From 4fa5662b6b49611f11856db8be346710217473ef Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:36 +0300 Subject: x86/mm: Initialize 'page_offset_base' at boot-time For 4- and 5-level paging we have different 'page_offset_base'. Let's initialize it at boot-time accordingly to machine capability. We also have to split __PAGE_OFFSET_BASE into two constants -- for 4- and 5-level paging. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page_64_types.h | 9 +++------ arch/x86/kernel/head64.c | 13 +++++++++---- arch/x86/kernel/head_64.S | 2 +- arch/x86/mm/kaslr.c | 8 ++++---- 4 files changed, 17 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index fa7dc7cd8c19..2c5a966dc222 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -37,16 +37,13 @@ * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's * what Xen requires. */ -#ifdef CONFIG_X86_5LEVEL -#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL) -#else -#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL) -#endif +#define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL) +#define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL) #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT #define __PAGE_OFFSET page_offset_base #else -#define __PAGE_OFFSET __PAGE_OFFSET_BASE +#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4 #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 8a0a485524da..876d3bf2b23a 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -49,7 +49,7 @@ EXPORT_SYMBOL(ptrs_per_p4d); #endif #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT -unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE; +unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4; EXPORT_SYMBOL(page_offset_base); unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE; EXPORT_SYMBOL(vmalloc_base); @@ -64,6 +64,11 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr) return ptr - (void *)_text + (void *)physaddr; } +static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr) +{ + return fixup_pointer(ptr, physaddr); +} + #ifdef CONFIG_X86_5LEVEL static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr) { @@ -81,6 +86,7 @@ static void __head check_la57_support(unsigned long physaddr) *fixup_int(&pgtable_l5_enabled, physaddr) = 1; *fixup_int(&pgdir_shift, physaddr) = 48; *fixup_int(&ptrs_per_p4d, physaddr) = 512; + *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5; } #else static void __head check_la57_support(unsigned long physaddr) {} @@ -89,7 +95,7 @@ static void __head check_la57_support(unsigned long physaddr) {} unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { - unsigned long load_delta, *p; + unsigned long load_delta; unsigned long pgtable_flags; pgdval_t *pgd; p4dval_t *p4d; @@ -196,8 +202,7 @@ unsigned long __head __startup_64(unsigned long physaddr, * Fixup phys_base - remove the memory encryption mask to obtain * the true physical address. */ - p = fixup_pointer(&phys_base, physaddr); - *p += load_delta - sme_get_me_mask(); + *fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask(); /* Encrypt the kernel and related (if SME is active) */ sme_encrypt_kernel(bp); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 04a625f0fcda..d3f8b43d541a 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -41,7 +41,7 @@ #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) -PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) +PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE_L4) PGD_START_KERNEL = pgd_index(__START_KERNEL_map) #endif L3_START_KERNEL = pud_index(__START_KERNEL_map) diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index d079878c6cbc..7828a7ca3bba 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -34,13 +34,10 @@ #define TB_SHIFT 40 /* - * Virtual address start and end range for randomization. - * * The end address could depend on more configuration options to make the * highest amount of space for randomization available, but that's too hard * to keep straight and caused issues already. */ -static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; /* @@ -76,11 +73,14 @@ static inline bool kaslr_memory_enabled(void) void __init kernel_randomize_memory(void) { size_t i; - unsigned long vaddr = vaddr_start; + unsigned long vaddr_start, vaddr; unsigned long rand, memory_tb; struct rnd_state rand_state; unsigned long remain_entropy; + vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4; + vaddr = vaddr_start; + /* * These BUILD_BUG_ON checks ensure the memory layout is consistent * with the vaddr_start/vaddr_end variables. These checks are very -- cgit v1.2.3 From a7412546d8cb5ad578805060b4006f2a021b5868 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:37 +0300 Subject: x86/mm: Adjust vmalloc base and size at boot-time vmalloc area has different placement and size depending on paging mode. Let's adjust it during early boot accodring to machine capability. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-5-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64_types.h | 16 ++++++++++------ arch/x86/kernel/head64.c | 3 ++- arch/x86/mm/kaslr.c | 3 ++- 3 files changed, 14 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 59d971c85de5..686329994ade 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -102,25 +102,29 @@ extern unsigned int ptrs_per_p4d; #define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) #define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) +#define __VMALLOC_BASE_L4 0xffffc90000000000 +#define __VMALLOC_BASE_L5 0xffa0000000000000 + +#define VMALLOC_SIZE_TB_L4 32UL +#define VMALLOC_SIZE_TB_L5 12800UL + #ifdef CONFIG_X86_5LEVEL -# define VMALLOC_SIZE_TB _AC(12800, UL) -# define __VMALLOC_BASE _AC(0xffa0000000000000, UL) # define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) #else -# define VMALLOC_SIZE_TB _AC(32, UL) -# define __VMALLOC_BASE _AC(0xffffc90000000000, UL) # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) #endif #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT # define VMALLOC_START vmalloc_base +# define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4) # define VMEMMAP_START vmemmap_base #else -# define VMALLOC_START __VMALLOC_BASE +# define VMALLOC_START __VMALLOC_BASE_L4 +# define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4 # define VMEMMAP_START __VMEMMAP_BASE #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ -#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) +#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 876d3bf2b23a..22bf2015254c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -51,7 +51,7 @@ EXPORT_SYMBOL(ptrs_per_p4d); #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4; EXPORT_SYMBOL(page_offset_base); -unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE; +unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4; EXPORT_SYMBOL(vmalloc_base); unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE; EXPORT_SYMBOL(vmemmap_base); @@ -87,6 +87,7 @@ static void __head check_la57_support(unsigned long physaddr) *fixup_int(&pgdir_shift, physaddr) = 48; *fixup_int(&ptrs_per_p4d, physaddr) = 512; *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5; + *fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5; } #else static void __head check_la57_support(unsigned long physaddr) {} diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 7828a7ca3bba..641169d38184 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -50,7 +50,7 @@ static __initdata struct kaslr_memory_region { unsigned long size_tb; } kaslr_regions[] = { { &page_offset_base, 0 }, - { &vmalloc_base, VMALLOC_SIZE_TB }, + { &vmalloc_base, 0 }, { &vmemmap_base, 1 }, }; @@ -94,6 +94,7 @@ void __init kernel_randomize_memory(void) return; kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT); + kaslr_regions[1].size_tb = VMALLOC_SIZE_TB; /* * Update Physical memory mapping to available and -- cgit v1.2.3 From 9b46a051e43461a9afda2bdd50e0e0ae349341df Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:38 +0300 Subject: x86/mm: Initialize vmemmap_base at boot-time vmemmap area has different placement depending on paging mode. Let's adjust it during early boot accodring to machine capability. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-6-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64_types.h | 9 +++------ arch/x86/kernel/head64.c | 3 ++- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 686329994ade..68909a68e5b9 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -108,11 +108,8 @@ extern unsigned int ptrs_per_p4d; #define VMALLOC_SIZE_TB_L4 32UL #define VMALLOC_SIZE_TB_L5 12800UL -#ifdef CONFIG_X86_5LEVEL -# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) -#else -# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) -#endif +#define __VMEMMAP_BASE_L4 0xffffea0000000000 +#define __VMEMMAP_BASE_L5 0xffd4000000000000 #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT # define VMALLOC_START vmalloc_base @@ -121,7 +118,7 @@ extern unsigned int ptrs_per_p4d; #else # define VMALLOC_START __VMALLOC_BASE_L4 # define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4 -# define VMEMMAP_START __VMEMMAP_BASE +# define VMEMMAP_START __VMEMMAP_BASE_L4 #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 22bf2015254c..795e762f3c66 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -53,7 +53,7 @@ unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4; EXPORT_SYMBOL(page_offset_base); unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4; EXPORT_SYMBOL(vmalloc_base); -unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE; +unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4; EXPORT_SYMBOL(vmemmap_base); #endif @@ -88,6 +88,7 @@ static void __head check_la57_support(unsigned long physaddr) *fixup_int(&ptrs_per_p4d, physaddr) = 512; *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5; *fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5; + *fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5; } #else static void __head check_la57_support(unsigned long physaddr) {} -- cgit v1.2.3 From 6f9dd329717f696f578347c0781a0247db957596 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:39 +0300 Subject: x86/mm: Support boot-time switching of paging modes in the early boot code Early boot code should be able to initialize page tables for both 4- and 5-level paging modes. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-7-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 33 ++++++++++++++++++++++----------- arch/x86/kernel/head_64.S | 10 ++++------ 2 files changed, 26 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 795e762f3c66..8161e719a20f 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -75,13 +75,13 @@ static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr) return fixup_pointer(ptr, physaddr); } -static void __head check_la57_support(unsigned long physaddr) +static bool __head check_la57_support(unsigned long physaddr) { if (native_cpuid_eax(0) < 7) - return; + return false; if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) - return; + return false; *fixup_int(&pgtable_l5_enabled, physaddr) = 1; *fixup_int(&pgdir_shift, physaddr) = 48; @@ -89,24 +89,30 @@ static void __head check_la57_support(unsigned long physaddr) *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5; *fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5; *fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5; + + return true; } #else -static void __head check_la57_support(unsigned long physaddr) {} +static bool __head check_la57_support(unsigned long physaddr) +{ + return false; +} #endif unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { - unsigned long load_delta; + unsigned long load_delta, *p; unsigned long pgtable_flags; pgdval_t *pgd; p4dval_t *p4d; pudval_t *pud; pmdval_t *pmd, pmd_entry; + bool la57; int i; unsigned int *next_pgt_ptr; - check_la57_support(physaddr); + la57 = check_la57_support(physaddr); /* Is the address too large? */ if (physaddr >> MAX_PHYSMEM_BITS) @@ -131,9 +137,14 @@ unsigned long __head __startup_64(unsigned long physaddr, /* Fixup the physical addresses in the page table */ pgd = fixup_pointer(&early_top_pgt, physaddr); - pgd[pgd_index(__START_KERNEL_map)] += load_delta; - - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + p = pgd + pgd_index(__START_KERNEL_map); + if (la57) + *p = (unsigned long)level4_kernel_pgt; + else + *p = (unsigned long)level3_kernel_pgt; + *p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta; + + if (la57) { p4d = fixup_pointer(&level4_kernel_pgt, physaddr); p4d[511] += load_delta; } @@ -158,7 +169,7 @@ unsigned long __head __startup_64(unsigned long physaddr, pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + if (la57) { p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; @@ -255,7 +266,7 @@ again: * critical -- __PAGE_OFFSET would point us back into the dynamic * range and we might end up looping forever... */ - if (!IS_ENABLED(CONFIG_X86_5LEVEL)) + if (!pgtable_l5_enabled) p4d_p = pgd_p; else if (pgd) p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d3f8b43d541a..145d7b95ae29 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -124,7 +124,10 @@ ENTRY(secondary_startup_64) /* Enable PAE mode, PGE and LA57 */ movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx #ifdef CONFIG_X86_5LEVEL + testl $1, pgtable_l5_enabled(%rip) + jz 1f orl $X86_CR4_LA57, %ecx +1: #endif movq %rcx, %cr4 @@ -372,12 +375,7 @@ GLOBAL(name) __INITDATA NEXT_PGD_PAGE(early_top_pgt) - .fill 511,8,0 -#ifdef CONFIG_X86_5LEVEL - .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC -#else - .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC -#endif + .fill 512,8,0 .fill PTI_USER_PGD_FILL,8,0 NEXT_PAGE(early_dynamic_pgts) -- cgit v1.2.3 From b9952ec78778aa7ae5b8df672668aece6fc93d2a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 16 Feb 2018 14:49:46 +0300 Subject: x86/xen: Allow XEN_PV and XEN_PVH to be enabled with X86_5LEVEL With boot-time switching between paging modes, XEN_PV and XEN_PVH can be boot into 4-level paging mode. Tested-by: Juergen Gross Signed-off-by: Kirill A. Shutemov Reviewed-by: Juergen Gross Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180216114948.68868-2-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 12 ++++++------ arch/x86/xen/Kconfig | 5 ----- arch/x86/xen/mmu_pv.c | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 145d7b95ae29..3e9de0fc97de 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -38,12 +38,12 @@ * */ +#define l4_index(x) (((x) >> 39) & 511) #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) -#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) -PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE_L4) -PGD_START_KERNEL = pgd_index(__START_KERNEL_map) -#endif +L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) +L4_START_KERNEL = l4_index(__START_KERNEL_map) + L3_START_KERNEL = pud_index(__START_KERNEL_map) .text @@ -386,9 +386,9 @@ NEXT_PAGE(early_dynamic_pgts) #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) NEXT_PGD_PAGE(init_top_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC - .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 + .org init_top_pgt + L4_PAGE_OFFSET*8, 0 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC - .org init_top_pgt + PGD_START_KERNEL*8, 0 + .org init_top_pgt + L4_START_KERNEL*8, 0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC .fill PTI_USER_PGD_FILL,8,0 diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index f605825a04ab..c1f98f32c45f 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -18,9 +18,6 @@ config XEN_PV bool "Xen PV guest support" default y depends on XEN - # XEN_PV is not ready to work with 5-level paging. - # Changes to hypervisor are also required. - depends on !X86_5LEVEL select XEN_HAVE_PVMMU select XEN_HAVE_VPMU help @@ -79,6 +76,4 @@ config XEN_DEBUG_FS config XEN_PVH bool "Support for running as a PVH guest" depends on XEN && XEN_PVHVM && ACPI - # Pre-built page tables are not ready to handle 5-level paging. - depends on !X86_5LEVEL def_bool n diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index d85076223a69..3f4fec59af09 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -538,6 +538,22 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) xen_mc_issue(PARAVIRT_LAZY_MMU); } + +#if CONFIG_PGTABLE_LEVELS >= 5 +__visible p4dval_t xen_p4d_val(p4d_t p4d) +{ + return pte_mfn_to_pfn(p4d.p4d); +} +PV_CALLEE_SAVE_REGS_THUNK(xen_p4d_val); + +__visible p4d_t xen_make_p4d(p4dval_t p4d) +{ + p4d = pte_pfn_to_mfn(p4d); + + return native_make_p4d(p4d); +} +PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d); +#endif /* CONFIG_PGTABLE_LEVELS >= 5 */ #endif /* CONFIG_X86_64 */ static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, @@ -2411,6 +2427,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .alloc_pud = xen_alloc_pmd_init, .release_pud = xen_release_pmd_init, + +#if CONFIG_PGTABLE_LEVELS >= 5 + .p4d_val = PV_CALLEE_SAVE(xen_p4d_val), + .make_p4d = PV_CALLEE_SAVE(xen_make_p4d), +#endif #endif /* CONFIG_X86_64 */ .activate_mm = xen_activate_mm, -- cgit v1.2.3 From 39b9552281abfcdfc54162897018890dafe7ffef Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 16 Feb 2018 14:49:48 +0300 Subject: x86/mm: Optimize boot-time paging mode switching cost By this point we have functioning boot-time switching between 4- and 5-level paging mode. But naive approach comes with cost. Numbers below are for kernel build, allmodconfig, 5 times. CONFIG_X86_5LEVEL=n: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17308719.892691 task-clock:u (msec) # 26.772 CPUs utilized ( +- 0.11% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,993,164 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,614,978,867,455 cycles:u # 2.520 GHz ( +- 0.01% ) 39,371,534,575,126 stalled-cycles-frontend:u # 90.27% frontend cycles idle ( +- 0.09% ) 28,363,350,152,428 instructions:u # 0.65 insn per cycle # 1.39 stalled cycles per insn ( +- 0.00% ) 6,316,784,066,413 branches:u # 364.948 M/sec ( +- 0.00% ) 250,808,144,781 branch-misses:u # 3.97% of all branches ( +- 0.01% ) 646.531974142 seconds time elapsed ( +- 1.15% ) CONFIG_X86_5LEVEL=y: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17411536.780625 task-clock:u (msec) # 26.426 CPUs utilized ( +- 0.10% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,868,663 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,865,909,056,301 cycles:u # 2.519 GHz ( +- 0.01% ) 39,740,130,365,581 stalled-cycles-frontend:u # 90.59% frontend cycles idle ( +- 0.05% ) 28,363,358,997,959 instructions:u # 0.65 insn per cycle # 1.40 stalled cycles per insn ( +- 0.00% ) 6,316,784,937,460 branches:u # 362.793 M/sec ( +- 0.00% ) 251,531,919,485 branch-misses:u # 3.98% of all branches ( +- 0.00% ) 658.886307752 seconds time elapsed ( +- 0.92% ) The patch tries to fix the performance regression by using cpu_feature_enabled(X86_FEATURE_LA57) instead of pgtable_l5_enabled in all hot code paths. These will statically patch the target code for additional performance. CONFIG_X86_5LEVEL=y + the patch: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17381990.268506 task-clock:u (msec) # 26.907 CPUs utilized ( +- 0.19% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,862,625 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,697,726,320,051 cycles:u # 2.514 GHz ( +- 0.03% ) 39,480,408,690,401 stalled-cycles-frontend:u # 90.35% frontend cycles idle ( +- 0.05% ) 28,363,394,221,388 instructions:u # 0.65 insn per cycle # 1.39 stalled cycles per insn ( +- 0.00% ) 6,316,794,985,573 branches:u # 363.410 M/sec ( +- 0.00% ) 251,013,232,547 branch-misses:u # 3.97% of all branches ( +- 0.01% ) 645.991174661 seconds time elapsed ( +- 1.19% ) Unfortunately, this approach doesn't help with text size: vmlinux.before .text size: 8190319 vmlinux.after .text size: 8200623 The .text section is increased by about 4k. Not sure if we can do anything about this. Signed-off-by: Kirill A. Shuemov Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180216114948.68868-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/misc.h | 5 +++++ arch/x86/entry/entry_64.S | 11 ++--------- arch/x86/include/asm/pgtable_64_types.h | 5 ++++- arch/x86/kernel/head64.c | 9 +++++++-- arch/x86/kernel/head_64.S | 2 +- arch/x86/mm/kasan_init_64.c | 6 ++++++ 6 files changed, 25 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 9d323dc6b159..4d369c308ed7 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -12,6 +12,11 @@ #undef CONFIG_PARAVIRT_SPINLOCKS #undef CONFIG_KASAN +#ifdef CONFIG_X86_5LEVEL +/* cpu_feature_enabled() cannot be used that early */ +#define pgtable_l5_enabled __pgtable_l5_enabled +#endif + #include #include #include diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2c06348b7807..b18acdff9c3f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -275,15 +275,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) * depending on paging mode) in the address. */ #ifdef CONFIG_X86_5LEVEL - testl $1, pgtable_l5_enabled(%rip) - jz 1f - shl $(64 - 57), %rcx - sar $(64 - 57), %rcx - jmp 2f -1: - shl $(64 - 48), %rcx - sar $(64 - 48), %rcx -2: + ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \ + "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57 #else shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 68909a68e5b9..d5c21a382475 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -21,7 +21,10 @@ typedef unsigned long pgprotval_t; typedef struct { pteval_t pte; } pte_t; #ifdef CONFIG_X86_5LEVEL -extern unsigned int pgtable_l5_enabled; +extern unsigned int __pgtable_l5_enabled; +#ifndef pgtable_l5_enabled +#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57) +#endif #else #define pgtable_l5_enabled 0 #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 8161e719a20f..0c855deee165 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -32,6 +32,11 @@ #include #include +#ifdef CONFIG_X86_5LEVEL +#undef pgtable_l5_enabled +#define pgtable_l5_enabled __pgtable_l5_enabled +#endif + /* * Manage page tables very early on. */ @@ -40,8 +45,8 @@ static unsigned int __initdata next_early_pgt; pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); #ifdef CONFIG_X86_5LEVEL -unsigned int pgtable_l5_enabled __ro_after_init; -EXPORT_SYMBOL(pgtable_l5_enabled); +unsigned int __pgtable_l5_enabled __ro_after_init; +EXPORT_SYMBOL(__pgtable_l5_enabled); unsigned int pgdir_shift __ro_after_init = 39; EXPORT_SYMBOL(pgdir_shift); unsigned int ptrs_per_p4d __ro_after_init = 1; diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 3e9de0fc97de..326c63129417 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -124,7 +124,7 @@ ENTRY(secondary_startup_64) /* Enable PAE mode, PGE and LA57 */ movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx #ifdef CONFIG_X86_5LEVEL - testl $1, pgtable_l5_enabled(%rip) + testl $1, __pgtable_l5_enabled(%rip) jz 1f orl $X86_CR4_LA57, %ecx 1: diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 0df0dd13a71d..d8ff013ea9d0 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,6 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #define DISABLE_BRANCH_PROFILING #define pr_fmt(fmt) "kasan: " fmt + +#ifdef CONFIG_X86_5LEVEL +/* Too early to use cpu_feature_enabled() */ +#define pgtable_l5_enabled __pgtable_l5_enabled +#endif + #include #include #include -- cgit v1.2.3 From 038bac2b02989acf1fc938cedcb7944c02672b9f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 19 Feb 2018 11:09:05 +0100 Subject: x86/acpi: Add a new x86_init_acpi structure to x86_init_ops Add a new struct x86_init_acpi to x86_init_ops. For now it contains only one init function to get the RSDP table address. Signed-off-by: Juergen Gross Reviewed-by: Andy Shevchenko Acked-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Cc: Borislav Petkov Cc: Eric Biederman Cc: H. Peter Anvin Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: boris.ostrovsky@oracle.com Cc: lenb@kernel.org Cc: linux-acpi@vger.kernel.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20180219100906.14265-3-jgross@suse.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/acpi.h | 7 +++++++ arch/x86/include/asm/x86_init.h | 9 +++++++++ arch/x86/kernel/x86_init.c | 5 +++++ 3 files changed, 21 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 11881726ed37..6609dd7289b5 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_ACPI_APEI # include @@ -133,6 +134,12 @@ static inline bool acpi_has_cpu_in_madt(void) return !!acpi_lapic; } +#define ACPI_HAVE_ARCH_GET_ROOT_POINTER +static inline u64 acpi_arch_get_root_pointer(void) +{ + return x86_init.acpi.get_root_pointer(); +} + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index fc2f082ac635..2e2c34d2bb00 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -130,6 +130,14 @@ struct x86_hyper_init { void (*init_mem_mapping)(void); }; +/** + * struct x86_init_acpi - x86 ACPI init functions + * @get_root_pointer: get RSDP address + */ +struct x86_init_acpi { + u64 (*get_root_pointer)(void); +}; + /** * struct x86_init_ops - functions for platform specific setup * @@ -144,6 +152,7 @@ struct x86_init_ops { struct x86_init_iommu iommu; struct x86_init_pci pci; struct x86_hyper_init hyper; + struct x86_init_acpi acpi; }; /** diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 1151ccd72ce9..9e4e994a4836 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -30,6 +30,7 @@ int __init iommu_init_noop(void) { return 0; } void iommu_shutdown_noop(void) { } bool __init bool_x86_init_noop(void) { return false; } void x86_op_int_noop(int cpu) { } +u64 u64_x86_init_noop(void) { return 0; } /* * The platform setup functions are preset with the default functions @@ -91,6 +92,10 @@ struct x86_init_ops x86_init __initdata = { .x2apic_available = bool_x86_init_noop, .init_mem_mapping = x86_init_noop, }, + + .acpi = { + .get_root_pointer = u64_x86_init_noop, + }, }; struct x86_cpuinit_ops x86_cpuinit = { -- cgit v1.2.3 From c46dacb75cd59a50a2380dcba5e7edf4fde86845 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 21 Feb 2018 10:42:32 +0100 Subject: x86/boot: Make the x86_init noop functions static Make the noop functions in x86_init.c static in case they are used locally only. Reported-by: kbuild test robot Signed-off-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180221094232.23462-1-jgross@suse.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/x86_init.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 9e4e994a4836..b8cff22a8785 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -26,11 +26,11 @@ void x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } -int __init iommu_init_noop(void) { return 0; } -void iommu_shutdown_noop(void) { } -bool __init bool_x86_init_noop(void) { return false; } -void x86_op_int_noop(int cpu) { } -u64 u64_x86_init_noop(void) { return 0; } +static int __init iommu_init_noop(void) { return 0; } +static void iommu_shutdown_noop(void) { } +static bool __init bool_x86_init_noop(void) { return false; } +static void x86_op_int_noop(int cpu) { } +static u64 u64_x86_init_noop(void) { return 0; } /* * The platform setup functions are preset with the default functions -- cgit v1.2.3 From ef61f8a340fd6d49df6b367785743febc47320c1 Mon Sep 17 00:00:00 2001 From: "Jan H. Schönherr" Date: Sat, 3 Feb 2018 00:10:20 +0100 Subject: x86/boot/e820: Implement a range manipulation operator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a more versatile memmap= operator, which -- in addition to all the things that were possible before -- allows you to: - redeclare existing ranges -- before, you were limited to adding ranges; - drop any range -- like a mem= for any location; - use any e820 memory type -- not just some predefined ones. The syntax is: memmap=%-+ Size and offset work as usual. The "-" and "+" are optional and their existence determine the behavior: The command works on the specified range of memory limited to type (if specified). This memory is then configured to show up as . If is not specified, the memory is removed from the e820 map. Signed-off-by: Jan H. Schönherr Cc: Andy Lutomirski Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180202231020.15608-1-jschoenh@amazon.de Signed-off-by: Ingo Molnar --- Documentation/admin-guide/kernel-parameters.txt | 9 +++++++++ arch/x86/kernel/e820.c | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'arch/x86/kernel') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1d1d53f85ddd..5529fa82700b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2237,6 +2237,15 @@ The memory region may be marked as e820 type 12 (0xc) and is NVDIMM or ADR memory. + memmap=%-+ + [KNL,ACPI] Convert memory within the specified region + from to . If "-" is left + out, the whole region will be marked as , + even if previously unavailable. If "+" is left + out, matching memory will be removed. Types are + specified as e820 types, e.g., 1 = RAM, 2 = reserved, + 3 = ACPI, 12 = PRAM. + memory_corruption_check=0/1 [X86] Some BIOSes seem to corrupt the first 64k of memory when doing things like suspend/resume. diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 71c11ad5643e..6a2cb1442e05 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -924,6 +924,24 @@ static int __init parse_memmap_one(char *p) } else if (*p == '!') { start_at = memparse(p+1, &p); e820__range_add(start_at, mem_size, E820_TYPE_PRAM); + } else if (*p == '%') { + enum e820_type from = 0, to = 0; + + start_at = memparse(p + 1, &p); + if (*p == '-') + from = simple_strtoull(p + 1, &p, 0); + if (*p == '+') + to = simple_strtoull(p + 1, &p, 0); + if (*p != '\0') + return -EINVAL; + if (from && to) + e820__range_update(start_at, mem_size, from, to); + else if (to) + e820__range_add(start_at, mem_size, to); + else if (from) + e820__range_remove(start_at, mem_size, from, 1); + else + e820__range_remove(start_at, mem_size, 0, 0); } else { e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1); } -- cgit v1.2.3 From c100a583601d357f923c41af5434dc1f8d07890f Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 2 Mar 2018 13:18:01 +0800 Subject: kdump, vmcoreinfo: Export pgtable_l5_enabled value User-space utilities examining crash-kernels need to know if the crashed kernel was in 5-level paging mode or not. So write 'pgtable_l5_enabled' to vmcoreinfo, which covers these three cases: pgtable_l5_enabled == 0 when: - Compiled with !CONFIG_X86_5LEVEL - Compiled with CONFIG_X86_5LEVEL=y while CPU has no 'la57' flag pgtable_l5_enabled != 0 when: - Compiled with CONFIG_X86_5LEVEL=y and CPU has 'la57' flag Signed-off-by: Baoquan He Acked-by: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: douly.fnst@cn.fujitsu.com Cc: dyoung@redhat.com Cc: ebiederm@xmission.com Cc: kirill.shutemov@linux.intel.com Cc: vgoyal@redhat.com Link: http://lkml.kernel.org/r/20180302051801.19594-1-bhe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/machine_kexec_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 3b7427aa7d85..02f913cb27b5 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -350,6 +350,7 @@ void arch_crash_save_vmcoreinfo(void) { VMCOREINFO_NUMBER(phys_base); VMCOREINFO_SYMBOL(init_top_pgt); + VMCOREINFO_NUMBER(pgtable_l5_enabled); #ifdef CONFIG_NUMA VMCOREINFO_SYMBOL(node_data); -- cgit v1.2.3 From cb06d8e3d020c30fe10ae711c925a5319ab82c88 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 5 Mar 2018 19:25:50 +0300 Subject: x86/tme: Detect if TME and MKTME is activated by BIOS IA32_TME_ACTIVATE MSR (0x982) can be used to check if BIOS has enabled TME and MKTME. It includes which encryption policy/algorithm is selected for TME or available for MKTME. For MKTME, the MSR also enumerates how many KeyIDs are available. We would need to exclude KeyID bits from physical address bits. detect_tme() would adjust cpuinfo_x86::x86_phys_bits accordingly. We have to do this even if we are not going to use KeyID bits ourself. VM guests still have to know that these bits are not usable for physical address. Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Kai Huang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180305162610.37510-3-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 90 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4aa9fd379390..b862067bb33c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -510,6 +510,93 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c) } } +#define MSR_IA32_TME_ACTIVATE 0x982 + +/* Helpers to access TME_ACTIVATE MSR */ +#define TME_ACTIVATE_LOCKED(x) (x & 0x1) +#define TME_ACTIVATE_ENABLED(x) (x & 0x2) + +#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ +#define TME_ACTIVATE_POLICY_AES_XTS_128 0 + +#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ + +#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ +#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 + +/* Values for mktme_status (SW only construct) */ +#define MKTME_ENABLED 0 +#define MKTME_DISABLED 1 +#define MKTME_UNINITIALIZED 2 +static int mktme_status = MKTME_UNINITIALIZED; + +static void detect_tme(struct cpuinfo_x86 *c) +{ + u64 tme_activate, tme_policy, tme_crypto_algs; + int keyid_bits = 0, nr_keyids = 0; + static u64 tme_activate_cpu0 = 0; + + rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); + + if (mktme_status != MKTME_UNINITIALIZED) { + if (tme_activate != tme_activate_cpu0) { + /* Broken BIOS? */ + pr_err_once("x86/tme: configuation is inconsistent between CPUs\n"); + pr_err_once("x86/tme: MKTME is not usable\n"); + mktme_status = MKTME_DISABLED; + + /* Proceed. We may need to exclude bits from x86_phys_bits. */ + } + } else { + tme_activate_cpu0 = tme_activate; + } + + if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { + pr_info_once("x86/tme: not enabled by BIOS\n"); + mktme_status = MKTME_DISABLED; + return; + } + + if (mktme_status != MKTME_UNINITIALIZED) + goto detect_keyid_bits; + + pr_info("x86/tme: enabled by BIOS\n"); + + tme_policy = TME_ACTIVATE_POLICY(tme_activate); + if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) + pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); + + tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); + if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { + pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", + tme_crypto_algs); + mktme_status = MKTME_DISABLED; + } +detect_keyid_bits: + keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); + nr_keyids = (1UL << keyid_bits) - 1; + if (nr_keyids) { + pr_info_once("x86/mktme: enabled by BIOS\n"); + pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); + } else { + pr_info_once("x86/mktme: disabled by BIOS\n"); + } + + if (mktme_status == MKTME_UNINITIALIZED) { + /* MKTME is usable */ + mktme_status = MKTME_ENABLED; + } + + /* + * Exclude KeyID bits from physical address bits. + * + * We have to do this even if we are not going to use KeyID bits + * ourself. VM guests still have to know that these bits are not usable + * for physical address. + */ + c->x86_phys_bits -= keyid_bits; +} + static void init_intel_energy_perf(struct cpuinfo_x86 *c) { u64 epb; @@ -680,6 +767,9 @@ static void init_intel(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_VMX)) detect_vmx_virtcap(c); + if (cpu_has(c, X86_FEATURE_TME)) + detect_tme(c); + init_intel_energy_perf(c); init_intel_misc_features(c); -- cgit v1.2.3 From be7825c19b4866ddc7b1431740b69ede2eeb93c1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 5 Mar 2018 19:25:52 +0300 Subject: x86/pconfig: Detect PCONFIG targets Intel PCONFIG targets are enumerated via new CPUID leaf 0x1b. This patch detects all supported targets of PCONFIG and implements helper to check if the target is supported. Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Kai Huang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180305162610.37510-5-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel_pconfig.h | 15 +++++++ arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/intel_pconfig.c | 82 ++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/intel_pconfig.h create mode 100644 arch/x86/kernel/cpu/intel_pconfig.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h new file mode 100644 index 000000000000..fb7a37c3798b --- /dev/null +++ b/arch/x86/include/asm/intel_pconfig.h @@ -0,0 +1,15 @@ +#ifndef _ASM_X86_INTEL_PCONFIG_H +#define _ASM_X86_INTEL_PCONFIG_H + +#include +#include + +enum pconfig_target { + INVALID_TARGET = 0, + MKTME_TARGET = 1, + PCONFIG_TARGET_NR +}; + +int pconfig_target_supported(enum pconfig_target target); + +#endif /* _ASM_X86_INTEL_PCONFIG_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 570e8bb1f386..a66229f51b12 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -28,7 +28,7 @@ obj-y += cpuid-deps.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o diff --git a/arch/x86/kernel/cpu/intel_pconfig.c b/arch/x86/kernel/cpu/intel_pconfig.c new file mode 100644 index 000000000000..0771a905b286 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_pconfig.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel PCONFIG instruction support. + * + * Copyright (C) 2017 Intel Corporation + * + * Author: + * Kirill A. Shutemov + */ + +#include +#include + +#define PCONFIG_CPUID 0x1b + +#define PCONFIG_CPUID_SUBLEAF_MASK ((1 << 12) - 1) + +/* Subleaf type (EAX) for PCONFIG CPUID leaf (0x1B) */ +enum { + PCONFIG_CPUID_SUBLEAF_INVALID = 0, + PCONFIG_CPUID_SUBLEAF_TARGETID = 1, +}; + +/* Bitmask of supported targets */ +static u64 targets_supported __read_mostly; + +int pconfig_target_supported(enum pconfig_target target) +{ + /* + * We would need to re-think the implementation once we get > 64 + * PCONFIG targets. Spec allows up to 2^32 targets. + */ + BUILD_BUG_ON(PCONFIG_TARGET_NR >= 64); + + if (WARN_ON_ONCE(target >= 64)) + return 0; + return targets_supported & (1ULL << target); +} + +static int __init intel_pconfig_init(void) +{ + int subleaf; + + if (!boot_cpu_has(X86_FEATURE_PCONFIG)) + return 0; + + /* + * Scan subleafs of PCONFIG CPUID leaf. + * + * Subleafs of the same type need not to be consecutive. + * + * Stop on the first invalid subleaf type. All subleafs after the first + * invalid are invalid too. + */ + for (subleaf = 0; subleaf < INT_MAX; subleaf++) { + struct cpuid_regs regs; + + cpuid_count(PCONFIG_CPUID, subleaf, + ®s.eax, ®s.ebx, ®s.ecx, ®s.edx); + + switch (regs.eax & PCONFIG_CPUID_SUBLEAF_MASK) { + case PCONFIG_CPUID_SUBLEAF_INVALID: + /* Stop on the first invalid subleaf */ + goto out; + case PCONFIG_CPUID_SUBLEAF_TARGETID: + /* Mark supported PCONFIG targets */ + if (regs.ebx < 64) + targets_supported |= (1ULL << regs.ebx); + if (regs.ecx < 64) + targets_supported |= (1ULL << regs.ecx); + if (regs.edx < 64) + targets_supported |= (1ULL << regs.edx); + break; + default: + /* Unknown CPUID.PCONFIG subleaf: ignore */ + break; + } + } +out: + return 0; +} +arch_initcall(intel_pconfig_init); -- cgit v1.2.3 From 50beba07a0e42ebd4454adc97515a2a2a969645b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 20 Feb 2018 20:05:04 +0200 Subject: ACPI, x86/boot: Split out acpi_generic_reduce_hw_init() and export This is a preparation patch to allow override the hardware reduced initialization on ACPI enabled platforms. No functional change intended. Signed-off-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki Cc: Eric Biederman Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: linux-acpi@vger.kernel.org Link: http://lkml.kernel.org/r/20180220180506.65523-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/acpi.h | 4 ++++ arch/x86/kernel/acpi/boot.c | 22 +++++++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 6609dd7289b5..a303d7b7d763 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -140,6 +140,8 @@ static inline u64 acpi_arch_get_root_pointer(void) return x86_init.acpi.get_root_pointer(); } +void acpi_generic_reduced_hw_init(void); + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 @@ -149,6 +151,8 @@ static inline void acpi_noirq_set(void) { } static inline void acpi_disable_pci(void) { } static inline void disable_acpi(void) { } +static inline void acpi_generic_reduced_hw_init(void) { } + #endif /* !CONFIG_ACPI */ #define ARCH_HAS_POWER_INIT 1 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2aa92094b59d..baa084ecffdb 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1376,17 +1376,21 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) * * We initialize the Hardware-reduced ACPI model here: */ +void __init acpi_generic_reduced_hw_init(void) +{ + /* + * Override x86_init functions and bypass legacy PIC in + * hardware reduced ACPI mode. + */ + x86_init.timers.timer_init = x86_init_noop; + x86_init.irqs.pre_vector_init = x86_init_noop; + legacy_pic = &null_legacy_pic; +} + static void __init acpi_reduced_hw_init(void) { - if (acpi_gbl_reduced_hardware) { - /* - * Override x86_init functions and bypass legacy pic - * in Hardware-reduced ACPI mode - */ - x86_init.timers.timer_init = x86_init_noop; - x86_init.irqs.pre_vector_init = x86_init_noop; - legacy_pic = &null_legacy_pic; - } + if (acpi_gbl_reduced_hardware) + acpi_generic_reduced_hw_init(); } /* -- cgit v1.2.3 From 81b53e5ff21e09b42525cfa08f2b0af2b8c5f465 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 20 Feb 2018 20:05:05 +0200 Subject: ACPI, x86/boot: Introduce the ->reduced_hw_early_init() ACPI callback Some ACPI hardware reduced platforms need to initialize certain devices defined by the ACPI hardware specification even though in principle those devices should not be present in an ACPI hardware reduced platform. To allow that to happen, make it possible to override the generic x86_init callbacks and provide a custom legacy_pic value, add a new ->reduced_hw_early_init() callback to struct x86_init_acpi and make acpi_reduced_hw_init() use it. Signed-off-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki Cc: Eric Biederman Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: linux-acpi@vger.kernel.org Link: http://lkml.kernel.org/r/20180220180506.65523-2-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/x86_init.h | 2 ++ arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/x86_init.c | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2e2c34d2bb00..5bd45a8f5ae3 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -133,9 +133,11 @@ struct x86_hyper_init { /** * struct x86_init_acpi - x86 ACPI init functions * @get_root_pointer: get RSDP address + * @reduced_hw_early_init: hardware reduced platform early init */ struct x86_init_acpi { u64 (*get_root_pointer)(void); + void (*reduced_hw_early_init)(void); }; /** diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index baa084ecffdb..7a37d9357bc4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1390,7 +1390,7 @@ void __init acpi_generic_reduced_hw_init(void) static void __init acpi_reduced_hw_init(void) { if (acpi_gbl_reduced_hardware) - acpi_generic_reduced_hw_init(); + x86_init.acpi.reduced_hw_early_init(); } /* diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index b8cff22a8785..ac67ccffeef0 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -95,6 +96,7 @@ struct x86_init_ops x86_init __initdata = { .acpi = { .get_root_pointer = u64_x86_init_noop, + .reduced_hw_early_init = acpi_generic_reduced_hw_init, }, }; -- cgit v1.2.3 From 547edaca247abf910e32f0cd883ba83b8fc6d0ed Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 15 Mar 2018 16:49:06 +0300 Subject: x86/mm: Update comment in detect_tme() regarding x86_phys_bits As Kai pointed out, the primary reason for adjusting x86_phys_bits is to reflect that the the address space is reduced and not the ability to communicate the available physical address space to virtual machines. Suggested-by: Kai Huang Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Cc: Tom Lendacky Cc: Dave Hansen Cc: linux-mm@kvack.org Link: https://lkml.kernel.org/r/20180315134907.9311-2-kirill.shutemov@linux.intel.com --- arch/x86/kernel/cpu/intel.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3f8d7a3b6447..6106d11ceb6b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -587,11 +587,8 @@ detect_keyid_bits: } /* - * Exclude KeyID bits from physical address bits. - * - * We have to do this even if we are not going to use KeyID bits - * ourself. VM guests still have to know that these bits are not usable - * for physical address. + * KeyID bits effectively lower the number of physical address + * bits. Update cpuinfo_x86::x86_phys_bits accordingly. */ c->x86_phys_bits -= keyid_bits; } -- cgit v1.2.3 From eaeb8e76cd5751e805f6e4a3fcec91d283e3b0c2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 13 Mar 2018 15:47:09 +0000 Subject: x86/cpu/tme: Fix spelling: "configuation" -> "configuration" Trivial fix to spelling mistake in the pr_err_once() error message text. Signed-off-by: Colin Ian King Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-janitors@vger.kernel.org Link: http://lkml.kernel.org/r/20180313154709.1015-1-colin.king@canonical.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 6106d11ceb6b..b9693b80fc21 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -540,7 +540,7 @@ static void detect_tme(struct cpuinfo_x86 *c) if (mktme_status != MKTME_UNINITIALIZED) { if (tme_activate != tme_activate_cpu0) { /* Broken BIOS? */ - pr_err_once("x86/tme: configuation is inconsistent between CPUs\n"); + pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); pr_err_once("x86/tme: MKTME is not usable\n"); mktme_status = MKTME_DISABLED; -- cgit v1.2.3