summaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/desc.h147
-rw-r--r--arch/x86/include/asm/disabled-features.h8
-rw-r--r--arch/x86/include/asm/elf.h28
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/kasan.h9
-rw-r--r--arch/x86/include/asm/kexec.h1
-rw-r--r--arch/x86/include/asm/mmu_context.h4
-rw-r--r--arch/x86/include/asm/page_64_types.h10
-rw-r--r--arch/x86/include/asm/paravirt.h54
-rw-r--r--arch/x86/include/asm/paravirt_types.h17
-rw-r--r--arch/x86/include/asm/pgalloc.h37
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h1
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h1
-rw-r--r--arch/x86/include/asm/pgtable.h84
-rw-r--r--arch/x86/include/asm/pgtable_32.h1
-rw-r--r--arch/x86/include/asm/pgtable_64.h23
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h32
-rw-r--r--arch/x86/include/asm/pgtable_types.h46
-rw-r--r--arch/x86/include/asm/processor.h6
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/sparsemem.h9
-rw-r--r--arch/x86/include/asm/stackprotector.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h9
-rw-r--r--arch/x86/include/asm/xen/page.h8
24 files changed, 440 insertions, 109 deletions
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 1548ca92ad3f..d0a21b12dd58 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -4,6 +4,7 @@
#include <asm/desc_defs.h>
#include <asm/ldt.h>
#include <asm/mmu.h>
+#include <asm/fixmap.h>
#include <linux/smp.h>
#include <linux/percpu.h>
@@ -45,11 +46,43 @@ struct gdt_page {
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+/* Provide the original GDT */
+static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
{
return per_cpu(gdt_page, cpu).gdt;
}
+/* Provide the current original GDT */
+static inline struct desc_struct *get_current_gdt_rw(void)
+{
+ return this_cpu_ptr(&gdt_page)->gdt;
+}
+
+/* Get the fixmap index for a specific processor */
+static inline unsigned int get_cpu_gdt_ro_index(int cpu)
+{
+ return FIX_GDT_REMAP_BEGIN + cpu;
+}
+
+/* Provide the fixmap address of the remapped GDT */
+static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
+{
+ unsigned int idx = get_cpu_gdt_ro_index(cpu);
+ return (struct desc_struct *)__fix_to_virt(idx);
+}
+
+/* Provide the current read-only GDT */
+static inline struct desc_struct *get_current_gdt_ro(void)
+{
+ return get_cpu_gdt_ro(smp_processor_id());
+}
+
+/* Provide the physical address of the GDT page. */
+static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
+{
+ return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
+}
+
#ifdef CONFIG_X86_64
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
@@ -174,7 +207,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
{
- struct desc_struct *d = get_cpu_gdt_table(cpu);
+ struct desc_struct *d = get_cpu_gdt_rw(cpu);
tss_desc tss;
set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
@@ -194,22 +227,90 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
entries * LDT_ENTRY_SIZE - 1);
- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+ write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT,
&ldt, DESC_LDT);
asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
}
}
+static inline void native_load_gdt(const struct desc_ptr *dtr)
+{
+ asm volatile("lgdt %0"::"m" (*dtr));
+}
+
+static inline void native_load_idt(const struct desc_ptr *dtr)
+{
+ asm volatile("lidt %0"::"m" (*dtr));
+}
+
+static inline void native_store_gdt(struct desc_ptr *dtr)
+{
+ asm volatile("sgdt %0":"=m" (*dtr));
+}
+
+static inline void native_store_idt(struct desc_ptr *dtr)
+{
+ asm volatile("sidt %0":"=m" (*dtr));
+}
+
+/*
+ * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
+ * a read-only remapping. To prevent a page fault, the GDT is switched to the
+ * original writeable version when needed.
+ */
+#ifdef CONFIG_X86_64
+static inline void native_load_tr_desc(void)
+{
+ struct desc_ptr gdt;
+ int cpu = raw_smp_processor_id();
+ bool restore = 0;
+ struct desc_struct *fixmap_gdt;
+
+ native_store_gdt(&gdt);
+ fixmap_gdt = get_cpu_gdt_ro(cpu);
+
+ /*
+ * If the current GDT is the read-only fixmap, swap to the original
+ * writeable version. Swap back at the end.
+ */
+ if (gdt.address == (unsigned long)fixmap_gdt) {
+ load_direct_gdt(cpu);
+ restore = 1;
+ }
+ asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+ if (restore)
+ load_fixmap_gdt(cpu);
+}
+#else
static inline void native_load_tr_desc(void)
{
asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
}
+#endif
+
+static inline unsigned long native_store_tr(void)
+{
+ unsigned long tr;
+
+ asm volatile("str %0":"=r" (tr));
+
+ return tr;
+}
+
+static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+ struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
+ unsigned int i;
+
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+ gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
+}
DECLARE_PER_CPU(bool, __tss_limit_invalid);
static inline void force_reload_TR(void)
{
- struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
+ struct desc_struct *d = get_current_gdt_rw();
tss_desc tss;
memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
@@ -257,44 +358,6 @@ static inline void invalidate_tss_limit(void)
this_cpu_write(__tss_limit_invalid, true);
}
-static inline void native_load_gdt(const struct desc_ptr *dtr)
-{
- asm volatile("lgdt %0"::"m" (*dtr));
-}
-
-static inline void native_load_idt(const struct desc_ptr *dtr)
-{
- asm volatile("lidt %0"::"m" (*dtr));
-}
-
-static inline void native_store_gdt(struct desc_ptr *dtr)
-{
- asm volatile("sgdt %0":"=m" (*dtr));
-}
-
-static inline void native_store_idt(struct desc_ptr *dtr)
-{
- asm volatile("sidt %0":"=m" (*dtr));
-}
-
-static inline unsigned long native_store_tr(void)
-{
- unsigned long tr;
-
- asm volatile("str %0":"=r" (tr));
-
- return tr;
-}
-
-static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
-{
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
- unsigned int i;
-
- for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
- gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
-}
-
/* This intentionally ignores lm, since 32-bit apps don't have that field. */
#define LDT_empty(info) \
((info)->base_addr == 0 && \
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 85599ad4d024..5dff775af7cd 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -36,6 +36,12 @@
# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31))
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
+#ifdef CONFIG_X86_5LEVEL
+# define DISABLE_LA57 0
+#else
+# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -55,7 +61,7 @@
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
-#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
+#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57)
#define DISABLED_MASK17 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 3762536619f8..e8ab9a46bc68 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -293,8 +293,23 @@ do { \
} \
} while (0)
+/*
+ * True on X86_32 or when emulating IA32 on X86_64
+ */
+static inline int mmap_is_ia32(void)
+{
+ return IS_ENABLED(CONFIG_X86_32) ||
+ (IS_ENABLED(CONFIG_COMPAT) &&
+ test_thread_flag(TIF_ADDR32));
+}
+
+extern unsigned long tasksize_32bit(void);
+extern unsigned long tasksize_64bit(void);
+extern unsigned long get_mmap_base(int is_legacy);
+
#ifdef CONFIG_X86_32
+#define __STACK_RND_MASK(is32bit) (0x7ff)
#define STACK_RND_MASK (0x7ff)
#define ARCH_DLINFO ARCH_DLINFO_IA32
@@ -304,7 +319,8 @@ do { \
#else /* CONFIG_X86_32 */
/* 1GB for 64bit, 8MB for 32bit */
-#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)
+#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff)
+#define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32())
#define ARCH_DLINFO \
do { \
@@ -348,16 +364,6 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
-/*
- * True on X86_32 or when emulating IA32 on X86_64
- */
-static inline int mmap_is_ia32(void)
-{
- return IS_ENABLED(CONFIG_X86_32) ||
- (IS_ENABLED(CONFIG_COMPAT) &&
- test_thread_flag(TIF_ADDR32));
-}
-
/* Do not change the values. See get_align_mask() */
enum align_flags {
ALIGN_VA_32 = BIT(0),
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 8554f960e21b..b65155cc3760 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -100,6 +100,10 @@ enum fixed_addresses {
#ifdef CONFIG_X86_INTEL_MID
FIX_LNW_VRTC,
#endif
+ /* Fixmap entries to remap the GDTs, one per processor. */
+ FIX_GDT_REMAP_BEGIN,
+ FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+
__end_of_permanent_fixed_addresses,
/*
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
index 1410b567ecde..f527b02a0ee3 100644
--- a/arch/x86/include/asm/kasan.h
+++ b/arch/x86/include/asm/kasan.h
@@ -11,9 +11,12 @@
* 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT
*/
#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
- (0xffff800000000000ULL >> 3))
-/* 47 bits for kernel address -> (47 - 3) bits for shadow */
-#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (47 - 3)))
+ ((-1UL << __VIRTUAL_MASK_SHIFT) >> 3))
+/*
+ * 47 bits for kernel address -> (47 - 3) bits for shadow
+ * 56 bits for kernel address -> (56 - 3) bits for shadow
+ */
+#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (__VIRTUAL_MASK_SHIFT - 3)))
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 282630e4c6ea..70ef205489f0 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -164,6 +164,7 @@ struct kimage_arch {
};
#else
struct kimage_arch {
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 306c7e12af55..68b329d77b3a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -268,8 +268,4 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
return __pkru_allows_pkey(vma_pkey(vma), write);
}
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
- return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
-}
#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 9215e0527647..3f5f08b010d0 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -36,7 +36,12 @@
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
* what Xen requires.
*/
+#ifdef CONFIG_X86_5LEVEL
+#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL)
+#else
#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL)
+#endif
+
#ifdef CONFIG_RANDOMIZE_MEMORY
#define __PAGE_OFFSET page_offset_base
#else
@@ -46,8 +51,13 @@
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
+#ifdef CONFIG_X86_5LEVEL
+#define __PHYSICAL_MASK_SHIFT 52
+#define __VIRTUAL_MASK_SHIFT 56
+#else
#define __PHYSICAL_MASK_SHIFT 46
#define __VIRTUAL_MASK_SHIFT 47
+#endif
/*
* Kernel image size is limited to 1GiB due to the fixmap living in the
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 0489884fdc44..55fa56fe4e45 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -357,6 +357,16 @@ static inline void paravirt_release_pud(unsigned long pfn)
PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
}
+static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn)
+{
+ PVOP_VCALL2(pv_mmu_ops.alloc_p4d, mm, pfn);
+}
+
+static inline void paravirt_release_p4d(unsigned long pfn)
+{
+ PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
+}
+
static inline void pte_update(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
@@ -536,7 +546,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
val);
}
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
static inline pud_t __pud(pudval_t val)
{
pudval_t ret;
@@ -565,26 +575,54 @@ static inline pudval_t pud_val(pud_t pud)
return ret;
}
-static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+static inline void pud_clear(pud_t *pudp)
{
- pgdval_t val = native_pgd_val(pgd);
+ set_pud(pudp, __pud(0));
+}
- if (sizeof(pgdval_t) > sizeof(long))
- PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp,
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ p4dval_t val = native_p4d_val(p4d);
+
+ if (sizeof(p4dval_t) > sizeof(long))
+ PVOP_VCALL3(pv_mmu_ops.set_p4d, p4dp,
val, (u64)val >> 32);
else
- PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp,
+ PVOP_VCALL2(pv_mmu_ops.set_p4d, p4dp,
val);
}
+#if CONFIG_PGTABLE_LEVELS >= 5
+
+static inline p4d_t __p4d(p4dval_t val)
+{
+ p4dval_t ret = PVOP_CALLEE1(p4dval_t, pv_mmu_ops.make_p4d, val);
+
+ return (p4d_t) { ret };
+}
+
+static inline p4dval_t p4d_val(p4d_t p4d)
+{
+ return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d);
+}
+
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ pgdval_t val = native_pgd_val(pgd);
+
+ PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, val);
+}
+
static inline void pgd_clear(pgd_t *pgdp)
{
set_pgd(pgdp, __pgd(0));
}
-static inline void pud_clear(pud_t *pudp)
+#endif /* CONFIG_PGTABLE_LEVELS == 5 */
+
+static inline void p4d_clear(p4d_t *p4dp)
{
- set_pud(pudp, __pud(0));
+ set_p4d(p4dp, __p4d(0));
}
#endif /* CONFIG_PGTABLE_LEVELS == 4 */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index b060f962d581..7465d6fe336f 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -238,9 +238,11 @@ struct pv_mmu_ops {
void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
+ void (*alloc_p4d)(struct mm_struct *mm, unsigned long pfn);
void (*release_pte)(unsigned long pfn);
void (*release_pmd)(unsigned long pfn);
void (*release_pud)(unsigned long pfn);
+ void (*release_p4d)(unsigned long pfn);
/* Pagetable manipulation functions */
void (*set_pte)(pte_t *ptep, pte_t pteval);
@@ -279,12 +281,21 @@ struct pv_mmu_ops {
struct paravirt_callee_save pmd_val;
struct paravirt_callee_save make_pmd;
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
struct paravirt_callee_save pud_val;
struct paravirt_callee_save make_pud;
- void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
-#endif /* CONFIG_PGTABLE_LEVELS == 4 */
+ void (*set_p4d)(p4d_t *p4dp, p4d_t p4dval);
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+ struct paravirt_callee_save p4d_val;
+ struct paravirt_callee_save make_p4d;
+
+ void (*set_pgd)(pgd_t *pgdp, pgd_t pgdval);
+#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
+
+#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
+
#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
struct pv_lazy_ops lazy_mode;
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index b6d425999f99..b2d0cd8288aa 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -17,9 +17,11 @@ static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {
static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
unsigned long start, unsigned long count) {}
static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {}
+static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn) {}
static inline void paravirt_release_pte(unsigned long pfn) {}
static inline void paravirt_release_pmd(unsigned long pfn) {}
static inline void paravirt_release_pud(unsigned long pfn) {}
+static inline void paravirt_release_p4d(unsigned long pfn) {}
#endif
/*
@@ -121,10 +123,10 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
#endif /* CONFIG_X86_PAE */
#if CONFIG_PGTABLE_LEVELS > 3
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
{
paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
- set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
+ set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
}
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -150,6 +152,37 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
___pud_free_tlb(tlb, pud);
}
+#if CONFIG_PGTABLE_LEVELS > 4
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+ paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
+ set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
+}
+
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ gfp_t gfp = GFP_KERNEL_ACCOUNT;
+
+ if (mm == &init_mm)
+ gfp &= ~__GFP_ACCOUNT;
+ return (p4d_t *)get_zeroed_page(gfp);
+}
+
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+ free_page((unsigned long)p4d);
+}
+
+extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
+
+static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
+ unsigned long address)
+{
+ ___p4d_free_tlb(tlb, p4d);
+}
+
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 392576433e77..373ab1de909f 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -7,6 +7,7 @@
typedef unsigned long pteval_t;
typedef unsigned long pmdval_t;
typedef unsigned long pudval_t;
+typedef unsigned long p4dval_t;
typedef unsigned long pgdval_t;
typedef unsigned long pgprotval_t;
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index bcc89625ebe5..b8a4341faafa 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -7,6 +7,7 @@
typedef u64 pteval_t;
typedef u64 pmdval_t;
typedef u64 pudval_t;
+typedef u64 p4dval_t;
typedef u64 pgdval_t;
typedef u64 pgprotval_t;
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2197e5322df9..f5af95a0c6b8 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -51,11 +51,19 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd)
-#ifndef __PAGETABLE_PUD_FOLDED
+#ifndef __PAGETABLE_P4D_FOLDED
#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
#define pgd_clear(pgd) native_pgd_clear(pgd)
#endif
+#ifndef set_p4d
+# define set_p4d(p4dp, p4d) native_set_p4d(p4dp, p4d)
+#endif
+
+#ifndef __PAGETABLE_PUD_FOLDED
+#define p4d_clear(p4d) native_p4d_clear(p4d)
+#endif
+
#ifndef set_pud
# define set_pud(pudp, pud) native_set_pud(pudp, pud)
#endif
@@ -72,6 +80,11 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
#define pgd_val(x) native_pgd_val(x)
#define __pgd(x) native_make_pgd(x)
+#ifndef __PAGETABLE_P4D_FOLDED
+#define p4d_val(x) native_p4d_val(x)
+#define __p4d(x) native_make_p4d(x)
+#endif
+
#ifndef __PAGETABLE_PUD_FOLDED
#define pud_val(x) native_pud_val(x)
#define __pud(x) native_make_pud(x)
@@ -177,6 +190,17 @@ static inline unsigned long pud_pfn(pud_t pud)
return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
}
+static inline unsigned long p4d_pfn(p4d_t p4d)
+{
+ return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
+}
+
+static inline int p4d_large(p4d_t p4d)
+{
+ /* No 512 GiB pages yet */
+ return 0;
+}
+
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
static inline int pmd_large(pmd_t pte)
@@ -536,6 +560,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
#define pte_pgprot(x) __pgprot(pte_flags(x))
#define pmd_pgprot(x) __pgprot(pmd_flags(x))
#define pud_pgprot(x) __pgprot(pud_flags(x))
+#define p4d_pgprot(x) __pgprot(p4d_flags(x))
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
@@ -585,6 +610,7 @@ pte_t *populate_extra_pte(unsigned long vaddr);
#include <linux/mm_types.h>
#include <linux/mmdebug.h>
#include <linux/log2.h>
+#include <asm/fixmap.h>
static inline int pte_none(pte_t pte)
{
@@ -768,7 +794,52 @@ static inline int pud_large(pud_t pud)
}
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
+static inline unsigned long pud_index(unsigned long address)
+{
+ return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+}
+
#if CONFIG_PGTABLE_LEVELS > 3
+static inline int p4d_none(p4d_t p4d)
+{
+ return (native_p4d_val(p4d) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+ return p4d_flags(p4d) & _PAGE_PRESENT;
+}
+
+static inline unsigned long p4d_page_vaddr(p4d_t p4d)
+{
+ return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
+}
+
+/*
+ * Currently stuck as a macro due to indirect forward reference to
+ * linux/mmzone.h's __section_mem_map_addr() definition:
+ */
+#define p4d_page(p4d) \
+ pfn_to_page((p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT)
+
+/* Find an entry in the third-level page table.. */
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+ return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
+}
+#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+
+static inline unsigned long p4d_index(unsigned long address)
+{
+ return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1);
+}
+
+#if CONFIG_PGTABLE_LEVELS > 4
static inline int pgd_present(pgd_t pgd)
{
return pgd_flags(pgd) & _PAGE_PRESENT;
@@ -786,14 +857,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
/* to find an entry in a page-table-directory. */
-static inline unsigned long pud_index(unsigned long address)
-{
- return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
-}
-
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
- return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
+ return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
}
static inline int pgd_bad(pgd_t pgd)
@@ -811,7 +877,7 @@ static inline int pgd_none(pgd_t pgd)
*/
return !native_pgd_val(pgd);
}
-#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index fbc73360aea0..bfab55675c16 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -14,7 +14,6 @@
*/
#ifndef __ASSEMBLY__
#include <asm/processor.h>
-#include <asm/fixmap.h>
#include <linux/threads.h>
#include <asm/paravirt.h>
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 73c7ccc38912..9991224f6238 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -35,15 +35,22 @@ extern void paging_init(void);
#define pud_ERROR(e) \
pr_err("%s:%d: bad pud %p(%016lx)\n", \
__FILE__, __LINE__, &(e), pud_val(e))
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+#define p4d_ERROR(e) \
+ pr_err("%s:%d: bad p4d %p(%016lx)\n", \
+ __FILE__, __LINE__, &(e), p4d_val(e))
+#endif
+
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %p(%016lx)\n", \
__FILE__, __LINE__, &(e), pgd_val(e))
struct mm_struct;
+void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
-
static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
@@ -121,6 +128,20 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
#endif
}
+static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ *p4dp = p4d;
+}
+
+static inline void native_p4d_clear(p4d_t *p4d)
+{
+#ifdef CONFIG_X86_5LEVEL
+ native_set_p4d(p4d, native_make_p4d(0));
+#else
+ native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)});
+#endif
+}
+
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
*pgdp = pgd;
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..06470da156ba 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -13,6 +13,7 @@
typedef unsigned long pteval_t;
typedef unsigned long pmdval_t;
typedef unsigned long pudval_t;
+typedef unsigned long p4dval_t;
typedef unsigned long pgdval_t;
typedef unsigned long pgprotval_t;
@@ -22,12 +23,32 @@ typedef struct { pteval_t pte; } pte_t;
#define SHARED_KERNEL_PMD 0
+#ifdef CONFIG_X86_5LEVEL
+
+/*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
+#define PGDIR_SHIFT 48
+#define PTRS_PER_PGD 512
+
+/*
+ * 4th level page in 5-level paging case
+ */
+#define P4D_SHIFT 39
+#define PTRS_PER_P4D 512
+#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE - 1))
+
+#else /* CONFIG_X86_5LEVEL */
+
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
#define PGDIR_SHIFT 39
#define PTRS_PER_PGD 512
+#endif /* CONFIG_X86_5LEVEL */
+
/*
* 3rd level page
*/
@@ -55,9 +76,15 @@ typedef struct { pteval_t pte; } pte_t;
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#ifdef CONFIG_X86_5LEVEL
+#define VMALLOC_SIZE_TB _AC(16384, UL)
+#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
+#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
+#else
#define VMALLOC_SIZE_TB _AC(32, UL)
#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
+#endif
#ifdef CONFIG_RANDOMIZE_MEMORY
#define VMALLOC_START vmalloc_base
#define VMEMMAP_START vmemmap_base
@@ -67,10 +94,11 @@ typedef struct { pteval_t pte; } pte_t;
#endif /* CONFIG_RANDOMIZE_MEMORY */
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
-#define MODULES_END _AC(0xffffffffff000000, UL)
+/* The module sections ends with the start of the fixmap */
+#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
-#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
+#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 62484333673d..bf9638e1ee42 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -272,9 +272,28 @@ static inline pgdval_t pgd_flags(pgd_t pgd)
return native_pgd_val(pgd) & PTE_FLAGS_MASK;
}
-#if CONFIG_PGTABLE_LEVELS > 3
-#include <asm-generic/5level-fixup.h>
+#if CONFIG_PGTABLE_LEVELS > 4
+typedef struct { p4dval_t p4d; } p4d_t;
+
+static inline p4d_t native_make_p4d(pudval_t val)
+{
+ return (p4d_t) { val };
+}
+
+static inline p4dval_t native_p4d_val(p4d_t p4d)
+{
+ return p4d.p4d;
+}
+#else
+#include <asm-generic/pgtable-nop4d.h>
+
+static inline p4dval_t native_p4d_val(p4d_t p4d)
+{
+ return native_pgd_val(p4d.pgd);
+}
+#endif
+#if CONFIG_PGTABLE_LEVELS > 3
typedef struct { pudval_t pud; } pud_t;
static inline pud_t native_make_pud(pmdval_t val)
@@ -287,12 +306,11 @@ static inline pudval_t native_pud_val(pud_t pud)
return pud.pud;
}
#else
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h>
static inline pudval_t native_pud_val(pud_t pud)
{
- return native_pgd_val(pud.pgd);
+ return native_pgd_val(pud.p4d.pgd);
}
#endif
@@ -309,15 +327,30 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
return pmd.pmd;
}
#else
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h>
static inline pmdval_t native_pmd_val(pmd_t pmd)
{
- return native_pgd_val(pmd.pud.pgd);
+ return native_pgd_val(pmd.pud.p4d.pgd);
}
#endif
+static inline p4dval_t p4d_pfn_mask(p4d_t p4d)
+{
+ /* No 512 GiB huge pages yet */
+ return PTE_PFN_MASK;
+}
+
+static inline p4dval_t p4d_flags_mask(p4d_t p4d)
+{
+ return ~p4d_pfn_mask(p4d);
+}
+
+static inline p4dval_t p4d_flags(p4d_t p4d)
+{
+ return native_p4d_val(p4d) & p4d_flags_mask(p4d);
+}
+
static inline pudval_t pud_pfn_mask(pud_t pud)
{
if (native_pud_val(pud) & _PAGE_PSE)
@@ -461,6 +494,7 @@ enum pg_level {
PG_LEVEL_4K,
PG_LEVEL_2M,
PG_LEVEL_1G,
+ PG_LEVEL_512G,
PG_LEVEL_NUM
};
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 78defd0aa220..3cada998a402 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -709,6 +709,8 @@ extern struct desc_ptr early_gdt_descr;
extern void cpu_set_gdt(int);
extern void switch_to_new_gdt(int);
+extern void load_direct_gdt(int);
+extern void load_fixmap_gdt(int);
extern void load_percpu_segment(int);
extern void cpu_init(void);
@@ -790,6 +792,7 @@ static inline void spin_lock_prefetch(const void *x)
/*
* User space process size: 3GB (default).
*/
+#define IA32_PAGE_OFFSET PAGE_OFFSET
#define TASK_SIZE PAGE_OFFSET
#define TASK_SIZE_MAX TASK_SIZE
#define STACK_TOP TASK_SIZE
@@ -866,7 +869,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
-#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
+#define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3))
+#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE)
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index fac9a5c0abe9..d91ba04dd007 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -53,6 +53,12 @@
# define NEED_MOVBE 0
#endif
+#ifdef CONFIG_X86_5LEVEL
+# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31))
+#else
+# define NEED_LA57 0
+#endif
+
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT
/* Paravirtualized systems may not have PSE or PGE available */
@@ -98,7 +104,7 @@
#define REQUIRED_MASK13 0
#define REQUIRED_MASK14 0
#define REQUIRED_MASK15 0
-#define REQUIRED_MASK16 0
+#define REQUIRED_MASK16 (NEED_LA57)
#define REQUIRED_MASK17 0
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 4517d6b93188..1f5bee2c202f 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -26,8 +26,13 @@
# endif
#else /* CONFIG_X86_32 */
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
-# define MAX_PHYSADDR_BITS 44
-# define MAX_PHYSMEM_BITS 46
+# ifdef CONFIG_X86_5LEVEL
+# define MAX_PHYSADDR_BITS 52
+# define MAX_PHYSMEM_BITS 52
+# else
+# define MAX_PHYSADDR_BITS 44
+# define MAX_PHYSMEM_BITS 46
+# endif
#endif
#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 58505f01962f..dcbd9bcce714 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -87,7 +87,7 @@ static inline void setup_stack_canary_segment(int cpu)
{
#ifdef CONFIG_X86_32
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
- struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+ struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
struct desc_struct desc;
desc = gdt_table[GDT_ENTRY_STACK_CANARY];
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 75d002bdb3f3..6ed9ea469b48 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -215,7 +215,6 @@ static inline void __flush_tlb_one(unsigned long addr)
/*
* TLB flushing:
*
- * - flush_tlb() flushes the current mm struct TLBs
* - flush_tlb_all() flushes all processes TLBs
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
* - flush_tlb_page(vma, vmaddr) flushes one page
@@ -247,11 +246,6 @@ static inline void flush_tlb_all(void)
__flush_tlb_all();
}
-static inline void flush_tlb(void)
-{
- __flush_tlb_up();
-}
-
static inline void local_flush_tlb(void)
{
__flush_tlb_up();
@@ -313,14 +307,11 @@ static inline void flush_tlb_kernel_range(unsigned long start,
flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long vmflag);
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
-#define flush_tlb() flush_tlb_current_task()
-
void native_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long start, unsigned long end);
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 64c5e745ebad..8a5a02b1dfba 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -280,13 +280,17 @@ static inline pte_t __pte_ma(pteval_t x)
#define pmd_val_ma(v) ((v).pmd)
#ifdef __PAGETABLE_PUD_FOLDED
-#define pud_val_ma(v) ((v).pgd.pgd)
+#define pud_val_ma(v) ((v).p4d.pgd.pgd)
#else
#define pud_val_ma(v) ((v).pud)
#endif
#define __pmd_ma(x) ((pmd_t) { (x) } )
-#define pgd_val_ma(x) ((x).pgd)
+#ifdef __PAGETABLE_P4D_FOLDED
+#define p4d_val_ma(x) ((x).pgd.pgd)
+#else
+#define p4d_val_ma(x) ((x).p4d)
+#endif
void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid);