summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/cmpxchg.h6
-rw-r--r--arch/alpha/include/asm/xchg.h38
-rw-r--r--arch/arc/include/asm/bug.h3
-rw-r--r--arch/arc/kernel/setup.c2
-rw-r--r--arch/arc/kernel/unwind.c2
-rw-r--r--arch/arm/kernel/time.c2
-rw-r--r--arch/arm/mach-ux500/cpu-db8500.c35
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/hugetlb.h2
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h26
-rw-r--r--arch/arm64/include/asm/mmu_context.h4
-rw-r--r--arch/arm64/include/asm/pgalloc.h44
-rw-r--r--arch/arm64/include/asm/pgtable.h23
-rw-r--r--arch/arm64/include/asm/stacktrace.h2
-rw-r--r--arch/arm64/include/asm/uaccess.h12
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c4
-rw-r--r--arch/arm64/kernel/cpu_errata.c9
-rw-r--r--arch/arm64/kernel/cpufeature.c6
-rw-r--r--arch/arm64/kernel/efi.c2
-rw-r--r--arch/arm64/kernel/hibernate.c148
-rw-r--r--arch/arm64/kernel/perf_event.c4
-rw-r--r--arch/arm64/kernel/process.c11
-rw-r--r--arch/arm64/kernel/ptrace.c2
-rw-r--r--arch/arm64/kernel/stacktrace.c5
-rw-r--r--arch/arm64/kernel/sys_compat.c2
-rw-r--r--arch/arm64/kernel/time.c2
-rw-r--r--arch/arm64/kernel/traps.c10
-rw-r--r--arch/arm64/kvm/hyp/switch.c4
-rw-r--r--arch/arm64/mm/dump.c54
-rw-r--r--arch/arm64/mm/fault.c44
-rw-r--r--arch/arm64/mm/hugetlbpage.c94
-rw-r--r--arch/arm64/mm/kasan_init.c70
-rw-r--r--arch/arm64/mm/mmu.c292
-rw-r--r--arch/arm64/mm/pageattr.c32
-rw-r--r--arch/arm64/mm/proc.S14
-rw-r--r--arch/arm64/net/bpf_jit_comp.c5
-rw-r--r--arch/cris/include/arch-v10/arch/bug.h11
-rw-r--r--arch/ia64/include/asm/bug.h6
-rw-r--r--arch/ia64/kernel/Makefile1
-rw-r--r--arch/m68k/include/asm/bug.h3
-rw-r--r--arch/mips/boot/Makefile1
-rw-r--r--arch/mips/include/asm/compat.h1
-rw-r--r--arch/mips/kernel/setup.c16
-rw-r--r--arch/mips/kernel/smp-bmips.c2
-rw-r--r--arch/powerpc/include/asm/firmware.h2
-rw-r--r--arch/powerpc/include/asm/topology.h3
-rw-r--r--arch/powerpc/kernel/eeh_driver.c3
-rw-r--r--arch/powerpc/kernel/prom_init.c2
-rw-r--r--arch/powerpc/kernel/sysfs.c6
-rw-r--r--arch/powerpc/kvm/book3s_xive.c2
-rw-r--r--arch/powerpc/mm/drmem.c14
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c3
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c6
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c4
-rw-r--r--arch/powerpc/platforms/pseries/setup.c3
-rw-r--r--arch/powerpc/sysdev/xive/spapr.c16
-rw-r--r--arch/riscv/Kconfig3
-rw-r--r--arch/riscv/kernel/entry.S5
-rw-r--r--arch/riscv/kernel/head.S2
-rw-r--r--arch/riscv/kernel/setup.c2
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/include/asm/bug.h6
-rw-r--r--arch/x86/.gitignore1
-rw-r--r--arch/x86/Kconfig90
-rw-r--r--arch/x86/Kconfig.cpu4
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/boot/compressed/eboot.c4
-rw-r--r--arch/x86/boot/compressed/head_64.S168
-rw-r--r--arch/x86/boot/compressed/kaslr.c14
-rw-r--r--arch/x86/boot/compressed/kaslr_64.c (renamed from arch/x86/boot/compressed/pagetable.c)0
-rw-r--r--arch/x86/boot/compressed/misc.c22
-rw-r--r--arch/x86/boot/compressed/misc.h5
-rw-r--r--arch/x86/boot/compressed/pgtable.h20
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c148
-rw-r--r--arch/x86/entry/entry_64.S5
-rw-r--r--arch/x86/include/asm/acpi.h7
-rw-r--r--arch/x86/include/asm/cpufeature.h79
-rw-r--r--arch/x86/include/asm/kaslr.h4
-rw-r--r--arch/x86/include/asm/mem_encrypt.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h37
-rw-r--r--arch/x86/include/asm/page_64.h8
-rw-r--r--arch/x86/include/asm/page_64_types.h20
-rw-r--r--arch/x86/include/asm/paravirt.h21
-rw-r--r--arch/x86/include/asm/pgalloc.h5
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h1
-rw-r--r--arch/x86/include/asm/pgtable.h11
-rw-r--r--arch/x86/include/asm/pgtable_32.h2
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h23
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h70
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/sparsemem.h9
-rw-r--r--arch/x86/include/asm/x86_init.h9
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h18
-rw-r--r--arch/x86/kernel/apic/vector.c25
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c15
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c37
-rw-r--r--arch/x86/kernel/e820.c18
-rw-r--r--arch/x86/kernel/head64.c81
-rw-r--r--arch/x86/kernel/head_64.S22
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/setup.c5
-rw-r--r--arch/x86/kernel/smpboot.c12
-rw-r--r--arch/x86/kernel/x86_init.c13
-rw-r--r--arch/x86/lib/error-inject.c1
-rw-r--r--arch/x86/mm/Makefile15
-rw-r--r--arch/x86/mm/debug_pagetables.c32
-rw-r--r--arch/x86/mm/dump_pagetables.c125
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/ident_map.c2
-rw-r--r--arch/x86/mm/init_64.c36
-rw-r--r--arch/x86/mm/kasan_init_64.c20
-rw-r--r--arch/x86/mm/kaslr.c29
-rw-r--r--arch/x86/mm/mem_encrypt.c578
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c564
-rw-r--r--arch/x86/mm/tlb.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c9
-rw-r--r--arch/x86/oprofile/nmi_int.c2
-rw-r--r--arch/x86/platform/efi/efi_64.c10
-rw-r--r--arch/x86/power/hibernate_64.c6
-rw-r--r--arch/x86/tools/relocs.c3
-rw-r--r--arch/x86/xen/Kconfig5
-rw-r--r--arch/x86/xen/enlighten_pvh.c14
-rw-r--r--arch/x86/xen/mmu_pv.c21
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/xtensa/kernel/pci-dma.c40
-rw-r--r--arch/xtensa/mm/init.c70
132 files changed, 2272 insertions, 1530 deletions
diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h
index 46ebf14aed4e..8a2b331e43fe 100644
--- a/arch/alpha/include/asm/cmpxchg.h
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -6,7 +6,6 @@
* Atomic exchange routines.
*/
-#define __ASM__MB
#define ____xchg(type, args...) __xchg ## type ## _local(args)
#define ____cmpxchg(type, args...) __cmpxchg ## type ## _local(args)
#include <asm/xchg.h>
@@ -33,10 +32,6 @@
cmpxchg_local((ptr), (o), (n)); \
})
-#ifdef CONFIG_SMP
-#undef __ASM__MB
-#define __ASM__MB "\tmb\n"
-#endif
#undef ____xchg
#undef ____cmpxchg
#define ____xchg(type, args...) __xchg ##type(args)
@@ -64,7 +59,6 @@
cmpxchg((ptr), (o), (n)); \
})
-#undef __ASM__MB
#undef ____cmpxchg
#endif /* _ALPHA_CMPXCHG_H */
diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
index 68dfb3cb7145..e2b59fac5257 100644
--- a/arch/alpha/include/asm/xchg.h
+++ b/arch/alpha/include/asm/xchg.h
@@ -12,6 +12,10 @@
* Atomic exchange.
* Since it can be used to implement critical sections
* it must clobber "memory" (also for interrupts in UP).
+ *
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
*/
static inline unsigned long
@@ -19,6 +23,7 @@ ____xchg(_u8, volatile char *m, unsigned long val)
{
unsigned long ret, tmp, addr64;
+ smp_mb();
__asm__ __volatile__(
" andnot %4,7,%3\n"
" insbl %1,%4,%1\n"
@@ -28,12 +33,12 @@ ____xchg(_u8, volatile char *m, unsigned long val)
" or %1,%2,%2\n"
" stq_c %2,0(%3)\n"
" beq %2,2f\n"
- __ASM__MB
".subsection 2\n"
"2: br 1b\n"
".previous"
: "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
: "r" ((long)m), "1" (val) : "memory");
+ smp_mb();
return ret;
}
@@ -43,6 +48,7 @@ ____xchg(_u16, volatile short *m, unsigned long val)
{
unsigned long ret, tmp, addr64;
+ smp_mb();
__asm__ __volatile__(
" andnot %4,7,%3\n"
" inswl %1,%4,%1\n"
@@ -52,12 +58,12 @@ ____xchg(_u16, volatile short *m, unsigned long val)
" or %1,%2,%2\n"
" stq_c %2,0(%3)\n"
" beq %2,2f\n"
- __ASM__MB
".subsection 2\n"
"2: br 1b\n"
".previous"
: "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
: "r" ((long)m), "1" (val) : "memory");
+ smp_mb();
return ret;
}
@@ -67,17 +73,18 @@ ____xchg(_u32, volatile int *m, unsigned long val)
{
unsigned long dummy;
+ smp_mb();
__asm__ __volatile__(
"1: ldl_l %0,%4\n"
" bis $31,%3,%1\n"
" stl_c %1,%2\n"
" beq %1,2f\n"
- __ASM__MB
".subsection 2\n"
"2: br 1b\n"
".previous"
: "=&r" (val), "=&r" (dummy), "=m" (*m)
: "rI" (val), "m" (*m) : "memory");
+ smp_mb();
return val;
}
@@ -87,17 +94,18 @@ ____xchg(_u64, volatile long *m, unsigned long val)
{
unsigned long dummy;
+ smp_mb();
__asm__ __volatile__(
"1: ldq_l %0,%4\n"
" bis $31,%3,%1\n"
" stq_c %1,%2\n"
" beq %1,2f\n"
- __ASM__MB
".subsection 2\n"
"2: br 1b\n"
".previous"
: "=&r" (val), "=&r" (dummy), "=m" (*m)
: "rI" (val), "m" (*m) : "memory");
+ smp_mb();
return val;
}
@@ -128,10 +136,12 @@ ____xchg(, volatile void *ptr, unsigned long x, int size)
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*
- * The memory barrier should be placed in SMP only when we actually
- * make the change. If we don't change anything (so if the returned
- * prev is equal to old) then we aren't acquiring anything new and
- * we don't need any memory barrier as far I can tell.
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
+ * The trailing memory barrier is placed in SMP unconditionally, in
+ * order to guarantee that dependency ordering is preserved when a
+ * dependency is headed by an unsuccessful operation.
*/
static inline unsigned long
@@ -139,6 +149,7 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
{
unsigned long prev, tmp, cmp, addr64;
+ smp_mb();
__asm__ __volatile__(
" andnot %5,7,%4\n"
" insbl %1,%5,%1\n"
@@ -150,13 +161,13 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
" or %1,%2,%2\n"
" stq_c %2,0(%4)\n"
" beq %2,3f\n"
- __ASM__MB
"2:\n"
".subsection 2\n"
"3: br 1b\n"
".previous"
: "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
: "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+ smp_mb();
return prev;
}
@@ -166,6 +177,7 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
{
unsigned long prev, tmp, cmp, addr64;
+ smp_mb();
__asm__ __volatile__(
" andnot %5,7,%4\n"
" inswl %1,%5,%1\n"
@@ -177,13 +189,13 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
" or %1,%2,%2\n"
" stq_c %2,0(%4)\n"
" beq %2,3f\n"
- __ASM__MB
"2:\n"
".subsection 2\n"
"3: br 1b\n"
".previous"
: "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
: "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+ smp_mb();
return prev;
}
@@ -193,6 +205,7 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
{
unsigned long prev, cmp;
+ smp_mb();
__asm__ __volatile__(
"1: ldl_l %0,%5\n"
" cmpeq %0,%3,%1\n"
@@ -200,13 +213,13 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
" mov %4,%1\n"
" stl_c %1,%2\n"
" beq %1,3f\n"
- __ASM__MB
"2:\n"
".subsection 2\n"
"3: br 1b\n"
".previous"
: "=&r"(prev), "=&r"(cmp), "=m"(*m)
: "r"((long) old), "r"(new), "m"(*m) : "memory");
+ smp_mb();
return prev;
}
@@ -216,6 +229,7 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
{
unsigned long prev, cmp;
+ smp_mb();
__asm__ __volatile__(
"1: ldq_l %0,%5\n"
" cmpeq %0,%3,%1\n"
@@ -223,13 +237,13 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
" mov %4,%1\n"
" stq_c %1,%2\n"
" beq %1,3f\n"
- __ASM__MB
"2:\n"
".subsection 2\n"
"3: br 1b\n"
".previous"
: "=&r"(prev), "=&r"(cmp), "=m"(*m)
: "r"((long) old), "r"(new), "m"(*m) : "memory");
+ smp_mb();
return prev;
}
diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h
index ea022d47896c..21ec82466d62 100644
--- a/arch/arc/include/asm/bug.h
+++ b/arch/arc/include/asm/bug.h
@@ -23,7 +23,8 @@ void die(const char *str, struct pt_regs *regs, unsigned long address);
#define BUG() do { \
pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
- dump_stack(); \
+ barrier_before_unreachable(); \
+ __builtin_trap(); \
} while (0)
#define HAVE_ARCH_BUG
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 9d27331fe69a..ec12fe1c2f07 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -373,7 +373,7 @@ static void arc_chk_core_config(void)
{
struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
int saved = 0, present = 0;
- char *opt_nm = NULL;;
+ char *opt_nm = NULL;
if (!cpu->extn.timer0)
panic("Timer0 is not present!\n");
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 333daab7def0..183391d4d33a 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -366,7 +366,7 @@ static void init_unwind_hdr(struct unwind_table *table,
return;
ret_err:
- panic("Attention !!! Dwarf FDE parsing errors\n");;
+ panic("Attention !!! Dwarf FDE parsing errors\n");
}
#ifdef CONFIG_MODULES
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 629f8e9981f1..cf2701cb0de8 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -83,7 +83,7 @@ static void dummy_clock_access(struct timespec64 *ts)
}
static clock_access_fn __read_persistent_clock = dummy_clock_access;
-static clock_access_fn __read_boot_clock = dummy_clock_access;;
+static clock_access_fn __read_boot_clock = dummy_clock_access;
void read_persistent_clock64(struct timespec64 *ts)
{
diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index 57058ac46f49..7e5d7a083707 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -23,7 +23,6 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
-#include <linux/perf/arm_pmu.h>
#include <linux/regulator/machine.h>
#include <asm/outercache.h>
@@ -112,37 +111,6 @@ static void ux500_restart(enum reboot_mode mode, const char *cmd)
prcmu_system_reset(0);
}
-/*
- * The PMU IRQ lines of two cores are wired together into a single interrupt.
- * Bounce the interrupt to the other core if it's not ours.
- */
-static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler)
-{
- irqreturn_t ret = handler(irq, dev);
- int other = !smp_processor_id();
-
- if (ret == IRQ_NONE && cpu_online(other))
- irq_set_affinity(irq, cpumask_of(other));
-
- /*
- * We should be able to get away with the amount of IRQ_NONEs we give,
- * while still having the spurious IRQ detection code kick in if the
- * interrupt really starts hitting spuriously.
- */
- return ret;
-}
-
-static struct arm_pmu_platdata db8500_pmu_platdata = {
- .handle_irq = db8500_pmu_handler,
- .irq_flags = IRQF_NOBALANCING | IRQF_NO_THREAD,
-};
-
-static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = {
- /* Requires call-back bindings. */
- OF_DEV_AUXDATA("arm,cortex-a9-pmu", 0, "arm-pmu", &db8500_pmu_platdata),
- {},
-};
-
static struct of_dev_auxdata u8540_auxdata_lookup[] __initdata = {
OF_DEV_AUXDATA("stericsson,db8500-prcmu", 0x80157000, "db8500-prcmu", NULL),
{},
@@ -165,9 +133,6 @@ static void __init u8500_init_machine(void)
if (of_machine_is_compatible("st-ericsson,u8540"))
of_platform_populate(NULL, u8500_local_bus_nodes,
u8540_auxdata_lookup, NULL);
- else
- of_platform_populate(NULL, u8500_local_bus_nodes,
- u8500_auxdata_lookup, NULL);
}
static const char * stericsson_dt_platform_compat[] = {
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index be7bd19c87ec..350c76a1d15b 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -20,7 +20,7 @@
#define MPIDR_UP_BITMASK (0x1 << 30)
#define MPIDR_MT_BITMASK (0x1 << 24)
-#define MPIDR_HWID_BITMASK 0xff00ffffff
+#define MPIDR_HWID_BITMASK UL(0xff00ffffff)
#define MPIDR_LEVEL_BITS_SHIFT 3
#define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT)
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 1dca41bea16a..e73f68569624 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -22,7 +22,7 @@
static inline pte_t huge_ptep_get(pte_t *ptep)
{
- return *ptep;
+ return READ_ONCE(*ptep);
}
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 9679067a1574..7faed6e48b46 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -185,42 +185,42 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
return pmd;
}
-static inline void kvm_set_s2pte_readonly(pte_t *pte)
+static inline void kvm_set_s2pte_readonly(pte_t *ptep)
{
pteval_t old_pteval, pteval;
- pteval = READ_ONCE(pte_val(*pte));
+ pteval = READ_ONCE(pte_val(*ptep));
do {
old_pteval = pteval;
pteval &= ~PTE_S2_RDWR;
pteval |= PTE_S2_RDONLY;
- pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval);
+ pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
} while (pteval != old_pteval);
}
-static inline bool kvm_s2pte_readonly(pte_t *pte)
+static inline bool kvm_s2pte_readonly(pte_t *ptep)
{
- return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
+ return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY;
}
-static inline bool kvm_s2pte_exec(pte_t *pte)
+static inline bool kvm_s2pte_exec(pte_t *ptep)
{
- return !(pte_val(*pte) & PTE_S2_XN);
+ return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN);
}
-static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
+static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp)
{
- kvm_set_s2pte_readonly((pte_t *)pmd);
+ kvm_set_s2pte_readonly((pte_t *)pmdp);
}
-static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
+static inline bool kvm_s2pmd_readonly(pmd_t *pmdp)
{
- return kvm_s2pte_readonly((pte_t *)pmd);
+ return kvm_s2pte_readonly((pte_t *)pmdp);
}
-static inline bool kvm_s2pmd_exec(pmd_t *pmd)
+static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
{
- return !(pmd_val(*pmd) & PMD_S2_XN);
+ return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
}
static inline bool kvm_page_empty(void *ptr)
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 8d3331985d2e..39ec0b8a689e 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -141,13 +141,13 @@ static inline void cpu_install_idmap(void)
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
* avoiding the possibility of conflicting TLB entries being allocated.
*/
-static inline void cpu_replace_ttbr1(pgd_t *pgd)
+static inline void cpu_replace_ttbr1(pgd_t *pgdp)
{
typedef void (ttbr_replace_func)(phys_addr_t);
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
ttbr_replace_func *replace_phys;
- phys_addr_t pgd_phys = virt_to_phys(pgd);
+ phys_addr_t pgd_phys = virt_to_phys(pgdp);
replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index e9d9f1b006ef..2e05bcd944c8 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -36,23 +36,23 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
return (pmd_t *)__get_free_page(PGALLOC_GFP);
}
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
{
- BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
- free_page((unsigned long)pmd);
+ BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
+ free_page((unsigned long)pmdp);
}
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
{
- set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot));
+ set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot));
}
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
{
- __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
+ __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);
}
#else
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
{
BUILD_BUG();
}
@@ -65,30 +65,30 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
return (pud_t *)__get_free_page(PGALLOC_GFP);
}
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
{
- BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
- free_page((unsigned long)pud);
+ BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
+ free_page((unsigned long)pudp);
}
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
{
- set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot));
+ set_pgd(pgdp, __pgd(__phys_to_pgd_val(pudp) | prot));
}
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp)
{
- __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
+ __pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE);
}
#else
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
{
BUILD_BUG();
}
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
@@ -114,10 +114,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr)
/*
* Free a PTE table.
*/
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
{
- if (pte)
- free_page((unsigned long)pte);
+ if (ptep)
+ free_page((unsigned long)ptep);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
@@ -126,10 +126,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
__free_page(pte);
}
-static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
+static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
pmdval_t prot)
{
- set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot));
+ set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot));
}
/*
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 094374c82db0..7e2c27e63cd8 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -218,7 +218,7 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
static inline void set_pte(pte_t *ptep, pte_t pte)
{
- *ptep = pte;
+ WRITE_ONCE(*ptep, pte);
/*
* Only if the new pte is valid and kernel, otherwise TLB maintenance
@@ -250,6 +250,8 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
+ pte_t old_pte;
+
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte, addr);
@@ -258,14 +260,15 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
* hardware updates of the pte (ptep_set_access_flags safely changes
* valid ptes without going through an invalid entry).
*/
- if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
+ old_pte = READ_ONCE(*ptep);
+ if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) &&
(mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
VM_WARN_ONCE(!pte_young(pte),
"%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
- __func__, pte_val(*ptep), pte_val(pte));
- VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte),
+ __func__, pte_val(old_pte), pte_val(pte));
+ VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
"%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
- __func__, pte_val(*ptep), pte_val(pte));
+ __func__, pte_val(old_pte), pte_val(pte));
}
set_pte(ptep, pte);
@@ -431,7 +434,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- *pmdp = pmd;
+ WRITE_ONCE(*pmdp, pmd);
dsb(ishst);
isb();
}
@@ -482,7 +485,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
static inline void set_pud(pud_t *pudp, pud_t pud)
{
- *pudp = pud;
+ WRITE_ONCE(*pudp, pud);
dsb(ishst);
isb();
}
@@ -500,7 +503,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
/* Find an entry in the second-level page table. */
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
-#define pmd_offset_phys(dir, addr) (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
+#define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
@@ -535,7 +538,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
- *pgdp = pgd;
+ WRITE_ONCE(*pgdp, pgd);
dsb(ishst);
}
@@ -552,7 +555,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
/* Find an entry in the frst-level page table. */
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
-#define pud_offset_phys(dir, addr) (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
+#define pud_offset_phys(dir, addr) (pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr))))
#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 472ef944e932..902f9edacbea 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -28,7 +28,7 @@ struct stackframe {
unsigned long fp;
unsigned long pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- unsigned int graph;
+ int graph;
#endif
};
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 543e11f0f657..e66b0fca99c2 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -72,15 +72,15 @@ static inline void set_fs(mm_segment_t fs)
* This is equivalent to the following test:
* (u65)addr + (u65)size <= (u65)current->addr_limit + 1
*/
-static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
+static inline unsigned long __range_ok(const void __user *addr, unsigned long size)
{
- unsigned long limit = current_thread_info()->addr_limit;
+ unsigned long ret, limit = current_thread_info()->addr_limit;
__chk_user_ptr(addr);
asm volatile(
// A + B <= C + 1 for all A,B,C, in four easy steps:
// 1: X = A + B; X' = X % 2^64
- " adds %0, %0, %2\n"
+ " adds %0, %3, %2\n"
// 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4
" csel %1, xzr, %1, hi\n"
// 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X'
@@ -92,9 +92,9 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
// testing X' - C == 0, subject to the previous adjustments.
" sbcs xzr, %0, %1\n"
" cset %0, ls\n"
- : "+r" (addr), "+r" (limit) : "Ir" (size) : "cc");
+ : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc");
- return addr;
+ return ret;
}
/*
@@ -104,7 +104,7 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
*/
#define untagged_addr(addr) sign_extend64(addr, 55)
-#define access_ok(type, addr, size) __range_ok((unsigned long)(addr), size)
+#define access_ok(type, addr, size) __range_ok(addr, size)
#define user_addr_max get_fs
#define _ASM_EXTABLE(from, to) \
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index c33b5e4010ab..68450e954d47 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -370,6 +370,7 @@ static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
static int swp_handler(struct pt_regs *regs, u32 instr)
{
u32 destreg, data, type, address = 0;
+ const void __user *user_ptr;
int rn, rt2, res = 0;
perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
@@ -401,7 +402,8 @@ static int swp_handler(struct pt_regs *regs, u32 instr)
aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET), data);
/* Check access in reasonable access range for both SWP and SWPB */
- if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) {
+ user_ptr = (const void __user *)(unsigned long)(address & ~3);
+ if (!access_ok(VERIFY_WRITE, user_ptr, 4)) {
pr_debug("SWP{B} emulation: access to 0x%08x not allowed!\n",
address);
goto fault;
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 07823595b7f0..52f15cd896e1 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -408,6 +408,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
},
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+ .enable = qcom_enable_link_stack_sanitization,
+ },
+ {
+ .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
+ MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+ },
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
.enable = enable_smccc_arch_workaround_1,
},
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 29b1f873e337..2985a067fc13 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -199,9 +199,11 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
};
static const struct arm64_ftr_bits ftr_ctr[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 29, 1, 1), /* DIC */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1), /* IDC */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0), /* ERG */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
/*
* Linux can handle differing I-cache policies. Userspace JITs will
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index f85ac58d08a3..a8bf1c892b90 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -90,7 +90,7 @@ static int __init set_permissions(pte_t *ptep, pgtable_t token,
unsigned long addr, void *data)
{
efi_memory_desc_t *md = data;
- pte_t pte = *ptep;
+ pte_t pte = READ_ONCE(*ptep);
if (md->attribute & EFI_MEMORY_RO)
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index f20cf7e99249..1ec5f28c39fc 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -202,10 +202,10 @@ static int create_safe_exec_page(void *src_start, size_t length,
gfp_t mask)
{
int rc = 0;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
unsigned long dst = (unsigned long)allocator(mask);
if (!dst) {
@@ -216,38 +216,38 @@ static int create_safe_exec_page(void *src_start, size_t length,
memcpy((void *)dst, src_start, length);
flush_icache_range(dst, dst + length);
- pgd = pgd_offset_raw(allocator(mask), dst_addr);
- if (pgd_none(*pgd)) {
- pud = allocator(mask);
- if (!pud) {
+ pgdp = pgd_offset_raw(allocator(mask), dst_addr);
+ if (pgd_none(READ_ONCE(*pgdp))) {
+ pudp = allocator(mask);
+ if (!pudp) {
rc = -ENOMEM;
goto out;
}
- pgd_populate(&init_mm, pgd, pud);
+ pgd_populate(&init_mm, pgdp, pudp);
}
- pud = pud_offset(pgd, dst_addr);
- if (pud_none(*pud)) {
- pmd = allocator(mask);
- if (!pmd) {
+ pudp = pud_offset(pgdp, dst_addr);
+ if (pud_none(READ_ONCE(*pudp))) {
+ pmdp = allocator(mask);
+ if (!pmdp) {
rc = -ENOMEM;
goto out;
}
- pud_populate(&init_mm, pud, pmd);
+ pud_populate(&init_mm, pudp, pmdp);
}
- pmd = pmd_offset(pud, dst_addr);
- if (pmd_none(*pmd)) {
- pte = allocator(mask);
- if (!pte) {
+ pmdp = pmd_offset(pudp, dst_addr);
+ if (pmd_none(READ_ONCE(*pmdp))) {
+ ptep = allocator(mask);
+ if (!ptep) {
rc = -ENOMEM;
goto out;
}
- pmd_populate_kernel(&init_mm, pmd, pte);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
}
- pte = pte_offset_kernel(pmd, dst_addr);
- set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
+ ptep = pte_offset_kernel(pmdp, dst_addr);
+ set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
/*
* Load our new page tables. A strict BBM approach requires that we
@@ -263,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
*/
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
- write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1);
+ write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
isb();
*phys_dst_addr = virt_to_phys((void *)dst);
@@ -320,9 +320,9 @@ int swsusp_arch_suspend(void)
return ret;
}
-static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
+static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
{
- pte_t pte = *src_pte;
+ pte_t pte = READ_ONCE(*src_ptep);
if (pte_valid(pte)) {
/*
@@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
* read only (code, rodata). Clear the RDONLY bit from
* the temporary mappings we use during restore.
*/
- set_pte(dst_pte, pte_mkwrite(pte));
+ set_pte(dst_ptep, pte_mkwrite(pte));
} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
/*
* debug_pagealloc will removed the PTE_VALID bit if
@@ -343,112 +343,116 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
*/
BUG_ON(!pfn_valid(pte_pfn(pte)));
- set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte)));
+ set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
}
}
-static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
+static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
unsigned long end)
{
- pte_t *src_pte;
- pte_t *dst_pte;
+ pte_t *src_ptep;
+ pte_t *dst_ptep;
unsigned long addr = start;
- dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_pte)
+ dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_ptep)
return -ENOMEM;
- pmd_populate_kernel(&init_mm, dst_pmd, dst_pte);
- dst_pte = pte_offset_kernel(dst_pmd, start);
+ pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
+ dst_ptep = pte_offset_kernel(dst_pmdp, start);
- src_pte = pte_offset_kernel(src_pmd, start);
+ src_ptep = pte_offset_kernel(src_pmdp, start);
do {
- _copy_pte(dst_pte, src_pte, addr);
- } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+ _copy_pte(dst_ptep, src_ptep, addr);
+ } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
return 0;
}
-static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start,
+static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
unsigned long end)
{
- pmd_t *src_pmd;
- pmd_t *dst_pmd;
+ pmd_t *src_pmdp;
+ pmd_t *dst_pmdp;
unsigned long next;
unsigned long addr = start;
- if (pud_none(*dst_pud)) {
- dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_pmd)
+ if (pud_none(READ_ONCE(*dst_pudp))) {
+ dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_pmdp)
return -ENOMEM;
- pud_populate(&init_mm, dst_pud, dst_pmd);
+ pud_populate(&init_mm, dst_pudp, dst_pmdp);
}
- dst_pmd = pmd_offset(dst_pud, start);
+ dst_pmdp = pmd_offset(dst_pudp, start);
- src_pmd = pmd_offset(src_pud, start);
+ src_pmdp = pmd_offset(src_pudp, start);
do {
+ pmd_t pmd = READ_ONCE(*src_pmdp);
+
next = pmd_addr_end(addr, end);
- if (pmd_none(*src_pmd))
+ if (pmd_none(pmd))
continue;
- if (pmd_table(*src_pmd)) {
- if (copy_pte(dst_pmd, src_pmd, addr, next))
+ if (pmd_table(pmd)) {
+ if (copy_pte(dst_pmdp, src_pmdp, addr, next))
return -ENOMEM;
} else {
- set_pmd(dst_pmd,
- __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY));
+ set_pmd(dst_pmdp,
+ __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
}
- } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+ } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
return 0;
}
-static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start,
+static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
unsigned long end)
{
- pud_t *dst_pud;
- pud_t *src_pud;
+ pud_t *dst_pudp;
+ pud_t *src_pudp;
unsigned long next;
unsigned long addr = start;
- if (pgd_none(*dst_pgd)) {
- dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_pud)
+ if (pgd_none(READ_ONCE(*dst_pgdp))) {
+ dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_pudp)
return -ENOMEM;
- pgd_populate(&init_mm, dst_pgd, dst_pud);
+ pgd_populate(&init_mm, dst_pgdp, dst_pudp);
}
- dst_pud = pud_offset(dst_pgd, start);
+ dst_pudp = pud_offset(dst_pgdp, start);
- src_pud = pud_offset(src_pgd, start);
+ src_pudp = pud_offset(src_pgdp, start);
do {
+ pud_t pud = READ_ONCE(*src_pudp);
+
next = pud_addr_end(addr, end);
- if (pud_none(*src_pud))
+ if (pud_none(pud))
continue;
- if (pud_table(*(src_pud))) {
- if (copy_pmd(dst_pud, src_pud, addr, next))
+ if (pud_table(pud)) {
+ if (copy_pmd(dst_pudp, src_pudp, addr, next))
return -ENOMEM;
} else {
- set_pud(dst_pud,
- __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY));
+ set_pud(dst_pudp,
+ __pud(pud_val(pud) & ~PMD_SECT_RDONLY));
}
- } while (dst_pud++, src_pud++, addr = next, addr != end);
+ } while (dst_pudp++, src_pudp++, addr = next, addr != end);
return 0;
}
-static int copy_page_tables(pgd_t *dst_pgd, unsigned long start,
+static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
unsigned long end)
{
unsigned long next;
unsigned long addr = start;
- pgd_t *src_pgd = pgd_offset_k(start);
+ pgd_t *src_pgdp = pgd_offset_k(start);
- dst_pgd = pgd_offset_raw(dst_pgd, start);
+ dst_pgdp = pgd_offset_raw(dst_pgdp, start);
do {
next = pgd_addr_end(addr, end);
- if (pgd_none(*src_pgd))
+ if (pgd_none(READ_ONCE(*src_pgdp)))
continue;
- if (copy_pud(dst_pgd, src_pgd, addr, next))
+ if (copy_pud(dst_pgdp, src_pgdp, addr, next))
return -ENOMEM;
- } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+ } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
return 0;
}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 75b220ba73a3..85a251b6dfa8 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -908,9 +908,9 @@ static void __armv8pmu_probe_pmu(void *info)
int pmuver;
dfr0 = read_sysreg(id_aa64dfr0_el1);
- pmuver = cpuid_feature_extract_signed_field(dfr0,
+ pmuver = cpuid_feature_extract_unsigned_field(dfr0,
ID_AA64DFR0_PMUVER_SHIFT);
- if (pmuver < 1)
+ if (pmuver == 0xf || pmuver == 0)
return;
probe->present = true;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index ad8aeb098b31..c0da6efe5465 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -220,8 +220,15 @@ void __show_regs(struct pt_regs *regs)
show_regs_print_info(KERN_DEFAULT);
print_pstate(regs);
- printk("pc : %pS\n", (void *)regs->pc);
- printk("lr : %pS\n", (void *)lr);
+
+ if (!user_mode(regs)) {
+ printk("pc : %pS\n", (void *)regs->pc);
+ printk("lr : %pS\n", (void *)lr);
+ } else {
+ printk("pc : %016llx\n", regs->pc);
+ printk("lr : %016llx\n", lr);
+ }
+
printk("sp : %016llx\n", sp);
i = top_reg;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 6618036ae6d4..9ae31f7e2243 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1419,7 +1419,7 @@ static int compat_ptrace_hbp_get(unsigned int note_type,
u64 addr = 0;
u32 ctrl = 0;
- int err, idx = compat_ptrace_hbp_num_to_idx(num);;
+ int err, idx = compat_ptrace_hbp_num_to_idx(num);
if (num & 1) {
err = ptrace_hbp_get_addr(note_type, tsk, idx, &addr);
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 76809ccd309c..d5718a060672 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -59,6 +59,11 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (tsk->ret_stack &&
(frame->pc == (unsigned long)return_to_handler)) {
+ if (WARN_ON_ONCE(frame->graph == -1))
+ return -EINVAL;
+ if (frame->graph < -1)
+ frame->graph += FTRACE_NOTRACE_DEPTH;
+
/*
* This is a case where function graph tracer has
* modified a return address (LR) in a stack frame
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 8b8bbd3eaa52..a382b2a1b84e 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -57,7 +57,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags)
if (end < start || flags)
return -EINVAL;
- if (!access_ok(VERIFY_READ, start, end - start))
+ if (!access_ok(VERIFY_READ, (const void __user *)start, end - start))
return -EFAULT;
return __do_compat_cache_op(start, end);
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index a4391280fba9..f258636273c9 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -52,7 +52,7 @@ unsigned long profile_pc(struct pt_regs *regs)
frame.fp = regs->regs[29];
frame.pc = regs->pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = -1; /* no task info */
+ frame.graph = current->curr_ret_stack;
#endif
do {
int ret = unwind_frame(NULL, &frame);
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index bbb0fde2780e..eb2d15147e8d 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -57,7 +57,7 @@ static const char *handler[]= {
"Error"
};
-int show_unhandled_signals = 1;
+int show_unhandled_signals = 0;
static void dump_backtrace_entry(unsigned long where)
{
@@ -526,14 +526,6 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs)
}
#endif
- if (show_unhandled_signals_ratelimited()) {
- pr_info("%s[%d]: syscall %d\n", current->comm,
- task_pid_nr(current), regs->syscallno);
- dump_instr("", regs);
- if (user_mode(regs))
- __show_regs(regs);
- }
-
return sys_ni_syscall();
}
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 116252a8d3a5..870f4b1587f9 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -407,8 +407,10 @@ again:
u32 midr = read_cpuid_id();
/* Apply BTAC predictors mitigation to all Falkor chips */
- if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)
+ if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
+ ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {
__qcom_hyp_sanitize_btac_predictors();
+ }
}
fp_enabled = __fpsimd_enabled();
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 7b60d62ac593..65dfc8571bf8 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -286,48 +286,52 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
}
-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start)
{
- pte_t *pte = pte_offset_kernel(pmd, 0UL);
+ pte_t *ptep = pte_offset_kernel(pmdp, 0UL);
unsigned long addr;
unsigned i;
- for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+ for (i = 0; i < PTRS_PER_PTE; i++, ptep++) {
addr = start + i * PAGE_SIZE;
- note_page(st, addr, 4, pte_val(*pte));
+ note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));
}
}
-static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start)
{
- pmd_t *pmd = pmd_offset(pud, 0UL);
+ pmd_t *pmdp = pmd_offset(pudp, 0UL);
unsigned long addr;
unsigned i;
- for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+ for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) {
+ pmd_t pmd = READ_ONCE(*pmdp);
+
addr = start + i * PMD_SIZE;
- if (pmd_none(*pmd) || pmd_sect(*pmd)) {
- note_page(st, addr, 3, pmd_val(*pmd));
+ if (pmd_none(pmd) || pmd_sect(pmd)) {
+ note_page(st, addr, 3, pmd_val(pmd));
} else {
- BUG_ON(pmd_bad(*pmd));
- walk_pte(st, pmd, addr);
+ BUG_ON(pmd_bad(pmd));
+ walk_pte(st, pmdp, addr);
}
}
}
-static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start)
{
- pud_t *pud = pud_offset(pgd, 0UL);
+ pud_t *pudp = pud_offset(pgdp, 0UL);
unsigned long addr;
unsigned i;
- for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+ for (i = 0; i < PTRS_PER_PUD; i++, pudp++) {
+ pud_t pud = READ_ONCE(*pudp);
+
addr = start + i * PUD_SIZE;
- if (pud_none(*pud) || pud_sect(*pud)) {
- note_page(st, addr, 2, pud_val(*pud));
+ if (pud_none(pud) || pud_sect(pud)) {
+ note_page(st, addr, 2, pud_val(pud));
} else {
- BUG_ON(pud_bad(*pud));
- walk_pmd(st, pud, addr);
+ BUG_ON(pud_bad(pud));
+ walk_pmd(st, pudp, addr);
}
}
}
@@ -335,17 +339,19 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
unsigned long start)
{
- pgd_t *pgd = pgd_offset(mm, 0UL);
+ pgd_t *pgdp = pgd_offset(mm, 0UL);
unsigned i;
unsigned long addr;
- for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+ for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) {
+ pgd_t pgd = READ_ONCE(*pgdp);
+
addr = start + i * PGDIR_SIZE;
- if (pgd_none(*pgd)) {
- note_page(st, addr, 1, pgd_val(*pgd));
+ if (pgd_none(pgd)) {
+ note_page(st, addr, 1, pgd_val(pgd));
} else {
- BUG_ON(pgd_bad(*pgd));
- walk_pud(st, pgd, addr);
+ BUG_ON(pgd_bad(pgd));
+ walk_pud(st, pgdp, addr);
}
}
}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index f76bb2c3c943..bff11553eb05 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -130,7 +130,8 @@ static void mem_abort_decode(unsigned int esr)
void show_pte(unsigned long addr)
{
struct mm_struct *mm;
- pgd_t *pgd;
+ pgd_t *pgdp;
+ pgd_t pgd;
if (addr < TASK_SIZE) {
/* TTBR0 */
@@ -149,33 +150,37 @@ void show_pte(unsigned long addr)
return;
}
- pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n",
+ pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
VA_BITS, mm->pgd);
- pgd = pgd_offset(mm, addr);
- pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
+ pgdp = pgd_offset(mm, addr);
+ pgd = READ_ONCE(*pgdp);
+ pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
do {
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
+ pud_t *pudp, pud;
+ pmd_t *pmdp, pmd;
+ pte_t *ptep, pte;
- if (pgd_none(*pgd) || pgd_bad(*pgd))
+ if (pgd_none(pgd) || pgd_bad(pgd))
break;
- pud = pud_offset(pgd, addr);
- pr_cont(", *pud=%016llx", pud_val(*pud));
- if (pud_none(*pud) || pud_bad(*pud))
+ pudp = pud_offset(pgdp, addr);
+ pud = READ_ONCE(*pudp);
+ pr_cont(", pud=%016llx", pud_val(pud));
+ if (pud_none(pud) || pud_bad(pud))
break;
- pmd = pmd_offset(pud, addr);
- pr_cont(", *pmd=%016llx", pmd_val(*pmd));
- if (pmd_none(*pmd) || pmd_bad(*pmd))
+ pmdp = pmd_offset(pudp, addr);
+ pmd = READ_ONCE(*pmdp);
+ pr_cont(", pmd=%016llx", pmd_val(pmd));
+ if (pmd_none(pmd) || pmd_bad(pmd))
break;
- pte = pte_offset_map(pmd, addr);
- pr_cont(", *pte=%016llx", pte_val(*pte));
- pte_unmap(pte);
+ ptep = pte_offset_map(pmdp, addr);
+ pte = READ_ONCE(*ptep);
+ pr_cont(", pte=%016llx", pte_val(pte));
+ pte_unmap(ptep);
} while(0);
pr_cont("\n");
@@ -196,8 +201,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
pte_t entry, int dirty)
{
pteval_t old_pteval, pteval;
+ pte_t pte = READ_ONCE(*ptep);
- if (pte_same(*ptep, entry))
+ if (pte_same(pte, entry))
return 0;
/* only preserve the access flags and write permission */
@@ -210,7 +216,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
* (calculated as: a & b == ~(~a | ~b)).
*/
pte_val(entry) ^= PTE_RDONLY;
- pteval = READ_ONCE(pte_val(*ptep));
+ pteval = pte_val(pte);
do {
old_pteval = pteval;
pteval ^= PTE_RDONLY;
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 6cb0fa92a651..ecc6818191df 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -54,14 +54,14 @@ static inline pgprot_t pte_pgprot(pte_t pte)
static int find_num_contig(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, size_t *pgsize)
{
- pgd_t *pgd = pgd_offset(mm, addr);
- pud_t *pud;
- pmd_t *pmd;
+ pgd_t *pgdp = pgd_offset(mm, addr);
+ pud_t *pudp;
+ pmd_t *pmdp;
*pgsize = PAGE_SIZE;
- pud = pud_offset(pgd, addr);
- pmd = pmd_offset(pud, addr);
- if ((pte_t *)pmd == ptep) {
+ pudp = pud_offset(pgdp, addr);
+ pmdp = pmd_offset(pudp, addr);
+ if ((pte_t *)pmdp == ptep) {
*pgsize = PMD_SIZE;
return CONT_PMDS;
}
@@ -181,11 +181,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
clear_flush(mm, addr, ptep, pgsize, ncontig);
- for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
- pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
- pte_val(pfn_pte(pfn, hugeprot)));
+ for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
- }
}
void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -203,20 +200,20 @@ void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
- pgd_t *pgd;
- pud_t *pud;
- pte_t *pte = NULL;
-
- pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
- pgd = pgd_offset(mm, addr);
- pud = pud_alloc(mm, pgd, addr);
- if (!pud)
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep = NULL;
+
+ pgdp = pgd_offset(mm, addr);
+ pudp = pud_alloc(mm, pgdp, addr);
+ if (!pudp)
return NULL;
if (sz == PUD_SIZE) {
- pte = (pte_t *)pud;
+ ptep = (pte_t *)pudp;
} else if (sz == (PAGE_SIZE * CONT_PTES)) {
- pmd_t *pmd = pmd_alloc(mm, pud, addr);
+ pmdp = pmd_alloc(mm, pudp, addr);
WARN_ON(addr & (sz - 1));
/*
@@ -226,60 +223,55 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
* will be no pte_unmap() to correspond with this
* pte_alloc_map().
*/
- pte = pte_alloc_map(mm, pmd, addr);
+ ptep = pte_alloc_map(mm, pmdp, addr);
} else if (sz == PMD_SIZE) {
if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
- pud_none(*pud))
- pte = huge_pmd_share(mm, addr, pud);
+ pud_none(READ_ONCE(*pudp)))
+ ptep = huge_pmd_share(mm, addr, pudp);
else
- pte = (pte_t *)pmd_alloc(mm, pud, addr);
+ ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
} else if (sz == (PMD_SIZE * CONT_PMDS)) {
- pmd_t *pmd;
-
- pmd = pmd_alloc(mm, pud, addr);
+ pmdp = pmd_alloc(mm, pudp, addr);
WARN_ON(addr & (sz - 1));
- return (pte_t *)pmd;
+ return (pte_t *)pmdp;
}
- pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
- sz, pte, pte_val(*pte));
- return pte;
+ return ptep;
}
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
+ pgd_t *pgdp;
+ pud_t *pudp, pud;
+ pmd_t *pmdp, pmd;
- pgd = pgd_offset(mm, addr);
- pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
- if (!pgd_present(*pgd))
+ pgdp = pgd_offset(mm, addr);
+ if (!pgd_present(READ_ONCE(*pgdp)))
return NULL;
- pud = pud_offset(pgd, addr);
- if (sz != PUD_SIZE && pud_none(*pud))
+ pudp = pud_offset(pgdp, addr);
+ pud = READ_ONCE(*pudp);
+ if (sz != PUD_SIZE && pud_none(pud))
return NULL;
/* hugepage or swap? */
- if (pud_huge(*pud) || !pud_present(*pud))
- return (pte_t *)pud;
+ if (pud_huge(pud) || !pud_present(pud))
+ return (pte_t *)pudp;
/* table; check the next level */
if (sz == CONT_PMD_SIZE)
addr &= CONT_PMD_MASK;
- pmd = pmd_offset(pud, addr);
+ pmdp = pmd_offset(pudp, addr);
+ pmd = READ_ONCE(*pmdp);
if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
- pmd_none(*pmd))
+ pmd_none(pmd))
return NULL;
- if (pmd_huge(*pmd) || !pmd_present(*pmd))
- return (pte_t *)pmd;
+ if (pmd_huge(pmd) || !pmd_present(pmd))
+ return (pte_t *)pmdp;
- if (sz == CONT_PTE_SIZE) {
- pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
- return pte;
- }
+ if (sz == CONT_PTE_SIZE)
+ return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
return NULL;
}
@@ -367,7 +359,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
size_t pgsize;
pte_t pte;
- if (!pte_cont(*ptep)) {
+ if (!pte_cont(READ_ONCE(*ptep))) {
ptep_set_wrprotect(mm, addr, ptep);
return;
}
@@ -391,7 +383,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
size_t pgsize;
int ncontig;
- if (!pte_cont(*ptep)) {
+ if (!pte_cont(READ_ONCE(*ptep))) {
ptep_clear_flush(vma, addr, ptep);
return;
}
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 6e02e6fb4c7b..dabfc1ecda3d 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -44,92 +44,92 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node)
return __pa(p);
}
-static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node,
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node,
bool early)
{
- if (pmd_none(*pmd)) {
+ if (pmd_none(READ_ONCE(*pmdp))) {
phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte)
: kasan_alloc_zeroed_page(node);
- __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+ __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
}
- return early ? pte_offset_kimg(pmd, addr)
- : pte_offset_kernel(pmd, addr);
+ return early ? pte_offset_kimg(pmdp, addr)
+ : pte_offset_kernel(pmdp, addr);
}
-static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node,
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node,
bool early)
{
- if (pud_none(*pud)) {
+ if (pud_none(READ_ONCE(*pudp))) {
phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd)
: kasan_alloc_zeroed_page(node);
- __pud_populate(pud, pmd_phys, PMD_TYPE_TABLE);
+ __pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE);
}
- return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr);
+ return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr);
}
-static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node,
+static pud_t *__init kasan_pud_offset(pgd_t *pgdp, unsigned long addr, int node,
bool early)
{
- if (pgd_none(*pgd)) {
+ if (pgd_none(READ_ONCE(*pgdp))) {
phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud)
: kasan_alloc_zeroed_page(node);
- __pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE);
+ __pgd_populate(pgdp, pud_phys, PMD_TYPE_TABLE);
}
- return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr);
+ return early ? pud_offset_kimg(pgdp, addr) : pud_offset(pgdp, addr);
}
-static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr,
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
- pte_t *pte = kasan_pte_offset(pmd, addr, node, early);
+ pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
do {
phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page)
: kasan_alloc_zeroed_page(node);
next = addr + PAGE_SIZE;
- set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
- } while (pte++, addr = next, addr != end && pte_none(*pte));
+ set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+ } while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep)));
}
-static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr,
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
- pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early);
+ pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
do {
next = pmd_addr_end(addr, end);
- kasan_pte_populate(pmd, addr, next, node, early);
- } while (pmd++, addr = next, addr != end && pmd_none(*pmd));
+ kasan_pte_populate(pmdp, addr, next, node, early);
+ } while (pmdp++, addr = next, addr != end && pmd_none(READ_ONCE(*pmdp)));
}
-static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr,
+static void __init kasan_pud_populate(pgd_t *pgdp, unsigned long addr,
unsigned long end, int node, bool early)
{
unsigned long next;
- pud_t *pud = kasan_pud_offset(pgd, addr, node, early);
+ pud_t *pudp = kasan_pud_offset(pgdp, addr, node, early);
do {
next = pud_addr_end(addr, end);
- kasan_pmd_populate(pud, addr, next, node, early);
- } while (pud++, addr = next, addr != end && pud_none(*pud));
+ kasan_pmd_populate(pudp, addr, next, node, early);
+ } while (pudp++, addr = next, addr != end && pud_none(READ_ONCE(*pudp)));
}
static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
int node, bool early)
{
unsigned long next;
- pgd_t *pgd;
+ pgd_t *pgdp;
- pgd = pgd_offset_k(addr);
+ pgdp = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, end);
- kasan_pud_populate(pgd, addr, next, node, early);
- } while (pgd++, addr = next, addr != end);
+ kasan_pud_populate(pgdp, addr, next, node, early);
+ } while (pgdp++, addr = next, addr != end);
}
/* The early shadow maps everything to a single page of zeroes */
@@ -155,14 +155,14 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end,
*/
void __init kasan_copy_shadow(pgd_t *pgdir)
{
- pgd_t *pgd, *pgd_new, *pgd_end;
+ pgd_t *pgdp, *pgdp_new, *pgdp_end;
- pgd = pgd_offset_k(KASAN_SHADOW_START);
- pgd_end = pgd_offset_k(KASAN_SHADOW_END);
- pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
+ pgdp = pgd_offset_k(KASAN_SHADOW_START);
+ pgdp_end = pgd_offset_k(KASAN_SHADOW_END);
+ pgdp_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
do {
- set_pgd(pgd_new, *pgd);
- } while (pgd++, pgd_new++, pgd != pgd_end);
+ set_pgd(pgdp_new, READ_ONCE(*pgdp));
+ } while (pgdp++, pgdp_new++, pgdp != pgdp_end);
}
static void __init clear_pgds(unsigned long start,
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 4694cda823c9..84a019f55022 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -125,45 +125,48 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
return ((old ^ new) & ~mask) == 0;
}
-static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end,
+static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot)
{
- pte_t *pte;
+ pte_t *ptep;
- pte = pte_set_fixmap_offset(pmd, addr);
+ ptep = pte_set_fixmap_offset(pmdp, addr);
do {
- pte_t old_pte = *pte;
+ pte_t old_pte = READ_ONCE(*ptep);
- set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot));
+ set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
/*
* After the PTE entry has been populated once, we
* only allow updates to the permission attributes.
*/
- BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte)));
+ BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
+ READ_ONCE(pte_val(*ptep))));
phys += PAGE_SIZE;
- } while (pte++, addr += PAGE_SIZE, addr != end);
+ } while (ptep++, addr += PAGE_SIZE, addr != end);
pte_clear_fixmap();
}
-static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
+static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void),
int flags)
{
unsigned long next;
+ pmd_t pmd = READ_ONCE(*pmdp);
- BUG_ON(pmd_sect(*pmd));
- if (pmd_none(*pmd)) {
+ BUG_ON(pmd_sect(pmd));
+ if (pmd_none(pmd)) {
phys_addr_t pte_phys;
BUG_ON(!pgtable_alloc);
pte_phys = pgtable_alloc();
- __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+ __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
+ pmd = READ_ONCE(*pmdp);
}
- BUG_ON(pmd_bad(*pmd));
+ BUG_ON(pmd_bad(pmd));
do {
pgprot_t __prot = prot;
@@ -175,67 +178,69 @@ static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
- init_pte(pmd, addr, next, phys, __prot);
+ init_pte(pmdp, addr, next, phys, __prot);
phys += next - addr;
} while (addr = next, addr != end);
}
-static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
+static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void), int flags)
{
unsigned long next;
- pmd_t *pmd;
+ pmd_t *pmdp;
- pmd = pmd_set_fixmap_offset(pud, addr);
+ pmdp = pmd_set_fixmap_offset(pudp, addr);
do {
- pmd_t old_pmd = *pmd;
+ pmd_t old_pmd = READ_ONCE(*pmdp);
next = pmd_addr_end(addr, end);
/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
(flags & NO_BLOCK_MAPPINGS) == 0) {
- pmd_set_huge(pmd, phys, prot);
+ pmd_set_huge(pmdp, phys, prot);
/*
* After the PMD entry has been populated once, we
* only allow updates to the permission attributes.
*/
BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
- pmd_val(*pmd)));
+ READ_ONCE(pmd_val(*pmdp))));
} else {
- alloc_init_cont_pte(pmd, addr, next, phys, prot,
+ alloc_init_cont_pte(pmdp, addr, next, phys, prot,
pgtable_alloc, flags);
BUG_ON(pmd_val(old_pmd) != 0 &&
- pmd_val(old_pmd) != pmd_val(*pmd));
+ pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
}
phys += next - addr;
- } while (pmd++, addr = next, addr != end);
+ } while (pmdp++, addr = next, addr != end);
pmd_clear_fixmap();
}
-static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
+static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void), int flags)
{
unsigned long next;
+ pud_t pud = READ_ONCE(*pudp);
/*
* Check for initial section mappings in the pgd/pud.
*/
- BUG_ON(pud_sect(*pud));
- if (pud_none(*pud)) {
+ BUG_ON(pud_sect(pud));
+ if (pud_none(pud)) {
phys_addr_t pmd_phys;
BUG_ON(!pgtable_alloc);
pmd_phys = pgtable_alloc();
- __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
+ __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
+ pud = READ_ONCE(*pudp);
}
- BUG_ON(pud_bad(*pud));
+ BUG_ON(pud_bad(pud));
do {
pgprot_t __prot = prot;
@@ -247,7 +252,7 @@ static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
- init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags);
+ init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
phys += next - addr;
} while (addr = next, addr != end);
@@ -265,25 +270,27 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
return true;
}
-static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
- phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(void),
- int flags)
+static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
+ phys_addr_t phys, pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(void),
+ int flags)
{
- pud_t *pud;
unsigned long next;
+ pud_t *pudp;
+ pgd_t pgd = READ_ONCE(*pgdp);
- if (pgd_none(*pgd)) {
+ if (pgd_none(pgd)) {
phys_addr_t pud_phys;
BUG_ON(!pgtable_alloc);
pud_phys = pgtable_alloc();
- __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
+ __pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
+ pgd = READ_ONCE(*pgdp);
}
- BUG_ON(pgd_bad(*pgd));
+ BUG_ON(pgd_bad(pgd));
- pud = pud_set_fixmap_offset(pgd, addr);
+ pudp = pud_set_fixmap_offset(pgdp, addr);
do {
- pud_t old_pud = *pud;
+ pud_t old_pud = READ_ONCE(*pudp);
next = pud_addr_end(addr, end);
@@ -292,23 +299,23 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
*/
if (use_1G_block(addr, next, phys) &&
(flags & NO_BLOCK_MAPPINGS) == 0) {
- pud_set_huge(pud, phys, prot);
+ pud_set_huge(pudp, phys, prot);
/*
* After the PUD entry has been populated once, we
* only allow updates to the permission attributes.
*/
BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
- pud_val(*pud)));
+ READ_ONCE(pud_val(*pudp))));
} else {
- alloc_init_cont_pmd(pud, addr, next, phys, prot,
+ alloc_init_cont_pmd(pudp, addr, next, phys, prot,
pgtable_alloc, flags);
BUG_ON(pud_val(old_pud) != 0 &&
- pud_val(old_pud) != pud_val(*pud));
+ pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
}
phys += next - addr;
- } while (pud++, addr = next, addr != end);
+ } while (pudp++, addr = next, addr != end);
pud_clear_fixmap();
}
@@ -320,7 +327,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
int flags)
{
unsigned long addr, length, end, next;
- pgd_t *pgd = pgd_offset_raw(pgdir, virt);
+ pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
/*
* If the virtual and physical address don't have the same offset
@@ -336,10 +343,10 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
end = addr + length;
do {
next = pgd_addr_end(addr, end);
- alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
+ alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
flags);
phys += next - addr;
- } while (pgd++, addr = next, addr != end);
+ } while (pgdp++, addr = next, addr != end);
}
static phys_addr_t pgd_pgtable_alloc(void)
@@ -401,10 +408,10 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
flush_tlb_kernel_range(virt, virt + size);
}
-static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
+static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
phys_addr_t end, pgprot_t prot, int flags)
{
- __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
+ __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
prot, early_pgtable_alloc, flags);
}
@@ -418,7 +425,7 @@ void __init mark_linear_text_alias_ro(void)
PAGE_KERNEL_RO);
}
-static void __init map_mem(pgd_t *pgd)
+static void __init map_mem(pgd_t *pgdp)
{
phys_addr_t kernel_start = __pa_symbol(_text);
phys_addr_t kernel_end = __pa_symbol(__init_begin);
@@ -451,7 +458,7 @@ static void __init map_mem(pgd_t *pgd)
if (memblock_is_nomap(reg))
continue;
- __map_memblock(pgd, start, end, PAGE_KERNEL, flags);
+ __map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
}
/*
@@ -464,7 +471,7 @@ static void __init map_mem(pgd_t *pgd)
* Note that contiguous mappings cannot be remapped in this way,
* so we should avoid them here.
*/
- __map_memblock(pgd, kernel_start, kernel_end,
+ __map_memblock(pgdp, kernel_start, kernel_end,
PAGE_KERNEL, NO_CONT_MAPPINGS);
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
@@ -475,7 +482,7 @@ static void __init map_mem(pgd_t *pgd)
* through /sys/kernel/kexec_crash_size interface.
*/
if (crashk_res.end) {
- __map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
+ __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
PAGE_KERNEL,
NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
memblock_clear_nomap(crashk_res.start,
@@ -499,7 +506,7 @@ void mark_rodata_ro(void)
debug_checkwx();
}
-static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
+static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
pgprot_t prot, struct vm_struct *vma,
int flags, unsigned long vm_flags)
{
@@ -509,7 +516,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
BUG_ON(!PAGE_ALIGNED(pa_start));
BUG_ON(!PAGE_ALIGNED(size));
- __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
+ __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
early_pgtable_alloc, flags);
if (!(vm_flags & VM_NO_GUARD))
@@ -562,7 +569,7 @@ core_initcall(map_entry_trampoline);
/*
* Create fine-grained mappings for the kernel.
*/
-static void __init map_kernel(pgd_t *pgd)
+static void __init map_kernel(pgd_t *pgdp)
{
static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
vmlinux_initdata, vmlinux_data;
@@ -578,24 +585,24 @@ static void __init map_kernel(pgd_t *pgd)
* Only rodata will be remapped with different permissions later on,
* all other segments are allowed to use contiguous mappings.
*/
- map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0,
+ map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
VM_NO_GUARD);
- map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL,
+ map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
&vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
- map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot,
+ map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
&vmlinux_inittext, 0, VM_NO_GUARD);
- map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL,
+ map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
&vmlinux_initdata, 0, VM_NO_GUARD);
- map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
+ map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
- if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+ if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
/*
* The fixmap falls in a separate pgd to the kernel, and doesn't
* live in the carveout for the swapper_pg_dir. We can simply
* re-use the existing dir for the fixmap.
*/
- set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
- *pgd_offset_k(FIXADDR_START));
+ set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
+ READ_ONCE(*pgd_offset_k(FIXADDR_START)));
} else if (CONFIG_PGTABLE_LEVELS > 3) {
/*
* The fixmap shares its top level pgd entry with the kernel
@@ -604,14 +611,15 @@ static void __init map_kernel(pgd_t *pgd)
* entry instead.
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
- pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START),
+ pud_populate(&init_mm,
+ pud_set_fixmap_offset(pgdp, FIXADDR_START),
lm_alias(bm_pmd));
pud_clear_fixmap();
} else {
BUG();
}
- kasan_copy_shadow(pgd);
+ kasan_copy_shadow(pgdp);
}
/*
@@ -621,10 +629,10 @@ static void __init map_kernel(pgd_t *pgd)
void __init paging_init(void)
{
phys_addr_t pgd_phys = early_pgtable_alloc();
- pgd_t *pgd = pgd_set_fixmap(pgd_phys);
+ pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
- map_kernel(pgd);
- map_mem(pgd);
+ map_kernel(pgdp);
+ map_mem(pgdp);
/*
* We want to reuse the original swapper_pg_dir so we don't have to
@@ -635,7 +643,7 @@ void __init paging_init(void)
* To do this we need to go via a temporary pgd.
*/
cpu_replace_ttbr1(__va(pgd_phys));
- memcpy(swapper_pg_dir, pgd, PGD_SIZE);
+ memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
pgd_clear_fixmap();
@@ -655,37 +663,40 @@ void __init paging_init(void)
*/
int kern_addr_valid(unsigned long addr)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
+ pgd_t *pgdp;
+ pud_t *pudp, pud;
+ pmd_t *pmdp, pmd;
+ pte_t *ptep, pte;
if ((((long)addr) >> VA_BITS) != -1UL)
return 0;
- pgd = pgd_offset_k(addr);
- if (pgd_none(*pgd))
+ pgdp = pgd_offset_k(addr);
+ if (pgd_none(READ_ONCE(*pgdp)))
return 0;
- pud = pud_offset(pgd, addr);
- if (pud_none(*pud))
+ pudp = pud_offset(pgdp, addr);
+ pud = READ_ONCE(*pudp);
+ if (pud_none(pud))
return 0;
- if (pud_sect(*pud))
- return pfn_valid(pud_pfn(*pud));
+ if (pud_sect(pud))
+ return pfn_valid(pud_pfn(pud));
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
+ pmdp = pmd_offset(pudp, addr);
+ pmd = READ_ONCE(*pmdp);
+ if (pmd_none(pmd))
return 0;
- if (pmd_sect(*pmd))
- return pfn_valid(pmd_pfn(*pmd));
+ if (pmd_sect(pmd))
+ return pfn_valid(pmd_pfn(pmd));
- pte = pte_offset_kernel(pmd, addr);
- if (pte_none(*pte))
+ ptep = pte_offset_kernel(pmdp, addr);
+ pte = READ_ONCE(*ptep);
+ if (pte_none(pte))
return 0;
- return pfn_valid(pte_pfn(*pte));
+ return pfn_valid(pte_pfn(pte));
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
#if !ARM64_SWAPPER_USES_SECTION_MAPS
@@ -700,32 +711,32 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
{
unsigned long addr = start;
unsigned long next;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
do {
next = pmd_addr_end(addr, end);
- pgd = vmemmap_pgd_populate(addr, node);
- if (!pgd)
+ pgdp = vmemmap_pgd_populate(addr, node);
+ if (!pgdp)
return -ENOMEM;
- pud = vmemmap_pud_populate(pgd, addr, node);
- if (!pud)
+ pudp = vmemmap_pud_populate(pgdp, addr, node);
+ if (!pudp)
return -ENOMEM;
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
+ pmdp = pmd_offset(pudp, addr);
+ if (pmd_none(READ_ONCE(*pmdp))) {
void *p = NULL;
p = vmemmap_alloc_block_buf(PMD_SIZE, node);
if (!p)
return -ENOMEM;
- pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
+ pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
} else
- vmemmap_verify((pte_t *)pmd, node, addr, next);
+ vmemmap_verify((pte_t *)pmdp, node, addr, next);
} while (addr = next, addr != end);
return 0;
@@ -739,20 +750,22 @@ void vmemmap_free(unsigned long start, unsigned long end,
static inline pud_t * fixmap_pud(unsigned long addr)
{
- pgd_t *pgd = pgd_offset_k(addr);
+ pgd_t *pgdp = pgd_offset_k(addr);
+ pgd_t pgd = READ_ONCE(*pgdp);
- BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+ BUG_ON(pgd_none(pgd) || pgd_bad(pgd));
- return pud_offset_kimg(pgd, addr);
+ return pud_offset_kimg(pgdp, addr);
}
static inline pmd_t * fixmap_pmd(unsigned long addr)
{
- pud_t *pud = fixmap_pud(addr);
+ pud_t *pudp = fixmap_pud(addr);
+ pud_t pud = READ_ONCE(*pudp);
- BUG_ON(pud_none(*pud) || pud_bad(*pud));
+ BUG_ON(pud_none(pud) || pud_bad(pud));
- return pmd_offset_kimg(pud, addr);
+ return pmd_offset_kimg(pudp, addr);
}
static inline pte_t * fixmap_pte(unsigned long addr)
@@ -768,30 +781,31 @@ static inline pte_t * fixmap_pte(unsigned long addr)
*/
void __init early_fixmap_init(void)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
+ pgd_t *pgdp, pgd;
+ pud_t *pudp;
+ pmd_t *pmdp;
unsigned long addr = FIXADDR_START;
- pgd = pgd_offset_k(addr);
+ pgdp = pgd_offset_k(addr);
+ pgd = READ_ONCE(*pgdp);
if (CONFIG_PGTABLE_LEVELS > 3 &&
- !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) {
+ !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {
/*
* We only end up here if the kernel mapping and the fixmap
* share the top level pgd entry, which should only happen on
* 16k/4 levels configurations.
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
- pud = pud_offset_kimg(pgd, addr);
+ pudp = pud_offset_kimg(pgdp, addr);
} else {
- if (pgd_none(*pgd))
- __pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
- pud = fixmap_pud(addr);
+ if (pgd_none(pgd))
+ __pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
+ pudp = fixmap_pud(addr);
}
- if (pud_none(*pud))
- __pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
- pmd = fixmap_pmd(addr);
- __pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
+ if (pud_none(READ_ONCE(*pudp)))
+ __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
+ pmdp = fixmap_pmd(addr);
+ __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
/*
* The boot-ioremap range spans multiple pmds, for which
@@ -800,11 +814,11 @@ void __init early_fixmap_init(void)
BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
- if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
- || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+ if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
+ || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
WARN_ON(1);
- pr_warn("pmd %p != %p, %p\n",
- pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
+ pr_warn("pmdp %p != %p, %p\n",
+ pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
fix_to_virt(FIX_BTMAP_BEGIN));
@@ -824,16 +838,16 @@ void __set_fixmap(enum fixed_addresses idx,
phys_addr_t phys, pgprot_t flags)
{
unsigned long addr = __fix_to_virt(idx);
- pte_t *pte;
+ pte_t *ptep;
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
- pte = fixmap_pte(addr);
+ ptep = fixmap_pte(addr);
if (pgprot_val(flags)) {
- set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+ set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
} else {
- pte_clear(&init_mm, addr, pte);
+ pte_clear(&init_mm, addr, ptep);
flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
}
}
@@ -915,36 +929,46 @@ int __init arch_ioremap_pmd_supported(void)
return 1;
}
-int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
{
pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
+
+ /* ioremap_page_range doesn't honour BBM */
+ if (pud_present(READ_ONCE(*pudp)))
+ return 0;
+
BUG_ON(phys & ~PUD_MASK);
- set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot));
+ set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
return 1;
}
-int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
{
pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
+
+ /* ioremap_page_range doesn't honour BBM */
+ if (pmd_present(READ_ONCE(*pmdp)))
+ return 0;
+
BUG_ON(phys & ~PMD_MASK);
- set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot));
+ set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
return 1;
}
-int pud_clear_huge(pud_t *pud)
+int pud_clear_huge(pud_t *pudp)
{
- if (!pud_sect(*pud))
+ if (!pud_sect(READ_ONCE(*pudp)))
return 0;
- pud_clear(pud);
+ pud_clear(pudp);
return 1;
}
-int pmd_clear_huge(pmd_t *pmd)
+int pmd_clear_huge(pmd_t *pmdp)
{
- if (!pmd_sect(*pmd))
+ if (!pmd_sect(READ_ONCE(*pmdp)))
return 0;
- pmd_clear(pmd);
+ pmd_clear(pmdp);
return 1;
}
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index a682a0a2a0fa..a56359373d8b 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -29,7 +29,7 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
void *data)
{
struct page_change_data *cdata = data;
- pte_t pte = *ptep;
+ pte_t pte = READ_ONCE(*ptep);
pte = clear_pte_bit(pte, cdata->clear_mask);
pte = set_pte_bit(pte, cdata->set_mask);
@@ -156,30 +156,32 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
*/
bool kernel_page_present(struct page *page)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
+ pgd_t *pgdp;
+ pud_t *pudp, pud;
+ pmd_t *pmdp, pmd;
+ pte_t *ptep;
unsigned long addr = (unsigned long)page_address(page);
- pgd = pgd_offset_k(addr);
- if (pgd_none(*pgd))
+ pgdp = pgd_offset_k(addr);
+ if (pgd_none(READ_ONCE(*pgdp)))
return false;
- pud = pud_offset(pgd, addr);
- if (pud_none(*pud))
+ pudp = pud_offset(pgdp, addr);
+ pud = READ_ONCE(*pudp);
+ if (pud_none(pud))
return false;
- if (pud_sect(*pud))
+ if (pud_sect(pud))
return true;
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
+ pmdp = pmd_offset(pudp, addr);
+ pmd = READ_ONCE(*pmdp);
+ if (pmd_none(pmd))
return false;
- if (pmd_sect(*pmd))
+ if (pmd_sect(pmd))
return true;
- pte = pte_offset_kernel(pmd, addr);
- return pte_valid(*pte);
+ ptep = pte_offset_kernel(pmdp, addr);
+ return pte_valid(READ_ONCE(*ptep));
}
#endif /* CONFIG_HIBERNATION */
#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 71baed7e592a..c0af47617299 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -205,7 +205,8 @@ ENDPROC(idmap_cpu_replace_ttbr1)
dc cvac, cur_\()\type\()p // Ensure any existing dirty
dmb sy // lines are written back before
ldr \type, [cur_\()\type\()p] // loading the entry
- tbz \type, #0, next_\()\type // Skip invalid entries
+ tbz \type, #0, skip_\()\type // Skip invalid and
+ tbnz \type, #11, skip_\()\type // non-global entries
.endm
.macro __idmap_kpti_put_pgtable_ent_ng, type
@@ -265,8 +266,9 @@ ENTRY(idmap_kpti_install_ng_mappings)
add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
do_pgd: __idmap_kpti_get_pgtable_ent pgd
tbnz pgd, #1, walk_puds
- __idmap_kpti_put_pgtable_ent_ng pgd
next_pgd:
+ __idmap_kpti_put_pgtable_ent_ng pgd
+skip_pgd:
add cur_pgdp, cur_pgdp, #8
cmp cur_pgdp, end_pgdp
b.ne do_pgd
@@ -294,8 +296,9 @@ walk_puds:
add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
do_pud: __idmap_kpti_get_pgtable_ent pud
tbnz pud, #1, walk_pmds
- __idmap_kpti_put_pgtable_ent_ng pud
next_pud:
+ __idmap_kpti_put_pgtable_ent_ng pud
+skip_pud:
add cur_pudp, cur_pudp, 8
cmp cur_pudp, end_pudp
b.ne do_pud
@@ -314,8 +317,9 @@ walk_pmds:
add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
do_pmd: __idmap_kpti_get_pgtable_ent pmd
tbnz pmd, #1, walk_ptes
- __idmap_kpti_put_pgtable_ent_ng pmd
next_pmd:
+ __idmap_kpti_put_pgtable_ent_ng pmd
+skip_pmd:
add cur_pmdp, cur_pmdp, #8
cmp cur_pmdp, end_pmdp
b.ne do_pmd
@@ -333,7 +337,7 @@ walk_ptes:
add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
do_pte: __idmap_kpti_get_pgtable_ent pte
__idmap_kpti_put_pgtable_ent_ng pte
-next_pte:
+skip_pte:
add cur_ptep, cur_ptep, #8
cmp cur_ptep, end_ptep
b.ne do_pte
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 1d4f1da7c58f..a93350451e8e 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -250,8 +250,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
off = offsetof(struct bpf_array, map.max_entries);
emit_a64_mov_i64(tmp, off, ctx);
emit(A64_LDR32(tmp, r2, tmp), ctx);
+ emit(A64_MOV(0, r3, r3), ctx);
emit(A64_CMP(0, r3, tmp), ctx);
- emit(A64_B_(A64_COND_GE, jmp_offset), ctx);
+ emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
@@ -259,7 +260,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
*/
emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
emit(A64_CMP(1, tcc, tmp), ctx);
- emit(A64_B_(A64_COND_GT, jmp_offset), ctx);
+ emit(A64_B_(A64_COND_HI, jmp_offset), ctx);
emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
/* prog = array->ptrs[index];
diff --git a/arch/cris/include/arch-v10/arch/bug.h b/arch/cris/include/arch-v10/arch/bug.h
index 905afeacfedf..06da9d49152a 100644
--- a/arch/cris/include/arch-v10/arch/bug.h
+++ b/arch/cris/include/arch-v10/arch/bug.h
@@ -44,18 +44,25 @@ struct bug_frame {
* not be used like this with newer versions of gcc.
*/
#define BUG() \
+do { \
__asm__ __volatile__ ("clear.d [" __stringify(BUG_MAGIC) "]\n\t"\
"movu.w " __stringify(__LINE__) ",$r0\n\t"\
"jump 0f\n\t" \
".section .rodata\n" \
"0:\t.string \"" __FILE__ "\"\n\t" \
- ".previous")
+ ".previous"); \
+ unreachable(); \
+} while (0)
#endif
#else
/* This just causes an oops. */
-#define BUG() (*(int *)0 = 0)
+#define BUG() \
+do { \
+ barrier_before_unreachable(); \
+ __builtin_trap(); \
+} while (0)
#endif
diff --git a/arch/ia64/include/asm/bug.h b/arch/ia64/include/asm/bug.h
index bd3eeb8d1cfa..66b37a532765 100644
--- a/arch/ia64/include/asm/bug.h
+++ b/arch/ia64/include/asm/bug.h
@@ -4,7 +4,11 @@
#ifdef CONFIG_BUG
#define ia64_abort() __builtin_trap()
-#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0)
+#define BUG() do { \
+ printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+ barrier_before_unreachable(); \
+ ia64_abort(); \
+} while (0)
/* should this BUG be made generic? */
#define HAVE_ARCH_BUG
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 0b4c65a1af25..498f3da3f225 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -41,7 +41,6 @@ ifneq ($(CONFIG_IA64_ESI),)
obj-y += esi_stub.o # must be in kernel proper
endif
obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o
-obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_BINFMT_ELF) += elfcore.o
diff --git a/arch/m68k/include/asm/bug.h b/arch/m68k/include/asm/bug.h
index b7e2bf1ba4a6..275dca1435bf 100644
--- a/arch/m68k/include/asm/bug.h
+++ b/arch/m68k/include/asm/bug.h
@@ -8,16 +8,19 @@
#ifndef CONFIG_SUN3
#define BUG() do { \
pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+ barrier_before_unreachable(); \
__builtin_trap(); \
} while (0)
#else
#define BUG() do { \
pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+ barrier_before_unreachable(); \
panic("BUG!"); \
} while (0)
#endif
#else
#define BUG() do { \
+ barrier_before_unreachable(); \
__builtin_trap(); \
} while (0)
#endif
diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile
index 1bd5c4f00d19..c22da16d67b8 100644
--- a/arch/mips/boot/Makefile
+++ b/arch/mips/boot/Makefile
@@ -126,6 +126,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS
quiet_cmd_cpp_its_S = ITS $@
cmd_cpp_its_S = $(CPP) $(cpp_flags) -P -C -o $@ $< \
+ -D__ASSEMBLY__ \
-DKERNEL_NAME="\"Linux $(KERNELRELEASE)\"" \
-DVMLINUX_BINARY="\"$(3)\"" \
-DVMLINUX_COMPRESSION="\"$(2)\"" \
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index 946681db8dc3..9a0fa66b81ac 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -86,7 +86,6 @@ struct compat_flock {
compat_off_t l_len;
s32 l_sysid;
compat_pid_t l_pid;
- short __unused;
s32 pad[4];
};
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 85bc601e9a0d..5f8b0a9e30b3 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -375,6 +375,7 @@ static void __init bootmem_init(void)
unsigned long reserved_end;
unsigned long mapstart = ~0UL;
unsigned long bootmap_size;
+ phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX;
bool bootmap_valid = false;
int i;
@@ -395,7 +396,8 @@ static void __init bootmem_init(void)
max_low_pfn = 0;
/*
- * Find the highest page frame number we have available.
+ * Find the highest page frame number we have available
+ * and the lowest used RAM address
*/
for (i = 0; i < boot_mem_map.nr_map; i++) {
unsigned long start, end;
@@ -407,6 +409,8 @@ static void __init bootmem_init(void)
end = PFN_DOWN(boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size);
+ ramstart = min(ramstart, boot_mem_map.map[i].addr);
+
#ifndef CONFIG_HIGHMEM
/*
* Skip highmem here so we get an accurate max_low_pfn if low
@@ -436,6 +440,13 @@ static void __init bootmem_init(void)
mapstart = max(reserved_end, start);
}
+ /*
+ * Reserve any memory between the start of RAM and PHYS_OFFSET
+ */
+ if (ramstart > PHYS_OFFSET)
+ add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
+ BOOT_MEM_RESERVED);
+
if (min_low_pfn >= max_low_pfn)
panic("Incorrect memory mapping !!!");
if (min_low_pfn > ARCH_PFN_OFFSET) {
@@ -664,9 +675,6 @@ static int __init early_parse_mem(char *p)
add_memory_region(start, size, BOOT_MEM_RAM);
- if (start && start > PHYS_OFFSET)
- add_memory_region(PHYS_OFFSET, start - PHYS_OFFSET,
- BOOT_MEM_RESERVED);
return 0;
}
early_param("mem", early_parse_mem);
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 87dcac2447c8..9d41732a9146 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -572,7 +572,7 @@ asmlinkage void __weak plat_wired_tlb_setup(void)
*/
}
-void __init bmips_cpu_setup(void)
+void bmips_cpu_setup(void)
{
void __iomem __maybe_unused *cbr = BMIPS_GET_CBR();
u32 __maybe_unused cfg;
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 511acfd7ab0d..535add3f7791 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -52,7 +52,7 @@
#define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
#define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000)
#define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000)
-#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000400000000)
+#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000)
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 593248110902..9f421641a35c 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -81,6 +81,9 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
{
return 0;
}
+
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
+
#endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index beea2182d754..0c0b66fc5bfb 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -384,7 +384,8 @@ static void *eeh_report_resume(void *data, void *userdata)
eeh_pcid_put(dev);
pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
#ifdef CONFIG_PCI_IOV
- eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
+ if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
+ eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
#endif
return NULL;
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index adf044daafd7..d22c41c26bb3 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -874,7 +874,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.mmu = 0,
.hash_ext = 0,
.radix_ext = 0,
- .byte22 = OV5_FEAT(OV5_DRC_INFO),
+ .byte22 = 0,
},
/* option vector 6: IBM PAPR hints */
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 5a8bfee6e187..04d0bbd7a1dd 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -788,7 +788,8 @@ static int register_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_create_file(s, &dev_attr_pir);
- if (cpu_has_feature(CPU_FTR_ARCH_206))
+ if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+ !firmware_has_feature(FW_FEATURE_LPAR))
device_create_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
@@ -873,7 +874,8 @@ static int unregister_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_remove_file(s, &dev_attr_pir);
- if (cpu_has_feature(CPU_FTR_ARCH_206))
+ if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+ !firmware_has_feature(FW_FEATURE_LPAR))
device_remove_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index f0f5cd4d2fe7..f9818d7d3381 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -188,7 +188,7 @@ static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio)
if (!qpage) {
pr_err("Failed to allocate queue %d for VCPU %d\n",
prio, xc->server_num);
- return -ENOMEM;;
+ return -ENOMEM;
}
memset(qpage, 0, 1 << xive->q_order);
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 1604110c4238..3f1803672c9b 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -98,7 +98,7 @@ static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
- dr_cell->flags = cpu_to_be32(lmb->flags);
+ dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
}
static int drmem_update_dt_v2(struct device_node *memory,
@@ -121,7 +121,7 @@ static int drmem_update_dt_v2(struct device_node *memory,
}
if (prev_lmb->aa_index != lmb->aa_index ||
- prev_lmb->flags != lmb->flags)
+ drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb))
lmb_sets++;
prev_lmb = lmb;
@@ -150,7 +150,7 @@ static int drmem_update_dt_v2(struct device_node *memory,
}
if (prev_lmb->aa_index != lmb->aa_index ||
- prev_lmb->flags != lmb->flags) {
+ drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) {
/* end of one set, start of another */
dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
dr_cell++;
@@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
u32 i, n_lmbs;
n_lmbs = of_read_number(prop++, 1);
+ if (n_lmbs == 0)
+ return;
for (i = 0; i < n_lmbs; i++) {
read_drconf_v1_cell(&lmb, &prop);
@@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
u32 i, j, lmb_sets;
lmb_sets = of_read_number(prop++, 1);
+ if (lmb_sets == 0)
+ return;
for (i = 0; i < lmb_sets; i++) {
read_drconf_v2_cell(&dr_cell, &prop);
@@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
struct drmem_lmb *lmb;
drmem_info->n_lmbs = of_read_number(prop++, 1);
+ if (drmem_info->n_lmbs == 0)
+ return;
drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
GFP_KERNEL);
@@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
int lmb_index;
lmb_sets = of_read_number(prop++, 1);
+ if (lmb_sets == 0)
+ return;
/* first pass, calculate the number of LMBs */
p = prop;
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 872d1f6dd11e..a9636d8cba15 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -327,6 +327,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
break;
+ case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */
+ PPC_LWZ_OFFS(r_A, r_skb, K);
+ break;
case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
break;
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index dd4c9b8b8a81..f6f55ab4980e 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -199,9 +199,11 @@ static void disable_nest_pmu_counters(void)
const struct cpumask *l_cpumask;
get_online_cpus();
- for_each_online_node(nid) {
+ for_each_node_with_cpus(nid) {
l_cpumask = cpumask_of_node(nid);
- cpu = cpumask_first(l_cpumask);
+ cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
+ if (cpu >= nr_cpu_ids)
+ continue;
opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
get_hard_smp_processor_id(cpu));
}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 496e47696ed0..a6c92c78c9b2 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1854,7 +1854,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
s64 rc;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return -ENODEV;;
+ return -ENODEV;
pe = &phb->ioda.pe_array[pdn->pe_number];
if (pe->tce_bypass_enabled) {
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 4fb21e17504a..092715b9674b 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -80,6 +80,10 @@ static void pnv_setup_rfi_flush(void)
if (np && of_property_read_bool(np, "disabled"))
enable--;
+ np = of_get_child_by_name(fw_features, "speculation-policy-favor-security");
+ if (np && of_property_read_bool(np, "disabled"))
+ enable = 0;
+
of_node_put(np);
of_node_put(fw_features);
}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 372d7ada1a0c..1a527625acf7 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -482,7 +482,8 @@ static void pseries_setup_rfi_flush(void)
if (types == L1D_FLUSH_NONE)
types = L1D_FLUSH_FALLBACK;
- if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+ if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) ||
+ (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))
enable = false;
} else {
/* Default to fallback if case hcall is not available */
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index d9c4c9366049..091f1d0d0af1 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
if (rc) {
- pr_err("Error %lld getting queue info prio %d\n", rc, prio);
+ pr_err("Error %lld getting queue info CPU %d prio %d\n", rc,
+ target, prio);
rc = -EIO;
goto fail;
}
@@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
/* Configure and enable the queue in HW */
rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
if (rc) {
- pr_err("Error %lld setting queue for prio %d\n", rc, prio);
+ pr_err("Error %lld setting queue for CPU %d prio %d\n", rc,
+ target, prio);
rc = -EIO;
} else {
q->qpage = qpage;
@@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
if (IS_ERR(qpage))
return PTR_ERR(qpage);
- return xive_spapr_configure_queue(cpu, q, prio, qpage,
- xive_queue_shift);
+ return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu),
+ q, prio, qpage, xive_queue_shift);
}
static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
@@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
struct xive_q *q = &xc->queue[prio];
unsigned int alloc_order;
long rc;
+ int hw_cpu = get_hard_smp_processor_id(cpu);
- rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0);
+ rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);
if (rc)
- pr_err("Error %ld setting queue for prio %d\n", rc, prio);
+ pr_err("Error %ld setting queue for CPU %d prio %d\n", rc,
+ hw_cpu, prio);
alloc_order = xive_alloc_order(xive_queue_shift);
free_pages((unsigned long)q->qpage, alloc_order);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index b6722c246d9c..04807c7f64cc 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -8,7 +8,6 @@ config RISCV
select OF
select OF_EARLY_FLATTREE
select OF_IRQ
- select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_WANT_FRAME_POINTERS
select CLONE_BACKWARDS
select COMMON_CLK
@@ -20,7 +19,6 @@ config RISCV
select GENERIC_STRNLEN_USER
select GENERIC_SMP_IDLE_THREAD
select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
- select ARCH_WANT_OPTIONAL_GPIOLIB
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_DMA_API_DEBUG
@@ -34,7 +32,6 @@ config RISCV
select HAVE_ARCH_TRACEHOOK
select MODULES_USE_ELF_RELA if MODULES
select THREAD_INFO_IN_TASK
- select RISCV_IRQ_INTC
select RISCV_TIMER
config MMU
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 87fc045be51f..56fa592cfa34 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -172,6 +172,9 @@ ENTRY(handle_exception)
move a1, sp /* pt_regs */
tail do_IRQ
1:
+ /* Exceptions run with interrupts enabled */
+ csrs sstatus, SR_SIE
+
/* Handle syscalls */
li t0, EXC_SYSCALL
beq s4, t0, handle_syscall
@@ -198,8 +201,6 @@ handle_syscall:
*/
addi s2, s2, 0x4
REG_S s2, PT_SEPC(sp)
- /* System calls run with interrupts enabled */
- csrs sstatus, SR_SIE
/* Trace syscalls, but only if requested by the user. */
REG_L t0, TASK_TI_FLAGS(tp)
andi t0, t0, _TIF_SYSCALL_TRACE
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 226eeb190f90..6e07ed37bbff 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -64,7 +64,7 @@ ENTRY(_start)
/* Start the kernel */
mv a0, s0
mv a1, s1
- call sbi_save
+ call parse_dtb
tail start_kernel
relocate:
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 09f7064e898c..c11f40c1b2a8 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -144,7 +144,7 @@ asmlinkage void __init setup_vm(void)
#endif
}
-void __init sbi_save(unsigned int hartid, void *dtb)
+void __init parse_dtb(unsigned int hartid, void *dtb)
{
early_init_dt_scan(__va(dtb));
}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 6bf594ace663..8767e45f1b2b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -430,6 +430,8 @@ config SPARC_LEON
depends on SPARC32
select USB_EHCI_BIG_ENDIAN_MMIO
select USB_EHCI_BIG_ENDIAN_DESC
+ select USB_UHCI_BIG_ENDIAN_MMIO
+ select USB_UHCI_BIG_ENDIAN_DESC
---help---
If you say Y here if you are running on a SPARC-LEON processor.
The LEON processor is a synthesizable VHDL model of the
diff --git a/arch/sparc/include/asm/bug.h b/arch/sparc/include/asm/bug.h
index 6f17528356b2..ea53e418f6c0 100644
--- a/arch/sparc/include/asm/bug.h
+++ b/arch/sparc/include/asm/bug.h
@@ -9,10 +9,14 @@
void do_BUG(const char *file, int line);
#define BUG() do { \
do_BUG(__FILE__, __LINE__); \
+ barrier_before_unreachable(); \
__builtin_trap(); \
} while (0)
#else
-#define BUG() __builtin_trap()
+#define BUG() do { \
+ barrier_before_unreachable(); \
+ __builtin_trap(); \
+} while (0)
#endif
#define HAVE_ARCH_BUG
diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore
index aff152c87cf4..5a82bac5e0bc 100644
--- a/arch/x86/.gitignore
+++ b/arch/x86/.gitignore
@@ -1,6 +1,7 @@
boot/compressed/vmlinux
tools/test_get_len
tools/insn_sanity
+tools/insn_decoder_test
purgatory/kexec-purgatory.c
purgatory/purgatory.ro
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 09c599e0900d..18233e459bff 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -423,12 +423,6 @@ config X86_MPPARSE
For old smp systems that do not have proper acpi support. Newer systems
(esp with 64bit cpus) with acpi support, MADT and DSDT will override it
-config X86_BIGSMP
- bool "Support for big SMP systems with more than 8 CPUs"
- depends on X86_32 && SMP
- ---help---
- This option is needed for the systems that have more than 8 CPUs
-
config GOLDFISH
def_bool y
depends on X86_GOLDFISH
@@ -461,6 +455,12 @@ config INTEL_RDT
Say N if unsure.
if X86_32
+config X86_BIGSMP
+ bool "Support for big SMP systems with more than 8 CPUs"
+ depends on SMP
+ ---help---
+ This option is needed for the systems that have more than 8 CPUs
+
config X86_EXTENDED_PLATFORM
bool "Support for extended (non-PC) x86 platforms"
default y
@@ -950,25 +950,66 @@ config MAXSMP
Enable maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
+#
+# The maximum number of CPUs supported:
+#
+# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
+# and which can be configured interactively in the
+# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
+#
+# The ranges are different on 32-bit and 64-bit kernels, depending on
+# hardware capabilities and scalability features of the kernel.
+#
+# ( If MAXSMP is enabled we just use the highest possible value and disable
+# interactive configuration. )
+#
+
+config NR_CPUS_RANGE_BEGIN
+ int
+ default NR_CPUS_RANGE_END if MAXSMP
+ default 1 if !SMP
+ default 2
+
+config NR_CPUS_RANGE_END
+ int
+ depends on X86_32
+ default 64 if SMP && X86_BIGSMP
+ default 8 if SMP && !X86_BIGSMP
+ default 1 if !SMP
+
+config NR_CPUS_RANGE_END
+ int
+ depends on X86_64
+ default 8192 if SMP && ( MAXSMP || CPUMASK_OFFSTACK)
+ default 512 if SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+ default 1 if !SMP
+
+config NR_CPUS_DEFAULT
+ int
+ depends on X86_32
+ default 32 if X86_BIGSMP
+ default 8 if SMP
+ default 1 if !SMP
+
+config NR_CPUS_DEFAULT
+ int
+ depends on X86_64
+ default 8192 if MAXSMP
+ default 64 if SMP
+ default 1 if !SMP
+
config NR_CPUS
int "Maximum number of CPUs" if SMP && !MAXSMP
- range 2 8 if SMP && X86_32 && !X86_BIGSMP
- range 2 64 if SMP && X86_32 && X86_BIGSMP
- range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
- range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
- default "1" if !SMP
- default "8192" if MAXSMP
- default "32" if SMP && X86_BIGSMP
- default "8" if SMP && X86_32
- default "64" if SMP
+ range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+ default NR_CPUS_DEFAULT
---help---
This allows you to specify the maximum number of CPUs which this
kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum
supported value is 8192, otherwise the maximum value is 512. The
minimum value which makes sense is 2.
- This is purely to save memory - each supported CPU adds
- approximately eight kilobytes to the kernel image.
+ This is purely to save memory: each supported CPU adds about 8KB
+ to the kernel image.
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
@@ -1364,7 +1405,7 @@ config HIGHMEM4G
config HIGHMEM64G
bool "64GB"
- depends on !M486
+ depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
select X86_PAE
---help---
Select this if you have a 32-bit processor and more than 4
@@ -1431,6 +1472,8 @@ config X86_PAE
config X86_5LEVEL
bool "Enable 5-level page tables support"
+ select DYNAMIC_MEMORY_LAYOUT
+ select SPARSEMEM_VMEMMAP
depends on X86_64
---help---
5-level paging enables access to larger address space:
@@ -1439,8 +1482,8 @@ config X86_5LEVEL
It will be supported by future Intel CPUs.
- Note: a kernel with this option enabled can only be booted
- on machines that support the feature.
+ A kernel with the option enabled can be booted on machines that
+ support 4- or 5-level paging.
See Documentation/x86/x86_64/5level-paging.txt for more
information.
@@ -2144,10 +2187,17 @@ config PHYSICAL_ALIGN
Don't change this unless you know what you are doing.
+config DYNAMIC_MEMORY_LAYOUT
+ bool
+ ---help---
+ This option makes base addresses of vmalloc and vmemmap as well as
+ __PAGE_OFFSET movable during boot.
+
config RANDOMIZE_MEMORY
bool "Randomize the kernel memory sections"
depends on X86_64
depends on RANDOMIZE_BASE
+ select DYNAMIC_MEMORY_LAYOUT
default RANDOMIZE_BASE
---help---
Randomizes the base virtual address of kernel memory sections
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 65a9a4716e34..8b8d2297d486 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -374,7 +374,7 @@ config X86_TSC
config X86_CMPXCHG64
def_bool y
- depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
+ depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8
# this should be set for all -march=.. options where the compiler
# generates cmov.
@@ -385,7 +385,7 @@ config X86_CMOV
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
- default "6" if X86_32 && X86_P6_NOP
+ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
default "5" if X86_32 && X86_CMPXCHG64
default "4"
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index f25e1530e064..1f734cd98fd3 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -78,7 +78,7 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
- vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
+ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o
vmlinux-objs-y += $(obj)/mem_encrypt.o
vmlinux-objs-y += $(obj)/pgtable_64.o
endif
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 353e20c3f114..886a9115af62 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -439,7 +439,7 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
struct efi_uga_draw_protocol *uga = NULL, *first_uga;
efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
unsigned long nr_ugas;
- u32 *handles = (u32 *)uga_handle;;
+ u32 *handles = (u32 *)uga_handle;
efi_status_t status = EFI_INVALID_PARAMETER;
int i;
@@ -484,7 +484,7 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height)
struct efi_uga_draw_protocol *uga = NULL, *first_uga;
efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
unsigned long nr_ugas;
- u64 *handles = (u64 *)uga_handle;;
+ u64 *handles = (u64 *)uga_handle;
efi_status_t status = EFI_INVALID_PARAMETER;
int i;
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fc313e29fe2c..fca012baba19 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -33,6 +33,7 @@
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
+#include "pgtable.h"
/*
* Locally defined symbols should be marked hidden:
@@ -304,55 +305,77 @@ ENTRY(startup_64)
/* Set up the stack */
leaq boot_stack_end(%rbx), %rsp
-#ifdef CONFIG_X86_5LEVEL
/*
- * Check if we need to enable 5-level paging.
- * RSI holds real mode data and need to be preserved across
- * a function call.
+ * At this point we are in long mode with 4-level paging enabled,
+ * but we might want to enable 5-level paging or vice versa.
+ *
+ * The problem is that we cannot do it directly. Setting or clearing
+ * CR4.LA57 in long mode would trigger #GP. So we need to switch off
+ * long mode and paging first.
+ *
+ * We also need a trampoline in lower memory to switch over from
+ * 4- to 5-level paging for cases when the bootloader puts the kernel
+ * above 4G, but didn't enable 5-level paging for us.
+ *
+ * The same trampoline can be used to switch from 5- to 4-level paging
+ * mode, like when starting 4-level paging kernel via kexec() when
+ * original kernel worked in 5-level paging mode.
+ *
+ * For the trampoline, we need the top page table to reside in lower
+ * memory as we don't have a way to load 64-bit values into CR3 in
+ * 32-bit mode.
+ *
+ * We go though the trampoline even if we don't have to: if we're
+ * already in a desired paging mode. This way the trampoline code gets
+ * tested on every boot.
*/
- pushq %rsi
- call l5_paging_required
- popq %rsi
- /* If l5_paging_required() returned zero, we're done here. */
- cmpq $0, %rax
- je lvl5
+ /* Make sure we have GDT with 32-bit code segment */
+ leaq gdt(%rip), %rax
+ movq %rax, gdt64+2(%rip)
+ lgdt gdt64(%rip)
/*
- * At this point we are in long mode with 4-level paging enabled,
- * but we want to enable 5-level paging.
+ * paging_prepare() sets up the trampoline and checks if we need to
+ * enable 5-level paging.
*
- * The problem is that we cannot do it directly. Setting LA57 in
- * long mode would trigger #GP. So we need to switch off long mode
- * first.
+ * Address of the trampoline is returned in RAX.
+ * Non zero RDX on return means we need to enable 5-level paging.
*
- * NOTE: This is not going to work if bootloader put us above 4G
- * limit.
- *
- * The first step is go into compatibility mode.
+ * RSI holds real mode data and needs to be preserved across
+ * this function call.
*/
+ pushq %rsi
+ call paging_prepare
+ popq %rsi
- /* Clear additional page table */
- leaq lvl5_pgtable(%rbx), %rdi
- xorq %rax, %rax
- movq $(PAGE_SIZE/8), %rcx
- rep stosq
+ /* Save the trampoline address in RCX */
+ movq %rax, %rcx
/*
- * Setup current CR3 as the first and only entry in a new top level
- * page table.
+ * Load the address of trampoline_return() into RDI.
+ * It will be used by the trampoline to return to the main code.
*/
- movq %cr3, %rdi
- leaq 0x7 (%rdi), %rax
- movq %rax, lvl5_pgtable(%rbx)
+ leaq trampoline_return(%rip), %rdi
/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
pushq $__KERNEL32_CS
- leaq compatible_mode(%rip), %rax
+ leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
pushq %rax
lretq
-lvl5:
-#endif
+trampoline_return:
+ /* Restore the stack, the 32-bit trampoline uses its own stack */
+ leaq boot_stack_end(%rbx), %rsp
+
+ /*
+ * cleanup_trampoline() would restore trampoline memory.
+ *
+ * RSI holds real mode data and needs to be preserved across
+ * this function call.
+ */
+ pushq %rsi
+ call cleanup_trampoline
+ popq %rsi
/* Zero EFLAGS */
pushq $0
@@ -490,46 +513,82 @@ relocated:
jmp *%rax
.code32
-#ifdef CONFIG_X86_5LEVEL
-compatible_mode:
- /* Setup data and stack segments */
+/*
+ * This is the 32-bit trampoline that will be copied over to low memory.
+ *
+ * RDI contains the return address (might be above 4G).
+ * ECX contains the base address of the trampoline memory.
+ * Non zero RDX on return means we need to enable 5-level paging.
+ */
+ENTRY(trampoline_32bit_src)
+ /* Set up data and stack segments */
movl $__KERNEL_DS, %eax
movl %eax, %ds
movl %eax, %ss
+ /* Set up new stack */
+ leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
+
/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0
- /* Point CR3 to 5-level paging */
- leal lvl5_pgtable(%ebx), %eax
- movl %eax, %cr3
+ /* Check what paging mode we want to be in after the trampoline */
+ cmpl $0, %edx
+ jz 1f
- /* Enable PAE and LA57 mode */
+ /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
+ movl %cr4, %eax
+ testl $X86_CR4_LA57, %eax
+ jnz 3f
+ jmp 2f
+1:
+ /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
movl %cr4, %eax
- orl $(X86_CR4_PAE | X86_CR4_LA57), %eax
+ testl $X86_CR4_LA57, %eax
+ jz 3f
+2:
+ /* Point CR3 to the trampoline's new top level page table */
+ leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
+ movl %eax, %cr3
+3:
+ /* Enable PAE and LA57 (if required) paging modes */
+ movl $X86_CR4_PAE, %eax
+ cmpl $0, %edx
+ jz 1f
+ orl $X86_CR4_LA57, %eax
+1:
movl %eax, %cr4
- /* Calculate address we are running at */
- call 1f
-1: popl %edi
- subl $1b, %edi
+ /* Calculate address of paging_enabled() once we are executing in the trampoline */
+ leal paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
- /* Prepare stack for far return to Long Mode */
+ /* Prepare the stack for far return to Long Mode */
pushl $__KERNEL_CS
- leal lvl5(%edi), %eax
- push %eax
+ pushl %eax
- /* Enable paging back */
+ /* Enable paging again */
movl $(X86_CR0_PG | X86_CR0_PE), %eax
movl %eax, %cr0
lret
-#endif
+ .code64
+paging_enabled:
+ /* Return from the trampoline */
+ jmp *%rdi
+
+ /*
+ * The trampoline code has a size limit.
+ * Make sure we fail to compile if the trampoline code grows
+ * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
+ */
+ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
+
+ .code32
no_longmode:
- /* This isn't an x86-64 CPU so hang */
+ /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
hlt
jmp 1b
@@ -537,6 +596,11 @@ no_longmode:
#include "../../kernel/verify_cpu.S"
.data
+gdt64:
+ .word gdt_end - gdt
+ .long 0
+ .word 0
+ .quad 0
gdt:
.word gdt_end - gdt
.long gdt
@@ -585,7 +649,3 @@ boot_stack_end:
.balign 4096
pgtable:
.fill BOOT_PGT_SIZE, 1, 0
-#ifdef CONFIG_X86_5LEVEL
-lvl5_pgtable:
- .fill PAGE_SIZE, 1, 0
-#endif
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 8199a6187251..66e42a098d70 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -46,6 +46,12 @@
#define STATIC
#include <linux/decompress/mm.h>
+#ifdef CONFIG_X86_5LEVEL
+unsigned int pgtable_l5_enabled __ro_after_init;
+unsigned int pgdir_shift __ro_after_init = 39;
+unsigned int ptrs_per_p4d __ro_after_init = 1;
+#endif
+
extern unsigned long get_cmd_line_ptr(void);
/* Simplified build-specific string for starting entropy. */
@@ -723,6 +729,14 @@ void choose_random_location(unsigned long input,
return;
}
+#ifdef CONFIG_X86_5LEVEL
+ if (__read_cr4() & X86_CR4_LA57) {
+ pgtable_l5_enabled = 1;
+ pgdir_shift = 48;
+ ptrs_per_p4d = 512;
+ }
+#endif
+
boot_params->hdr.loadflags |= KASLR_FLAG;
/* Prepare to add new identity pagetables on demand. */
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/kaslr_64.c
index b5e5e02f8cde..b5e5e02f8cde 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/kaslr_64.c
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 98761a1576ce..8e4b55dd5df9 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -14,6 +14,7 @@
#include "misc.h"
#include "error.h"
+#include "pgtable.h"
#include "../string.h"
#include "../voffset.h"
@@ -169,16 +170,6 @@ void __puthex(unsigned long value)
}
}
-static bool l5_supported(void)
-{
- /* Check if leaf 7 is supported. */
- if (native_cpuid_eax(0) < 7)
- return 0;
-
- /* Check if la57 is supported. */
- return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
-}
-
#if CONFIG_X86_NEED_RELOCS
static void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr)
@@ -372,12 +363,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
console_init();
debug_putstr("early console in extract_kernel\n");
- if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
- error("This linux kernel as configured requires 5-level paging\n"
- "This CPU does not support the required 'cr4.la57' feature\n"
- "Unable to boot - please use a kernel appropriate for your CPU\n");
- }
-
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
@@ -388,6 +373,11 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
debug_putaddr(output_len);
debug_putaddr(kernel_total_size);
+#ifdef CONFIG_X86_64
+ /* Report address of 32-bit trampoline */
+ debug_putaddr(trampoline_32bit);
+#endif
+
/*
* The memory hole needed for the kernel is the larger of either
* the entire decompressed kernel plus relocation table, or the
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 9d323dc6b159..4d369c308ed7 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -12,6 +12,11 @@
#undef CONFIG_PARAVIRT_SPINLOCKS
#undef CONFIG_KASAN
+#ifdef CONFIG_X86_5LEVEL
+/* cpu_feature_enabled() cannot be used that early */
+#define pgtable_l5_enabled __pgtable_l5_enabled
+#endif
+
#include <linux/linkage.h>
#include <linux/screen_info.h>
#include <linux/elf.h>
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
new file mode 100644
index 000000000000..91f75638f6e6
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -0,0 +1,20 @@
+#ifndef BOOT_COMPRESSED_PAGETABLE_H
+#define BOOT_COMPRESSED_PAGETABLE_H
+
+#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)
+
+#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0
+
+#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
+#define TRAMPOLINE_32BIT_CODE_SIZE 0x60
+
+#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
+
+#ifndef __ASSEMBLER__
+
+extern unsigned long *trampoline_32bit;
+
+extern void trampoline_32bit_src(void *return_ptr);
+
+#endif /* __ASSEMBLER__ */
+#endif /* BOOT_COMPRESSED_PAGETABLE_H */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index b4469a37e9a1..32af1cbcd903 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -1,4 +1,6 @@
#include <asm/processor.h>
+#include "pgtable.h"
+#include "../string.h"
/*
* __force_order is used by special_insns.h asm code to force instruction
@@ -9,20 +11,144 @@
*/
unsigned long __force_order;
-int l5_paging_required(void)
+#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
+#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
+
+struct paging_config {
+ unsigned long trampoline_start;
+ unsigned long l5_required;
+};
+
+/* Buffer to preserve trampoline memory */
+static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
+
+/*
+ * The page table is going to be used instead of page table in the trampoline
+ * memory.
+ *
+ * It must not be in BSS as BSS is cleared after cleanup_trampoline().
+ */
+static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
+
+/*
+ * Trampoline address will be printed by extract_kernel() for debugging
+ * purposes.
+ *
+ * Avoid putting the pointer into .bss as it will be cleared between
+ * paging_prepare() and extract_kernel().
+ */
+unsigned long *trampoline_32bit __section(.data);
+
+struct paging_config paging_prepare(void)
{
- /* Check if leaf 7 is supported. */
+ struct paging_config paging_config = {};
+ unsigned long bios_start, ebda_start;
+
+ /*
+ * Check if LA57 is desired and supported.
+ *
+ * There are two parts to the check:
+ * - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
+ * - if the machine supports 5-level paging:
+ * + CPUID leaf 7 is supported
+ * + the leaf has the feature bit set
+ *
+ * That's substitute for boot_cpu_has() in early boot code.
+ */
+ if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
+ native_cpuid_eax(0) >= 7 &&
+ (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
+ paging_config.l5_required = 1;
+ }
+
+ /*
+ * Find a suitable spot for the trampoline.
+ * This code is based on reserve_bios_regions().
+ */
+
+ ebda_start = *(unsigned short *)0x40e << 4;
+ bios_start = *(unsigned short *)0x413 << 10;
- if (native_cpuid_eax(0) < 7)
- return 0;
+ if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
+ bios_start = BIOS_START_MAX;
+
+ if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
+ bios_start = ebda_start;
+
+ /* Place the trampoline just below the end of low memory, aligned to 4k */
+ paging_config.trampoline_start = bios_start - TRAMPOLINE_32BIT_SIZE;
+ paging_config.trampoline_start = round_down(paging_config.trampoline_start, PAGE_SIZE);
+
+ trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
+
+ /* Preserve trampoline memory */
+ memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
+
+ /* Clear trampoline memory first */
+ memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
+
+ /* Copy trampoline code in place */
+ memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
+ &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
+
+ /*
+ * The code below prepares page table in trampoline memory.
+ *
+ * The new page table will be used by trampoline code for switching
+ * from 4- to 5-level paging or vice versa.
+ *
+ * If switching is not required, the page table is unused: trampoline
+ * code wouldn't touch CR3.
+ */
+
+ /*
+ * We are not going to use the page table in trampoline memory if we
+ * are already in the desired paging mode.
+ */
+ if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
+ goto out;
+
+ if (paging_config.l5_required) {
+ /*
+ * For 4- to 5-level paging transition, set up current CR3 as
+ * the first and the only entry in a new top-level page table.
+ */
+ trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC;
+ } else {
+ unsigned long src;
+
+ /*
+ * For 5- to 4-level paging transition, copy page table pointed
+ * by first entry in the current top-level page table as our
+ * new top-level page table.
+ *
+ * We cannot just point to the page table from trampoline as it
+ * may be above 4G.
+ */
+ src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
+ memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
+ (void *)src, PAGE_SIZE);
+ }
+
+out:
+ return paging_config;
+}
+
+void cleanup_trampoline(void)
+{
+ void *trampoline_pgtable;
- /* Check if la57 is supported. */
- if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
- return 0;
+ trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET;
- /* Check if 5-level paging has already been enabled. */
- if (native_read_cr4() & X86_CR4_LA57)
- return 0;
+ /*
+ * Move the top level page table out of trampoline memory,
+ * if it's there.
+ */
+ if ((void *)__native_read_cr3() == trampoline_pgtable) {
+ memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE);
+ native_write_cr3((unsigned long)top_pgtable);
+ }
- return 1;
+ /* Restore trampoline memory */
+ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
}
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d5c7f18f79ac..8a78030a82f2 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -260,8 +260,13 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
* Change top bits to match most significant bit (47th or 56th bit
* depending on paging mode) in the address.
*/
+#ifdef CONFIG_X86_5LEVEL
+ ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
+ "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
+#else
shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
+#endif
/* If this changed %rcx, it was not canonical */
cmpq %rcx, %r11
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 11881726ed37..6609dd7289b5 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -31,6 +31,7 @@
#include <asm/mmu.h>
#include <asm/mpspec.h>
#include <asm/realmode.h>
+#include <asm/x86_init.h>
#ifdef CONFIG_ACPI_APEI
# include <asm/pgtable_types.h>
@@ -133,6 +134,12 @@ static inline bool acpi_has_cpu_in_madt(void)
return !!acpi_lapic;
}
+#define ACPI_HAVE_ARCH_GET_ROOT_POINTER
+static inline u64 acpi_arch_get_root_pointer(void)
+{
+ return x86_init.acpi.get_root_pointer();
+}
+
#else /* !CONFIG_ACPI */
#define acpi_lapic 0
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 70eddb3922ff..736771c9822e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -148,45 +148,46 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline __pure bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto("1: jmp 6f\n"
- "2:\n"
- ".skip -(((5f-4f) - (2b-1b)) > 0) * "
- "((5f-4f) - (2b-1b)),0x90\n"
- "3:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 4f - .\n" /* repl offset */
- " .word %P1\n" /* always replace */
- " .byte 3b - 1b\n" /* src len */
- " .byte 5f - 4f\n" /* repl len */
- " .byte 3b - 2b\n" /* pad len */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "4: jmp %l[t_no]\n"
- "5:\n"
- ".previous\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 0\n" /* no replacement */
- " .word %P0\n" /* feature bit */
- " .byte 3b - 1b\n" /* src len */
- " .byte 0\n" /* repl len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .altinstr_aux,\"ax\"\n"
- "6:\n"
- " testb %[bitnum],%[cap_byte]\n"
- " jnz %l[t_yes]\n"
- " jmp %l[t_no]\n"
- ".previous\n"
- : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
- [bitnum] "i" (1 << (bit & 7)),
- [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
- : : t_yes, t_no);
- t_yes:
- return true;
- t_no:
- return false;
+ asm_volatile_goto("1: jmp 6f\n"
+ "2:\n"
+ ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+ "((5f-4f) - (2b-1b)),0x90\n"
+ "3:\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 4f - .\n" /* repl offset */
+ " .word %P[always]\n" /* always replace */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 5f - 4f\n" /* repl len */
+ " .byte 3b - 2b\n" /* pad len */
+ ".previous\n"
+ ".section .altinstr_replacement,\"ax\"\n"
+ "4: jmp %l[t_no]\n"
+ "5:\n"
+ ".previous\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 0\n" /* no replacement */
+ " .word %P[feature]\n" /* feature bit */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 0\n" /* repl len */
+ " .byte 0\n" /* pad len */
+ ".previous\n"
+ ".section .altinstr_aux,\"ax\"\n"
+ "6:\n"
+ " testb %[bitnum],%[cap_byte]\n"
+ " jnz %l[t_yes]\n"
+ " jmp %l[t_no]\n"
+ ".previous\n"
+ : : [feature] "i" (bit),
+ [always] "i" (X86_FEATURE_ALWAYS),
+ [bitnum] "i" (1 << (bit & 7)),
+ [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+ : : t_yes, t_no);
+t_yes:
+ return true;
+t_no:
+ return false;
}
#define static_cpu_has(bit) \
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 460991e3b529..db7ba2feb947 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -5,10 +5,6 @@
unsigned long kaslr_get_random_long(const char *purpose);
#ifdef CONFIG_RANDOMIZE_MEMORY
-extern unsigned long page_offset_base;
-extern unsigned long vmalloc_base;
-extern unsigned long vmemmap_base;
-
void kernel_randomize_memory(void);
#else
static inline void kernel_randomize_memory(void) { }
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 22c5f3e6f820..8fe61ad21047 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -22,6 +22,7 @@
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern u64 sme_me_mask;
+extern bool sev_enabled;
void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
unsigned long decrypted_kernel_vaddr,
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index b7063cfa19f9..d0dabeae0505 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -275,4 +275,41 @@ do { \
} while (0)
#endif /* __ASSEMBLY__ */
+
+/*
+ * Below is used in the eBPF JIT compiler and emits the byte sequence
+ * for the following assembly:
+ *
+ * With retpolines configured:
+ *
+ * callq do_rop
+ * spec_trap:
+ * pause
+ * lfence
+ * jmp spec_trap
+ * do_rop:
+ * mov %rax,(%rsp)
+ * retq
+ *
+ * Without retpolines configured:
+ *
+ * jmp *%rax
+ */
+#ifdef CONFIG_RETPOLINE
+# define RETPOLINE_RAX_BPF_JIT_SIZE 17
+# define RETPOLINE_RAX_BPF_JIT() \
+ EMIT1_off32(0xE8, 7); /* callq do_rop */ \
+ /* spec_trap: */ \
+ EMIT2(0xF3, 0x90); /* pause */ \
+ EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
+ EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
+ /* do_rop: */ \
+ EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
+ EMIT1(0xC3); /* retq */
+#else
+# define RETPOLINE_RAX_BPF_JIT_SIZE 2
+# define RETPOLINE_RAX_BPF_JIT() \
+ EMIT2(0xFF, 0xE0); /* jmp *%rax */
+#endif
+
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 4baa6bceb232..e7f7a4f7375a 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -11,6 +11,10 @@
extern unsigned long max_pfn;
extern unsigned long phys_base;
+extern unsigned long page_offset_base;
+extern unsigned long vmalloc_base;
+extern unsigned long vmemmap_base;
+
static inline unsigned long __phys_addr_nodebug(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
@@ -52,10 +56,6 @@ static inline void clear_page(void *page)
void copy_page(void *to, void *from);
-#ifdef CONFIG_X86_MCE
-#define arch_unmap_kpfn arch_unmap_kpfn
-#endif
-
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_VSYSCALL_EMULATION
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index e1407312c412..2c5a966dc222 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -37,26 +37,24 @@
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
* what Xen requires.
*/
-#ifdef CONFIG_X86_5LEVEL
-#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL)
-#else
-#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL)
-#endif
+#define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL)
+#define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL)
-#ifdef CONFIG_RANDOMIZE_MEMORY
+#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
#define __PAGE_OFFSET page_offset_base
#else
-#define __PAGE_OFFSET __PAGE_OFFSET_BASE
-#endif /* CONFIG_RANDOMIZE_MEMORY */
+#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4
+#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-#ifdef CONFIG_X86_5LEVEL
+
#define __PHYSICAL_MASK_SHIFT 52
-#define __VIRTUAL_MASK_SHIFT 56
+
+#ifdef CONFIG_X86_5LEVEL
+#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47)
#else
-#define __PHYSICAL_MASK_SHIFT 46
#define __VIRTUAL_MASK_SHIFT 47
#endif
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c83a2f418cea..9be2bf13825b 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -568,17 +568,22 @@ static inline p4dval_t p4d_val(p4d_t p4d)
return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d);
}
-static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
{
- pgdval_t val = native_pgd_val(pgd);
-
- PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, val);
+ PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd));
}
-static inline void pgd_clear(pgd_t *pgdp)
-{
- set_pgd(pgdp, __pgd(0));
-}
+#define set_pgd(pgdp, pgdval) do { \
+ if (pgtable_l5_enabled) \
+ __set_pgd(pgdp, pgdval); \
+ else \
+ set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \
+} while (0)
+
+#define pgd_clear(pgdp) do { \
+ if (pgtable_l5_enabled) \
+ set_pgd(pgdp, __pgd(0)); \
+} while (0)
#endif /* CONFIG_PGTABLE_LEVELS == 5 */
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index aff42e1da6ee..263c142a6a6c 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -167,6 +167,8 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
#if CONFIG_PGTABLE_LEVELS > 4
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
{
+ if (!pgtable_l5_enabled)
+ return;
paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
}
@@ -191,7 +193,8 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long address)
{
- ___p4d_free_tlb(tlb, p4d);
+ if (pgtable_l5_enabled)
+ ___p4d_free_tlb(tlb, p4d);
}
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 876b4c77d983..6a59a6d0cc50 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -44,5 +44,6 @@ typedef union {
*/
#define PTRS_PER_PTE 512
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index b444d83cfc95..89d5c8886c85 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -65,7 +65,7 @@ extern pmdval_t early_pmd_flags;
#ifndef __PAGETABLE_P4D_FOLDED
#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
-#define pgd_clear(pgd) native_pgd_clear(pgd)
+#define pgd_clear(pgd) (pgtable_l5_enabled ? native_pgd_clear(pgd) : 0)
#endif
#ifndef set_p4d
@@ -859,6 +859,8 @@ static inline unsigned long p4d_index(unsigned long address)
#if CONFIG_PGTABLE_LEVELS > 4
static inline int pgd_present(pgd_t pgd)
{
+ if (!pgtable_l5_enabled)
+ return 1;
return pgd_flags(pgd) & _PAGE_PRESENT;
}
@@ -876,6 +878,8 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
/* to find an entry in a page-table-directory. */
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
+ if (!pgtable_l5_enabled)
+ return (p4d_t *)pgd;
return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
}
@@ -883,6 +887,9 @@ static inline int pgd_bad(pgd_t pgd)
{
unsigned long ignore_flags = _PAGE_USER;
+ if (!pgtable_l5_enabled)
+ return 0;
+
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
ignore_flags |= _PAGE_NX;
@@ -891,6 +898,8 @@ static inline int pgd_bad(pgd_t pgd)
static inline int pgd_none(pgd_t pgd)
{
+ if (!pgtable_l5_enabled)
+ return 0;
/*
* There is no need to do a workaround for the KNL stray
* A/D bit erratum here. PGDs only point to page tables
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index b3ec519e3982..88a056b01db4 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -34,6 +34,8 @@ static inline void check_pgt_cache(void) { }
void paging_init(void);
void sync_initial_page_table(void);
+static inline int pgd_large(pgd_t pgd) { return 0; }
+
/*
* Define this if things work differently on an i386 and an i486:
* it will (on an i486) warn about kernel memory accesses that are
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index 0777e18a1d23..e3225e83db7d 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -15,6 +15,8 @@
# include <asm/pgtable-2level_types.h>
#endif
+#define pgtable_l5_enabled 0
+
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 1149d2112b2e..877bc27718ae 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -218,29 +218,26 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
-#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
- p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
-#else
- *p4dp = p4d;
-#endif
+ pgd_t pgd;
+
+ if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
+ *p4dp = p4d;
+ return;
+ }
+
+ pgd = native_make_pgd(native_p4d_val(p4d));
+ pgd = pti_set_user_pgd((pgd_t *)p4dp, pgd);
+ *p4dp = native_make_p4d(native_pgd_val(pgd));
}
static inline void native_p4d_clear(p4d_t *p4d)
{
-#ifdef CONFIG_X86_5LEVEL
native_set_p4d(p4d, native_make_p4d(0));
-#else
- native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)});
-#endif
}
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
*pgdp = pti_set_user_pgd(pgdp, pgd);
-#else
- *pgdp = pgd;
-#endif
}
static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6b8f73dcbc2c..d5c21a382475 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -20,6 +20,18 @@ typedef unsigned long pgprotval_t;
typedef struct { pteval_t pte; } pte_t;
+#ifdef CONFIG_X86_5LEVEL
+extern unsigned int __pgtable_l5_enabled;
+#ifndef pgtable_l5_enabled
+#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57)
+#endif
+#else
+#define pgtable_l5_enabled 0
+#endif
+
+extern unsigned int pgdir_shift;
+extern unsigned int ptrs_per_p4d;
+
#endif /* !__ASSEMBLY__ */
#define SHARED_KERNEL_PMD 0
@@ -29,24 +41,28 @@ typedef struct { pteval_t pte; } pte_t;
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
-#define PGDIR_SHIFT 48
+#define PGDIR_SHIFT pgdir_shift
#define PTRS_PER_PGD 512
/*
* 4th level page in 5-level paging case
*/
-#define P4D_SHIFT 39
-#define PTRS_PER_P4D 512
-#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
-#define P4D_MASK (~(P4D_SIZE - 1))
+#define P4D_SHIFT 39
+#define MAX_PTRS_PER_P4D 512
+#define PTRS_PER_P4D ptrs_per_p4d
+#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE - 1))
+
+#define MAX_POSSIBLE_PHYSMEM_BITS 52
#else /* CONFIG_X86_5LEVEL */
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
-#define PGDIR_SHIFT 39
-#define PTRS_PER_PGD 512
+#define PGDIR_SHIFT 39
+#define PTRS_PER_PGD 512
+#define MAX_PTRS_PER_P4D 1
#endif /* CONFIG_X86_5LEVEL */
@@ -82,31 +98,33 @@ typedef struct { pteval_t pte; } pte_t;
* range must not overlap with anything except the KASAN shadow area, which
* is correct as KASAN disables KASLR.
*/
-#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#define MAXMEM (1UL << MAX_PHYSMEM_BITS)
-#ifdef CONFIG_X86_5LEVEL
-# define VMALLOC_SIZE_TB _AC(12800, UL)
-# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
-# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
-# define LDT_PGD_ENTRY _AC(-112, UL)
-# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
-#else
-# define VMALLOC_SIZE_TB _AC(32, UL)
-# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
-# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
-# define LDT_PGD_ENTRY _AC(-3, UL)
-# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
-#endif
+#define LDT_PGD_ENTRY_L4 -3UL
+#define LDT_PGD_ENTRY_L5 -112UL
+#define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
+#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
+
+#define __VMALLOC_BASE_L4 0xffffc90000000000
+#define __VMALLOC_BASE_L5 0xffa0000000000000
+
+#define VMALLOC_SIZE_TB_L4 32UL
+#define VMALLOC_SIZE_TB_L5 12800UL
+
+#define __VMEMMAP_BASE_L4 0xffffea0000000000
+#define __VMEMMAP_BASE_L5 0xffd4000000000000
-#ifdef CONFIG_RANDOMIZE_MEMORY
+#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
# define VMALLOC_START vmalloc_base
+# define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
# define VMEMMAP_START vmemmap_base
#else
-# define VMALLOC_START __VMALLOC_BASE
-# define VMEMMAP_START __VMEMMAP_BASE
-#endif /* CONFIG_RANDOMIZE_MEMORY */
+# define VMALLOC_START __VMALLOC_BASE_L4
+# define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4
+# define VMEMMAP_START __VMEMMAP_BASE_L4
+#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
-#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
+#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index fb3a6de7440b..6847d85400a8 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -53,12 +53,6 @@
# define NEED_MOVBE 0
#endif
-#ifdef CONFIG_X86_5LEVEL
-# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31))
-#else
-# define NEED_LA57 0
-#endif
-
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT
/* Paravirtualized systems may not have PSE or PGE available */
@@ -104,7 +98,7 @@
#define REQUIRED_MASK13 0
#define REQUIRED_MASK14 0
#define REQUIRED_MASK15 0
-#define REQUIRED_MASK16 (NEED_LA57)
+#define REQUIRED_MASK16 0
#define REQUIRED_MASK17 0
#define REQUIRED_MASK18 0
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 461f53d27708..a4189762b266 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -129,6 +129,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
+void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 4fc1e9d3c43e..4617a2bf123c 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,13 +27,8 @@
# endif
#else /* CONFIG_X86_32 */
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
-# ifdef CONFIG_X86_5LEVEL
-# define MAX_PHYSADDR_BITS 52
-# define MAX_PHYSMEM_BITS 52
-# else
-# define MAX_PHYSADDR_BITS 44
-# define MAX_PHYSMEM_BITS 46
-# endif
+# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44)
+# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46)
#endif
#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index fc2f082ac635..2e2c34d2bb00 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -131,6 +131,14 @@ struct x86_hyper_init {
};
/**
+ * struct x86_init_acpi - x86 ACPI init functions
+ * @get_root_pointer: get RSDP address
+ */
+struct x86_init_acpi {
+ u64 (*get_root_pointer)(void);
+};
+
+/**
* struct x86_init_ops - functions for platform specific setup
*
*/
@@ -144,6 +152,7 @@ struct x86_init_ops {
struct x86_init_iommu iommu;
struct x86_init_pci pci;
struct x86_hyper_init hyper;
+ struct x86_init_acpi acpi;
};
/**
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 197c2e6c7376..099414345865 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -241,24 +241,24 @@
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
struct hv_reenlightenment_control {
- u64 vector:8;
- u64 reserved1:8;
- u64 enabled:1;
- u64 reserved2:15;
- u64 target_vp:32;
+ __u64 vector:8;
+ __u64 reserved1:8;
+ __u64 enabled:1;
+ __u64 reserved2:15;
+ __u64 target_vp:32;
};
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
struct hv_tsc_emulation_control {
- u64 enabled:1;
- u64 reserved:63;
+ __u64 enabled:1;
+ __u64 reserved:63;
};
struct hv_tsc_emulation_status {
- u64 inprogress:1;
- u64 reserved:63;
+ __u64 inprogress:1;
+ __u64 reserved:63;
};
#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 3cc471beb50b..bb6f7a2148d7 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -134,21 +134,40 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
{
struct apic_chip_data *apicd = apic_chip_data(irqd);
struct irq_desc *desc = irq_data_to_desc(irqd);
+ bool managed = irqd_affinity_is_managed(irqd);
lockdep_assert_held(&vector_lock);
trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
apicd->cpu);
- /* Setup the vector move, if required */
- if (apicd->vector && cpu_online(apicd->cpu)) {
+ /*
+ * If there is no vector associated or if the associated vector is
+ * the shutdown vector, which is associated to make PCI/MSI
+ * shutdown mode work, then there is nothing to release. Clear out
+ * prev_vector for this and the offlined target case.
+ */
+ apicd->prev_vector = 0;
+ if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR)
+ goto setnew;
+ /*
+ * If the target CPU of the previous vector is online, then mark
+ * the vector as move in progress and store it for cleanup when the
+ * first interrupt on the new vector arrives. If the target CPU is
+ * offline then the regular release mechanism via the cleanup
+ * vector is not possible and the vector can be immediately freed
+ * in the underlying matrix allocator.
+ */
+ if (cpu_online(apicd->cpu)) {
apicd->move_in_progress = true;
apicd->prev_vector = apicd->vector;
apicd->prev_cpu = apicd->cpu;
} else {
- apicd->prev_vector = 0;
+ irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
+ managed);
}
+setnew:
apicd->vector = newvec;
apicd->cpu = newcpu;
BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 46b675aaf20b..f11910b44638 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
uv_gre_table = gre;
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+ unsigned long size = ((unsigned long)(gre->limit - lgre)
+ << UV_GAM_RANGE_SHFT);
+ int order = 0;
+ char suffix[] = " KMGTPE";
+
+ while (size > 9999 && order < sizeof(suffix)) {
+ size /= 1024;
+ order++;
+ }
+
if (!index) {
pr_info("UV: GAM Range Table...\n");
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
}
- pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
+ pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
- ((unsigned long)(gre->limit - lgre)) >>
- (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
+ size, suffix[order],
gre->type, gre->nasid, gre->sockid, gre->pnode);
lgre = gre->limit;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index bdab7d2f51af..fca759d272a1 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1804,6 +1804,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
goto out_common_fail;
}
closid = ret;
+ ret = 0;
rdtgrp->closid = closid;
list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index aa0d5df9dc60..e956eb267061 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
extern struct mca_config mca_cfg;
+#ifndef CONFIG_X86_64
+/*
+ * On 32-bit systems it would be difficult to safely unmap a poison page
+ * from the kernel 1:1 map because there are no non-canonical addresses that
+ * we can use to refer to the address without risking a speculative access.
+ * However, this isn't much of an issue because:
+ * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
+ * are only mapped into the kernel as needed
+ * 2) Few people would run a 32-bit kernel on a machine that supports
+ * recoverable errors because they have too much memory to boot 32-bit.
+ */
+static inline void mce_unmap_kpfn(unsigned long pfn) {}
+#define mce_unmap_kpfn mce_unmap_kpfn
+#endif
+
#endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3a8e88a611eb..3d8c573a9a27 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -105,6 +105,10 @@ static struct irq_work mce_irq_work;
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn);
+#endif
+
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
@@ -234,7 +238,7 @@ static void __print_mce(struct mce *m)
m->cs, m->ip);
if (m->cs == __KERNEL_CS)
- pr_cont("{%pS}", (void *)m->ip);
+ pr_cont("{%pS}", (void *)(unsigned long)m->ip);
pr_cont("\n");
}
@@ -590,7 +594,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT;
- memory_failure(pfn, 0);
+ if (!memory_failure(pfn, 0))
+ mce_unmap_kpfn(pfn);
}
return NOTIFY_OK;
@@ -1057,12 +1062,13 @@ static int do_memory_failure(struct mce *m)
ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
if (ret)
pr_err("Memory error not recovered");
+ else
+ mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
return ret;
}
-#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
-
-void arch_unmap_kpfn(unsigned long pfn)
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn)
{
unsigned long decoy_addr;
@@ -1073,7 +1079,7 @@ void arch_unmap_kpfn(unsigned long pfn)
* We would like to just call:
* set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
* but doing that would radically increase the odds of a
- * speculative access to the posion page because we'd have
+ * speculative access to the poison page because we'd have
* the virtual address of the kernel 1:1 mapping sitting
* around in registers.
* Instead we get tricky. We create a non-canonical address
@@ -1082,23 +1088,10 @@ void arch_unmap_kpfn(unsigned long pfn)
* a legal address.
*/
-/*
- * Build time check to see if we have a spare virtual bit. Don't want
- * to leave this until run time because most developers don't have a
- * system that can exercise this code path. This will only become a
- * problem if/when we move beyond 5-level page tables.
- *
- * Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD)
- */
-#if PGDIR_SHIFT + 9 < 63
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
-#else
-#error "no unused virtual bit available"
-#endif
if (set_memory_np(decoy_addr, 1))
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
-
}
#endif
@@ -2328,6 +2321,12 @@ static __init int mcheck_init_device(void)
{
int err;
+ /*
+ * Check if we have a spare virtual bit. This will only become
+ * a problem if/when we move beyond 5-level page tables.
+ */
+ MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63);
+
if (!mce_available(&boot_cpu_data)) {
err = -EIO;
goto err_out;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 71c11ad5643e..6a2cb1442e05 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -924,6 +924,24 @@ static int __init parse_memmap_one(char *p)
} else if (*p == '!') {
start_at = memparse(p+1, &p);
e820__range_add(start_at, mem_size, E820_TYPE_PRAM);
+ } else if (*p == '%') {
+ enum e820_type from = 0, to = 0;
+
+ start_at = memparse(p + 1, &p);
+ if (*p == '-')
+ from = simple_strtoull(p + 1, &p, 0);
+ if (*p == '+')
+ to = simple_strtoull(p + 1, &p, 0);
+ if (*p != '\0')
+ return -EINVAL;
+ if (from && to)
+ e820__range_update(start_at, mem_size, from, to);
+ else if (to)
+ e820__range_add(start_at, mem_size, to);
+ else if (from)
+ e820__range_remove(start_at, mem_size, from, 1);
+ else
+ e820__range_remove(start_at, mem_size, 0, 0);
} else {
e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1);
}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 7ba5d819ebe3..0c855deee165 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -32,6 +32,11 @@
#include <asm/microcode.h>
#include <asm/kasan.h>
+#ifdef CONFIG_X86_5LEVEL
+#undef pgtable_l5_enabled
+#define pgtable_l5_enabled __pgtable_l5_enabled
+#endif
+
/*
* Manage page tables very early on.
*/
@@ -39,6 +44,24 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
+#ifdef CONFIG_X86_5LEVEL
+unsigned int __pgtable_l5_enabled __ro_after_init;
+EXPORT_SYMBOL(__pgtable_l5_enabled);
+unsigned int pgdir_shift __ro_after_init = 39;
+EXPORT_SYMBOL(pgdir_shift);
+unsigned int ptrs_per_p4d __ro_after_init = 1;
+EXPORT_SYMBOL(ptrs_per_p4d);
+#endif
+
+#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
+unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
+EXPORT_SYMBOL(page_offset_base);
+unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4;
+EXPORT_SYMBOL(vmalloc_base);
+unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
+EXPORT_SYMBOL(vmemmap_base);
+#endif
+
#define __head __section(.head.text)
static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
@@ -46,6 +69,41 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
return ptr - (void *)_text + (void *)physaddr;
}
+static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr)
+{
+ return fixup_pointer(ptr, physaddr);
+}
+
+#ifdef CONFIG_X86_5LEVEL
+static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr)
+{
+ return fixup_pointer(ptr, physaddr);
+}
+
+static bool __head check_la57_support(unsigned long physaddr)
+{
+ if (native_cpuid_eax(0) < 7)
+ return false;
+
+ if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+ return false;
+
+ *fixup_int(&pgtable_l5_enabled, physaddr) = 1;
+ *fixup_int(&pgdir_shift, physaddr) = 48;
+ *fixup_int(&ptrs_per_p4d, physaddr) = 512;
+ *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5;
+ *fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5;
+ *fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5;
+
+ return true;
+}
+#else
+static bool __head check_la57_support(unsigned long physaddr)
+{
+ return false;
+}
+#endif
+
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
@@ -55,9 +113,12 @@ unsigned long __head __startup_64(unsigned long physaddr,
p4dval_t *p4d;
pudval_t *pud;
pmdval_t *pmd, pmd_entry;
+ bool la57;
int i;
unsigned int *next_pgt_ptr;
+ la57 = check_la57_support(physaddr);
+
/* Is the address too large? */
if (physaddr >> MAX_PHYSMEM_BITS)
for (;;);
@@ -81,9 +142,14 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Fixup the physical addresses in the page table */
pgd = fixup_pointer(&early_top_pgt, physaddr);
- pgd[pgd_index(__START_KERNEL_map)] += load_delta;
-
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p = pgd + pgd_index(__START_KERNEL_map);
+ if (la57)
+ *p = (unsigned long)level4_kernel_pgt;
+ else
+ *p = (unsigned long)level3_kernel_pgt;
+ *p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta;
+
+ if (la57) {
p4d = fixup_pointer(&level4_kernel_pgt, physaddr);
p4d[511] += load_delta;
}
@@ -108,7 +174,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ if (la57) {
p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
@@ -154,8 +220,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
* Fixup phys_base - remove the memory encryption mask to obtain
* the true physical address.
*/
- p = fixup_pointer(&phys_base, physaddr);
- *p += load_delta - sme_get_me_mask();
+ *fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask();
/* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp);
@@ -206,7 +271,7 @@ again:
* critical -- __PAGE_OFFSET would point us back into the dynamic
* range and we might end up looping forever...
*/
- if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (!pgtable_l5_enabled)
p4d_p = pgd_p;
else if (pgd)
p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
@@ -322,7 +387,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
- BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+ MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0f545b3cf926..48385c1074a5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -39,12 +39,12 @@
*
*/
+#define l4_index(x) (((x) >> 39) & 511)
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
-PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
-PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
-#endif
+L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
+L4_START_KERNEL = l4_index(__START_KERNEL_map)
+
L3_START_KERNEL = pud_index(__START_KERNEL_map)
.text
@@ -125,7 +125,10 @@ ENTRY(secondary_startup_64)
/* Enable PAE mode, PGE and LA57 */
movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
#ifdef CONFIG_X86_5LEVEL
+ testl $1, __pgtable_l5_enabled(%rip)
+ jz 1f
orl $X86_CR4_LA57, %ecx
+1:
#endif
movq %rcx, %cr4
@@ -374,12 +377,7 @@ GLOBAL(name)
__INITDATA
NEXT_PGD_PAGE(early_top_pgt)
- .fill 511,8,0
-#ifdef CONFIG_X86_5LEVEL
- .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
-#else
- .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
-#endif
+ .fill 512,8,0
.fill PTI_USER_PGD_FILL,8,0
NEXT_PAGE(early_dynamic_pgts)
@@ -390,9 +388,9 @@ NEXT_PAGE(early_dynamic_pgts)
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
NEXT_PGD_PAGE(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
- .org init_top_pgt + PGD_PAGE_OFFSET*8, 0
+ .org init_top_pgt + L4_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
- .org init_top_pgt + PGD_START_KERNEL*8, 0
+ .org init_top_pgt + L4_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
.fill PTI_USER_PGD_FILL,8,0
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 1f790cf9d38f..02f913cb27b5 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -350,6 +350,7 @@ void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_NUMBER(phys_base);
VMCOREINFO_SYMBOL(init_top_pgt);
+ VMCOREINFO_NUMBER(pgtable_l5_enabled);
#ifdef CONFIG_NUMA
VMCOREINFO_SYMBOL(node_data);
@@ -542,6 +543,7 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
goto overflow;
break;
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
value -= (u64)address;
*(u32 *)location = value;
break;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index da0c160e5589..f58336af095c 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -191,6 +191,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
goto overflow;
break;
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
if (*(u32 *)loc != 0)
goto invalid_relocation;
val -= (u64)loc;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4c616be28506..6285697b6e56 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -189,9 +189,7 @@ struct ist_info ist_info;
#endif
#else
-struct cpuinfo_x86 boot_cpu_data __read_mostly = {
- .x86_phys_bits = MAX_PHYSMEM_BITS,
-};
+struct cpuinfo_x86 boot_cpu_data __read_mostly;
EXPORT_SYMBOL(boot_cpu_data);
#endif
@@ -851,6 +849,7 @@ void __init setup_arch(char **cmdline_p)
__flush_tlb_all();
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
+ boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS;
#endif
/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6f27facbaa9b..ff99e2b6fc54 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1281,11 +1281,10 @@ void __init native_smp_prepare_boot_cpu(void)
cpu_set_state_online(me);
}
-void __init native_smp_cpus_done(unsigned int max_cpus)
+void __init calculate_max_logical_packages(void)
{
int ncpus;
- pr_debug("Boot done\n");
/*
* Today neither Intel nor AMD support heterogenous systems so
* extrapolate the boot cpu's data to all packages.
@@ -1293,6 +1292,13 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
pr_info("Max logical packages: %u\n", __max_logical_packages);
+}
+
+void __init native_smp_cpus_done(unsigned int max_cpus)
+{
+ pr_debug("Boot done\n");
+
+ calculate_max_logical_packages();
if (x86_has_numa_in_package)
set_sched_topology(x86_numa_in_package_topology);
@@ -1430,8 +1436,8 @@ static void remove_siblinginfo(int cpu)
cpumask_clear(cpu_llc_shared_mask(cpu));
cpumask_clear(topology_sibling_cpumask(cpu));
cpumask_clear(topology_core_cpumask(cpu));
- c->phys_proc_id = 0;
c->cpu_core_id = 0;
+ c->booted_cores = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
recompute_smt_state();
}
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 1151ccd72ce9..b8cff22a8785 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -26,10 +26,11 @@
void x86_init_noop(void) { }
void __init x86_init_uint_noop(unsigned int unused) { }
-int __init iommu_init_noop(void) { return 0; }
-void iommu_shutdown_noop(void) { }
-bool __init bool_x86_init_noop(void) { return false; }
-void x86_op_int_noop(int cpu) { }
+static int __init iommu_init_noop(void) { return 0; }
+static void iommu_shutdown_noop(void) { }
+static bool __init bool_x86_init_noop(void) { return false; }
+static void x86_op_int_noop(int cpu) { }
+static u64 u64_x86_init_noop(void) { return 0; }
/*
* The platform setup functions are preset with the default functions
@@ -91,6 +92,10 @@ struct x86_init_ops x86_init __initdata = {
.x2apic_available = bool_x86_init_noop,
.init_mem_mapping = x86_init_noop,
},
+
+ .acpi = {
+ .get_root_pointer = u64_x86_init_noop,
+ },
};
struct x86_cpuinit_ops x86_cpuinit = {
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index 7b881d03d0dd..3cdf06128d13 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -7,6 +7,7 @@ asmlinkage void just_return_func(void);
asm(
".type just_return_func, @function\n"
+ ".globl just_return_func\n"
"just_return_func:\n"
" ret\n"
".size just_return_func, .-just_return_func\n"
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 27e9e90a8d35..4b101dd6e52f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,12 +1,15 @@
# SPDX-License-Identifier: GPL-2.0
-# Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c
-KCOV_INSTRUMENT_tlb.o := n
-KCOV_INSTRUMENT_mem_encrypt.o := n
+# Kernel does not boot with instrumentation of tlb.c and mem_encrypt*.c
+KCOV_INSTRUMENT_tlb.o := n
+KCOV_INSTRUMENT_mem_encrypt.o := n
+KCOV_INSTRUMENT_mem_encrypt_identity.o := n
-KASAN_SANITIZE_mem_encrypt.o := n
+KASAN_SANITIZE_mem_encrypt.o := n
+KASAN_SANITIZE_mem_encrypt_identity.o := n
ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_mem_encrypt.o = -pg
+CFLAGS_REMOVE_mem_encrypt.o = -pg
+CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
@@ -16,6 +19,7 @@ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_physaddr.o := $(nostackp)
CFLAGS_setup_nx.o := $(nostackp)
+CFLAGS_mem_encrypt_identity.o := $(nostackp)
CFLAGS_fault.o := -I$(src)/../include/asm/trace
@@ -47,4 +51,5 @@ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index 421f2664ffa0..51a6f92da2bf 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -72,6 +72,31 @@ static const struct file_operations ptdump_curusr_fops = {
};
#endif
+#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
+extern pgd_t *efi_pgd;
+static struct dentry *pe_efi;
+
+static int ptdump_show_efi(struct seq_file *m, void *v)
+{
+ if (efi_pgd)
+ ptdump_walk_pgd_level_debugfs(m, efi_pgd, false);
+ return 0;
+}
+
+static int ptdump_open_efi(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, ptdump_show_efi, NULL);
+}
+
+static const struct file_operations ptdump_efi_fops = {
+ .owner = THIS_MODULE,
+ .open = ptdump_open_efi,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
static struct dentry *dir, *pe_knl, *pe_curknl;
static int __init pt_dump_debug_init(void)
@@ -96,6 +121,13 @@ static int __init pt_dump_debug_init(void)
if (!pe_curusr)
goto err;
#endif
+
+#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
+ pe_efi = debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops);
+ if (!pe_efi)
+ goto err;
+#endif
+
return 0;
err:
debugfs_remove_recursive(dir);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 2a4849e92831..62a7e9f65dec 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -29,6 +29,7 @@
struct pg_state {
int level;
pgprot_t current_prot;
+ pgprotval_t effective_prot;
unsigned long start_address;
unsigned long current_address;
const struct addr_marker *marker;
@@ -85,11 +86,15 @@ static struct addr_marker address_markers[] = {
[VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
[VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
#ifdef CONFIG_KASAN
- [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
- [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
+ /*
+ * These fields get initialized with the (dynamic)
+ * KASAN_SHADOW_{START,END} values in pt_dump_init().
+ */
+ [KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" },
+ [KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" },
#endif
#ifdef CONFIG_MODIFY_LDT_SYSCALL
- [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" },
+ [LDT_NR] = { 0UL, "LDT remap" },
#endif
[CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
#ifdef CONFIG_X86_ESPFIX64
@@ -231,9 +236,9 @@ static unsigned long normalize_addr(unsigned long u)
* print what we collected so far.
*/
static void note_page(struct seq_file *m, struct pg_state *st,
- pgprot_t new_prot, int level)
+ pgprot_t new_prot, pgprotval_t new_eff, int level)
{
- pgprotval_t prot, cur;
+ pgprotval_t prot, cur, eff;
static const char units[] = "BKMGTPE";
/*
@@ -243,23 +248,24 @@ static void note_page(struct seq_file *m, struct pg_state *st,
*/
prot = pgprot_val(new_prot);
cur = pgprot_val(st->current_prot);
+ eff = st->effective_prot;
if (!st->level) {
/* First entry */
st->current_prot = new_prot;
+ st->effective_prot = new_eff;
st->level = level;
st->marker = address_markers;
st->lines = 0;
pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
st->marker->name);
- } else if (prot != cur || level != st->level ||
+ } else if (prot != cur || new_eff != eff || level != st->level ||
st->current_address >= st->marker[1].start_address) {
const char *unit = units;
unsigned long delta;
int width = sizeof(unsigned long) * 2;
- pgprotval_t pr = pgprot_val(st->current_prot);
- if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) {
+ if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) {
WARN_ONCE(1,
"x86/mm: Found insecure W+X mapping at address %p/%pS\n",
(void *)st->start_address,
@@ -313,21 +319,30 @@ static void note_page(struct seq_file *m, struct pg_state *st,
st->start_address = st->current_address;
st->current_prot = new_prot;
+ st->effective_prot = new_eff;
st->level = level;
}
}
-static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, unsigned long P)
+static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
+{
+ return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
+ ((prot1 | prot2) & _PAGE_NX);
+}
+
+static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
+ pgprotval_t eff_in, unsigned long P)
{
int i;
pte_t *start;
- pgprotval_t prot;
+ pgprotval_t prot, eff;
start = (pte_t *)pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
prot = pte_flags(*start);
+ eff = effective_prot(eff_in, prot);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
- note_page(m, st, __pgprot(prot), 5);
+ note_page(m, st, __pgprot(prot), eff, 5);
start++;
}
}
@@ -344,12 +359,10 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
void *pt)
{
if (__pa(pt) == __pa(kasan_zero_pmd) ||
-#ifdef CONFIG_X86_5LEVEL
- __pa(pt) == __pa(kasan_zero_p4d) ||
-#endif
+ (pgtable_l5_enabled && __pa(pt) == __pa(kasan_zero_p4d)) ||
__pa(pt) == __pa(kasan_zero_pud)) {
pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
- note_page(m, st, __pgprot(prot), 5);
+ note_page(m, st, __pgprot(prot), 0, 5);
return true;
}
return false;
@@ -364,42 +377,45 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
#if PTRS_PER_PMD > 1
-static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P)
+static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
+ pgprotval_t eff_in, unsigned long P)
{
int i;
pmd_t *start, *pmd_start;
- pgprotval_t prot;
+ pgprotval_t prot, eff;
pmd_start = start = (pmd_t *)pud_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PMD; i++) {
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
if (!pmd_none(*start)) {
+ prot = pmd_flags(*start);
+ eff = effective_prot(eff_in, prot);
if (pmd_large(*start) || !pmd_present(*start)) {
- prot = pmd_flags(*start);
- note_page(m, st, __pgprot(prot), 4);
+ note_page(m, st, __pgprot(prot), eff, 4);
} else if (!kasan_page_table(m, st, pmd_start)) {
- walk_pte_level(m, st, *start,
+ walk_pte_level(m, st, *start, eff,
P + i * PMD_LEVEL_MULT);
}
} else
- note_page(m, st, __pgprot(0), 4);
+ note_page(m, st, __pgprot(0), 0, 4);
start++;
}
}
#else
-#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
+#define walk_pmd_level(m,s,a,e,p) walk_pte_level(m,s,__pmd(pud_val(a)),e,p)
#define pud_large(a) pmd_large(__pmd(pud_val(a)))
#define pud_none(a) pmd_none(__pmd(pud_val(a)))
#endif
#if PTRS_PER_PUD > 1
-static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P)
+static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
+ pgprotval_t eff_in, unsigned long P)
{
int i;
pud_t *start, *pud_start;
- pgprotval_t prot;
+ pgprotval_t prot, eff;
pud_t *prev_pud = NULL;
pud_start = start = (pud_t *)p4d_page_vaddr(addr);
@@ -407,15 +423,16 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
if (!pud_none(*start)) {
+ prot = pud_flags(*start);
+ eff = effective_prot(eff_in, prot);
if (pud_large(*start) || !pud_present(*start)) {
- prot = pud_flags(*start);
- note_page(m, st, __pgprot(prot), 3);
+ note_page(m, st, __pgprot(prot), eff, 3);
} else if (!kasan_page_table(m, st, pud_start)) {
- walk_pmd_level(m, st, *start,
+ walk_pmd_level(m, st, *start, eff,
P + i * PUD_LEVEL_MULT);
}
} else
- note_page(m, st, __pgprot(0), 3);
+ note_page(m, st, __pgprot(0), 0, 3);
prev_pud = start;
start++;
@@ -423,43 +440,43 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
}
#else
-#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(p4d_val(a)),p)
+#define walk_pud_level(m,s,a,e,p) walk_pmd_level(m,s,__pud(p4d_val(a)),e,p)
#define p4d_large(a) pud_large(__pud(p4d_val(a)))
#define p4d_none(a) pud_none(__pud(p4d_val(a)))
#endif
-#if PTRS_PER_P4D > 1
-
-static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
+static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
+ pgprotval_t eff_in, unsigned long P)
{
int i;
p4d_t *start, *p4d_start;
- pgprotval_t prot;
+ pgprotval_t prot, eff;
+
+ if (PTRS_PER_P4D == 1)
+ return walk_pud_level(m, st, __p4d(pgd_val(addr)), eff_in, P);
p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_P4D; i++) {
st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
if (!p4d_none(*start)) {
+ prot = p4d_flags(*start);
+ eff = effective_prot(eff_in, prot);
if (p4d_large(*start) || !p4d_present(*start)) {
- prot = p4d_flags(*start);
- note_page(m, st, __pgprot(prot), 2);
+ note_page(m, st, __pgprot(prot), eff, 2);
} else if (!kasan_page_table(m, st, p4d_start)) {
- walk_pud_level(m, st, *start,
+ walk_pud_level(m, st, *start, eff,
P + i * P4D_LEVEL_MULT);
}
} else
- note_page(m, st, __pgprot(0), 2);
+ note_page(m, st, __pgprot(0), 0, 2);
start++;
}
}
-#else
-#define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p)
-#define pgd_large(a) p4d_large(__p4d(pgd_val(a)))
-#define pgd_none(a) p4d_none(__p4d(pgd_val(a)))
-#endif
+#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
+#define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
static inline bool is_hypervisor_range(int idx)
{
@@ -483,7 +500,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
#else
pgd_t *start = swapper_pg_dir;
#endif
- pgprotval_t prot;
+ pgprotval_t prot, eff;
int i;
struct pg_state st = {};
@@ -499,15 +516,20 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
for (i = 0; i < PTRS_PER_PGD; i++) {
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
if (!pgd_none(*start) && !is_hypervisor_range(i)) {
+ prot = pgd_flags(*start);
+#ifdef CONFIG_X86_PAE
+ eff = _PAGE_USER | _PAGE_RW;
+#else
+ eff = prot;
+#endif
if (pgd_large(*start) || !pgd_present(*start)) {
- prot = pgd_flags(*start);
- note_page(m, &st, __pgprot(prot), 1);
+ note_page(m, &st, __pgprot(prot), eff, 1);
} else {
- walk_p4d_level(m, &st, *start,
+ walk_p4d_level(m, &st, *start, eff,
i * PGD_LEVEL_MULT);
}
} else
- note_page(m, &st, __pgprot(0), 1);
+ note_page(m, &st, __pgprot(0), 0, 1);
cond_resched();
start++;
@@ -515,7 +537,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
/* Flush out the last page */
st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
- note_page(m, &st, __pgprot(0), 0);
+ note_page(m, &st, __pgprot(0), 0, 0);
if (!checkwx)
return;
if (st.wx_pages)
@@ -570,6 +592,13 @@ static int __init pt_dump_init(void)
address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
+#endif
+#ifdef CONFIG_KASAN
+ address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
+ address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
+#endif
#endif
#ifdef CONFIG_X86_32
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index c88573d90f3e..e6af2b464c3d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -439,7 +439,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (pgd_none(*pgd_ref))
return -1;
- if (CONFIG_PGTABLE_LEVELS > 4) {
+ if (pgtable_l5_enabled) {
if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_ref);
arch_flush_lazy_mmu_mode();
@@ -454,7 +454,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (p4d_none(*p4d_ref))
return -1;
- if (p4d_none(*p4d) && CONFIG_PGTABLE_LEVELS == 4) {
+ if (p4d_none(*p4d) && !pgtable_l5_enabled) {
set_p4d(p4d, *p4d_ref);
arch_flush_lazy_mmu_mode();
} else {
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index ab33a32df2a8..9aa22be8331e 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -120,7 +120,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
result = ident_p4d_init(info, p4d, addr, next);
if (result)
return result;
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ if (pgtable_l5_enabled) {
set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
} else {
/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index fecb0c0a6077..9bbc51ae54a6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -88,12 +88,7 @@ static int __init nonx32_setup(char *str)
}
__setup("noexec32=", nonx32_setup);
-/*
- * When memory was added make sure all the processes MM have
- * suitable PGD entries in the local PGD level page.
- */
-#ifdef CONFIG_X86_5LEVEL
-void sync_global_pgds(unsigned long start, unsigned long end)
+static void sync_global_pgds_l5(unsigned long start, unsigned long end)
{
unsigned long addr;
@@ -129,8 +124,8 @@ void sync_global_pgds(unsigned long start, unsigned long end)
spin_unlock(&pgd_lock);
}
}
-#else
-void sync_global_pgds(unsigned long start, unsigned long end)
+
+static void sync_global_pgds_l4(unsigned long start, unsigned long end)
{
unsigned long addr;
@@ -143,7 +138,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
* With folded p4d, pgd_none() is always false, we need to
* handle synchonization on p4d level.
*/
- BUILD_BUG_ON(pgd_none(*pgd_ref));
+ MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref));
p4d_ref = p4d_offset(pgd_ref, addr);
if (p4d_none(*p4d_ref))
@@ -173,7 +168,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
spin_unlock(&pgd_lock);
}
}
-#endif
+
+/*
+ * When memory was added make sure all the processes MM have
+ * suitable PGD entries in the local PGD level page.
+ */
+void sync_global_pgds(unsigned long start, unsigned long end)
+{
+ if (pgtable_l5_enabled)
+ sync_global_pgds_l5(start, end);
+ else
+ sync_global_pgds_l4(start, end);
+}
/*
* NOTE: This function is marked __ref because it calls __init function
@@ -632,7 +638,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
unsigned long vaddr = (unsigned long)__va(paddr);
int i = p4d_index(vaddr);
- if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (!pgtable_l5_enabled)
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
@@ -712,7 +718,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
page_size_mask);
spin_lock(&init_mm.page_table_lock);
- if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (pgtable_l5_enabled)
pgd_populate(&init_mm, pgd, p4d);
else
p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
@@ -1093,7 +1099,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
* 5-level case we should free them. This code will have to change
* to adapt for boot-time switching between 4 and 5 level page tables.
*/
- if (CONFIG_PGTABLE_LEVELS == 5)
+ if (pgtable_l5_enabled)
free_pud_table(pud_base, p4d, altmap);
}
@@ -1193,8 +1199,8 @@ void __init mem_init(void)
register_page_bootmem_info();
/* Register memory areas for /proc/kcore */
- kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
- PAGE_SIZE, KCORE_OTHER);
+ if (get_gate_vma(&init_mm))
+ kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
mem_init_print_info(NULL);
}
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index af6f2f9c6a26..d8ff013ea9d0 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -1,6 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#define DISABLE_BRANCH_PROFILING
#define pr_fmt(fmt) "kasan: " fmt
+
+#ifdef CONFIG_X86_5LEVEL
+/* Too early to use cpu_feature_enabled() */
+#define pgtable_l5_enabled __pgtable_l5_enabled
+#endif
+
#include <linux/bootmem.h>
#include <linux/kasan.h>
#include <linux/kdebug.h>
@@ -19,7 +25,7 @@
extern struct range pfn_mapped[E820_MAX_ENTRIES];
-static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
static __init void *early_alloc(size_t size, int nid, bool panic)
{
@@ -176,10 +182,10 @@ static void __init clear_pgds(unsigned long start,
* With folded p4d, pgd_clear() is nop, use p4d_clear()
* instead.
*/
- if (CONFIG_PGTABLE_LEVELS < 5)
- p4d_clear(p4d_offset(pgd, start));
- else
+ if (pgtable_l5_enabled)
pgd_clear(pgd);
+ else
+ p4d_clear(p4d_offset(pgd, start));
}
pgd = pgd_offset_k(start);
@@ -191,7 +197,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
{
unsigned long p4d;
- if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (!pgtable_l5_enabled)
return (p4d_t *)pgd;
p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
@@ -272,7 +278,7 @@ void __init kasan_early_init(void)
for (i = 0; i < PTRS_PER_PUD; i++)
kasan_zero_pud[i] = __pud(pud_val);
- for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
+ for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++)
kasan_zero_p4d[i] = __p4d(p4d_val);
kasan_map_early_shadow(early_top_pgt);
@@ -303,7 +309,7 @@ void __init kasan_init(void)
* bunch of things like kernel code, modules, EFI mapping, etc.
* We need to take extra steps to not overwrite them.
*/
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ if (pgtable_l5_enabled) {
void *ptr;
ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index aedebd2ebf1e..615cc03ced84 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -34,23 +34,12 @@
#define TB_SHIFT 40
/*
- * Virtual address start and end range for randomization.
- *
* The end address could depend on more configuration options to make the
* highest amount of space for randomization available, but that's too hard
* to keep straight and caused issues already.
*/
-static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
-/* Default values */
-unsigned long page_offset_base = __PAGE_OFFSET_BASE;
-EXPORT_SYMBOL(page_offset_base);
-unsigned long vmalloc_base = __VMALLOC_BASE;
-EXPORT_SYMBOL(vmalloc_base);
-unsigned long vmemmap_base = __VMEMMAP_BASE;
-EXPORT_SYMBOL(vmemmap_base);
-
/*
* Memory regions randomized by KASLR (except modules that use a separate logic
* earlier during boot). The list is ordered based on virtual addresses. This
@@ -60,8 +49,8 @@ static __initdata struct kaslr_memory_region {
unsigned long *base;
unsigned long size_tb;
} kaslr_regions[] = {
- { &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ },
- { &vmalloc_base, VMALLOC_SIZE_TB },
+ { &page_offset_base, 0 },
+ { &vmalloc_base, 0 },
{ &vmemmap_base, 1 },
};
@@ -84,11 +73,14 @@ static inline bool kaslr_memory_enabled(void)
void __init kernel_randomize_memory(void)
{
size_t i;
- unsigned long vaddr = vaddr_start;
+ unsigned long vaddr_start, vaddr;
unsigned long rand, memory_tb;
struct rnd_state rand_state;
unsigned long remain_entropy;
+ vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
+ vaddr = vaddr_start;
+
/*
* These BUILD_BUG_ON checks ensure the memory layout is consistent
* with the vaddr_start/vaddr_end variables. These checks are very
@@ -101,6 +93,9 @@ void __init kernel_randomize_memory(void)
if (!kaslr_memory_enabled())
return;
+ kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
+ kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
+
/*
* Update Physical memory mapping to available and
* add padding if needed (especially for memory hotplug support).
@@ -129,7 +124,7 @@ void __init kernel_randomize_memory(void)
*/
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
prandom_bytes_state(&rand_state, &rand, sizeof(rand));
- if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (pgtable_l5_enabled)
entropy = (rand % (entropy + 1)) & P4D_MASK;
else
entropy = (rand % (entropy + 1)) & PUD_MASK;
@@ -141,7 +136,7 @@ void __init kernel_randomize_memory(void)
* randomization alignment.
*/
vaddr += get_padding(&kaslr_regions[i]);
- if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (pgtable_l5_enabled)
vaddr = round_up(vaddr + 1, P4D_SIZE);
else
vaddr = round_up(vaddr + 1, PUD_SIZE);
@@ -217,7 +212,7 @@ void __meminit init_trampoline(void)
return;
}
- if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (pgtable_l5_enabled)
init_trampoline_p4d();
else
init_trampoline_pud();
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1a53071e2e17..3a1b5fe4c2ca 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -25,17 +25,12 @@
#include <asm/bootparam.h>
#include <asm/set_memory.h>
#include <asm/cacheflush.h>
-#include <asm/sections.h>
#include <asm/processor-flags.h>
#include <asm/msr.h>
#include <asm/cmdline.h>
#include "mm_internal.h"
-static char sme_cmdline_arg[] __initdata = "mem_encrypt";
-static char sme_cmdline_on[] __initdata = "on";
-static char sme_cmdline_off[] __initdata = "off";
-
/*
* Since SME related variables are set early in the boot process they must
* reside in the .data section so as not to be zeroed out when the .bss
@@ -46,7 +41,7 @@ EXPORT_SYMBOL(sme_me_mask);
DEFINE_STATIC_KEY_FALSE(sev_enable_key);
EXPORT_SYMBOL_GPL(sev_enable_key);
-static bool sev_enabled __section(.data);
+bool sev_enabled __section(.data);
/* Buffer used for early in-place encryption by BSP, no locking needed */
static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
@@ -463,574 +458,3 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
/* Make the SWIOTLB buffer area decrypted */
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
}
-
-struct sme_populate_pgd_data {
- void *pgtable_area;
- pgd_t *pgd;
-
- pmdval_t pmd_flags;
- pteval_t pte_flags;
- unsigned long paddr;
-
- unsigned long vaddr;
- unsigned long vaddr_end;
-};
-
-static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
-{
- unsigned long pgd_start, pgd_end, pgd_size;
- pgd_t *pgd_p;
-
- pgd_start = ppd->vaddr & PGDIR_MASK;
- pgd_end = ppd->vaddr_end & PGDIR_MASK;
-
- pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
-
- pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
-
- memset(pgd_p, 0, pgd_size);
-}
-
-#define PGD_FLAGS _KERNPG_TABLE_NOENC
-#define P4D_FLAGS _KERNPG_TABLE_NOENC
-#define PUD_FLAGS _KERNPG_TABLE_NOENC
-#define PMD_FLAGS _KERNPG_TABLE_NOENC
-
-#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
-
-#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
-#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
- (_PAGE_PAT | _PAGE_PWT))
-
-#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
-
-#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
-
-#define PTE_FLAGS_DEC PTE_FLAGS
-#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
- (_PAGE_PAT | _PAGE_PWT))
-
-#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
-
-static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
-{
- pgd_t *pgd_p;
- p4d_t *p4d_p;
- pud_t *pud_p;
- pmd_t *pmd_p;
-
- pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
- if (native_pgd_val(*pgd_p)) {
- if (IS_ENABLED(CONFIG_X86_5LEVEL))
- p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
- else
- pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
- } else {
- pgd_t pgd;
-
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_p = ppd->pgtable_area;
- memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
- ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
-
- pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
- } else {
- pud_p = ppd->pgtable_area;
- memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
- ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
-
- pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
- }
- native_set_pgd(pgd_p, pgd);
- }
-
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_p += p4d_index(ppd->vaddr);
- if (native_p4d_val(*p4d_p)) {
- pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
- } else {
- p4d_t p4d;
-
- pud_p = ppd->pgtable_area;
- memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
- ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
-
- p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
- native_set_p4d(p4d_p, p4d);
- }
- }
-
- pud_p += pud_index(ppd->vaddr);
- if (native_pud_val(*pud_p)) {
- if (native_pud_val(*pud_p) & _PAGE_PSE)
- return NULL;
-
- pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
- } else {
- pud_t pud;
-
- pmd_p = ppd->pgtable_area;
- memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
- ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
-
- pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
- native_set_pud(pud_p, pud);
- }
-
- return pmd_p;
-}
-
-static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
-{
- pmd_t *pmd_p;
-
- pmd_p = sme_prepare_pgd(ppd);
- if (!pmd_p)
- return;
-
- pmd_p += pmd_index(ppd->vaddr);
- if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
- native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
-}
-
-static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
-{
- pmd_t *pmd_p;
- pte_t *pte_p;
-
- pmd_p = sme_prepare_pgd(ppd);
- if (!pmd_p)
- return;
-
- pmd_p += pmd_index(ppd->vaddr);
- if (native_pmd_val(*pmd_p)) {
- if (native_pmd_val(*pmd_p) & _PAGE_PSE)
- return;
-
- pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
- } else {
- pmd_t pmd;
-
- pte_p = ppd->pgtable_area;
- memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
- ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
-
- pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
- native_set_pmd(pmd_p, pmd);
- }
-
- pte_p += pte_index(ppd->vaddr);
- if (!native_pte_val(*pte_p))
- native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
-}
-
-static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
-{
- while (ppd->vaddr < ppd->vaddr_end) {
- sme_populate_pgd_large(ppd);
-
- ppd->vaddr += PMD_PAGE_SIZE;
- ppd->paddr += PMD_PAGE_SIZE;
- }
-}
-
-static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
-{
- while (ppd->vaddr < ppd->vaddr_end) {
- sme_populate_pgd(ppd);
-
- ppd->vaddr += PAGE_SIZE;
- ppd->paddr += PAGE_SIZE;
- }
-}
-
-static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
- pmdval_t pmd_flags, pteval_t pte_flags)
-{
- unsigned long vaddr_end;
-
- ppd->pmd_flags = pmd_flags;
- ppd->pte_flags = pte_flags;
-
- /* Save original end value since we modify the struct value */
- vaddr_end = ppd->vaddr_end;
-
- /* If start is not 2MB aligned, create PTE entries */
- ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
- __sme_map_range_pte(ppd);
-
- /* Create PMD entries */
- ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
- __sme_map_range_pmd(ppd);
-
- /* If end is not 2MB aligned, create PTE entries */
- ppd->vaddr_end = vaddr_end;
- __sme_map_range_pte(ppd);
-}
-
-static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
-{
- __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
-}
-
-static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
-{
- __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
-}
-
-static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
-{
- __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
-}
-
-static unsigned long __init sme_pgtable_calc(unsigned long len)
-{
- unsigned long p4d_size, pud_size, pmd_size, pte_size;
- unsigned long total;
-
- /*
- * Perform a relatively simplistic calculation of the pagetable
- * entries that are needed. Those mappings will be covered mostly
- * by 2MB PMD entries so we can conservatively calculate the required
- * number of P4D, PUD and PMD structures needed to perform the
- * mappings. For mappings that are not 2MB aligned, PTE mappings
- * would be needed for the start and end portion of the address range
- * that fall outside of the 2MB alignment. This results in, at most,
- * two extra pages to hold PTE entries for each range that is mapped.
- * Incrementing the count for each covers the case where the addresses
- * cross entries.
- */
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
- p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
- pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1;
- pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
- } else {
- p4d_size = 0;
- pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
- pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
- }
- pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
- pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
- pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
-
- total = p4d_size + pud_size + pmd_size + pte_size;
-
- /*
- * Now calculate the added pagetable structures needed to populate
- * the new pagetables.
- */
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
- p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
- pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE;
- pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
- } else {
- p4d_size = 0;
- pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
- pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
- }
- pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE;
- pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
-
- total += p4d_size + pud_size + pmd_size;
-
- return total;
-}
-
-void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp)
-{
- unsigned long workarea_start, workarea_end, workarea_len;
- unsigned long execute_start, execute_end, execute_len;
- unsigned long kernel_start, kernel_end, kernel_len;
- unsigned long initrd_start, initrd_end, initrd_len;
- struct sme_populate_pgd_data ppd;
- unsigned long pgtable_area_len;
- unsigned long decrypted_base;
-
- if (!sme_active())
- return;
-
- /*
- * Prepare for encrypting the kernel and initrd by building new
- * pagetables with the necessary attributes needed to encrypt the
- * kernel in place.
- *
- * One range of virtual addresses will map the memory occupied
- * by the kernel and initrd as encrypted.
- *
- * Another range of virtual addresses will map the memory occupied
- * by the kernel and initrd as decrypted and write-protected.
- *
- * The use of write-protect attribute will prevent any of the
- * memory from being cached.
- */
-
- /* Physical addresses gives us the identity mapped virtual addresses */
- kernel_start = __pa_symbol(_text);
- kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
- kernel_len = kernel_end - kernel_start;
-
- initrd_start = 0;
- initrd_end = 0;
- initrd_len = 0;
-#ifdef CONFIG_BLK_DEV_INITRD
- initrd_len = (unsigned long)bp->hdr.ramdisk_size |
- ((unsigned long)bp->ext_ramdisk_size << 32);
- if (initrd_len) {
- initrd_start = (unsigned long)bp->hdr.ramdisk_image |
- ((unsigned long)bp->ext_ramdisk_image << 32);
- initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
- initrd_len = initrd_end - initrd_start;
- }
-#endif
-
- /* Set the encryption workarea to be immediately after the kernel */
- workarea_start = kernel_end;
-
- /*
- * Calculate required number of workarea bytes needed:
- * executable encryption area size:
- * stack page (PAGE_SIZE)
- * encryption routine page (PAGE_SIZE)
- * intermediate copy buffer (PMD_PAGE_SIZE)
- * pagetable structures for the encryption of the kernel
- * pagetable structures for workarea (in case not currently mapped)
- */
- execute_start = workarea_start;
- execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
- execute_len = execute_end - execute_start;
-
- /*
- * One PGD for both encrypted and decrypted mappings and a set of
- * PUDs and PMDs for each of the encrypted and decrypted mappings.
- */
- pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
- pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
- if (initrd_len)
- pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
-
- /* PUDs and PMDs needed in the current pagetables for the workarea */
- pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
-
- /*
- * The total workarea includes the executable encryption area and
- * the pagetable area. The start of the workarea is already 2MB
- * aligned, align the end of the workarea on a 2MB boundary so that
- * we don't try to create/allocate PTE entries from the workarea
- * before it is mapped.
- */
- workarea_len = execute_len + pgtable_area_len;
- workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
-
- /*
- * Set the address to the start of where newly created pagetable
- * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
- * structures are created when the workarea is added to the current
- * pagetables and when the new encrypted and decrypted kernel
- * mappings are populated.
- */
- ppd.pgtable_area = (void *)execute_end;
-
- /*
- * Make sure the current pagetable structure has entries for
- * addressing the workarea.
- */
- ppd.pgd = (pgd_t *)native_read_cr3_pa();
- ppd.paddr = workarea_start;
- ppd.vaddr = workarea_start;
- ppd.vaddr_end = workarea_end;
- sme_map_range_decrypted(&ppd);
-
- /* Flush the TLB - no globals so cr3 is enough */
- native_write_cr3(__native_read_cr3());
-
- /*
- * A new pagetable structure is being built to allow for the kernel
- * and initrd to be encrypted. It starts with an empty PGD that will
- * then be populated with new PUDs and PMDs as the encrypted and
- * decrypted kernel mappings are created.
- */
- ppd.pgd = ppd.pgtable_area;
- memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
- ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
-
- /*
- * A different PGD index/entry must be used to get different
- * pagetable entries for the decrypted mapping. Choose the next
- * PGD index and convert it to a virtual address to be used as
- * the base of the mapping.
- */
- decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
- if (initrd_len) {
- unsigned long check_base;
-
- check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
- decrypted_base = max(decrypted_base, check_base);
- }
- decrypted_base <<= PGDIR_SHIFT;
-
- /* Add encrypted kernel (identity) mappings */
- ppd.paddr = kernel_start;
- ppd.vaddr = kernel_start;
- ppd.vaddr_end = kernel_end;
- sme_map_range_encrypted(&ppd);
-
- /* Add decrypted, write-protected kernel (non-identity) mappings */
- ppd.paddr = kernel_start;
- ppd.vaddr = kernel_start + decrypted_base;
- ppd.vaddr_end = kernel_end + decrypted_base;
- sme_map_range_decrypted_wp(&ppd);
-
- if (initrd_len) {
- /* Add encrypted initrd (identity) mappings */
- ppd.paddr = initrd_start;
- ppd.vaddr = initrd_start;
- ppd.vaddr_end = initrd_end;
- sme_map_range_encrypted(&ppd);
- /*
- * Add decrypted, write-protected initrd (non-identity) mappings
- */
- ppd.paddr = initrd_start;
- ppd.vaddr = initrd_start + decrypted_base;
- ppd.vaddr_end = initrd_end + decrypted_base;
- sme_map_range_decrypted_wp(&ppd);
- }
-
- /* Add decrypted workarea mappings to both kernel mappings */
- ppd.paddr = workarea_start;
- ppd.vaddr = workarea_start;
- ppd.vaddr_end = workarea_end;
- sme_map_range_decrypted(&ppd);
-
- ppd.paddr = workarea_start;
- ppd.vaddr = workarea_start + decrypted_base;
- ppd.vaddr_end = workarea_end + decrypted_base;
- sme_map_range_decrypted(&ppd);
-
- /* Perform the encryption */
- sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
- kernel_len, workarea_start, (unsigned long)ppd.pgd);
-
- if (initrd_len)
- sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
- initrd_len, workarea_start,
- (unsigned long)ppd.pgd);
-
- /*
- * At this point we are running encrypted. Remove the mappings for
- * the decrypted areas - all that is needed for this is to remove
- * the PGD entry/entries.
- */
- ppd.vaddr = kernel_start + decrypted_base;
- ppd.vaddr_end = kernel_end + decrypted_base;
- sme_clear_pgd(&ppd);
-
- if (initrd_len) {
- ppd.vaddr = initrd_start + decrypted_base;
- ppd.vaddr_end = initrd_end + decrypted_base;
- sme_clear_pgd(&ppd);
- }
-
- ppd.vaddr = workarea_start + decrypted_base;
- ppd.vaddr_end = workarea_end + decrypted_base;
- sme_clear_pgd(&ppd);
-
- /* Flush the TLB - no globals so cr3 is enough */
- native_write_cr3(__native_read_cr3());
-}
-
-void __init __nostackprotector sme_enable(struct boot_params *bp)
-{
- const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
- unsigned int eax, ebx, ecx, edx;
- unsigned long feature_mask;
- bool active_by_default;
- unsigned long me_mask;
- char buffer[16];
- u64 msr;
-
- /* Check for the SME/SEV support leaf */
- eax = 0x80000000;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- if (eax < 0x8000001f)
- return;
-
-#define AMD_SME_BIT BIT(0)
-#define AMD_SEV_BIT BIT(1)
- /*
- * Set the feature mask (SME or SEV) based on whether we are
- * running under a hypervisor.
- */
- eax = 1;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT;
-
- /*
- * Check for the SME/SEV feature:
- * CPUID Fn8000_001F[EAX]
- * - Bit 0 - Secure Memory Encryption support
- * - Bit 1 - Secure Encrypted Virtualization support
- * CPUID Fn8000_001F[EBX]
- * - Bits 5:0 - Pagetable bit position used to indicate encryption
- */
- eax = 0x8000001f;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- if (!(eax & feature_mask))
- return;
-
- me_mask = 1UL << (ebx & 0x3f);
-
- /* Check if memory encryption is enabled */
- if (feature_mask == AMD_SME_BIT) {
- /* For SME, check the SYSCFG MSR */
- msr = __rdmsr(MSR_K8_SYSCFG);
- if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
- return;
- } else {
- /* For SEV, check the SEV MSR */
- msr = __rdmsr(MSR_AMD64_SEV);
- if (!(msr & MSR_AMD64_SEV_ENABLED))
- return;
-
- /* SEV state cannot be controlled by a command line option */
- sme_me_mask = me_mask;
- sev_enabled = true;
- return;
- }
-
- /*
- * Fixups have not been applied to phys_base yet and we're running
- * identity mapped, so we must obtain the address to the SME command
- * line argument data using rip-relative addressing.
- */
- asm ("lea sme_cmdline_arg(%%rip), %0"
- : "=r" (cmdline_arg)
- : "p" (sme_cmdline_arg));
- asm ("lea sme_cmdline_on(%%rip), %0"
- : "=r" (cmdline_on)
- : "p" (sme_cmdline_on));
- asm ("lea sme_cmdline_off(%%rip), %0"
- : "=r" (cmdline_off)
- : "p" (sme_cmdline_off));
-
- if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
- active_by_default = true;
- else
- active_by_default = false;
-
- cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
- ((u64)bp->ext_cmd_line_ptr << 32));
-
- cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
-
- if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
- sme_me_mask = me_mask;
- else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
- sme_me_mask = 0;
- else
- sme_me_mask = active_by_default ? me_mask : 0;
-}
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
new file mode 100644
index 000000000000..1b2197d13832
--- /dev/null
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -0,0 +1,564 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define DISABLE_BRANCH_PROFILING
+
+/*
+ * Since we're dealing with identity mappings, physical and virtual
+ * addresses are the same, so override these defines which are ultimately
+ * used by the headers in misc.h.
+ */
+#define __pa(x) ((unsigned long)(x))
+#define __va(x) ((void *)((unsigned long)(x)))
+
+/*
+ * Special hack: we have to be careful, because no indirections are
+ * allowed here, and paravirt_ops is a kind of one. As it will only run in
+ * baremetal anyway, we just keep it from happening. (This list needs to
+ * be extended when new paravirt and debugging variants are added.)
+ */
+#undef CONFIG_PARAVIRT
+#undef CONFIG_PARAVIRT_SPINLOCKS
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/mem_encrypt.h>
+
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/cmdline.h>
+
+#include "mm_internal.h"
+
+#define PGD_FLAGS _KERNPG_TABLE_NOENC
+#define P4D_FLAGS _KERNPG_TABLE_NOENC
+#define PUD_FLAGS _KERNPG_TABLE_NOENC
+#define PMD_FLAGS _KERNPG_TABLE_NOENC
+
+#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
+#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))
+
+#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
+
+#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
+
+#define PTE_FLAGS_DEC PTE_FLAGS
+#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))
+
+#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
+
+struct sme_populate_pgd_data {
+ void *pgtable_area;
+ pgd_t *pgd;
+
+ pmdval_t pmd_flags;
+ pteval_t pte_flags;
+ unsigned long paddr;
+
+ unsigned long vaddr;
+ unsigned long vaddr_end;
+};
+
+static char sme_cmdline_arg[] __initdata = "mem_encrypt";
+static char sme_cmdline_on[] __initdata = "on";
+static char sme_cmdline_off[] __initdata = "off";
+
+static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
+{
+ unsigned long pgd_start, pgd_end, pgd_size;
+ pgd_t *pgd_p;
+
+ pgd_start = ppd->vaddr & PGDIR_MASK;
+ pgd_end = ppd->vaddr_end & PGDIR_MASK;
+
+ pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
+
+ pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
+
+ memset(pgd_p, 0, pgd_size);
+}
+
+static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = ppd->pgd + pgd_index(ppd->vaddr);
+ if (pgd_none(*pgd)) {
+ p4d = ppd->pgtable_area;
+ memset(p4d, 0, sizeof(*p4d) * PTRS_PER_P4D);
+ ppd->pgtable_area += sizeof(*p4d) * PTRS_PER_P4D;
+ set_pgd(pgd, __pgd(PGD_FLAGS | __pa(p4d)));
+ }
+
+ p4d = p4d_offset(pgd, ppd->vaddr);
+ if (p4d_none(*p4d)) {
+ pud = ppd->pgtable_area;
+ memset(pud, 0, sizeof(*pud) * PTRS_PER_PUD);
+ ppd->pgtable_area += sizeof(*pud) * PTRS_PER_PUD;
+ set_p4d(p4d, __p4d(P4D_FLAGS | __pa(pud)));
+ }
+
+ pud = pud_offset(p4d, ppd->vaddr);
+ if (pud_none(*pud)) {
+ pmd = ppd->pgtable_area;
+ memset(pmd, 0, sizeof(*pmd) * PTRS_PER_PMD);
+ ppd->pgtable_area += sizeof(*pmd) * PTRS_PER_PMD;
+ set_pud(pud, __pud(PUD_FLAGS | __pa(pmd)));
+ }
+
+ if (pud_large(*pud))
+ return NULL;
+
+ return pud;
+}
+
+static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pud = sme_prepare_pgd(ppd);
+ if (!pud)
+ return;
+
+ pmd = pmd_offset(pud, ppd->vaddr);
+ if (pmd_large(*pmd))
+ return;
+
+ set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags));
+}
+
+static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pud = sme_prepare_pgd(ppd);
+ if (!pud)
+ return;
+
+ pmd = pmd_offset(pud, ppd->vaddr);
+ if (pmd_none(*pmd)) {
+ pte = ppd->pgtable_area;
+ memset(pte, 0, sizeof(pte) * PTRS_PER_PTE);
+ ppd->pgtable_area += sizeof(pte) * PTRS_PER_PTE;
+ set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte)));
+ }
+
+ if (pmd_large(*pmd))
+ return;
+
+ pte = pte_offset_map(pmd, ppd->vaddr);
+ if (pte_none(*pte))
+ set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
+}
+
+static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+{
+ while (ppd->vaddr < ppd->vaddr_end) {
+ sme_populate_pgd_large(ppd);
+
+ ppd->vaddr += PMD_PAGE_SIZE;
+ ppd->paddr += PMD_PAGE_SIZE;
+ }
+}
+
+static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+{
+ while (ppd->vaddr < ppd->vaddr_end) {
+ sme_populate_pgd(ppd);
+
+ ppd->vaddr += PAGE_SIZE;
+ ppd->paddr += PAGE_SIZE;
+ }
+}
+
+static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
+ pmdval_t pmd_flags, pteval_t pte_flags)
+{
+ unsigned long vaddr_end;
+
+ ppd->pmd_flags = pmd_flags;
+ ppd->pte_flags = pte_flags;
+
+ /* Save original end value since we modify the struct value */
+ vaddr_end = ppd->vaddr_end;
+
+ /* If start is not 2MB aligned, create PTE entries */
+ ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
+ __sme_map_range_pte(ppd);
+
+ /* Create PMD entries */
+ ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
+ __sme_map_range_pmd(ppd);
+
+ /* If end is not 2MB aligned, create PTE entries */
+ ppd->vaddr_end = vaddr_end;
+ __sme_map_range_pte(ppd);
+}
+
+static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
+}
+
+static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
+}
+
+static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
+}
+
+static unsigned long __init sme_pgtable_calc(unsigned long len)
+{
+ unsigned long entries = 0, tables = 0;
+
+ /*
+ * Perform a relatively simplistic calculation of the pagetable
+ * entries that are needed. Those mappings will be covered mostly
+ * by 2MB PMD entries so we can conservatively calculate the required
+ * number of P4D, PUD and PMD structures needed to perform the
+ * mappings. For mappings that are not 2MB aligned, PTE mappings
+ * would be needed for the start and end portion of the address range
+ * that fall outside of the 2MB alignment. This results in, at most,
+ * two extra pages to hold PTE entries for each range that is mapped.
+ * Incrementing the count for each covers the case where the addresses
+ * cross entries.
+ */
+
+ /* PGDIR_SIZE is equal to P4D_SIZE on 4-level machine. */
+ if (PTRS_PER_P4D > 1)
+ entries += (DIV_ROUND_UP(len, PGDIR_SIZE) + 1) * sizeof(p4d_t) * PTRS_PER_P4D;
+ entries += (DIV_ROUND_UP(len, P4D_SIZE) + 1) * sizeof(pud_t) * PTRS_PER_PUD;
+ entries += (DIV_ROUND_UP(len, PUD_SIZE) + 1) * sizeof(pmd_t) * PTRS_PER_PMD;
+ entries += 2 * sizeof(pte_t) * PTRS_PER_PTE;
+
+ /*
+ * Now calculate the added pagetable structures needed to populate
+ * the new pagetables.
+ */
+
+ if (PTRS_PER_P4D > 1)
+ tables += DIV_ROUND_UP(entries, PGDIR_SIZE) * sizeof(p4d_t) * PTRS_PER_P4D;
+ tables += DIV_ROUND_UP(entries, P4D_SIZE) * sizeof(pud_t) * PTRS_PER_PUD;
+ tables += DIV_ROUND_UP(entries, PUD_SIZE) * sizeof(pmd_t) * PTRS_PER_PMD;
+
+ return entries + tables;
+}
+
+void __init sme_encrypt_kernel(struct boot_params *bp)
+{
+ unsigned long workarea_start, workarea_end, workarea_len;
+ unsigned long execute_start, execute_end, execute_len;
+ unsigned long kernel_start, kernel_end, kernel_len;
+ unsigned long initrd_start, initrd_end, initrd_len;
+ struct sme_populate_pgd_data ppd;
+ unsigned long pgtable_area_len;
+ unsigned long decrypted_base;
+
+ if (!sme_active())
+ return;
+
+ /*
+ * Prepare for encrypting the kernel and initrd by building new
+ * pagetables with the necessary attributes needed to encrypt the
+ * kernel in place.
+ *
+ * One range of virtual addresses will map the memory occupied
+ * by the kernel and initrd as encrypted.
+ *
+ * Another range of virtual addresses will map the memory occupied
+ * by the kernel and initrd as decrypted and write-protected.
+ *
+ * The use of write-protect attribute will prevent any of the
+ * memory from being cached.
+ */
+
+ /* Physical addresses gives us the identity mapped virtual addresses */
+ kernel_start = __pa_symbol(_text);
+ kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
+ kernel_len = kernel_end - kernel_start;
+
+ initrd_start = 0;
+ initrd_end = 0;
+ initrd_len = 0;
+#ifdef CONFIG_BLK_DEV_INITRD
+ initrd_len = (unsigned long)bp->hdr.ramdisk_size |
+ ((unsigned long)bp->ext_ramdisk_size << 32);
+ if (initrd_len) {
+ initrd_start = (unsigned long)bp->hdr.ramdisk_image |
+ ((unsigned long)bp->ext_ramdisk_image << 32);
+ initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
+ initrd_len = initrd_end - initrd_start;
+ }
+#endif
+
+ /* Set the encryption workarea to be immediately after the kernel */
+ workarea_start = kernel_end;
+
+ /*
+ * Calculate required number of workarea bytes needed:
+ * executable encryption area size:
+ * stack page (PAGE_SIZE)
+ * encryption routine page (PAGE_SIZE)
+ * intermediate copy buffer (PMD_PAGE_SIZE)
+ * pagetable structures for the encryption of the kernel
+ * pagetable structures for workarea (in case not currently mapped)
+ */
+ execute_start = workarea_start;
+ execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
+ execute_len = execute_end - execute_start;
+
+ /*
+ * One PGD for both encrypted and decrypted mappings and a set of
+ * PUDs and PMDs for each of the encrypted and decrypted mappings.
+ */
+ pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
+ pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
+ if (initrd_len)
+ pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
+
+ /* PUDs and PMDs needed in the current pagetables for the workarea */
+ pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
+
+ /*
+ * The total workarea includes the executable encryption area and
+ * the pagetable area. The start of the workarea is already 2MB
+ * aligned, align the end of the workarea on a 2MB boundary so that
+ * we don't try to create/allocate PTE entries from the workarea
+ * before it is mapped.
+ */
+ workarea_len = execute_len + pgtable_area_len;
+ workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
+
+ /*
+ * Set the address to the start of where newly created pagetable
+ * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
+ * structures are created when the workarea is added to the current
+ * pagetables and when the new encrypted and decrypted kernel
+ * mappings are populated.
+ */
+ ppd.pgtable_area = (void *)execute_end;
+
+ /*
+ * Make sure the current pagetable structure has entries for
+ * addressing the workarea.
+ */
+ ppd.pgd = (pgd_t *)native_read_cr3_pa();
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start;
+ ppd.vaddr_end = workarea_end;
+ sme_map_range_decrypted(&ppd);
+
+ /* Flush the TLB - no globals so cr3 is enough */
+ native_write_cr3(__native_read_cr3());
+
+ /*
+ * A new pagetable structure is being built to allow for the kernel
+ * and initrd to be encrypted. It starts with an empty PGD that will
+ * then be populated with new PUDs and PMDs as the encrypted and
+ * decrypted kernel mappings are created.
+ */
+ ppd.pgd = ppd.pgtable_area;
+ memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
+ ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
+
+ /*
+ * A different PGD index/entry must be used to get different
+ * pagetable entries for the decrypted mapping. Choose the next
+ * PGD index and convert it to a virtual address to be used as
+ * the base of the mapping.
+ */
+ decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
+ if (initrd_len) {
+ unsigned long check_base;
+
+ check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
+ decrypted_base = max(decrypted_base, check_base);
+ }
+ decrypted_base <<= PGDIR_SHIFT;
+
+ /* Add encrypted kernel (identity) mappings */
+ ppd.paddr = kernel_start;
+ ppd.vaddr = kernel_start;
+ ppd.vaddr_end = kernel_end;
+ sme_map_range_encrypted(&ppd);
+
+ /* Add decrypted, write-protected kernel (non-identity) mappings */
+ ppd.paddr = kernel_start;
+ ppd.vaddr = kernel_start + decrypted_base;
+ ppd.vaddr_end = kernel_end + decrypted_base;
+ sme_map_range_decrypted_wp(&ppd);
+
+ if (initrd_len) {
+ /* Add encrypted initrd (identity) mappings */
+ ppd.paddr = initrd_start;
+ ppd.vaddr = initrd_start;
+ ppd.vaddr_end = initrd_end;
+ sme_map_range_encrypted(&ppd);
+ /*
+ * Add decrypted, write-protected initrd (non-identity) mappings
+ */
+ ppd.paddr = initrd_start;
+ ppd.vaddr = initrd_start + decrypted_base;
+ ppd.vaddr_end = initrd_end + decrypted_base;
+ sme_map_range_decrypted_wp(&ppd);
+ }
+
+ /* Add decrypted workarea mappings to both kernel mappings */
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start;
+ ppd.vaddr_end = workarea_end;
+ sme_map_range_decrypted(&ppd);
+
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start + decrypted_base;
+ ppd.vaddr_end = workarea_end + decrypted_base;
+ sme_map_range_decrypted(&ppd);
+
+ /* Perform the encryption */
+ sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
+ kernel_len, workarea_start, (unsigned long)ppd.pgd);
+
+ if (initrd_len)
+ sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
+ initrd_len, workarea_start,
+ (unsigned long)ppd.pgd);
+
+ /*
+ * At this point we are running encrypted. Remove the mappings for
+ * the decrypted areas - all that is needed for this is to remove
+ * the PGD entry/entries.
+ */
+ ppd.vaddr = kernel_start + decrypted_base;
+ ppd.vaddr_end = kernel_end + decrypted_base;
+ sme_clear_pgd(&ppd);
+
+ if (initrd_len) {
+ ppd.vaddr = initrd_start + decrypted_base;
+ ppd.vaddr_end = initrd_end + decrypted_base;
+ sme_clear_pgd(&ppd);
+ }
+
+ ppd.vaddr = workarea_start + decrypted_base;
+ ppd.vaddr_end = workarea_end + decrypted_base;
+ sme_clear_pgd(&ppd);
+
+ /* Flush the TLB - no globals so cr3 is enough */
+ native_write_cr3(__native_read_cr3());
+}
+
+void __init sme_enable(struct boot_params *bp)
+{
+ const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
+ unsigned int eax, ebx, ecx, edx;
+ unsigned long feature_mask;
+ bool active_by_default;
+ unsigned long me_mask;
+ char buffer[16];
+ u64 msr;
+
+ /* Check for the SME/SEV support leaf */
+ eax = 0x80000000;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ if (eax < 0x8000001f)
+ return;
+
+#define AMD_SME_BIT BIT(0)
+#define AMD_SEV_BIT BIT(1)
+ /*
+ * Set the feature mask (SME or SEV) based on whether we are
+ * running under a hypervisor.
+ */
+ eax = 1;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT;
+
+ /*
+ * Check for the SME/SEV feature:
+ * CPUID Fn8000_001F[EAX]
+ * - Bit 0 - Secure Memory Encryption support
+ * - Bit 1 - Secure Encrypted Virtualization support
+ * CPUID Fn8000_001F[EBX]
+ * - Bits 5:0 - Pagetable bit position used to indicate encryption
+ */
+ eax = 0x8000001f;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ if (!(eax & feature_mask))
+ return;
+
+ me_mask = 1UL << (ebx & 0x3f);
+
+ /* Check if memory encryption is enabled */
+ if (feature_mask == AMD_SME_BIT) {
+ /* For SME, check the SYSCFG MSR */
+ msr = __rdmsr(MSR_K8_SYSCFG);
+ if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ return;
+ } else {
+ /* For SEV, check the SEV MSR */
+ msr = __rdmsr(MSR_AMD64_SEV);
+ if (!(msr & MSR_AMD64_SEV_ENABLED))
+ return;
+
+ /* SEV state cannot be controlled by a command line option */
+ sme_me_mask = me_mask;
+ sev_enabled = true;
+ return;
+ }
+
+ /*
+ * Fixups have not been applied to phys_base yet and we're running
+ * identity mapped, so we must obtain the address to the SME command
+ * line argument data using rip-relative addressing.
+ */
+ asm ("lea sme_cmdline_arg(%%rip), %0"
+ : "=r" (cmdline_arg)
+ : "p" (sme_cmdline_arg));
+ asm ("lea sme_cmdline_on(%%rip), %0"
+ : "=r" (cmdline_on)
+ : "p" (sme_cmdline_on));
+ asm ("lea sme_cmdline_off(%%rip), %0"
+ : "=r" (cmdline_off)
+ : "p" (sme_cmdline_off));
+
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
+ active_by_default = true;
+ else
+ active_by_default = false;
+
+ cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
+ ((u64)bp->ext_cmd_line_ptr << 32));
+
+ cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
+
+ if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
+ sme_me_mask = me_mask;
+ else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
+ sme_me_mask = 0;
+ else
+ sme_me_mask = active_by_default ? me_mask : 0;
+}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 7f1a51399674..e055d1a06699 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
unsigned long sp = current_stack_pointer;
pgd_t *pgd = pgd_offset(mm, sp);
- if (CONFIG_PGTABLE_LEVELS > 4) {
+ if (pgtable_l5_enabled) {
if (unlikely(pgd_none(*pgd))) {
pgd_t *pgd_ref = pgd_offset_k(sp);
@@ -613,7 +613,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
{
int cpu;
- struct flush_tlb_info info = {
+ struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
.mm = mm,
};
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4923d92f918d..45e4eb5bcbb2 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -13,6 +13,7 @@
#include <linux/if_vlan.h>
#include <asm/cacheflush.h>
#include <asm/set_memory.h>
+#include <asm/nospec-branch.h>
#include <linux/bpf.h>
/*
@@ -290,7 +291,7 @@ static void emit_bpf_tail_call(u8 **pprog)
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 43 /* number of bytes to jump */
+#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
label1 = cnt;
@@ -299,7 +300,7 @@ static void emit_bpf_tail_call(u8 **pprog)
*/
EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 32
+#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
EMIT2(X86_JA, OFFSET2); /* ja out */
label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
@@ -313,7 +314,7 @@ static void emit_bpf_tail_call(u8 **pprog)
* goto out;
*/
EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
-#define OFFSET3 10
+#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
EMIT2(X86_JE, OFFSET3); /* je out */
label3 = cnt;
@@ -326,7 +327,7 @@ static void emit_bpf_tail_call(u8 **pprog)
* rdi == ctx (1st arg)
* rax == prog->bpf_func + prologue_size
*/
- EMIT2(0xFF, 0xE0); /* jmp rax */
+ RETPOLINE_RAX_BPF_JIT();
/* out: */
BUILD_BUG_ON(cnt - label1 != OFFSET1);
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 174c59774cc9..a7a7677265b6 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -460,7 +460,7 @@ static int nmi_setup(void)
goto fail;
for_each_possible_cpu(cpu) {
- if (!cpu)
+ if (!IS_ENABLED(CONFIG_SMP) || !cpu)
continue;
memcpy(per_cpu(cpu_msrs, cpu).counters,
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index c310a8284358..4845871a2006 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -27,6 +27,7 @@
#include <linux/ioport.h>
#include <linux/mc146818rtc.h>
#include <linux/efi.h>
+#include <linux/export.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/reboot.h>
@@ -190,7 +191,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
early_code_mapping_set_exec(0);
}
-static pgd_t *efi_pgd;
+pgd_t *efi_pgd;
+EXPORT_SYMBOL_GPL(efi_pgd);
/*
* We need our own copy of the higher levels of the page tables
@@ -225,7 +227,7 @@ int __init efi_alloc_page_tables(void)
pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
if (!pud) {
- if (CONFIG_PGTABLE_LEVELS > 4)
+ if (pgtable_l5_enabled)
free_page((unsigned long) pgd_page_vaddr(*pgd));
free_page((unsigned long)efi_pgd);
return -ENOMEM;
@@ -255,8 +257,8 @@ void efi_sync_low_kernel_mappings(void)
* only span a single PGD entry and that the entry also maps
* other important kernel regions.
*/
- BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
- BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
+ MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
+ MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
(EFI_VA_END & PGDIR_MASK));
pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 0ef5e5204968..74a532989308 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -50,7 +50,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
{
pmd_t *pmd;
pud_t *pud;
- p4d_t *p4d;
+ p4d_t *p4d = NULL;
/*
* The new mapping only has to cover the page containing the image
@@ -66,7 +66,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
* tables used by the image kernel.
*/
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ if (pgtable_l5_enabled) {
p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
if (!p4d)
return -ENOMEM;
@@ -84,7 +84,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
set_pud(pud + pud_index(restore_jump_address),
__pud(__pa(pmd) | _KERNPG_TABLE));
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ if (p4d) {
set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE));
set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE));
} else {
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 5d73c443e778..220e97841e49 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -770,9 +770,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
break;
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
/*
* PC relative relocations don't need to be adjusted unless
* referencing a percpu symbol.
+ *
+ * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32.
*/
if (is_percpu_sym(sym, symname))
add_reloc(&relocs32neg, offset);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index f605825a04ab..c1f98f32c45f 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -18,9 +18,6 @@ config XEN_PV
bool "Xen PV guest support"
default y
depends on XEN
- # XEN_PV is not ready to work with 5-level paging.
- # Changes to hypervisor are also required.
- depends on !X86_5LEVEL
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
help
@@ -79,6 +76,4 @@ config XEN_DEBUG_FS
config XEN_PVH
bool "Support for running as a PVH guest"
depends on XEN && XEN_PVHVM && ACPI
- # Pre-built page tables are not ready to handle 5-level paging.
- depends on !X86_5LEVEL
def_bool n
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 436c4f003e17..aa1c6a6831a9 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -6,6 +6,7 @@
#include <asm/io_apic.h>
#include <asm/hypervisor.h>
#include <asm/e820/api.h>
+#include <asm/x86_init.h>
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
@@ -16,15 +17,20 @@
/*
* PVH variables.
*
- * xen_pvh and pvh_bootparams need to live in data segment since they
- * are used after startup_{32|64}, which clear .bss, are invoked.
+ * xen_pvh pvh_bootparams and pvh_start_info need to live in data segment
+ * since they are used after startup_{32|64}, which clear .bss, are invoked.
*/
bool xen_pvh __attribute__((section(".data"))) = 0;
struct boot_params pvh_bootparams __attribute__((section(".data")));
+struct hvm_start_info pvh_start_info __attribute__((section(".data")));
-struct hvm_start_info pvh_start_info;
unsigned int pvh_start_info_sz = sizeof(pvh_start_info);
+static u64 pvh_get_root_pointer(void)
+{
+ return pvh_start_info.rsdp_paddr;
+}
+
static void __init init_pvh_bootparams(void)
{
struct xen_memory_map memmap;
@@ -71,6 +77,8 @@ static void __init init_pvh_bootparams(void)
*/
pvh_bootparams.hdr.version = 0x212;
pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
+
+ x86_init.acpi.get_root_pointer = pvh_get_root_pointer;
}
/*
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index aae88fec9941..d20763472920 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -538,6 +538,22 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+__visible p4dval_t xen_p4d_val(p4d_t p4d)
+{
+ return pte_mfn_to_pfn(p4d.p4d);
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_p4d_val);
+
+__visible p4d_t xen_make_p4d(p4dval_t p4d)
+{
+ p4d = pte_pfn_to_mfn(p4d);
+
+ return native_make_p4d(p4d);
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
+#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
#endif /* CONFIG_X86_64 */
static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
@@ -2411,6 +2427,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.alloc_pud = xen_alloc_pmd_init,
.release_pud = xen_release_pmd_init,
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+ .p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
+ .make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
+#endif
#endif /* CONFIG_X86_64 */
.activate_mm = xen_activate_mm,
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 77c959cf81e7..7a43b2ae19f1 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -122,6 +122,8 @@ void __init xen_smp_cpus_done(unsigned int max_cpus)
if (xen_hvm_domain())
native_smp_cpus_done(max_cpus);
+ else
+ calculate_max_logical_packages();
if (xen_have_vcpu_info_placement)
return;
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 623720a11143..732631ce250f 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -16,6 +16,7 @@
*/
#include <linux/dma-contiguous.h>
+#include <linux/dma-direct.h>
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/mm.h>
@@ -123,7 +124,7 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
unsigned long attrs)
{
unsigned long ret;
- unsigned long uncached = 0;
+ unsigned long uncached;
unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct page *page = NULL;
@@ -144,15 +145,27 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
if (!page)
return NULL;
- ret = (unsigned long)page_address(page);
+ *handle = phys_to_dma(dev, page_to_phys(page));
- /* We currently don't support coherent memory outside KSEG */
+#ifdef CONFIG_MMU
+ if (PageHighMem(page)) {
+ void *p;
+ p = dma_common_contiguous_remap(page, size, VM_MAP,
+ pgprot_noncached(PAGE_KERNEL),
+ __builtin_return_address(0));
+ if (!p) {
+ if (!dma_release_from_contiguous(dev, page, count))
+ __free_pages(page, get_order(size));
+ }
+ return p;
+ }
+#endif
+ ret = (unsigned long)page_address(page);
BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR ||
ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
uncached = ret + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR;
- *handle = virt_to_bus((void *)ret);
__invalidate_dcache_range(ret, size);
return (void *)uncached;
@@ -161,13 +174,20 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
- unsigned long addr = (unsigned long)vaddr +
- XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
- struct page *page = virt_to_page(addr);
unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- BUG_ON(addr < XCHAL_KSEG_CACHED_VADDR ||
- addr > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
+ unsigned long addr = (unsigned long)vaddr;
+ struct page *page;
+
+ if (addr >= XCHAL_KSEG_BYPASS_VADDR &&
+ addr - XCHAL_KSEG_BYPASS_VADDR < XCHAL_KSEG_SIZE) {
+ addr += XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
+ page = virt_to_page(addr);
+ } else {
+#ifdef CONFIG_MMU
+ dma_common_free_remap(vaddr, size, VM_MAP);
+#endif
+ page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle)));
+ }
if (!dma_release_from_contiguous(dev, page, count))
__free_pages(page, get_order(size));
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index d776ec0d7b22..34aead7dcb48 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -79,19 +79,75 @@ void __init zones_init(void)
free_area_init_node(0, zones_size, ARCH_PFN_OFFSET, NULL);
}
+#ifdef CONFIG_HIGHMEM
+static void __init free_area_high(unsigned long pfn, unsigned long end)
+{
+ for (; pfn < end; pfn++)
+ free_highmem_page(pfn_to_page(pfn));
+}
+
+static void __init free_highpages(void)
+{
+ unsigned long max_low = max_low_pfn;
+ struct memblock_region *mem, *res;
+
+ reset_all_zones_managed_pages();
+ /* set highmem page free */
+ for_each_memblock(memory, mem) {
+ unsigned long start = memblock_region_memory_base_pfn(mem);
+ unsigned long end = memblock_region_memory_end_pfn(mem);
+
+ /* Ignore complete lowmem entries */
+ if (end <= max_low)
+ continue;
+
+ if (memblock_is_nomap(mem))
+ continue;
+
+ /* Truncate partial highmem entries */
+ if (start < max_low)
+ start = max_low;
+
+ /* Find and exclude any reserved regions */
+ for_each_memblock(reserved, res) {
+ unsigned long res_start, res_end;
+
+ res_start = memblock_region_reserved_base_pfn(res);
+ res_end = memblock_region_reserved_end_pfn(res);
+
+ if (res_end < start)
+ continue;
+ if (res_start < start)
+ res_start = start;
+ if (res_start > end)
+ res_start = end;
+ if (res_end > end)
+ res_end = end;
+ if (res_start != start)
+ free_area_high(start, res_start);
+ start = res_end;
+ if (start == end)
+ break;
+ }
+
+ /* And now free anything which remains */
+ if (start < end)
+ free_area_high(start, end);
+ }
+}
+#else
+static void __init free_highpages(void)
+{
+}
+#endif
+
/*
* Initialize memory pages.
*/
void __init mem_init(void)
{
-#ifdef CONFIG_HIGHMEM
- unsigned long tmp;
-
- reset_all_zones_managed_pages();
- for (tmp = max_low_pfn; tmp < max_pfn; tmp++)
- free_highmem_page(pfn_to_page(tmp));
-#endif
+ free_highpages();
max_mapnr = max_pfn - ARCH_PFN_OFFSET;
high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT);