summaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Makefile15
-rw-r--r--arch/powerpc/configs/pseries_defconfig1
-rw-r--r--arch/powerpc/crypto/crc-vpmsum_test.c10
-rw-r--r--arch/powerpc/include/asm/book3s/32/kup.h145
-rw-r--r--arch/powerpc/include/asm/book3s/32/mmu-hash.h5
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h23
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h21
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h95
-rw-r--r--arch/powerpc/include/asm/book3s/64/kup-radix.h108
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h70
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h107
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h12
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-4k.h9
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-64k.h8
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h40
-rw-r--r--arch/powerpc/include/asm/book3s/64/slice.h13
-rw-r--r--arch/powerpc/include/asm/drmem.h21
-rw-r--r--arch/powerpc/include/asm/exception-64s.h2
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h3
-rw-r--r--arch/powerpc/include/asm/futex.h4
-rw-r--r--arch/powerpc/include/asm/kup.h73
-rw-r--r--arch/powerpc/include/asm/mce.h1
-rw-r--r--arch/powerpc/include/asm/mmu.h25
-rw-r--r--arch/powerpc/include/asm/nohash/32/kup-8xx.h58
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu-8xx.h76
-rw-r--r--arch/powerpc/include/asm/nohash/64/mmu.h2
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h3
-rw-r--r--arch/powerpc/include/asm/page.h11
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h2
-rw-r--r--arch/powerpc/include/asm/processor.h3
-rw-r--r--arch/powerpc/include/asm/ptrace.h11
-rw-r--r--arch/powerpc/include/asm/sparsemem.h4
-rw-r--r--arch/powerpc/include/asm/uaccess.h38
-rw-r--r--arch/powerpc/kernel/asm-offsets.c7
-rw-r--r--arch/powerpc/kernel/entry_32.S28
-rw-r--r--arch/powerpc/kernel/entry_64.S27
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S15
-rw-r--r--arch/powerpc/kernel/head_32.S54
-rw-r--r--arch/powerpc/kernel/head_64.S4
-rw-r--r--arch/powerpc/kernel/mce_power.c2
-rw-r--r--arch/powerpc/kernel/paca.c12
-rw-r--r--arch/powerpc/kernel/process.c3
-rw-r--r--arch/powerpc/kernel/setup-common.c10
-rw-r--r--arch/powerpc/kernel/setup_64.c10
-rw-r--r--arch/powerpc/kernel/time.c1
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S2
-rw-r--r--arch/powerpc/kernel/watchdog.c81
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c2
-rw-r--r--arch/powerpc/lib/checksum_wrappers.c4
-rw-r--r--arch/powerpc/lib/code-patching.c5
-rw-r--r--arch/powerpc/lib/memcmp_64.S17
-rw-r--r--arch/powerpc/mm/8xx_mmu.c24
-rw-r--r--arch/powerpc/mm/copro_fault.c18
-rw-r--r--arch/powerpc/mm/drmem.c6
-rw-r--r--arch/powerpc/mm/fault.c49
-rw-r--r--arch/powerpc/mm/hash_low_32.S6
-rw-r--r--arch/powerpc/mm/hash_utils_64.c81
-rw-r--r--arch/powerpc/mm/highmem.c14
-rw-r--r--arch/powerpc/mm/init-common.c26
-rw-r--r--arch/powerpc/mm/init_32.c3
-rw-r--r--arch/powerpc/mm/mem.c11
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c29
-rw-r--r--arch/powerpc/mm/numa.c33
-rw-r--r--arch/powerpc/mm/pgtable-hash64.c13
-rw-r--r--arch/powerpc/mm/pgtable-radix.c55
-rw-r--r--arch/powerpc/mm/pgtable_64.c9
-rw-r--r--arch/powerpc/mm/pkeys.c1
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c23
-rw-r--r--arch/powerpc/mm/ptdump/hashpagetable.c2
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c3
-rw-r--r--arch/powerpc/mm/slb.c28
-rw-r--r--arch/powerpc/mm/slice.c49
-rw-r--r--arch/powerpc/mm/subpage-prot.c34
-rw-r--r--arch/powerpc/mm/tlb_nohash.c6
-rw-r--r--arch/powerpc/net/bpf_jit.h17
-rw-r--r--arch/powerpc/net/bpf_jit32.h4
-rw-r--r--arch/powerpc/net/bpf_jit64.h20
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c12
-rw-r--r--arch/powerpc/platforms/83xx/usb.c4
-rw-r--r--arch/powerpc/platforms/8xx/pic.c3
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype37
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c4
-rw-r--r--arch/powerpc/platforms/embedded6xx/holly.c19
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c17
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c13
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c3
-rw-r--r--arch/powerpc/platforms/pseries/pmem.c3
-rw-r--r--arch/powerpc/platforms/pseries/pseries_energy.c27
-rw-r--r--arch/powerpc/platforms/pseries/ras.c134
90 files changed, 1622 insertions, 523 deletions
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 7de49889bd5d..258ea6b2f2e7 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -34,11 +34,10 @@ ifdef CONFIG_PPC_BOOK3S_32
KBUILD_CFLAGS += -mcpu=powerpc
endif
-ifeq ($(CROSS_COMPILE),)
-KBUILD_DEFCONFIG := $(shell uname -m)_defconfig
-else
-KBUILD_DEFCONFIG := ppc64_defconfig
-endif
+# If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use
+# ppc64_defconfig because we have nothing better to go on.
+uname := $(shell uname -m)
+KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig
ifdef CONFIG_PPC64
new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi)
@@ -367,6 +366,10 @@ ppc32_allmodconfig:
$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/book3s_32.config \
-f $(srctree)/Makefile allmodconfig
+PHONY += ppc_defconfig
+ppc_defconfig:
+ $(call merge_into_defconfig,book3s_32.config,)
+
PHONY += ppc64le_allmodconfig
ppc64le_allmodconfig:
$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/le.config \
@@ -406,7 +409,9 @@ vdso_install:
ifdef CONFIG_PPC64
$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
endif
+ifdef CONFIG_VDSO32
$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
+endif
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index ea79c519863d..62e12f61a3b2 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -217,6 +217,7 @@ CONFIG_USB_MON=m
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_HCD_PPC_OF is not set
CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_XHCI_HCD=y
CONFIG_USB_STORAGE=m
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=m
diff --git a/arch/powerpc/crypto/crc-vpmsum_test.c b/arch/powerpc/crypto/crc-vpmsum_test.c
index 0153a9c6f4af..98ea4f4d3dde 100644
--- a/arch/powerpc/crypto/crc-vpmsum_test.c
+++ b/arch/powerpc/crypto/crc-vpmsum_test.c
@@ -78,16 +78,12 @@ static int __init crc_test_init(void)
pr_info("crc-vpmsum_test begins, %lu iterations\n", iterations);
for (i=0; i<iterations; i++) {
- size_t len, offset;
+ size_t offset = prandom_u32_max(16);
+ size_t len = prandom_u32_max(MAX_CRC_LENGTH);
- get_random_bytes(data, MAX_CRC_LENGTH);
- get_random_bytes(&len, sizeof(len));
- get_random_bytes(&offset, sizeof(offset));
-
- len %= MAX_CRC_LENGTH;
- offset &= 15;
if (len <= offset)
continue;
+ prandom_bytes(data, len);
len -= offset;
crypto_shash_update(crct10dif_shash, data+offset, len);
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
new file mode 100644
index 000000000000..677e9babef80
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_KUP_H
+#define _ASM_POWERPC_BOOK3S_32_KUP_H
+
+#include <asm/book3s/32/mmu-hash.h>
+
+#ifdef __ASSEMBLY__
+
+.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */
+101: mtsrin \gpr1, \gpr2
+ addi \gpr1, \gpr1, 0x111 /* next VSID */
+ rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */
+ addis \gpr2, \gpr2, 0x1000 /* address of next segment */
+ bdnz 101b
+ isync
+.endm
+
+.macro kuep_lock gpr1, gpr2
+#ifdef CONFIG_PPC_KUEP
+ li \gpr1, NUM_USER_SEGMENTS
+ li \gpr2, 0
+ mtctr \gpr1
+ mfsrin \gpr1, \gpr2
+ oris \gpr1, \gpr1, SR_NX@h /* set Nx */
+ kuep_update_sr \gpr1, \gpr2
+#endif
+.endm
+
+.macro kuep_unlock gpr1, gpr2
+#ifdef CONFIG_PPC_KUEP
+ li \gpr1, NUM_USER_SEGMENTS
+ li \gpr2, 0
+ mtctr \gpr1
+ mfsrin \gpr1, \gpr2
+ rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */
+ kuep_update_sr \gpr1, \gpr2
+#endif
+.endm
+
+#ifdef CONFIG_PPC_KUAP
+
+.macro kuap_update_sr gpr1, gpr2, gpr3 /* NEVER use r0 as gpr2 due to addis */
+101: mtsrin \gpr1, \gpr2
+ addi \gpr1, \gpr1, 0x111 /* next VSID */
+ rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */
+ addis \gpr2, \gpr2, 0x1000 /* address of next segment */
+ cmplw \gpr2, \gpr3
+ blt- 101b
+ isync
+.endm
+
+.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3
+ lwz \gpr2, KUAP(\thread)
+ rlwinm. \gpr3, \gpr2, 28, 0xf0000000
+ stw \gpr2, STACK_REGS_KUAP(\sp)
+ beq+ 102f
+ li \gpr1, 0
+ stw \gpr1, KUAP(\thread)
+ mfsrin \gpr1, \gpr2
+ oris \gpr1, \gpr1, SR_KS@h /* set Ks */
+ kuap_update_sr \gpr1, \gpr2, \gpr3
+102:
+.endm
+
+.macro kuap_restore sp, current, gpr1, gpr2, gpr3
+ lwz \gpr2, STACK_REGS_KUAP(\sp)
+ rlwinm. \gpr3, \gpr2, 28, 0xf0000000
+ stw \gpr2, THREAD + KUAP(\current)
+ beq+ 102f
+ mfsrin \gpr1, \gpr2
+ rlwinm \gpr1, \gpr1, 0, ~SR_KS /* Clear Ks */
+ kuap_update_sr \gpr1, \gpr2, \gpr3
+102:
+.endm
+
+.macro kuap_check current, gpr
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ lwz \gpr2, KUAP(thread)
+999: twnei \gpr, 0
+ EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+#endif
+.endm
+
+#endif /* CONFIG_PPC_KUAP */
+
+#else /* !__ASSEMBLY__ */
+
+#ifdef CONFIG_PPC_KUAP
+
+#include <linux/sched.h>
+
+static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
+{
+ barrier(); /* make sure thread.kuap is updated before playing with SRs */
+ while (addr < end) {
+ mtsrin(sr, addr);
+ sr += 0x111; /* next VSID */
+ sr &= 0xf0ffffff; /* clear VSID overflow */
+ addr += 0x10000000; /* address of next segment */
+ }
+ isync(); /* Context sync required after mtsrin() */
+}
+
+static inline void allow_user_access(void __user *to, const void __user *from, u32 size)
+{
+ u32 addr, end;
+
+ if (__builtin_constant_p(to) && to == NULL)
+ return;
+
+ addr = (__force u32)to;
+
+ if (!addr || addr >= TASK_SIZE || !size)
+ return;
+
+ end = min(addr + size, TASK_SIZE);
+ current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf);
+ kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */
+}
+
+static inline void prevent_user_access(void __user *to, const void __user *from, u32 size)
+{
+ u32 addr = (__force u32)to;
+ u32 end = min(addr + size, TASK_SIZE);
+
+ if (!addr || addr >= TASK_SIZE || !size)
+ return;
+
+ current->thread.kuap = 0;
+ kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */
+}
+
+static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+{
+ if (!is_write)
+ return false;
+
+ return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !");
+}
+
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_32_KUP_H */
diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index 5cb588395fdc..f9eae105a9f4 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -63,6 +63,11 @@ typedef pte_t *pgtable_t;
#define PP_RWRW 2 /* Supervisor read/write, User read/write */
#define PP_RXRX 3 /* Supervisor read, User read */
+/* Values for Segment Registers */
+#define SR_NX 0x10000000 /* No Execute */
+#define SR_KP 0x20000000 /* User key */
+#define SR_KS 0x40000000 /* Supervisor key */
+
#ifndef __ASSEMBLY__
/*
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index cf5ba5254299..8fd8599c9395 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -2,10 +2,10 @@
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_4K_H
-#define H_PTE_INDEX_SIZE 9
-#define H_PMD_INDEX_SIZE 7
-#define H_PUD_INDEX_SIZE 9
-#define H_PGD_INDEX_SIZE 9
+#define H_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 4KB = 2MB
+#define H_PMD_INDEX_SIZE 7 // size: 8B << 7 = 1KB, maps: 2^7 x 2MB = 256MB
+#define H_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 256MB = 128GB
+#define H_PGD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 128GB = 64TB
/*
* Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
@@ -13,6 +13,21 @@
*/
#define MAX_EA_BITS_PER_CONTEXT 46
+#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2)
+
+/*
+ * Our page table limit us to 64TB. Hence for the kernel mapping,
+ * each MAP area is limited to 16 TB.
+ * The four map areas are: linear mapping, vmap, IO and vmemmap
+ */
+#define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT)
+
+/*
+ * Define the address range of the kernel non-linear virtual area
+ * 16TB
+ */
+#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000)
+
#ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index f82ee8a3b561..d1d9177d9ebd 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -2,16 +2,29 @@
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
-#define H_PTE_INDEX_SIZE 8
-#define H_PMD_INDEX_SIZE 10
-#define H_PUD_INDEX_SIZE 10
-#define H_PGD_INDEX_SIZE 8
+#define H_PTE_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 64KB = 16MB
+#define H_PMD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16MB = 16GB
+#define H_PUD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB
+#define H_PGD_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 16TB = 4PB
+
/*
* Each context is 512TB size. SLB miss for first context/default context
* is handled in the hotpath.
*/
#define MAX_EA_BITS_PER_CONTEXT 49
+#define REGION_SHIFT MAX_EA_BITS_PER_CONTEXT
+
+/*
+ * We use one context for each MAP area.
+ */
+#define H_KERN_MAP_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
+
+/*
+ * Define the address range of the kernel non-linear virtual area
+ * 2PB
+ */
+#define H_KERN_VIRT_START ASM_CONST(0xc008000000000000)
/*
* 64k aligned address free up few of the lower bits of RPN for us
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 54b7af6cd27f..1d1183048cfd 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -29,6 +29,10 @@
#define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \
H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
+/*
+ * Top 2 bits are ignored in page table walk.
+ */
+#define EA_MASK (~(0xcUL << 60))
/*
* We store the slot details in the second half of page table.
@@ -42,59 +46,63 @@
#endif
/*
- * Define the address range of the kernel non-linear virtual area. In contrast
- * to the linear mapping, this is managed using the kernel page tables and then
- * inserted into the hash page table to actually take effect, similarly to user
- * mappings.
+ * +------------------------------+
+ * | |
+ * | |
+ * | |
+ * +------------------------------+ Kernel virtual map end (0xc00e000000000000)
+ * | |
+ * | |
+ * | 512TB/16TB of vmemmap |
+ * | |
+ * | |
+ * +------------------------------+ Kernel vmemmap start
+ * | |
+ * | 512TB/16TB of IO map |
+ * | |
+ * +------------------------------+ Kernel IO map start
+ * | |
+ * | 512TB/16TB of vmap |
+ * | |
+ * +------------------------------+ Kernel virt start (0xc008000000000000)
+ * | |
+ * | |
+ * | |
+ * +------------------------------+ Kernel linear (0xc.....)
*/
-#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
-/*
- * Allow virtual mapping of one context size.
- * 512TB for 64K page size
- * 64TB for 4K page size
- */
-#define H_KERN_VIRT_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
+#define H_VMALLOC_START H_KERN_VIRT_START
+#define H_VMALLOC_SIZE H_KERN_MAP_SIZE
+#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
-/*
- * 8TB IO mapping size
- */
-#define H_KERN_IO_SIZE ASM_CONST(0x80000000000) /* 8T */
+#define H_KERN_IO_START H_VMALLOC_END
+#define H_KERN_IO_SIZE H_KERN_MAP_SIZE
+#define H_KERN_IO_END (H_KERN_IO_START + H_KERN_IO_SIZE)
-/*
- * The vmalloc space starts at the beginning of the kernel non-linear virtual
- * region, and occupies 504T (64K) or 56T (4K)
- */
-#define H_VMALLOC_START H_KERN_VIRT_START
-#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE - H_KERN_IO_SIZE)
-#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
+#define H_VMEMMAP_START H_KERN_IO_END
+#define H_VMEMMAP_SIZE H_KERN_MAP_SIZE
+#define H_VMEMMAP_END (H_VMEMMAP_START + H_VMEMMAP_SIZE)
-#define H_KERN_IO_START H_VMALLOC_END
+#define NON_LINEAR_REGION_ID(ea) ((((unsigned long)ea - H_KERN_VIRT_START) >> REGION_SHIFT) + 2)
/*
* Region IDs
*/
-#define REGION_SHIFT 60UL
-#define REGION_MASK (0xfUL << REGION_SHIFT)
-#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT)
-
-#define VMALLOC_REGION_ID (REGION_ID(H_VMALLOC_START))
-#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
-#define VMEMMAP_REGION_ID (0xfUL) /* Server only */
-#define USER_REGION_ID (0UL)
+#define USER_REGION_ID 0
+#define LINEAR_MAP_REGION_ID 1
+#define VMALLOC_REGION_ID NON_LINEAR_REGION_ID(H_VMALLOC_START)
+#define IO_REGION_ID NON_LINEAR_REGION_ID(H_KERN_IO_START)
+#define VMEMMAP_REGION_ID NON_LINEAR_REGION_ID(H_VMEMMAP_START)
/*
* Defines the address of the vmemap area, in its own region on
* hash table CPUs.
*/
-#define H_VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT)
-
#ifdef CONFIG_PPC_MM_SLICES
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
#endif /* CONFIG_PPC_MM_SLICES */
-
/* PTEIDX nibble */
#define _PTEIDX_SECONDARY 0x8
#define _PTEIDX_GROUP_IX 0x7
@@ -103,6 +111,25 @@
#define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1)
#ifndef __ASSEMBLY__
+static inline int get_region_id(unsigned long ea)
+{
+ int region_id;
+ int id = (ea >> 60UL);
+
+ if (id == 0)
+ return USER_REGION_ID;
+
+ if (ea < H_KERN_VIRT_START)
+ return LINEAR_MAP_REGION_ID;
+
+ VM_BUG_ON(id != 0xc);
+ BUILD_BUG_ON(NON_LINEAR_REGION_ID(H_VMALLOC_START) != 2);
+
+ region_id = NON_LINEAR_REGION_ID(ea);
+ VM_BUG_ON(region_id > VMEMMAP_REGION_ID);
+ return region_id;
+}
+
#define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS)
#define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS)
static inline int hash__pgd_bad(pgd_t pgd)
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
new file mode 100644
index 000000000000..7679bd0c5af0
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
+#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
+
+#include <linux/const.h>
+
+#define AMR_KUAP_BLOCK_READ UL(0x4000000000000000)
+#define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000)
+#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE)
+#define AMR_KUAP_SHIFT 62
+
+#ifdef __ASSEMBLY__
+
+.macro kuap_restore_amr gpr
+#ifdef CONFIG_PPC_KUAP
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ ld \gpr, STACK_REGS_KUAP(r1)
+ mtspr SPRN_AMR, \gpr
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
+#endif
+.endm
+
+.macro kuap_check_amr gpr1, gpr2
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ mfspr \gpr1, SPRN_AMR
+ li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT)
+ sldi \gpr2, \gpr2, AMR_KUAP_SHIFT
+999: tdne \gpr1, \gpr2
+ EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
+#endif
+.endm
+
+.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr
+#ifdef CONFIG_PPC_KUAP
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ .ifnb \msr_pr_cr
+ bne \msr_pr_cr, 99f
+ .endif
+ mfspr \gpr1, SPRN_AMR
+ std \gpr1, STACK_REGS_KUAP(r1)
+ li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT)
+ sldi \gpr2, \gpr2, AMR_KUAP_SHIFT
+ cmpd \use_cr, \gpr1, \gpr2
+ beq \use_cr, 99f
+ // We don't isync here because we very recently entered via rfid
+ mtspr SPRN_AMR, \gpr2
+ isync
+99:
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
+#endif
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#ifdef CONFIG_PPC_KUAP
+
+#include <asm/reg.h>
+
+/*
+ * We support individually allowing read or write, but we don't support nesting
+ * because that would require an expensive read/modify write of the AMR.
+ */
+
+static inline void set_kuap(unsigned long value)
+{
+ if (!mmu_has_feature(MMU_FTR_RADIX_KUAP))
+ return;
+
+ /*
+ * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both
+ * before and after the move to AMR. See table 6 on page 1134.
+ */
+ isync();
+ mtspr(SPRN_AMR, value);
+ isync();
+}
+
+static inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ // This is written so we can resolve to a single case at build time
+ if (__builtin_constant_p(to) && to == NULL)
+ set_kuap(AMR_KUAP_BLOCK_WRITE);
+ else if (__builtin_constant_p(from) && from == NULL)
+ set_kuap(AMR_KUAP_BLOCK_READ);
+ else
+ set_kuap(0);
+}
+
+static inline void prevent_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ set_kuap(AMR_KUAP_BLOCKED);
+}
+
+static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+{
+ return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
+ (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
+ "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
+}
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index a28a28079edb..1e4705516a54 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -588,7 +588,8 @@ extern void slb_set_size(u16 size);
#endif
#define MAX_VMALLOC_CTX_CNT 1
-#define MAX_MEMMAP_CTX_CNT 1
+#define MAX_IO_CTX_CNT 1
+#define MAX_VMEMMAP_CTX_CNT 1
/*
* 256MB segment
@@ -601,13 +602,10 @@ extern void slb_set_size(u16 size);
* would give a protovsid of 0x1fffffffff. That will result in a VSID 0
* because of the modulo operation in vsid scramble.
*
- * We add one extra context to MIN_USER_CONTEXT so that we can map kernel
- * context easily. The +1 is to map the unused 0xe region mapping.
*/
#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2)
#define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
- MAX_MEMMAP_CTX_CNT + 2)
-
+ MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT)
/*
* For platforms that support on 65bit VA we limit the context bits
*/
@@ -657,8 +655,8 @@ extern void slb_set_size(u16 size);
/* 4 bits per slice and we have one slice per 1TB */
#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41)
-#define TASK_SLICE_ARRAY_SZ(x) ((x)->context.slb_addr_limit >> 41)
-
+#define LOW_SLICE_ARRAY_SZ (BITS_PER_LONG / BITS_PER_BYTE)
+#define TASK_SLICE_ARRAY_SZ(x) ((x)->hash_context->slb_addr_limit >> 41)
#ifndef __ASSEMBLY__
#ifdef CONFIG_PPC_SUBPAGE_PROT
@@ -687,12 +685,41 @@ struct subpage_prot_table {
#define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS)
extern void subpage_prot_free(struct mm_struct *mm);
-extern void subpage_prot_init_new_context(struct mm_struct *mm);
#else
static inline void subpage_prot_free(struct mm_struct *mm) {}
-static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
#endif /* CONFIG_PPC_SUBPAGE_PROT */
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+ u64 low_slices;
+ DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
+
+struct hash_mm_context {
+ u16 user_psize; /* page size index */
+
+ /* SLB page size encodings*/
+ unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ];
+ unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+ unsigned long slb_addr_limit;
+#ifdef CONFIG_PPC_64K_PAGES
+ struct slice_mask mask_64k;
+#endif
+ struct slice_mask mask_4k;
+#ifdef CONFIG_HUGETLB_PAGE
+ struct slice_mask mask_16m;
+ struct slice_mask mask_16g;
+#endif
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ struct subpage_prot_table *spt;
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+};
+
#if 0
/*
* The code below is equivalent to this function for arguments
@@ -747,7 +774,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
/*
* Bad address. We return VSID 0 for that
*/
- if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
+ if ((ea & EA_MASK) >= H_PGTABLE_RANGE)
return 0;
if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
@@ -774,28 +801,29 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
* 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff]
* 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff]
* 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff]
-
- * 0x00005 - [ 0xd000000000000000 - 0xd001ffffffffffff ]
- * 0x00006 - Not used - Can map 0xe000000000000000 range.
- * 0x00007 - [ 0xf000000000000000 - 0xf001ffffffffffff ]
*
- * So we can compute the context from the region (top nibble) by
- * subtracting 11, or 0xc - 1.
+ * vmap, IO, vmemap
+ *
+ * 0x00005 - [ 0xc008000000000000 - 0xc009ffffffffffff]
+ * 0x00006 - [ 0xc00a000000000000 - 0xc00bffffffffffff]
+ * 0x00007 - [ 0xc00c000000000000 - 0xc00dffffffffffff]
+ *
*/
static inline unsigned long get_kernel_context(unsigned long ea)
{
- unsigned long region_id = REGION_ID(ea);
+ unsigned long region_id = get_region_id(ea);
unsigned long ctx;
/*
- * For linear mapping we do support multiple context
+ * Depending on Kernel config, kernel region can have one context
+ * or more.
*/
- if (region_id == KERNEL_REGION_ID) {
+ if (region_id == LINEAR_MAP_REGION_ID) {
/*
* We already verified ea to be not beyond the addr limit.
*/
- ctx = 1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT);
+ ctx = 1 + ((ea & EA_MASK) >> MAX_EA_BITS_PER_CONTEXT);
} else
- ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT;
+ ctx = region_id + MAX_KERNEL_CTX_CNT - 1;
return ctx;
}
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 1ceee000c18d..230a9dec7677 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -35,6 +35,21 @@ typedef pte_t *pgtable_t;
#endif /* __ASSEMBLY__ */
+/*
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
+ */
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
+ defined(CONFIG_PPC_64K_PAGES)
+#define MAX_PHYSMEM_BITS 51
+#else
+#define MAX_PHYSMEM_BITS 46
+#endif
+
/* 64-bit classic hash table MMU */
#include <asm/book3s/64/mmu-hash.h>
@@ -89,16 +104,6 @@ struct spinlock;
/* Maximum possible number of NPUs in a system. */
#define NV_MAX_NPUS 8
-/*
- * One bit per slice. We have lower slices which cover 256MB segments
- * upto 4G range. That gets us 16 low slices. For the rest we track slices
- * in 1TB size.
- */
-struct slice_mask {
- u64 low_slices;
- DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
-};
-
typedef struct {
union {
/*
@@ -112,7 +117,6 @@ typedef struct {
mm_context_id_t id;
mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
};
- u16 user_psize; /* page size index */
/* Number of bits in the mm_cpumask */
atomic_t active_cpus;
@@ -122,27 +126,9 @@ typedef struct {
/* NPU NMMU context */
struct npu_context *npu_context;
+ struct hash_mm_context *hash_context;
-#ifdef CONFIG_PPC_MM_SLICES
- /* SLB page size encodings*/
- unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
- unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
- unsigned long slb_addr_limit;
-# ifdef CONFIG_PPC_64K_PAGES
- struct slice_mask mask_64k;
-# endif
- struct slice_mask mask_4k;
-# ifdef CONFIG_HUGETLB_PAGE
- struct slice_mask mask_16m;
- struct slice_mask mask_16g;
-# endif
-#else
- u16 sllp; /* SLB page size encoding */
-#endif
unsigned long vdso_base;
-#ifdef CONFIG_PPC_SUBPAGE_PROT
- struct subpage_prot_table spt;
-#endif /* CONFIG_PPC_SUBPAGE_PROT */
/*
* pagetable fragment support
*/
@@ -163,6 +149,67 @@ typedef struct {
#endif
} mm_context_t;
+static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
+{
+ return ctx->hash_context->user_psize;
+}
+
+static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
+{
+ ctx->hash_context->user_psize = user_psize;
+}
+
+static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
+{
+ return ctx->hash_context->low_slices_psize;
+}
+
+static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
+{
+ return ctx->hash_context->high_slices_psize;
+}
+
+static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
+{
+ return ctx->hash_context->slb_addr_limit;
+}
+
+static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
+{
+ ctx->hash_context->slb_addr_limit = limit;
+}
+
+#ifdef CONFIG_PPC_64K_PAGES
+static inline struct slice_mask *mm_ctx_slice_mask_64k(mm_context_t *ctx)
+{
+ return &ctx->hash_context->mask_64k;
+}
+#endif
+
+static inline struct slice_mask *mm_ctx_slice_mask_4k(mm_context_t *ctx)
+{
+ return &ctx->hash_context->mask_4k;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static inline struct slice_mask *mm_ctx_slice_mask_16m(mm_context_t *ctx)
+{
+ return &ctx->hash_context->mask_16m;
+}
+
+static inline struct slice_mask *mm_ctx_slice_mask_16g(mm_context_t *ctx)
+{
+ return &ctx->hash_context->mask_16g;
+}
+#endif
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
+{
+ return ctx->hash_context->spt;
+}
+#endif
+
/*
* The current system page and segment sizes
*/
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 581f91be9dd4..7dede2e34b70 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -277,9 +277,11 @@ extern unsigned long __vmalloc_end;
extern unsigned long __kernel_virt_start;
extern unsigned long __kernel_virt_size;
extern unsigned long __kernel_io_start;
+extern unsigned long __kernel_io_end;
#define KERN_VIRT_START __kernel_virt_start
-#define KERN_VIRT_SIZE __kernel_virt_size
#define KERN_IO_START __kernel_io_start
+#define KERN_IO_END __kernel_io_end
+
extern struct page *vmemmap;
extern unsigned long ioremap_bot;
extern unsigned long pci_io_base;
@@ -296,8 +298,7 @@ extern unsigned long pci_io_base;
#include <asm/barrier.h>
/*
- * The second half of the kernel virtual space is used for IO mappings,
- * it's itself carved into the PIO region (ISA and PHB IO space) and
+ * IO space itself carved into the PIO region (ISA and PHB IO space) and
* the ioremap space
*
* ISA_IO_BASE = KERN_IO_START, 64K reserved area
@@ -310,7 +311,7 @@ extern unsigned long pci_io_base;
#define PHB_IO_BASE (ISA_IO_END)
#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
#define IOREMAP_BASE (PHB_IO_END)
-#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE)
+#define IOREMAP_END (KERN_IO_END)
/* Advertise special mapping type for AGP */
#define HAVE_PAGE_AGP
@@ -992,7 +993,8 @@ extern struct page *pgd_page(pgd_t pgd);
(((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte) do { } while(0)
+
+static inline void pte_unmap(pte_t *pte) { }
/* to find an entry in a kernel page-table-directory */
/* This now only contains the vmalloc pages */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h
index 863c3e8286fb..d5f5ab73dc7f 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h
@@ -5,10 +5,11 @@
/*
* For 4K page size supported index is 13/9/9/9
*/
-#define RADIX_PTE_INDEX_SIZE 9 /* 2MB huge page */
-#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */
-#define RADIX_PUD_INDEX_SIZE 9
-#define RADIX_PGD_INDEX_SIZE 13
+#define RADIX_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 4K = 2MB
+#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB
+#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB
+#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB
+
/*
* One fragment per per page
*/
diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h
index ccb78ca9d0c5..54e33828b0fb 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h
@@ -5,10 +5,10 @@
/*
* For 64K page size supported index is 13/9/9/5
*/
-#define RADIX_PTE_INDEX_SIZE 5 /* 2MB huge page */
-#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */
-#define RADIX_PUD_INDEX_SIZE 9
-#define RADIX_PGD_INDEX_SIZE 13
+#define RADIX_PTE_INDEX_SIZE 5 // size: 8B << 5 = 256B, maps 2^5 x 64K = 2MB
+#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB
+#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB
+#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB
/*
* We use a 256 byte PTE page fragment in radix
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 5ab134eeed20..574eca33f893 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -72,19 +72,17 @@
* | |
* | |
* | |
- * +------------------------------+ Kernel IO map end (0xc010000000000000)
+ * +------------------------------+ Kernel vmemmap end (0xc010000000000000)
* | |
+ * | 512TB |
* | |
- * | 1/2 of virtual map |
+ * +------------------------------+ Kernel IO map end/vmemap start
* | |
+ * | 512TB |
* | |
- * +------------------------------+ Kernel IO map start
+ * +------------------------------+ Kernel vmap end/ IO map start
* | |
- * | 1/4 of virtual map |
- * | |
- * +------------------------------+ Kernel vmemap start
- * | |
- * | 1/4 of virtual map |
+ * | 512TB |
* | |
* +------------------------------+ Kernel virt start (0xc008000000000000)
* | |
@@ -93,24 +91,24 @@
* +------------------------------+ Kernel linear (0xc.....)
*/
-#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
-#define RADIX_KERN_VIRT_SIZE ASM_CONST(0x0008000000000000)
-
+#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
/*
- * The vmalloc space starts at the beginning of that region, and
- * occupies a quarter of it on radix config.
- * (we keep a quarter for the virtual memmap)
+ * 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick
+ * the same value as hash.
*/
+#define RADIX_KERN_MAP_SIZE (1UL << 49)
+
#define RADIX_VMALLOC_START RADIX_KERN_VIRT_START
-#define RADIX_VMALLOC_SIZE (RADIX_KERN_VIRT_SIZE >> 2)
+#define RADIX_VMALLOC_SIZE RADIX_KERN_MAP_SIZE
#define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE)
-/*
- * Defines the address of the vmemap area, in its own region on
- * hash table CPUs.
- */
-#define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END)
-#define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1))
+#define RADIX_KERN_IO_START RADIX_VMALLOC_END
+#define RADIX_KERN_IO_SIZE RADIX_KERN_MAP_SIZE
+#define RADIX_KERN_IO_END (RADIX_KERN_IO_START + RADIX_KERN_IO_SIZE)
+
+#define RADIX_VMEMMAP_START RADIX_KERN_IO_END
+#define RADIX_VMEMMAP_SIZE RADIX_KERN_MAP_SIZE
+#define RADIX_VMEMMAP_END (RADIX_VMEMMAP_START + RADIX_VMEMMAP_SIZE)
#ifndef __ASSEMBLY__
#define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h
index db0dedab65ee..062e11136e9c 100644
--- a/arch/powerpc/include/asm/book3s/64/slice.h
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -2,8 +2,6 @@
#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
#define _ASM_POWERPC_BOOK3S_64_SLICE_H
-#ifdef CONFIG_PPC_MM_SLICES
-
#define SLICE_LOW_SHIFT 28
#define SLICE_LOW_TOP (0x100000000ul)
#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
@@ -13,15 +11,4 @@
#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
-#else /* CONFIG_PPC_MM_SLICES */
-
-#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
-#define slice_set_user_psize(mm, psize) \
-do { \
- (mm)->context.user_psize = (psize); \
- (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
-} while (0)
-
-#endif /* CONFIG_PPC_MM_SLICES */
-
#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 7c1d8e74b25d..7f3279b014db 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -17,6 +17,9 @@ struct drmem_lmb {
u32 drc_index;
u32 aa_index;
u32 flags;
+#ifdef CONFIG_MEMORY_HOTPLUG
+ int nid;
+#endif
};
struct drmem_lmb_info {
@@ -104,4 +107,22 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
lmb->aa_index = 0xffffffff;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+static inline void lmb_set_nid(struct drmem_lmb *lmb)
+{
+ lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
+}
+static inline void lmb_clear_nid(struct drmem_lmb *lmb)
+{
+ lmb->nid = -1;
+}
+#else
+static inline void lmb_set_nid(struct drmem_lmb *lmb)
+{
+}
+static inline void lmb_clear_nid(struct drmem_lmb *lmb)
+{
+}
+#endif
+
#endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 937bb630093f..bef4e05a6823 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -497,6 +497,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
RESTORE_CTR(r1, area); \
b bad_stack; \
3: EXCEPTION_PROLOG_COMMON_1(); \
+ kuap_save_amr_and_lock r9, r10, cr1, cr0; \
beq 4f; /* if from kernel mode */ \
ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \
SAVE_PPR(area, r9); \
@@ -691,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
*/
#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \
EXCEPTION_PROLOG_COMMON_1(); \
+ kuap_save_amr_and_lock r9, r10, cr1; \
EXCEPTION_PROLOG_COMMON_2(area); \
EXCEPTION_PROLOG_COMMON_3(trap); \
/* Volatile regs are potentially clobbered here */ \
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 40a6c9261a6b..f6fc31f8baff 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -100,6 +100,9 @@ label##5: \
#define END_MMU_FTR_SECTION(msk, val) \
END_MMU_FTR_SECTION_NESTED(msk, val, 97)
+#define END_MMU_FTR_SECTION_NESTED_IFSET(msk, label) \
+ END_MMU_FTR_SECTION_NESTED((msk), (msk), label)
+
#define END_MMU_FTR_SECTION_IFSET(msk) END_MMU_FTR_SECTION((msk), (msk))
#define END_MMU_FTR_SECTION_IFCLR(msk) END_MMU_FTR_SECTION((msk), 0)
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 88b38b37c21b..3a6aa57b9d90 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -35,6 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
{
int oldval = 0, ret;
+ allow_write_to_user(uaddr, sizeof(*uaddr));
pagefault_disable();
switch (op) {
@@ -62,6 +63,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
if (!ret)
*oval = oldval;
+ prevent_write_to_user(uaddr, sizeof(*uaddr));
return ret;
}
@@ -75,6 +77,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
+ allow_write_to_user(uaddr, sizeof(*uaddr));
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
@@ -95,6 +98,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: "cc", "memory");
*uval = prev;
+ prevent_write_to_user(uaddr, sizeof(*uaddr));
return ret;
}
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
new file mode 100644
index 000000000000..5b5e39643a27
--- /dev/null
+++ b/arch/powerpc/include/asm/kup.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_H_
+#define _ASM_POWERPC_KUP_H_
+
+#ifdef CONFIG_PPC64
+#include <asm/book3s/64/kup-radix.h>
+#endif
+#ifdef CONFIG_PPC_8xx
+#include <asm/nohash/32/kup-8xx.h>
+#endif
+#ifdef CONFIG_PPC_BOOK3S_32
+#include <asm/book3s/32/kup.h>
+#endif
+
+#ifdef __ASSEMBLY__
+#ifndef CONFIG_PPC_KUAP
+.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3
+.endm
+
+.macro kuap_restore sp, current, gpr1, gpr2, gpr3
+.endm
+
+.macro kuap_check current, gpr
+.endm
+
+#endif
+
+#else /* !__ASSEMBLY__ */
+
+#include <asm/pgtable.h>
+
+void setup_kup(void);
+
+#ifdef CONFIG_PPC_KUEP
+void setup_kuep(bool disabled);
+#else
+static inline void setup_kuep(bool disabled) { }
+#endif /* CONFIG_PPC_KUEP */
+
+#ifdef CONFIG_PPC_KUAP
+void setup_kuap(bool disabled);
+#else
+static inline void setup_kuap(bool disabled) { }
+static inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size) { }
+static inline void prevent_user_access(void __user *to, const void __user *from,
+ unsigned long size) { }
+static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; }
+#endif /* CONFIG_PPC_KUAP */
+
+static inline void allow_read_from_user(const void __user *from, unsigned long size)
+{
+ allow_user_access(NULL, from, size);
+}
+
+static inline void allow_write_to_user(void __user *to, unsigned long size)
+{
+ allow_user_access(to, NULL, size);
+}
+
+static inline void prevent_read_from_user(const void __user *from, unsigned long size)
+{
+ prevent_user_access(NULL, from, size);
+}
+
+static inline void prevent_write_to_user(void __user *to, unsigned long size)
+{
+ prevent_user_access(to, NULL, size);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_KUP_H_ */
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 17996bc9382b..ad47fa865324 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -210,6 +210,7 @@ extern void release_mce_event(void);
extern void machine_check_queue_event(void);
extern void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest);
+unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
#ifdef CONFIG_PPC_BOOK3S_64
void flush_and_reload_slb(void);
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 598cdcdd1355..d86c5641bd97 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -107,6 +107,11 @@
*/
#define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000)
+/*
+ * Supports KUAP (key 0 controlling userspace addresses) on radix
+ */
+#define MMU_FTR_RADIX_KUAP ASM_CONST(0x80000000)
+
/* MMU feature bit sets for various CPUs */
#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \
MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
@@ -164,7 +169,10 @@ enum {
#endif
#ifdef CONFIG_PPC_RADIX_MMU
MMU_FTR_TYPE_RADIX |
-#endif
+#ifdef CONFIG_PPC_KUAP
+ MMU_FTR_RADIX_KUAP |
+#endif /* CONFIG_PPC_KUAP */
+#endif /* CONFIG_PPC_RADIX_MMU */
0,
};
@@ -341,21 +349,6 @@ static inline bool strict_kernel_rwx_enabled(void)
*/
#define MMU_PAGE_COUNT 16
-/*
- * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
- * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
- * page_to_nid does a page->section->node lookup
- * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
- * memory requirements with large number of sections.
- * 51 bits is the max physical real address on POWER9
- */
-#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
- defined (CONFIG_PPC_64K_PAGES)
-#define MAX_PHYSMEM_BITS 51
-#elif defined(CONFIG_SPARSEMEM)
-#define MAX_PHYSMEM_BITS 46
-#endif
-
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/book3s/64/mmu.h>
#else /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
new file mode 100644
index 000000000000..1c3133b5f86a
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_8XX_H_
+#define _ASM_POWERPC_KUP_8XX_H_
+
+#include <asm/bug.h>
+
+#ifdef CONFIG_PPC_KUAP
+
+#ifdef __ASSEMBLY__
+
+.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3
+ lis \gpr2, MD_APG_KUAP@h /* only APG0 and APG1 are used */
+ mfspr \gpr1, SPRN_MD_AP
+ mtspr SPRN_MD_AP, \gpr2
+ stw \gpr1, STACK_REGS_KUAP(\sp)
+.endm
+
+.macro kuap_restore sp, current, gpr1, gpr2, gpr3
+ lwz \gpr1, STACK_REGS_KUAP(\sp)
+ mtspr SPRN_MD_AP, \gpr1
+.endm
+
+.macro kuap_check current, gpr
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ mfspr \gpr, SPRN_MD_AP
+ rlwinm \gpr, \gpr, 16, 0xffff
+999: twnei \gpr, MD_APG_KUAP@h
+ EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+#endif
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#include <asm/reg.h>
+
+static inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ mtspr(SPRN_MD_AP, MD_APG_INIT);
+}
+
+static inline void prevent_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ mtspr(SPRN_MD_AP, MD_APG_KUAP);
+}
+
+static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+{
+ return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000),
+ "Bug: fault blocked by AP register !");
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* _ASM_POWERPC_KUP_8XX_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index 0a1a3fc54e54..c503e2f05e61 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -35,11 +35,18 @@
* Then we use the APG to say whether accesses are according to Page rules or
* "all Supervisor" rules (Access to all)
* Therefore, we define 2 APG groups. lsb is _PMD_USER
- * 0 => No user => 01 (all accesses performed according to page definition)
+ * 0 => Kernel => 01 (all accesses performed according to page definition)
* 1 => User => 00 (all accesses performed as supervisor iaw page definition)
- * We define all 16 groups so that all other bits of APG can take any value
+ * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
*/
-#define MI_APG_INIT 0x44444444
+#define MI_APG_INIT 0x4fffffff
+
+/*
+ * 0 => Kernel => 01 (all accesses performed according to page definition)
+ * 1 => User => 10 (all accesses performed according to swaped page definition)
+ * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
+ */
+#define MI_APG_KUEP 0x6fffffff
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MI_RPN is written, bits in
@@ -108,11 +115,18 @@
* Then we use the APG to say whether accesses are according to Page rules or
* "all Supervisor" rules (Access to all)
* Therefore, we define 2 APG groups. lsb is _PMD_USER
- * 0 => No user => 01 (all accesses performed according to page definition)
+ * 0 => Kernel => 01 (all accesses performed according to page definition)
* 1 => User => 00 (all accesses performed as supervisor iaw page definition)
- * We define all 16 groups so that all other bits of APG can take any value
+ * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
*/
-#define MD_APG_INIT 0x44444444
+#define MD_APG_INIT 0x4fffffff
+
+/*
+ * 0 => No user => 01 (all accesses performed according to page definition)
+ * 1 => User => 10 (all accesses performed according to swaped page definition)
+ * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
+ */
+#define MD_APG_KUAP 0x6fffffff
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MD_RPN is written, bits in
@@ -167,6 +181,7 @@
#ifdef CONFIG_PPC_MM_SLICES
#include <asm/nohash/32/slice.h>
#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1))
+#define LOW_SLICE_ARRAY_SZ SLICE_ARRAY_SIZE
#endif
#ifndef __ASSEMBLY__
@@ -193,6 +208,55 @@ typedef struct {
void *pte_frag;
} mm_context_t;
+#ifdef CONFIG_PPC_MM_SLICES
+static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
+{
+ return ctx->user_psize;
+}
+
+static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
+{
+ ctx->user_psize = user_psize;
+}
+
+static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
+{
+ return ctx->low_slices_psize;
+}
+
+static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
+{
+ return ctx->high_slices_psize;
+}
+
+static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
+{
+ return ctx->slb_addr_limit;
+}
+
+static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
+{
+ ctx->slb_addr_limit = limit;
+}
+
+static inline struct slice_mask *mm_ctx_slice_mask_base(mm_context_t *ctx)
+{
+ return &ctx->mask_base_psize;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static inline struct slice_mask *mm_ctx_slice_mask_512k(mm_context_t *ctx)
+{
+ return &ctx->mask_512k;
+}
+
+static inline struct slice_mask *mm_ctx_slice_mask_8m(mm_context_t *ctx)
+{
+ return &ctx->mask_8m;
+}
+#endif
+#endif /* CONFIG_PPC_MM_SLICE */
+
#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
diff --git a/arch/powerpc/include/asm/nohash/64/mmu.h b/arch/powerpc/include/asm/nohash/64/mmu.h
index e6585480dfc4..81cf30c370e5 100644
--- a/arch/powerpc/include/asm/nohash/64/mmu.h
+++ b/arch/powerpc/include/asm/nohash/64/mmu.h
@@ -2,6 +2,8 @@
#ifndef _ASM_POWERPC_NOHASH_64_MMU_H_
#define _ASM_POWERPC_NOHASH_64_MMU_H_
+#define MAX_PHYSMEM_BITS 44
+
/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
#include <asm/nohash/mmu-book3e.h>
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index e77ed9761632..0384a3302fb6 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -205,7 +205,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
(((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte) do { } while(0)
+
+static inline void pte_unmap(pte_t *pte) { }
/* to find an entry in a kernel page-table-directory */
/* This now only contains the vmalloc pages */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index ed870468ef6f..748f5db2e2b7 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -132,18 +132,7 @@ static inline bool pfn_valid(unsigned long pfn)
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
-#ifdef CONFIG_PPC_BOOK3S_64
-/*
- * On hash the vmalloc and other regions alias to the kernel region when passed
- * through __pa(), which virt_to_pfn() uses. That means virt_addr_valid() can
- * return true for some vmalloc addresses, which is incorrect. So explicitly
- * check that the address is in the kernel region.
- */
-#define virt_addr_valid(kaddr) (REGION_ID(kaddr) == KERNEL_REGION_ID && \
- pfn_valid(virt_to_pfn(kaddr)))
-#else
#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
-#endif
/*
* On Book-E parts we need __va to parse the device tree and we can't
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index c5698a523bb1..23f7ed796f38 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -302,6 +302,7 @@
/* Misc instructions for BPF compiler */
#define PPC_INST_LBZ 0x88000000
#define PPC_INST_LD 0xe8000000
+#define PPC_INST_LDX 0x7c00002a
#define PPC_INST_LHZ 0xa0000000
#define PPC_INST_LWZ 0x80000000
#define PPC_INST_LHBRX 0x7c00062c
@@ -309,6 +310,7 @@
#define PPC_INST_STB 0x98000000
#define PPC_INST_STH 0xb0000000
#define PPC_INST_STD 0xf8000000
+#define PPC_INST_STDX 0x7c00012a
#define PPC_INST_STDU 0xf8000001
#define PPC_INST_STW 0x90000000
#define PPC_INST_STWU 0x94000000
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 3120cca72e1f..706ac5df546f 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -164,6 +164,9 @@ struct thread_struct {
unsigned long rtas_sp; /* stack pointer for when in RTAS */
#endif
#endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+ unsigned long kuap; /* opened segments for user access */
+#endif
/* Debug Registers */
struct debug_reg debug;
struct thread_fp_state fp_state;
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 64271e562fed..6f047730e642 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -52,10 +52,17 @@ struct pt_regs
};
};
+ union {
+ struct {
#ifdef CONFIG_PPC64
- unsigned long ppr;
- unsigned long __pad; /* Maintain 16 byte interrupt stack alignment */
+ unsigned long ppr;
+#endif
+#ifdef CONFIG_PPC_KUAP
+ unsigned long kuap;
#endif
+ };
+ unsigned long __pad[2]; /* Maintain 16 byte interrupt stack alignment */
+ };
};
#endif
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index 68da49320592..3192d454a733 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -17,9 +17,9 @@ extern int create_section_mapping(unsigned long start, unsigned long end, int ni
extern int remove_section_mapping(unsigned long start, unsigned long end);
#ifdef CONFIG_PPC_BOOK3S_64
-extern void resize_hpt_for_hotplug(unsigned long new_mem_size);
+extern int resize_hpt_for_hotplug(unsigned long new_mem_size);
#else
-static inline void resize_hpt_for_hotplug(unsigned long new_mem_size) { }
+static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; }
#endif
#ifdef CONFIG_NUMA
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 4d6d905e9138..76f34346b642 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -6,6 +6,7 @@
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/extable.h>
+#include <asm/kup.h>
/*
* The fs value determines whether argument validity checking should be
@@ -140,6 +141,7 @@ extern long __put_user_bad(void);
#define __put_user_size(x, ptr, size, retval) \
do { \
retval = 0; \
+ allow_write_to_user(ptr, size); \
switch (size) { \
case 1: __put_user_asm(x, ptr, retval, "stb"); break; \
case 2: __put_user_asm(x, ptr, retval, "sth"); break; \
@@ -147,6 +149,7 @@ do { \
case 8: __put_user_asm2(x, ptr, retval); break; \
default: __put_user_bad(); \
} \
+ prevent_write_to_user(ptr, size); \
} while (0)
#define __put_user_nocheck(x, ptr, size) \
@@ -239,6 +242,7 @@ do { \
__chk_user_ptr(ptr); \
if (size > sizeof(x)) \
(x) = __get_user_bad(); \
+ allow_read_from_user(ptr, size); \
switch (size) { \
case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \
case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \
@@ -246,6 +250,7 @@ do { \
case 8: __get_user_asm2(x, ptr, retval); break; \
default: (x) = __get_user_bad(); \
} \
+ prevent_read_from_user(ptr, size); \
} while (0)
/*
@@ -305,15 +310,21 @@ extern unsigned long __copy_tofrom_user(void __user *to,
static inline unsigned long
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
- return __copy_tofrom_user(to, from, n);
+ unsigned long ret;
+
+ allow_user_access(to, from, n);
+ ret = __copy_tofrom_user(to, from, n);
+ prevent_user_access(to, from, n);
+ return ret;
}
#endif /* __powerpc64__ */
static inline unsigned long raw_copy_from_user(void *to,
const void __user *from, unsigned long n)
{
+ unsigned long ret;
if (__builtin_constant_p(n) && (n <= 8)) {
- unsigned long ret = 1;
+ ret = 1;
switch (n) {
case 1:
@@ -338,14 +349,18 @@ static inline unsigned long raw_copy_from_user(void *to,
}
barrier_nospec();
- return __copy_tofrom_user((__force void __user *)to, from, n);
+ allow_read_from_user(from, n);
+ ret = __copy_tofrom_user((__force void __user *)to, from, n);
+ prevent_read_from_user(from, n);
+ return ret;
}
static inline unsigned long raw_copy_to_user(void __user *to,
const void *from, unsigned long n)
{
+ unsigned long ret;
if (__builtin_constant_p(n) && (n <= 8)) {
- unsigned long ret = 1;
+ ret = 1;
switch (n) {
case 1:
@@ -365,17 +380,24 @@ static inline unsigned long raw_copy_to_user(void __user *to,
return 0;
}
- return __copy_tofrom_user(to, (__force const void __user *)from, n);
+ allow_write_to_user(to, n);
+ ret = __copy_tofrom_user(to, (__force const void __user *)from, n);
+ prevent_write_to_user(to, n);
+ return ret;
}
extern unsigned long __clear_user(void __user *addr, unsigned long size);
static inline unsigned long clear_user(void __user *addr, unsigned long size)
{
+ unsigned long ret = size;
might_fault();
- if (likely(access_ok(addr, size)))
- return __clear_user(addr, size);
- return size;
+ if (likely(access_ok(addr, size))) {
+ allow_write_to_user(addr, size);
+ ret = __clear_user(addr, size);
+ prevent_write_to_user(addr, size);
+ }
+ return ret;
}
extern long strncpy_from_user(char *dst, const char __user *src, long count);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 83ad99f9f05d..8e02444e9d3d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -147,6 +147,9 @@ int main(void)
#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu);
#endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+ OFFSET(KUAP, thread_struct, kuap);
+#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
OFFSET(PACATMSCRATCH, paca_struct, tm_scratch);
@@ -331,6 +334,10 @@ int main(void)
STACK_PT_REGS_OFFSET(_PPR, ppr);
#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC_KUAP
+ STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap);
+#endif
+
#if defined(CONFIG_PPC32)
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index b61cfd29c76f..2f3d159c11d7 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -36,6 +36,7 @@
#include <asm/asm-405.h>
#include <asm/feature-fixups.h>
#include <asm/barrier.h>
+#include <asm/kup.h>
/*
* MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
@@ -150,8 +151,8 @@ transfer_to_handler:
stw r12,_CTR(r11)
stw r2,_XER(r11)
mfspr r12,SPRN_SPRG_THREAD
- addi r2,r12,-THREAD
beq 2f /* if from user, fix up THREAD.regs */
+ addi r2, r12, -THREAD
addi r11,r1,STACK_FRAME_OVERHEAD
stw r11,PT_REGS(r12)
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
@@ -161,6 +162,9 @@ transfer_to_handler:
andis. r12,r12,DBCR0_IDM@h
#endif
ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+#ifdef CONFIG_PPC_BOOK3S_32
+ kuep_lock r11, r12
+#endif
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
beq+ 3f
/* From user and task is ptraced - load up global dbcr0 */
@@ -186,6 +190,8 @@ transfer_to_handler:
2: /* if from kernel, check interrupted DOZE/NAP mode and
* check for stack overflow
*/
+ kuap_save_and_lock r11, r12, r9, r2, r0
+ addi r2, r12, -THREAD
lwz r9,KSP_LIMIT(r12)
cmplw r1,r9 /* if r1 <= ksp_limit */
ble- stack_ovf /* then the kernel stack overflowed */
@@ -272,6 +278,7 @@ reenable_mmu: /* re-enable mmu so we can */
lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
rlwinm r9,r9,0,~MSR_EE
lwz r12,_LINK(r11) /* and return to address in LR */
+ kuap_restore r11, r2, r3, r4, r5
b fast_exception_return
#endif
@@ -422,12 +429,11 @@ BEGIN_FTR_SECTION
lwarx r7,0,r1
END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
stwcx. r0,0,r1 /* to clear the reservation */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- andi. r4,r8,MSR_PR
- beq 3f
ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
-3:
+#ifdef CONFIG_PPC_BOOK3S_32
+ kuep_unlock r5, r7
#endif
+ kuap_check r2, r4
lwz r4,_LINK(r1)
lwz r5,_CCR(r1)
mtlr r4
@@ -678,6 +684,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
stw r10,_CCR(r1)
stw r1,KSP(r3) /* Set old stack pointer */
+ kuap_check r2, r4
#ifdef CONFIG_SMP
/* We need a sync somewhere here to make sure that if the
* previous task gets rescheduled on another CPU, it sees all
@@ -820,6 +827,9 @@ restore_user:
bnel- load_dbcr0
#endif
ACCOUNT_CPU_USER_EXIT(r2, r10, r11)
+#ifdef CONFIG_PPC_BOOK3S_32
+ kuep_unlock r10, r11
+#endif
b restore
@@ -866,12 +876,12 @@ resume_kernel:
/* check current_thread_info->preempt_count */
lwz r0,TI_PREEMPT(r2)
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
- bne restore
+ bne restore_kuap
andi. r8,r8,_TIF_NEED_RESCHED
- beq+ restore
+ beq+ restore_kuap
lwz r3,_MSR(r1)
andi. r0,r3,MSR_EE /* interrupts off? */
- beq restore /* don't schedule if so */
+ beq restore_kuap /* don't schedule if so */
#ifdef CONFIG_TRACE_IRQFLAGS
/* Lockdep thinks irqs are enabled, we need to call
* preempt_schedule_irq with IRQs off, so we inform lockdep
@@ -890,6 +900,8 @@ resume_kernel:
bl trace_hardirqs_on
#endif
#endif /* CONFIG_PREEMPT */
+restore_kuap:
+ kuap_restore r1, r2, r9, r10, r0
/* interrupts are hard-disabled at this point */
restore:
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 15c67d2c0534..7cc25389c6bd 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -46,6 +46,7 @@
#include <asm/exception-64e.h>
#endif
#include <asm/feature-fixups.h>
+#include <asm/kup.h>
/*
* System calls.
@@ -120,6 +121,9 @@ END_BTB_FLUSH_SECTION
addi r9,r1,STACK_FRAME_OVERHEAD
ld r11,exception_marker@toc(r2)
std r11,-16(r9) /* "regshere" marker */
+
+ kuap_check_amr r10, r11
+
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
beq 33f
@@ -275,6 +279,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
+ kuap_check_amr r10, r11
+
#ifdef CONFIG_PPC_BOOK3S
/*
* Clear MSR_RI, MSR_EE is already and remains disabled. We could do
@@ -296,6 +302,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r8, PACATMSCRATCH(r13)
#endif
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
ld r2,GPR2(r1)
ld r1,GPR1(r1)
@@ -306,8 +316,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
RFI_TO_USER
b . /* prevent speculative execution */
- /* exit to kernel */
-1: ld r2,GPR2(r1)
+1: /* exit to kernel */
+ kuap_restore_amr r2
+
+ ld r2,GPR2(r1)
ld r1,GPR1(r1)
mtlr r4
mtcr r5
@@ -594,6 +606,8 @@ _GLOBAL(_switch)
std r23,_CCR(r1)
std r1,KSP(r3) /* Set old stack pointer */
+ kuap_check_amr r9, r10
+
FLUSH_COUNT_CACHE
/*
@@ -942,6 +956,8 @@ fast_exception_return:
ld r4,_XER(r1)
mtspr SPRN_XER,r4
+ kuap_check_amr r5, r6
+
REST_8GPRS(5, r1)
andi. r0,r3,MSR_RI
@@ -974,6 +990,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
REST_GPR(13, r1)
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
mtspr SPRN_SRR1,r3
ld r2,_CCR(r1)
@@ -1006,6 +1026,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r0,GPR0(r1)
ld r2,GPR2(r1)
ld r3,GPR3(r1)
+
+ kuap_restore_amr r4
+
ld r4,GPR4(r1)
ld r1,GPR1(r1)
RFI_TO_KERNEL
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6247b5bbfa5c..6b86055e5251 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -19,6 +19,7 @@
#include <asm/cpuidle.h>
#include <asm/head-64.h>
#include <asm/feature-fixups.h>
+#include <asm/kup.h>
/*
* There are a few constraints to be concerned with.
@@ -314,6 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
mfspr r11,SPRN_DSISR /* Save DSISR */
std r11,_DSISR(r1)
std r9,_CCR(r1) /* Save CR in stackframe */
+ kuap_save_amr_and_lock r9, r10, cr1
/* Save r9 through r13 from EXMC save area to stack frame. */
EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
mfmsr r11 /* get MSR value */
@@ -661,11 +663,17 @@ EXC_COMMON_BEGIN(data_access_slb_common)
ld r4,PACA_EXSLB+EX_DAR(r13)
std r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
+BEGIN_MMU_FTR_SECTION
+ /* HPT case, do SLB fault */
bl do_slb_fault
cmpdi r3,0
bne- 1f
b fast_exception_return
1: /* Error case */
+MMU_FTR_SECTION_ELSE
+ /* Radix case, access is outside page table range */
+ li r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
@@ -710,11 +718,17 @@ EXC_COMMON_BEGIN(instruction_access_slb_common)
EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB)
ld r4,_NIP(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
+BEGIN_MMU_FTR_SECTION
+ /* HPT case, do SLB fault */
bl do_slb_fault
cmpdi r3,0
bne- 1f
b fast_exception_return
1: /* Error case */
+MMU_FTR_SECTION_ELSE
+ /* Radix case, access is outside page table range */
+ li r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
@@ -1102,6 +1116,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
EXCEPTION_PROLOG_COMMON_1()
+ /* We don't touch AMR here, we never go to virtual mode */
EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
EXCEPTION_PROLOG_COMMON_3(0xe60)
addi r3,r1,STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 48051c8977c5..40aec3f00a05 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -387,7 +387,11 @@ DataAccess:
EXCEPTION_PROLOG
mfspr r10,SPRN_DSISR
stw r10,_DSISR(r11)
+#ifdef CONFIG_PPC_KUAP
+ andis. r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
+#else
andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
+#endif
bne 1f /* if not, try to put a PTE */
mfspr r4,SPRN_DAR /* into the hash table */
rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
@@ -522,9 +526,9 @@ InstructionTLBMiss:
andc. r1,r1,r0 /* check access & ~permission */
bne- InstructionAddressInvalid /* return if access not permitted */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- ori r1, r1, 0xe05 /* clear out reserved bits */
- andc r1, r0, r1 /* PP = user? 2 : 0 */
+ rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
+ ori r1, r1, 0xe06 /* clear out reserved bits */
+ andc r1, r0, r1 /* PP = user? 1 : 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -590,11 +594,11 @@ DataLoadTLBMiss:
* we would need to update the pte atomically with lwarx/stwcx.
*/
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
+ rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */
rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r0,r1 /* PP = user? rw? 2: 3: 0 */
+ andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -670,9 +674,9 @@ DataStoreTLBMiss:
* we would need to update the pte atomically with lwarx/stwcx.
*/
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- li r1,0xe05 /* clear out reserved bits & PP lsb */
- andc r1,r0,r1 /* PP = user? 2: 0 */
+ rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
+ li r1,0xe06 /* clear out reserved bits & PP msb */
+ andc r1,r0,r1 /* PP = user? 1: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -851,10 +855,6 @@ __secondary_start:
tophys(r4,r2)
addi r4,r4,THREAD /* phys address of our thread_struct */
mtspr SPRN_SPRG_THREAD,r4
-#ifdef CONFIG_PPC_RTAS
- li r3,0
- stw r3, RTAS_SP(r4) /* 0 => not in RTAS */
-#endif
lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
mtspr SPRN_SPRG_PGDIR, r4
@@ -900,14 +900,29 @@ load_up_mmu:
tophys(r6,r6)
lwz r6,_SDR1@l(r6)
mtspr SPRN_SDR1,r6
- li r0,16 /* load up segment register values */
+ li r0, NUM_USER_SEGMENTS /* load up user segment register values */
mtctr r0 /* for context 0 */
- lis r3,0x2000 /* Ku = 1, VSID = 0 */
+ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
+#ifdef CONFIG_PPC_KUEP
+ oris r3, r3, SR_NX@h /* Set Nx */
+#endif
+#ifdef CONFIG_PPC_KUAP
+ oris r3, r3, SR_KS@h /* Set Ks */
+#endif
li r4,0
3: mtsrin r3,r4
addi r3,r3,0x111 /* increment VSID */
addis r4,r4,0x1000 /* address of next segment */
bdnz 3b
+ li r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */
+ mtctr r0 /* for context 0 */
+ rlwinm r3, r3, 0, ~SR_NX /* Nx = 0 */
+ rlwinm r3, r3, 0, ~SR_KS /* Ks = 0 */
+ oris r3, r3, SR_KP@h /* Kp = 1 */
+3: mtsrin r3, r4
+ addi r3, r3, 0x111 /* increment VSID */
+ addis r4, r4, 0x1000 /* address of next segment */
+ bdnz 3b
/* Load the BAT registers with the values set up by MMU_init.
MMU_init takes care of whether we're on a 601 or not. */
@@ -941,10 +956,6 @@ start_here:
tophys(r4,r2)
addi r4,r4,THREAD /* init task's THREAD */
mtspr SPRN_SPRG_THREAD,r4
-#ifdef CONFIG_PPC_RTAS
- li r3,0
- stw r3, RTAS_SP(r4) /* 0 => not in RTAS */
-#endif
lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
mtspr SPRN_SPRG_PGDIR, r4
@@ -1014,7 +1025,12 @@ _ENTRY(switch_mmu_context)
blt- 4f
mulli r3,r3,897 /* multiply context by skew factor */
rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */
- addis r3,r3,0x6000 /* Set Ks, Ku bits */
+#ifdef CONFIG_PPC_KUEP
+ oris r3, r3, SR_NX@h /* Set Nx */
+#endif
+#ifdef CONFIG_PPC_KUAP
+ oris r3, r3, SR_KS@h /* Set Ks */
+#endif
li r0,NUM_USER_SEGMENTS
mtctr r0
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 3fad8d499767..5321a11c2835 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -968,7 +968,9 @@ start_here_multiplatform:
/* Restore parameters passed from prom_init/kexec */
mr r3,r31
- bl early_setup /* also sets r13 and SPRG_PACA */
+ LOAD_REG_ADDR(r12, DOTSYM(early_setup))
+ mtctr r12
+ bctrl /* also sets r13 and SPRG_PACA */
LOAD_REG_ADDR(r3, start_here_common)
ld r4,PACAKMSR(r13)
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 6b800eec31f2..367fbfa2e835 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -36,7 +36,7 @@
* Convert an address related to an mm to a PFN. NOTE: we are in real
* mode, we could potentially race with page table updates.
*/
-static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
+unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
{
pte_t *ptep;
unsigned long flags;
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index e7382abee868..9cc91d03ab62 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -267,12 +267,12 @@ void copy_mm_to_paca(struct mm_struct *mm)
get_paca()->mm_ctx_id = context->id;
#ifdef CONFIG_PPC_MM_SLICES
- VM_BUG_ON(!mm->context.slb_addr_limit);
- get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
- memcpy(&get_paca()->mm_ctx_low_slices_psize,
- &context->low_slices_psize, sizeof(context->low_slices_psize));
- memcpy(&get_paca()->mm_ctx_high_slices_psize,
- &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
+ VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
+ get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context);
+ memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
+ LOW_SLICE_ARRAY_SZ);
+ memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
+ TASK_SLICE_ARRAY_SZ(context));
#else /* CONFIG_PPC_MM_SLICES */
get_paca()->mm_ctx_user_psize = context->user_psize;
get_paca()->mm_ctx_sllp = context->sllp;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 225705aac814..64e1494d3a1d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1730,7 +1730,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
#ifdef CONFIG_PPC_BOOK3S_64
- preload_new_slb_context(start, sp);
+ if (!radix_enabled())
+ preload_new_slb_context(start, sp);
#endif
#endif
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 8b4858f82229..70dc10aa0ccf 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -831,6 +831,9 @@ static __init void print_system_info(void)
pr_info("htab_address = 0x%p\n", htab_address);
if (htab_hash_mask)
pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
+ pr_info("kernel vmalloc start = 0x%lx\n", KERN_VIRT_START);
+ pr_info("kernel IO start = 0x%lx\n", KERN_IO_START);
+ pr_info("kernel vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
#endif
#ifdef CONFIG_PPC_BOOK3S_32
if (Hash)
@@ -948,13 +951,8 @@ void __init setup_arch(char **cmdline_p)
init_mm.brk = klimit;
#ifdef CONFIG_PPC_MM_SLICES
-#ifdef CONFIG_PPC64
- if (!radix_enabled())
- init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
-#elif defined(CONFIG_PPC_8xx)
+#if defined(CONFIG_PPC_8xx)
init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
-#else
-#error "context.addr_limit not initialized."
#endif
#endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index ba404dd9ce1d..684e34493bf5 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -68,6 +68,7 @@
#include <asm/cputhreads.h>
#include <asm/hw_irq.h>
#include <asm/feature-fixups.h>
+#include <asm/kup.h>
#include "setup.h"
@@ -331,6 +332,12 @@ void __init early_setup(unsigned long dt_ptr)
*/
configure_exceptions();
+ /*
+ * Configure Kernel Userspace Protection. This needs to happen before
+ * feature fixups for platforms that implement this using features.
+ */
+ setup_kup();
+
/* Apply all the dynamic patching */
apply_feature_fixups();
setup_feature_keys();
@@ -383,6 +390,9 @@ void early_setup_secondary(void)
/* Initialize the hash table or TLB handling */
early_init_mmu_secondary();
+ /* Perform any KUP setup that is per-cpu */
+ setup_kup();
+
/*
* At this point, we can let interrupts switch to virtual mode
* (the MMU has been setup), so adjust the MSR in the PACA to
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index bc0503ef9c9c..6ef32472ee1d 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -43,7 +43,6 @@
#include <linux/timex.h>
#include <linux/kernel_stat.h>
#include <linux/time.h>
-#include <linux/clockchips.h>
#include <linux/init.h>
#include <linux/profile.h>
#include <linux/cpu.h>
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 1e0bc5955a40..afd516b572f8 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -98,7 +98,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
* can be used, r7 contains NSEC_PER_SEC.
*/
- lwz r5,WTOM_CLOCK_SEC(r9)
+ lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9)
lwz r6,WTOM_CLOCK_NSEC(r9)
/* We now have our offset in r5,r6. We create a fake dependency
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 3c6ab22a0c4e..af3c15a1d41e 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -77,7 +77,7 @@ static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
-static DEFINE_PER_CPU(struct timer_list, wd_timer);
+static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer);
static DEFINE_PER_CPU(u64, wd_timer_tb);
/* SMP checker bits */
@@ -293,21 +293,21 @@ out:
nmi_exit();
}
-static void wd_timer_reset(unsigned int cpu, struct timer_list *t)
-{
- t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms);
- if (wd_timer_period_ms > 1000)
- t->expires = __round_jiffies_up(t->expires, cpu);
- add_timer_on(t, cpu);
-}
-
-static void wd_timer_fn(struct timer_list *t)
+static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
int cpu = smp_processor_id();
+ if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ return HRTIMER_NORESTART;
+
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return HRTIMER_NORESTART;
+
watchdog_timer_interrupt(cpu);
- wd_timer_reset(cpu, t);
+ hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms));
+
+ return HRTIMER_RESTART;
}
void arch_touch_nmi_watchdog(void)
@@ -323,37 +323,22 @@ void arch_touch_nmi_watchdog(void)
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
-static void start_watchdog_timer_on(unsigned int cpu)
-{
- struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
-
- per_cpu(wd_timer_tb, cpu) = get_tb();
-
- timer_setup(t, wd_timer_fn, TIMER_PINNED);
- wd_timer_reset(cpu, t);
-}
-
-static void stop_watchdog_timer_on(unsigned int cpu)
-{
- struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
-
- del_timer_sync(t);
-}
-
-static int start_wd_on_cpu(unsigned int cpu)
+static void start_watchdog(void *arg)
{
+ struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
+ int cpu = smp_processor_id();
unsigned long flags;
if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
WARN_ON(1);
- return 0;
+ return;
}
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
- return 0;
+ return;
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
- return 0;
+ return;
wd_smp_lock(&flags);
cpumask_set_cpu(cpu, &wd_cpus_enabled);
@@ -363,27 +348,40 @@ static int start_wd_on_cpu(unsigned int cpu)
}
wd_smp_unlock(&flags);
- start_watchdog_timer_on(cpu);
+ *this_cpu_ptr(&wd_timer_tb) = get_tb();
- return 0;
+ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer->function = watchdog_timer_fn;
+ hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms),
+ HRTIMER_MODE_REL_PINNED);
}
-static int stop_wd_on_cpu(unsigned int cpu)
+static int start_watchdog_on_cpu(unsigned int cpu)
{
+ return smp_call_function_single(cpu, start_watchdog, NULL, true);
+}
+
+static void stop_watchdog(void *arg)
+{
+ struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
+ int cpu = smp_processor_id();
unsigned long flags;
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
- return 0; /* Can happen in CPU unplug case */
+ return; /* Can happen in CPU unplug case */
- stop_watchdog_timer_on(cpu);
+ hrtimer_cancel(hrtimer);
wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
wd_smp_unlock(&flags);
wd_smp_clear_cpu_pending(cpu, get_tb());
+}
- return 0;
+static int stop_watchdog_on_cpu(unsigned int cpu)
+{
+ return smp_call_function_single(cpu, stop_watchdog, NULL, true);
}
static void watchdog_calc_timeouts(void)
@@ -402,7 +400,7 @@ void watchdog_nmi_stop(void)
int cpu;
for_each_cpu(cpu, &wd_cpus_enabled)
- stop_wd_on_cpu(cpu);
+ stop_watchdog_on_cpu(cpu);
}
void watchdog_nmi_start(void)
@@ -411,7 +409,7 @@ void watchdog_nmi_start(void)
watchdog_calc_timeouts();
for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
- start_wd_on_cpu(cpu);
+ start_watchdog_on_cpu(cpu);
}
/*
@@ -423,7 +421,8 @@ int __init watchdog_nmi_probe(void)
err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
"powerpc/watchdog:online",
- start_wd_on_cpu, stop_wd_on_cpu);
+ start_watchdog_on_cpu,
+ stop_watchdog_on_cpu);
if (err < 0) {
pr_warn("could not be initialized");
return err;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 3b9662a4207e..085509148d95 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -822,7 +822,7 @@ static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
raddr = per_cpu_ptr(addr, cpu);
l = (unsigned long)raddr;
- if (REGION_ID(l) == VMALLOC_REGION_ID) {
+ if (get_region_id(l) == VMALLOC_REGION_ID) {
l = vmalloc_to_phys(raddr);
raddr = (unsigned int *)l;
}
diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c
index 890d4ddd91d6..bb9307ce2440 100644
--- a/arch/powerpc/lib/checksum_wrappers.c
+++ b/arch/powerpc/lib/checksum_wrappers.c
@@ -29,6 +29,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst,
unsigned int csum;
might_sleep();
+ allow_read_from_user(src, len);
*err_ptr = 0;
@@ -60,6 +61,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst,
}
out:
+ prevent_read_from_user(src, len);
return (__force __wsum)csum;
}
EXPORT_SYMBOL(csum_and_copy_from_user);
@@ -70,6 +72,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
unsigned int csum;
might_sleep();
+ allow_write_to_user(dst, len);
*err_ptr = 0;
@@ -97,6 +100,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
}
out:
+ prevent_write_to_user(dst, len);
return (__force __wsum)csum;
}
EXPORT_SYMBOL(csum_and_copy_to_user);
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 506413a2c25e..90c9d4a1e36f 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -15,7 +15,6 @@
#include <linux/cpuhotplug.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
-#include <linux/kprobes.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -26,9 +25,9 @@
static int __patch_instruction(unsigned int *exec_addr, unsigned int instr,
unsigned int *patch_addr)
{
- int err;
+ int err = 0;
- __put_user_size(instr, patch_addr, 4, err);
+ __put_user_asm(instr, patch_addr, err, "stw");
if (err)
return err;
diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S
index 844d8e774492..b7f6f6e0b6e8 100644
--- a/arch/powerpc/lib/memcmp_64.S
+++ b/arch/powerpc/lib/memcmp_64.S
@@ -215,11 +215,20 @@ _GLOBAL_TOC(memcmp)
beq .Lzero
.Lcmp_rest_lt8bytes:
- /* Here we have only less than 8 bytes to compare with. at least s1
- * Address is aligned with 8 bytes.
- * The next double words are load and shift right with appropriate
- * bits.
+ /*
+ * Here we have less than 8 bytes to compare. At least s1 is aligned to
+ * 8 bytes, but s2 may not be. We must make sure s2 + 7 doesn't cross a
+ * page boundary, otherwise we might read past the end of the buffer and
+ * trigger a page fault. We use 4K as the conservative minimum page
+ * size. If we detect that case we go to the byte-by-byte loop.
+ *
+ * Otherwise the next double word is loaded from s1 and s2, and shifted
+ * right to compare the appropriate bits.
*/
+ clrldi r6,r4,(64-12) // r6 = r4 & 0xfff
+ cmpdi r6,0xff8
+ bgt .Lshort
+
subfic r6,r5,8
slwi r6,r6,3
LD rA,0,r3
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index fe1f6443d57f..87648b58d295 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -213,3 +213,27 @@ void flush_instruction_cache(void)
mtspr(SPRN_IC_CST, IDC_INVALL);
isync();
}
+
+#ifdef CONFIG_PPC_KUEP
+void __init setup_kuep(bool disabled)
+{
+ if (disabled)
+ return;
+
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+
+ mtspr(SPRN_MI_AP, MI_APG_KUEP);
+}
+#endif
+
+#ifdef CONFIG_PPC_KUAP
+void __init setup_kuap(bool disabled)
+{
+ pr_info("Activating Kernel Userspace Access Protection\n");
+
+ if (disabled)
+ pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n");
+
+ mtspr(SPRN_MD_AP, MD_APG_KUAP);
+}
+#endif
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index c8da352e8686..f137286740cb 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -105,7 +105,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
u64 vsid, vsidkey;
int psize, ssize;
- switch (REGION_ID(ea)) {
+ switch (get_region_id(ea)) {
case USER_REGION_ID:
pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
if (mm == NULL)
@@ -117,16 +117,20 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
break;
case VMALLOC_REGION_ID:
pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
- if (ea < VMALLOC_END)
- psize = mmu_vmalloc_psize;
- else
- psize = mmu_io_psize;
+ psize = mmu_vmalloc_psize;
ssize = mmu_kernel_ssize;
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
vsidkey = SLB_VSID_KERNEL;
break;
- case KERNEL_REGION_ID:
- pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea);
+ case IO_REGION_ID:
+ pr_devel("%s: 0x%llx -- IO_REGION_ID\n", __func__, ea);
+ psize = mmu_io_psize;
+ ssize = mmu_kernel_ssize;
+ vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ vsidkey = SLB_VSID_KERNEL;
+ break;
+ case LINEAR_MAP_REGION_ID:
+ pr_devel("%s: 0x%llx -- LINEAR_MAP_REGION_ID\n", __func__, ea);
psize = mmu_linear_psize;
ssize = mmu_kernel_ssize;
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 3f1803672c9b..641891df2046 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -366,8 +366,10 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
if (!drmem_info->lmbs)
return;
- for_each_drmem_lmb(lmb)
+ for_each_drmem_lmb(lmb) {
read_drconf_v1_cell(lmb, &prop);
+ lmb_set_nid(lmb);
+ }
}
static void __init init_drmem_v2_lmbs(const __be32 *prop)
@@ -412,6 +414,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
lmb->aa_index = dr_cell.aa_index;
lmb->flags = dr_cell.flags;
+
+ lmb_set_nid(lmb);
}
}
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 887f11bcf330..b5d3578d9f65 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -44,6 +44,7 @@
#include <asm/mmu_context.h>
#include <asm/siginfo.h>
#include <asm/debug.h>
+#include <asm/kup.h>
static inline bool notify_page_fault(struct pt_regs *regs)
{
@@ -223,19 +224,46 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
}
/* Is this a bad kernel fault ? */
-static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
- unsigned long address)
+static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address, bool is_write)
{
+ int is_exec = TRAP(regs) == 0x400;
+
/* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */
if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT |
DSISR_PROTFAULT))) {
- printk_ratelimited(KERN_CRIT "kernel tried to execute"
- " exec-protected page (%lx) -"
- "exploit attempt? (uid: %d)\n",
- address, from_kuid(&init_user_ns,
- current_uid()));
+ pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n",
+ address >= TASK_SIZE ? "exec-protected" : "user",
+ address,
+ from_kuid(&init_user_ns, current_uid()));
+
+ // Kernel exec fault is always bad
+ return true;
}
- return is_exec || (address >= TASK_SIZE);
+
+ if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) &&
+ !search_exception_tables(regs->nip)) {
+ pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
+ address,
+ from_kuid(&init_user_ns, current_uid()));
+ }
+
+ // Kernel fault on kernel address is bad
+ if (address >= TASK_SIZE)
+ return true;
+
+ // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
+ if (!search_exception_tables(regs->nip))
+ return true;
+
+ // Read/write fault in a valid region (the exception table search passed
+ // above), but blocked by KUAP is bad, it can never succeed.
+ if (bad_kuap_fault(regs, is_write))
+ return true;
+
+ // What's left? Kernel fault on user in well defined regions (extable
+ // matched), and allowed by KUAP in the faulting context.
+ return false;
}
static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
@@ -455,9 +483,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
/*
* The kernel should never take an execute fault nor should it
- * take a page fault to a kernel address.
+ * take a page fault to a kernel address or a page fault to a user
+ * address outside of dedicated places
*/
- if (unlikely(!is_user && bad_kernel_fault(is_exec, error_code, address)))
+ if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write)))
return SIGSEGV;
/*
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index a6c491f18a04..e27792d0b744 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -309,13 +309,13 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64)
_GLOBAL(create_hpte)
/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
- rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
+ rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */
+ rlwinm r0,r5,32-6,30,30 /* _PAGE_DIRTY -> PP msb */
and r8,r8,r0 /* writable if _RW & _DIRTY */
rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */
rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */
ori r8,r8,0xe04 /* clear out reserved bits */
- andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */
+ andc r8,r5,r8 /* PP = user? (rw&dirty? 1: 3): 0 */
BEGIN_FTR_SECTION
rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0a4f939a8161..6eb89643ce58 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -755,12 +755,12 @@ static unsigned long __init htab_get_table_size(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-void resize_hpt_for_hotplug(unsigned long new_mem_size)
+int resize_hpt_for_hotplug(unsigned long new_mem_size)
{
unsigned target_hpt_shift;
if (!mmu_hash_ops.resize_hpt)
- return;
+ return 0;
target_hpt_shift = htab_shift_for_mem_size(new_mem_size);
@@ -772,23 +772,25 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
* reduce unless the target shift is at least 2 below the
* current shift
*/
- if ((target_hpt_shift > ppc64_pft_size)
- || (target_hpt_shift < (ppc64_pft_size - 1))) {
- int rc;
-
- rc = mmu_hash_ops.resize_hpt(target_hpt_shift);
- if (rc && (rc != -ENODEV))
- printk(KERN_WARNING
- "Unable to resize hash page table to target order %d: %d\n",
- target_hpt_shift, rc);
- }
+ if (target_hpt_shift > ppc64_pft_size ||
+ target_hpt_shift < ppc64_pft_size - 1)
+ return mmu_hash_ops.resize_hpt(target_hpt_shift);
+
+ return 0;
}
int hash__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
- int rc = htab_bolt_mapping(start, end, __pa(start),
- pgprot_val(PAGE_KERNEL), mmu_linear_psize,
- mmu_kernel_ssize);
+ int rc;
+
+ if (end >= H_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
+ rc = htab_bolt_mapping(start, end, __pa(start),
+ pgprot_val(PAGE_KERNEL), mmu_linear_psize,
+ mmu_kernel_ssize);
if (rc < 0) {
int rc2 = htab_remove_mapping(start, end, mmu_linear_psize,
@@ -929,6 +931,11 @@ static void __init htab_initialize(void)
DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
base, size, prot);
+ if ((base + size) >= H_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ continue;
+ }
+
BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
prot, mmu_linear_psize, mmu_kernel_ssize));
}
@@ -968,6 +975,7 @@ void __init hash__early_init_devtree(void)
htab_scan_page_sizes();
}
+struct hash_mm_context init_hash_mm_context;
void __init hash__early_init_mmu(void)
{
#ifndef CONFIG_PPC_64K_PAGES
@@ -1013,11 +1021,11 @@ void __init hash__early_init_mmu(void)
__pgd_val_bits = HASH_PGD_VAL_BITS;
__kernel_virt_start = H_KERN_VIRT_START;
- __kernel_virt_size = H_KERN_VIRT_SIZE;
__vmalloc_start = H_VMALLOC_START;
__vmalloc_end = H_VMALLOC_END;
__kernel_io_start = H_KERN_IO_START;
- vmemmap = (struct page *)H_VMEMMAP_BASE;
+ __kernel_io_end = H_KERN_IO_END;
+ vmemmap = (struct page *)H_VMEMMAP_START;
ioremap_bot = IOREMAP_BASE;
#ifdef CONFIG_PCI
@@ -1041,6 +1049,9 @@ void __init hash__early_init_mmu(void)
*/
htab_initialize();
+ init_mm.context.hash_context = &init_hash_mm_context;
+ init_mm.context.hash_context->slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+
pr_info("Initializing hash mmu with SLB\n");
/* Initialize SLB management */
slb_initialize();
@@ -1147,10 +1158,13 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
*/
static int subpage_protection(struct mm_struct *mm, unsigned long ea)
{
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
u32 spp = 0;
u32 **sbpm, *sbpp;
+ if (!spt)
+ return 0;
+
if (ea >= spt->maxaddr)
return 0;
if (ea < 0x100000000UL) {
@@ -1238,7 +1252,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
trace_hash_fault(ea, access, trap);
/* Get region & vsid */
- switch (REGION_ID(ea)) {
+ switch (get_region_id(ea)) {
case USER_REGION_ID:
user_region = 1;
if (! mm) {
@@ -1252,10 +1266,13 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
break;
case VMALLOC_REGION_ID:
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
- if (ea < VMALLOC_END)
- psize = mmu_vmalloc_psize;
- else
- psize = mmu_io_psize;
+ psize = mmu_vmalloc_psize;
+ ssize = mmu_kernel_ssize;
+ break;
+
+ case IO_REGION_ID:
+ vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ psize = mmu_io_psize;
ssize = mmu_kernel_ssize;
break;
default:
@@ -1421,7 +1438,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
unsigned long flags = 0;
struct mm_struct *mm = current->mm;
- if (REGION_ID(ea) == VMALLOC_REGION_ID)
+ if ((get_region_id(ea) == VMALLOC_REGION_ID) ||
+ (get_region_id(ea) == IO_REGION_ID))
mm = &init_mm;
if (dsisr & DSISR_NOHPTE)
@@ -1437,8 +1455,9 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
unsigned long access = _PAGE_PRESENT | _PAGE_READ;
unsigned long flags = 0;
struct mm_struct *mm = current->mm;
+ unsigned int region_id = get_region_id(ea);
- if (REGION_ID(ea) == VMALLOC_REGION_ID)
+ if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
mm = &init_mm;
if (dsisr & DSISR_NOHPTE)
@@ -1455,7 +1474,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
* 2) user space access kernel space.
*/
access |= _PAGE_PRIVILEGED;
- if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID))
+ if ((msr & MSR_PR) || (region_id == USER_REGION_ID))
access &= ~_PAGE_PRIVILEGED;
if (trap == 0x400)
@@ -1470,7 +1489,7 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
int psize = get_slice_psize(mm, ea);
/* We only prefault standard pages for now */
- if (unlikely(psize != mm->context.user_psize))
+ if (unlikely(psize != mm_ctx_user_psize(&mm->context)))
return false;
/*
@@ -1499,7 +1518,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
int rc, ssize, update_flags = 0;
unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
- BUG_ON(REGION_ID(ea) != USER_REGION_ID);
+ BUG_ON(get_region_id(ea) != USER_REGION_ID);
if (!should_hash_preload(mm, ea))
return;
@@ -1549,7 +1568,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
/* Hash it in */
#ifdef CONFIG_PPC_64K_PAGES
- if (mm->context.user_psize == MMU_PAGE_64K)
+ if (mm_ctx_user_psize(&mm->context) == MMU_PAGE_64K)
rc = __hash_page_64K(ea, access, vsid, ptep, trap,
update_flags, ssize);
else
@@ -1562,8 +1581,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
*/
if (rc == -1)
hash_failure_debug(ea, access, vsid, trap, ssize,
- mm->context.user_psize,
- mm->context.user_psize,
+ mm_ctx_user_psize(&mm->context),
+ mm_ctx_user_psize(&mm->context),
pte_val(*ptep));
out_exit:
local_irq_restore(flags);
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
index 82a0e37557a5..320c1672b2ae 100644
--- a/arch/powerpc/mm/highmem.c
+++ b/arch/powerpc/mm/highmem.c
@@ -43,9 +43,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-#ifdef CONFIG_DEBUG_HIGHMEM
- BUG_ON(!pte_none(*(kmap_pte-idx)));
-#endif
+ WARN_ON(IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !pte_none(*(kmap_pte - idx)));
__set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1);
local_flush_tlb_page(NULL, vaddr);
@@ -56,7 +54,6 @@ EXPORT_SYMBOL(kmap_atomic_prot);
void __kunmap_atomic(void *kvaddr)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
- int type __maybe_unused;
if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
pagefault_enable();
@@ -64,14 +61,12 @@ void __kunmap_atomic(void *kvaddr)
return;
}
- type = kmap_atomic_idx();
-
-#ifdef CONFIG_DEBUG_HIGHMEM
- {
+ if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM)) {
+ int type = kmap_atomic_idx();
unsigned int idx;
idx = type + KM_TYPE_NR * smp_processor_id();
- BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+ WARN_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
/*
* force other mappings to Oops if they'll try to access
@@ -80,7 +75,6 @@ void __kunmap_atomic(void *kvaddr)
pte_clear(&init_mm, vaddr, kmap_pte-idx);
local_flush_tlb_page(NULL, vaddr);
}
-#endif
kmap_atomic_idx_pop();
pagefault_enable();
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 1e6910eb70ed..6ea5607fc564 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -24,6 +24,32 @@
#include <linux/string.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
+#include <asm/kup.h>
+
+static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
+static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
+
+static int __init parse_nosmep(char *p)
+{
+ disable_kuep = true;
+ pr_warn("Disabling Kernel Userspace Execution Prevention\n");
+ return 0;
+}
+early_param("nosmep", parse_nosmep);
+
+static int __init parse_nosmap(char *p)
+{
+ disable_kuap = true;
+ pr_warn("Disabling Kernel Userspace Access Protection\n");
+ return 0;
+}
+early_param("nosmap", parse_nosmap);
+
+void setup_kup(void)
+{
+ setup_kuep(disable_kuep);
+ setup_kuap(disable_kuap);
+}
#define CTOR(shift) static void ctor_##shift(void *addr) \
{ \
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 41a3513cadc9..80cc97cd8878 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -45,6 +45,7 @@
#include <asm/tlb.h>
#include <asm/sections.h>
#include <asm/hugetlb.h>
+#include <asm/kup.h>
#include "mmu_decl.h"
@@ -178,6 +179,8 @@ void __init MMU_init(void)
btext_unmap();
#endif
+ setup_kup();
+
/* Shortly after that, the entire linear mapping will be available */
memblock_set_current_limit(lowmem_end_addr);
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f6787f90e158..e12bec98366f 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -109,8 +109,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
-int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
- bool want_memblock)
+int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -131,8 +131,8 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int __meminit arch_remove_memory(int nid, u64 start, u64 size,
- struct vmem_altmap *altmap)
+int __ref arch_remove_memory(int nid, u64 start, u64 size,
+ struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -161,7 +161,8 @@ int __meminit arch_remove_memory(int nid, u64 start, u64 size,
*/
vm_unmap_aliases();
- resize_hpt_for_hotplug(memblock_phys_mem_size());
+ if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
+ pr_warn("Hash collision while resizing HPT\n");
return ret;
}
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index f720c5cc0b5e..cb2b08635508 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -63,6 +63,13 @@ static int hash__init_new_context(struct mm_struct *mm)
if (index < 0)
return index;
+ mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
+ GFP_KERNEL);
+ if (!mm->context.hash_context) {
+ ida_free(&mmu_context_ida, index);
+ return -ENOMEM;
+ }
+
/*
* The old code would re-promote on fork, we don't do that when using
* slices as it could cause problem promoting slices that have been
@@ -77,10 +84,26 @@ static int hash__init_new_context(struct mm_struct *mm)
* We should not be calling init_new_context() on init_mm. Hence a
* check against 0 is OK.
*/
- if (mm->context.id == 0)
+ if (mm->context.id == 0) {
+ memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
slice_init_new_context_exec(mm);
+ } else {
+ /* This is fork. Copy hash_context details from current->mm */
+ memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ /* inherit subpage prot detalis if we have one. */
+ if (current->mm->context.hash_context->spt) {
+ mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
+ GFP_KERNEL);
+ if (!mm->context.hash_context->spt) {
+ ida_free(&mmu_context_ida, index);
+ kfree(mm->context.hash_context);
+ return -ENOMEM;
+ }
+ }
+#endif
- subpage_prot_init_new_context(mm);
+ }
pkey_mm_init(mm);
return index;
@@ -118,6 +141,7 @@ static int radix__init_new_context(struct mm_struct *mm)
asm volatile("ptesync;isync" : : : "memory");
mm->context.npu_context = NULL;
+ mm->context.hash_context = NULL;
return index;
}
@@ -162,6 +186,7 @@ static void destroy_contexts(mm_context_t *ctx)
if (context_id)
ida_free(&mmu_context_ida, context_id);
}
+ kfree(ctx->hash_context);
}
static void pmd_frag_destroy(void *pmd_frag)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f976676004ad..6ef36d553cde 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -32,7 +32,6 @@
#include <asm/sparsemem.h>
#include <asm/prom.h>
#include <asm/smp.h>
-#include <asm/cputhreads.h>
#include <asm/topology.h>
#include <asm/firmware.h>
#include <asm/paca.h>
@@ -908,16 +907,22 @@ static int __init early_numa(char *p)
}
early_param("numa", early_numa);
-static bool topology_updates_enabled = true;
+/*
+ * The platform can inform us through one of several mechanisms
+ * (post-migration device tree updates, PRRN or VPHN) that the NUMA
+ * assignment of a resource has changed. This controls whether we act
+ * on that. Disabled by default.
+ */
+static bool topology_updates_enabled;
static int __init early_topology_updates(char *p)
{
if (!p)
return 0;
- if (!strcmp(p, "off")) {
- pr_info("Disabling topology updates\n");
- topology_updates_enabled = false;
+ if (!strcmp(p, "on")) {
+ pr_warn("Caution: enabling topology updates\n");
+ topology_updates_enabled = true;
}
return 0;
@@ -1498,6 +1503,9 @@ int start_topology_update(void)
{
int rc = 0;
+ if (!topology_updates_enabled)
+ return 0;
+
if (firmware_has_feature(FW_FEATURE_PRRN)) {
if (!prrn_enabled) {
prrn_enabled = 1;
@@ -1531,6 +1539,9 @@ int stop_topology_update(void)
{
int rc = 0;
+ if (!topology_updates_enabled)
+ return 0;
+
if (prrn_enabled) {
prrn_enabled = 0;
#ifdef CONFIG_SMP
@@ -1588,11 +1599,13 @@ static ssize_t topology_write(struct file *file, const char __user *buf,
kbuf[read_len] = '\0';
- if (!strncmp(kbuf, "on", 2))
+ if (!strncmp(kbuf, "on", 2)) {
+ topology_updates_enabled = true;
start_topology_update();
- else if (!strncmp(kbuf, "off", 3))
+ } else if (!strncmp(kbuf, "off", 3)) {
stop_topology_update();
- else
+ topology_updates_enabled = false;
+ } else
return -EINVAL;
return count;
@@ -1607,9 +1620,7 @@ static const struct file_operations topology_ops = {
static int topology_update_init(void)
{
- /* Do not poll for changes if disabled at boot */
- if (topology_updates_enabled)
- start_topology_update();
+ start_topology_update();
if (vphn_enabled)
topology_schedule_update();
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index c08d49046a96..097a3b3538b1 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -112,9 +112,16 @@ int __meminit hash__vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys)
{
- int rc = htab_bolt_mapping(start, start + page_size, phys,
- pgprot_val(PAGE_KERNEL),
- mmu_vmemmap_psize, mmu_kernel_ssize);
+ int rc;
+
+ if ((start + page_size) >= H_VMEMMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
+ rc = htab_bolt_mapping(start, start + page_size, phys,
+ pgprot_val(PAGE_KERNEL),
+ mmu_vmemmap_psize, mmu_kernel_ssize);
if (rc < 0) {
int rc2 = htab_remove_mapping(start, start + page_size,
mmu_vmemmap_psize,
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 154472a28c77..fcb0169e2d32 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -29,6 +29,7 @@
#include <asm/powernv.h>
#include <asm/sections.h>
#include <asm/trace.h>
+#include <asm/uaccess.h>
#include <trace/events/thp.h>
@@ -135,6 +136,10 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa,
*/
BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
+#ifdef CONFIG_PPC_64K_PAGES
+ BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT));
+#endif
+
if (unlikely(!slab_is_available()))
return early_map_kernel_page(ea, pa, flags, map_page_size,
nid, region_start, region_end);
@@ -334,6 +339,12 @@ void __init radix_init_pgtable(void)
* page tables will be allocated within the range. No
* need or a node (which we don't have yet).
*/
+
+ if ((reg->base + reg->size) >= RADIX_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ continue;
+ }
+
WARN_ON(create_physical_mapping(reg->base,
reg->base + reg->size,
-1));
@@ -531,8 +542,15 @@ static void radix_init_amor(void)
mtspr(SPRN_AMOR, (3ul << 62));
}
-static void radix_init_iamr(void)
+#ifdef CONFIG_PPC_KUEP
+void setup_kuep(bool disabled)
{
+ if (disabled || !early_radix_enabled())
+ return;
+
+ if (smp_processor_id() == boot_cpuid)
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+
/*
* Radix always uses key0 of the IAMR to determine if an access is
* allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
@@ -540,6 +558,25 @@ static void radix_init_iamr(void)
*/
mtspr(SPRN_IAMR, (1ul << 62));
}
+#endif
+
+#ifdef CONFIG_PPC_KUAP
+void setup_kuap(bool disabled)
+{
+ if (disabled || !early_radix_enabled())
+ return;
+
+ if (smp_processor_id() == boot_cpuid) {
+ pr_info("Activating Kernel Userspace Access Prevention\n");
+ cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP;
+ }
+
+ /* Make sure userspace can't change the AMR */
+ mtspr(SPRN_UAMOR, 0);
+ mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
+ isync();
+}
+#endif
void __init radix__early_init_mmu(void)
{
@@ -574,11 +611,11 @@ void __init radix__early_init_mmu(void)
__pgd_val_bits = RADIX_PGD_VAL_BITS;
__kernel_virt_start = RADIX_KERN_VIRT_START;
- __kernel_virt_size = RADIX_KERN_VIRT_SIZE;
__vmalloc_start = RADIX_VMALLOC_START;
__vmalloc_end = RADIX_VMALLOC_END;
__kernel_io_start = RADIX_KERN_IO_START;
- vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
+ __kernel_io_end = RADIX_KERN_IO_END;
+ vmemmap = (struct page *)RADIX_VMEMMAP_START;
ioremap_bot = IOREMAP_BASE;
#ifdef CONFIG_PCI
@@ -601,7 +638,6 @@ void __init radix__early_init_mmu(void)
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
- radix_init_iamr();
radix_init_pgtable();
/* Switch to the guard PID before turning on MMU */
radix__switch_mmu_context(NULL, &init_mm);
@@ -623,7 +659,6 @@ void radix__early_init_mmu_secondary(void)
__pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
radix_init_amor();
}
- radix_init_iamr();
radix__switch_mmu_context(NULL, &init_mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
@@ -866,6 +901,11 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
+ if (end >= RADIX_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
return create_physical_mapping(start, end, nid);
}
@@ -893,6 +933,11 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
int ret;
+ if ((start + page_size) >= RADIX_VMEMMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
BUG_ON(ret);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index fb1375c07e8c..95ad2a09501c 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -90,14 +90,14 @@ unsigned long __pgd_val_bits;
EXPORT_SYMBOL(__pgd_val_bits);
unsigned long __kernel_virt_start;
EXPORT_SYMBOL(__kernel_virt_start);
-unsigned long __kernel_virt_size;
-EXPORT_SYMBOL(__kernel_virt_size);
unsigned long __vmalloc_start;
EXPORT_SYMBOL(__vmalloc_start);
unsigned long __vmalloc_end;
EXPORT_SYMBOL(__vmalloc_end);
unsigned long __kernel_io_start;
EXPORT_SYMBOL(__kernel_io_start);
+unsigned long __kernel_io_end;
+EXPORT_SYMBOL(__kernel_io_end);
struct page *vmemmap;
EXPORT_SYMBOL(vmemmap);
unsigned long __pte_frag_nr;
@@ -121,6 +121,11 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
if (pgprot_val(prot) & H_PAGE_4K_PFN)
return NULL;
+ if ((ea + size) >= (void *)IOREMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return NULL;
+ }
+
WARN_ON(pa & ~PAGE_MASK);
WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
WARN_ON(size & ~PAGE_MASK);
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index 587807763737..ae7fca40e5b3 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -7,6 +7,7 @@
#include <asm/mman.h>
#include <asm/mmu_context.h>
+#include <asm/mmu.h>
#include <asm/setup.h>
#include <linux/pkeys.h>
#include <linux/of_device.h>
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index f29d2f118b44..bf1de3ca39bc 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -394,3 +394,26 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
else /* Anything else has 256M mapped */
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
}
+
+#ifdef CONFIG_PPC_KUEP
+void __init setup_kuep(bool disabled)
+{
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+
+ if (cpu_has_feature(CPU_FTR_601))
+ pr_warn("KUEP is not working on powerpc 601 (No NX bit in Seg Regs)\n");
+
+ if (disabled)
+ pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n");
+}
+#endif
+
+#ifdef CONFIG_PPC_KUAP
+void __init setup_kuap(bool disabled)
+{
+ pr_info("Activating Kernel Userspace Access Protection\n");
+
+ if (disabled)
+ pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n");
+}
+#endif
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index b430e4e08af6..b9bda0105841 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -500,7 +500,7 @@ static void populate_markers(void)
address_markers[7].start_address = IOREMAP_BASE;
address_markers[8].start_address = IOREMAP_END;
#ifdef CONFIG_PPC_BOOK3S_64
- address_markers[9].start_address = H_VMEMMAP_BASE;
+ address_markers[9].start_address = H_VMEMMAP_START;
#else
address_markers[9].start_address = VMEMMAP_BASE;
#endif
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 37138428ab55..63fc56feea15 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -303,8 +303,9 @@ static void populate_markers(void)
address_markers[i++].start_address = PHB_IO_END;
address_markers[i++].start_address = IOREMAP_BASE;
address_markers[i++].start_address = IOREMAP_END;
+ /* What is the ifdef about? */
#ifdef CONFIG_PPC_BOOK3S_64
- address_markers[i++].start_address = H_VMEMMAP_BASE;
+ address_markers[i++].start_address = H_VMEMMAP_START;
#else
address_markers[i++].start_address = VMEMMAP_BASE;
#endif
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 5986df48359b..89e4531de64b 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -691,10 +691,10 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
unsigned long flags;
int ssize;
- if (id == KERNEL_REGION_ID) {
+ if (id == LINEAR_MAP_REGION_ID) {
/* We only support upto MAX_PHYSMEM_BITS */
- if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS))
+ if ((ea & EA_MASK) > (1UL << MAX_PHYSMEM_BITS))
return -EFAULT;
flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
@@ -702,20 +702,25 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
#ifdef CONFIG_SPARSEMEM_VMEMMAP
} else if (id == VMEMMAP_REGION_ID) {
- if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
+ if (ea >= H_VMEMMAP_END)
return -EFAULT;
flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
#endif
} else if (id == VMALLOC_REGION_ID) {
- if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
+ if (ea >= H_VMALLOC_END)
return -EFAULT;
- if (ea < H_VMALLOC_END)
- flags = local_paca->vmalloc_sllp;
- else
- flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
+ flags = local_paca->vmalloc_sllp;
+
+ } else if (id == IO_REGION_ID) {
+
+ if (ea >= H_KERN_IO_END)
+ return -EFAULT;
+
+ flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
+
} else {
return -EFAULT;
}
@@ -725,6 +730,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
ssize = MMU_SEGSIZE_256M;
context = get_kernel_context(ea);
+
return slb_insert_entry(ea, context, flags, ssize, true);
}
@@ -739,7 +745,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
* consider this as bad access if we take a SLB miss
* on an address above addr limit.
*/
- if (ea >= mm->context.slb_addr_limit)
+ if (ea >= mm_ctx_slb_addr_limit(&mm->context))
return -EFAULT;
context = get_user_context(&mm->context, ea);
@@ -761,7 +767,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
long do_slb_fault(struct pt_regs *regs, unsigned long ea)
{
- unsigned long id = REGION_ID(ea);
+ unsigned long id = get_region_id(ea);
/* IRQs are not reconciled here, so can't check irqs_disabled */
VM_WARN_ON(mfmsr() & MSR_EE);
@@ -784,7 +790,7 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea)
* first class kernel code. But for performance it's probably nicer
* if they go via fast_exception_return too.
*/
- if (id >= KERNEL_REGION_ID) {
+ if (id >= LINEAR_MAP_REGION_ID) {
long err;
#ifdef CONFIG_DEBUG_VM
/* Catch recursive kernel SLB faults. */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index aec91dbcdc0b..35b278082391 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -101,7 +101,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
{
struct vm_area_struct *vma;
- if ((mm->context.slb_addr_limit - len) < addr)
+ if ((mm_ctx_slb_addr_limit(&mm->context) - len) < addr)
return 0;
vma = find_vma(mm, addr);
return (!vma || (addr + len) <= vm_start_gap(vma));
@@ -155,15 +155,15 @@ static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
{
#ifdef CONFIG_PPC_64K_PAGES
if (psize == MMU_PAGE_64K)
- return &mm->context.mask_64k;
+ return mm_ctx_slice_mask_64k(&mm->context);
#endif
if (psize == MMU_PAGE_4K)
- return &mm->context.mask_4k;
+ return mm_ctx_slice_mask_4k(&mm->context);
#ifdef CONFIG_HUGETLB_PAGE
if (psize == MMU_PAGE_16M)
- return &mm->context.mask_16m;
+ return mm_ctx_slice_mask_16m(&mm->context);
if (psize == MMU_PAGE_16G)
- return &mm->context.mask_16g;
+ return mm_ctx_slice_mask_16g(&mm->context);
#endif
BUG();
}
@@ -253,7 +253,7 @@ static void slice_convert(struct mm_struct *mm,
*/
spin_lock_irqsave(&slice_convert_lock, flags);
- lpsizes = mm->context.low_slices_psize;
+ lpsizes = mm_ctx_low_slices(&mm->context);
for (i = 0; i < SLICE_NUM_LOW; i++) {
if (!(mask->low_slices & (1u << i)))
continue;
@@ -272,8 +272,8 @@ static void slice_convert(struct mm_struct *mm,
(((unsigned long)psize) << (mask_index * 4));
}
- hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
+ hpsizes = mm_ctx_high_slices(&mm->context);
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(mm_ctx_slb_addr_limit(&mm->context)); i++) {
if (!test_bit(i, mask->high_slices))
continue;
@@ -292,8 +292,8 @@ static void slice_convert(struct mm_struct *mm,
}
slice_dbg(" lsps=%lx, hsps=%lx\n",
- (unsigned long)mm->context.low_slices_psize,
- (unsigned long)mm->context.high_slices_psize);
+ (unsigned long)mm_ctx_low_slices(&mm->context),
+ (unsigned long)mm_ctx_high_slices(&mm->context));
spin_unlock_irqrestore(&slice_convert_lock, flags);
@@ -393,7 +393,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
* DEFAULT_MAP_WINDOW we should apply this.
*/
if (high_limit > DEFAULT_MAP_WINDOW)
- addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW;
+ addr += mm_ctx_slb_addr_limit(&mm->context) - DEFAULT_MAP_WINDOW;
while (addr > min_addr) {
info.high_limit = addr;
@@ -505,20 +505,20 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
return -ENOMEM;
}
- if (high_limit > mm->context.slb_addr_limit) {
+ if (high_limit > mm_ctx_slb_addr_limit(&mm->context)) {
/*
* Increasing the slb_addr_limit does not require
* slice mask cache to be recalculated because it should
* be already initialised beyond the old address limit.
*/
- mm->context.slb_addr_limit = high_limit;
+ mm_ctx_set_slb_addr_limit(&mm->context, high_limit);
on_each_cpu(slice_flush_segments, mm, 1);
}
/* Sanity checks */
BUG_ON(mm->task_size == 0);
- BUG_ON(mm->context.slb_addr_limit == 0);
+ BUG_ON(mm_ctx_slb_addr_limit(&mm->context) == 0);
VM_BUG_ON(radix_enabled());
slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
@@ -696,7 +696,7 @@ unsigned long arch_get_unmapped_area(struct file *filp,
unsigned long flags)
{
return slice_get_unmapped_area(addr, len, flags,
- current->mm->context.user_psize, 0);
+ mm_ctx_user_psize(&current->mm->context), 0);
}
unsigned long arch_get_unmapped_area_topdown(struct file *filp,
@@ -706,7 +706,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
const unsigned long flags)
{
return slice_get_unmapped_area(addr0, len, flags,
- current->mm->context.user_psize, 1);
+ mm_ctx_user_psize(&current->mm->context), 1);
}
unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
@@ -717,10 +717,10 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
VM_BUG_ON(radix_enabled());
if (slice_addr_is_low(addr)) {
- psizes = mm->context.low_slices_psize;
+ psizes = mm_ctx_low_slices(&mm->context);
index = GET_LOW_SLICE_INDEX(addr);
} else {
- psizes = mm->context.high_slices_psize;
+ psizes = mm_ctx_high_slices(&mm->context);
index = GET_HIGH_SLICE_INDEX(addr);
}
mask_index = index & 0x1;
@@ -742,20 +742,19 @@ void slice_init_new_context_exec(struct mm_struct *mm)
* duplicated.
*/
#ifdef CONFIG_PPC64
- mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+ mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW_USER64);
#else
mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
#endif
-
- mm->context.user_psize = psize;
+ mm_ctx_set_user_psize(&mm->context, psize);
/*
* Set all slice psizes to the default.
*/
- lpsizes = mm->context.low_slices_psize;
+ lpsizes = mm_ctx_low_slices(&mm->context);
memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
- hpsizes = mm->context.high_slices_psize;
+ hpsizes = mm_ctx_high_slices(&mm->context);
memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
/*
@@ -777,7 +776,7 @@ void slice_setup_new_exec(void)
if (!is_32bit_task())
return;
- mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+ mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW);
}
#endif
@@ -816,7 +815,7 @@ int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
unsigned long len)
{
const struct slice_mask *maskp;
- unsigned int psize = mm->context.user_psize;
+ unsigned int psize = mm_ctx_user_psize(&mm->context);
VM_BUG_ON(radix_enabled());
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 5e4178790dee..c9dff4e1f295 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -25,10 +25,13 @@
*/
void subpage_prot_free(struct mm_struct *mm)
{
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
unsigned long i, j, addr;
u32 **p;
+ if (!spt)
+ return;
+
for (i = 0; i < 4; ++i) {
if (spt->low_prot[i]) {
free_page((unsigned long)spt->low_prot[i]);
@@ -48,13 +51,7 @@ void subpage_prot_free(struct mm_struct *mm)
free_page((unsigned long)p);
}
spt->maxaddr = 0;
-}
-
-void subpage_prot_init_new_context(struct mm_struct *mm)
-{
- struct subpage_prot_table *spt = &mm->context.spt;
-
- memset(spt, 0, sizeof(*spt));
+ kfree(spt);
}
static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
@@ -93,12 +90,15 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
static void subpage_prot_clear(unsigned long addr, unsigned long len)
{
struct mm_struct *mm = current->mm;
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
u32 **spm, *spp;
unsigned long i;
size_t nw;
unsigned long next, limit;
+ if (!spt)
+ return ;
+
down_write(&mm->mmap_sem);
limit = addr + len;
if (limit > spt->maxaddr)
@@ -189,7 +189,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
unsigned long, len, u32 __user *, map)
{
struct mm_struct *mm = current->mm;
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
u32 **spm, *spp;
unsigned long i;
size_t nw;
@@ -218,6 +218,20 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
return -EFAULT;
down_write(&mm->mmap_sem);
+
+ if (!spt) {
+ /*
+ * Allocate subpage prot table if not already done.
+ * Do this with mmap_sem held
+ */
+ spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL);
+ if (!spt) {
+ err = -ENOMEM;
+ goto out;
+ }
+ mm->context.hash_context->spt = spt;
+ }
+
subpage_mark_vma_nohuge(mm, addr, len);
for (limit = addr + len; addr < limit; addr = next) {
next = pmd_addr_end(addr, limit);
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index ac23dc1c6535..088e0a6b5ade 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -800,5 +800,11 @@ void __init early_init_mmu(void)
#ifdef CONFIG_PPC_47x
early_init_mmu_47x();
#endif
+
+#ifdef CONFIG_PPC_MM_SLICES
+#if defined(CONFIG_PPC_8xx)
+ init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+#endif
+#endif
}
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 549e9490ff2a..dcac37745b05 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -51,6 +51,8 @@
#define PPC_LIS(r, i) PPC_ADDIS(r, 0, i)
#define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \
___PPC_RA(base) | ((i) & 0xfffc))
+#define PPC_STDX(r, base, b) EMIT(PPC_INST_STDX | ___PPC_RS(r) | \
+ ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_STDU(r, base, i) EMIT(PPC_INST_STDU | ___PPC_RS(r) | \
___PPC_RA(base) | ((i) & 0xfffc))
#define PPC_STW(r, base, i) EMIT(PPC_INST_STW | ___PPC_RS(r) | \
@@ -65,7 +67,9 @@
#define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \
___PPC_RA(base) | IMM_L(i))
#define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \
- ___PPC_RA(base) | IMM_L(i))
+ ___PPC_RA(base) | ((i) & 0xfffc))
+#define PPC_LDX(r, base, b) EMIT(PPC_INST_LDX | ___PPC_RT(r) | \
+ ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \
___PPC_RA(base) | IMM_L(i))
#define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | ___PPC_RT(r) | \
@@ -85,17 +89,6 @@
___PPC_RA(a) | ___PPC_RB(b))
#define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \
___PPC_RA(a) | ___PPC_RB(b))
-
-#ifdef CONFIG_PPC64
-#define PPC_BPF_LL(r, base, i) do { PPC_LD(r, base, i); } while(0)
-#define PPC_BPF_STL(r, base, i) do { PPC_STD(r, base, i); } while(0)
-#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0)
-#else
-#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0)
-#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0)
-#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0)
-#endif
-
#define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i))
#define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i))
#define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
index dc50a8d4b3b9..21744d8aa053 100644
--- a/arch/powerpc/net/bpf_jit32.h
+++ b/arch/powerpc/net/bpf_jit32.h
@@ -122,6 +122,10 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i)
#endif
+#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0)
+#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0)
+#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0)
+
#define SEEN_DATAREF 0x10000 /* might call external helpers */
#define SEEN_XREG 0x20000 /* X reg is used */
#define SEEN_MEM 0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 3609be4692b3..47f441f351a6 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -68,6 +68,26 @@ static const int b2p[] = {
/* PPC NVR range -- update this if we ever use NVRs below r27 */
#define BPF_PPC_NVR_MIN 27
+/*
+ * WARNING: These can use TMP_REG_2 if the offset is not at word boundary,
+ * so ensure that it isn't in use already.
+ */
+#define PPC_BPF_LL(r, base, i) do { \
+ if ((i) % 4) { \
+ PPC_LI(b2p[TMP_REG_2], (i)); \
+ PPC_LDX(r, base, b2p[TMP_REG_2]); \
+ } else \
+ PPC_LD(r, base, i); \
+ } while(0)
+#define PPC_BPF_STL(r, base, i) do { \
+ if ((i) % 4) { \
+ PPC_LI(b2p[TMP_REG_2], (i)); \
+ PPC_STDX(r, base, b2p[TMP_REG_2]); \
+ } else \
+ PPC_STD(r, base, i); \
+ } while(0)
+#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0)
+
#define SEEN_FUNC 0x1000 /* might call external helpers */
#define SEEN_STACK 0x2000 /* uses BPF stack */
#define SEEN_TAILCALL 0x4000 /* uses tail calls */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 4194d3cfb60c..21a1dcd4b156 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -252,7 +252,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
- PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
+ PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT);
PPC_BCC(COND_GT, out);
@@ -265,7 +265,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
/* prog = array->ptrs[index]; */
PPC_MULI(b2p[TMP_REG_1], b2p_index, 8);
PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array);
- PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
+ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
/*
* if (prog == NULL)
@@ -275,7 +275,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
PPC_BCC(COND_EQ, out);
/* goto *(prog->bpf_func + prologue_size); */
- PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
+ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
#ifdef PPC64_ELF_ABI_v1
/* skip past the function descriptor */
PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
@@ -606,7 +606,7 @@ bpf_alu32_trunc:
* the instructions generated will remain the
* same across all passes
*/
- PPC_STD(dst_reg, 1, bpf_jit_stack_local(ctx));
+ PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx));
PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
break;
@@ -662,7 +662,7 @@ emit_clear:
PPC_LI32(b2p[TMP_REG_1], imm);
src_reg = b2p[TMP_REG_1];
}
- PPC_STD(src_reg, dst_reg, off);
+ PPC_BPF_STL(src_reg, dst_reg, off);
break;
/*
@@ -709,7 +709,7 @@ emit_clear:
break;
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
- PPC_LD(dst_reg, src_reg, off);
+ PPC_BPF_LL(dst_reg, src_reg, off);
break;
/*
diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c
index 5c31d8292d3b..e7c2c3fb011a 100644
--- a/arch/powerpc/platforms/83xx/usb.c
+++ b/arch/powerpc/platforms/83xx/usb.c
@@ -221,8 +221,10 @@ int mpc837x_usb_cfg(void)
int ret = 0;
np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
- if (!np || !of_device_is_available(np))
+ if (!np || !of_device_is_available(np)) {
+ of_node_put(np);
return -ENODEV;
+ }
prop = of_get_property(np, "phy_type", NULL);
if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
index 8d5a25d43ef3..e9617d35fd1f 100644
--- a/arch/powerpc/platforms/8xx/pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -153,10 +153,9 @@ int mpc8xx_pic_init(void)
if (mpc8xx_pic_host == NULL) {
printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
ret = -ENOMEM;
- goto out;
}
- return 0;
+ ret = 0;
out:
of_node_put(np);
return ret;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 842b2c7e156a..60a7c7095b05 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -25,6 +25,8 @@ config PPC_BOOK3S_32
bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
+ select PPC_HAVE_KUEP
+ select PPC_HAVE_KUAP
config PPC_85xx
bool "Freescale 85xx"
@@ -34,6 +36,8 @@ config PPC_8xx
bool "Freescale 8xx"
select FSL_SOC
select SYS_SUPPORTS_HUGETLBFS
+ select PPC_HAVE_KUEP
+ select PPC_HAVE_KUAP
config 40x
bool "AMCC 40x"
@@ -326,6 +330,8 @@ config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
+ select PPC_HAVE_KUEP
+ select PPC_HAVE_KUAP
default y
help
Enable support for the Power ISA 3.0 Radix style MMU. Currently this
@@ -345,6 +351,37 @@ config PPC_RADIX_MMU_DEFAULT
If you're unsure, say Y.
+config PPC_HAVE_KUEP
+ bool
+
+config PPC_KUEP
+ bool "Kernel Userspace Execution Prevention"
+ depends on PPC_HAVE_KUEP
+ default y
+ help
+ Enable support for Kernel Userspace Execution Prevention (KUEP)
+
+ If you're unsure, say Y.
+
+config PPC_HAVE_KUAP
+ bool
+
+config PPC_KUAP
+ bool "Kernel Userspace Access Protection"
+ depends on PPC_HAVE_KUAP
+ default y
+ help
+ Enable support for Kernel Userspace Access Protection (KUAP)
+
+ If you're unsure, say Y.
+
+config PPC_KUAP_DEBUG
+ bool "Extra debugging for Kernel Userspace Access Protection"
+ depends on PPC_HAVE_KUAP && (PPC_RADIX_MMU || PPC_32)
+ help
+ Add extra debugging for Kernel Userspace Access Protection (KUAP)
+ If you're unsure, say N.
+
config ARCH_ENABLE_HUGEPAGE_MIGRATION
def_bool y
depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 7f12c7b78c0f..6646f152d57b 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -194,7 +194,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
* faults need to be deferred to process context.
*/
if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&
- (REGION_ID(ea) != USER_REGION_ID)) {
+ (get_region_id(ea) != USER_REGION_ID)) {
spin_unlock(&spu->register_lock);
ret = hash_page(ea,
@@ -224,7 +224,7 @@ static void __spu_kernel_slb(void *addr, struct copro_slb *slb)
unsigned long ea = (unsigned long)addr;
u64 llp;
- if (REGION_ID(ea) == KERNEL_REGION_ID)
+ if (get_region_id(ea) == LINEAR_MAP_REGION_ID)
llp = mmu_psize_defs[mmu_linear_psize].sllp;
else
llp = mmu_psize_defs[mmu_virtual_psize].sllp;
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 0409714e8070..829bf3697dc9 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -44,7 +44,8 @@
#define HOLLY_PCI_CFG_PHYS 0x7c000000
-int holly_exclude_device(struct pci_controller *hose, u_char bus, u_char devfn)
+static int holly_exclude_device(struct pci_controller *hose, u_char bus,
+ u_char devfn)
{
if (bus == 0 && PCI_SLOT(devfn) == 0)
return PCIBIOS_DEVICE_NOT_FOUND;
@@ -187,13 +188,13 @@ static void __init holly_init_IRQ(void)
tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0);
}
-void holly_show_cpuinfo(struct seq_file *m)
+static void holly_show_cpuinfo(struct seq_file *m)
{
seq_printf(m, "vendor\t\t: IBM\n");
seq_printf(m, "machine\t\t: PPC750 GX/CL\n");
}
-void __noreturn holly_restart(char *cmd)
+static void __noreturn holly_restart(char *cmd)
{
__be32 __iomem *ocn_bar1 = NULL;
unsigned long bar;
@@ -233,18 +234,6 @@ void __noreturn holly_restart(char *cmd)
for (;;) ;
}
-void holly_power_off(void)
-{
- local_irq_disable();
- /* No way to shut power off with software */
- for (;;) ;
-}
-
-void holly_halt(void)
-{
- holly_power_off();
-}
-
/*
* Called very early, device-tree isn't unflattened
*/
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index 7472244e7f30..7cba0d5da3ff 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -121,6 +121,8 @@ static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
#define OPAL_CALL(name, opcode) \
int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
+ int64_t a4, int64_t a5, int64_t a6, int64_t a7); \
+int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
int64_t a4, int64_t a5, int64_t a6, int64_t a7) \
{ \
return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index d291b618a559..47087832f8b2 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -379,7 +379,7 @@ static int dlpar_add_lmb(struct drmem_lmb *);
static int dlpar_remove_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
- int nid, rc;
+ int rc;
if (!lmb_is_removable(lmb))
return -EINVAL;
@@ -389,14 +389,14 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
return rc;
block_sz = pseries_memory_block_size();
- nid = memory_add_physaddr_to_nid(lmb->base_addr);
- __remove_memory(nid, lmb->base_addr, block_sz);
+ __remove_memory(lmb->nid, lmb->base_addr, block_sz);
/* Update memory regions for memory remove */
memblock_remove(lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
+ lmb_clear_nid(lmb);
lmb->flags &= ~DRCONF_MEM_ASSIGNED;
return 0;
@@ -653,7 +653,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
static int dlpar_add_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
- int nid, rc;
+ int rc;
if (lmb->flags & DRCONF_MEM_ASSIGNED)
return -EINVAL;
@@ -664,13 +664,11 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
return rc;
}
+ lmb_set_nid(lmb);
block_sz = memory_block_size_bytes();
- /* Find the node id for this address */
- nid = memory_add_physaddr_to_nid(lmb->base_addr);
-
/* Add the memory */
- rc = __add_memory(nid, lmb->base_addr, block_sz);
+ rc = __add_memory(lmb->nid, lmb->base_addr, block_sz);
if (rc) {
invalidate_lmb_associativity_index(lmb);
return rc;
@@ -678,8 +676,9 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
rc = dlpar_online_lmb(lmb);
if (rc) {
- __remove_memory(nid, lmb->base_addr, block_sz);
+ __remove_memory(lmb->nid, lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
+ lmb_clear_nid(lmb);
} else {
lmb->flags |= DRCONF_MEM_ASSIGNED;
}
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 36eb1ddbac69..03bbb299320e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -105,7 +105,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
unsigned long attrs)
{
u64 proto_tce;
- __be64 *tcep, *tces;
+ __be64 *tcep;
u64 rpn;
proto_tce = TCE_PCI_READ; // Read allowed
@@ -113,7 +113,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
- tces = tcep = ((__be64 *)tbl->it_base) + index;
+ tcep = ((__be64 *)tbl->it_base) + index;
while (npages--) {
/* can't move this out since we might cross MEMBLOCK boundary */
@@ -129,9 +129,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
{
- __be64 *tcep, *tces;
+ __be64 *tcep;
- tces = tcep = ((__be64 *)tbl->it_base) + index;
+ tcep = ((__be64 *)tbl->it_base) + index;
while (npages--)
*(tcep++) = 0;
@@ -945,7 +945,7 @@ static phys_addr_t ddw_memory_hotplug_max(void)
for_each_node_by_type(memory, "memory") {
unsigned long start, size;
- int ranges, n_mem_addr_cells, n_mem_size_cells, len;
+ int n_mem_addr_cells, n_mem_size_cells, len;
const __be32 *memcell_buf;
memcell_buf = of_get_property(memory, "reg", &len);
@@ -955,9 +955,6 @@ static phys_addr_t ddw_memory_hotplug_max(void)
n_mem_addr_cells = of_n_addr_cells(memory);
n_mem_size_cells = of_n_size_cells(memory);
- /* ranges in cell */
- ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
-
start = of_read_number(memcell_buf, n_mem_addr_cells);
memcell_buf += n_mem_addr_cells;
size = of_read_number(memcell_buf, n_mem_size_cells);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index f2a9f0adc2d3..1034ef1fe2b4 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -901,8 +901,10 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
break;
case H_PARAMETER:
+ pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n");
return -EINVAL;
case H_RESOURCE:
+ pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n");
return -EPERM;
default:
pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc);
@@ -918,7 +920,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
if (rc != 0) {
switch (state.commit_rc) {
case H_PTEG_FULL:
- pr_warn("Hash collision while resizing HPT\n");
return -ENOSPC;
default:
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
index 27f0a915c8a9..f860a897a9e0 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -106,7 +106,7 @@ static ssize_t pmem_drc_remove_node(u32 drc_index)
int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
{
- u32 count, drc_index;
+ u32 drc_index;
int rc;
/* slim chance, but we might get a hotplug event while booting */
@@ -123,7 +123,6 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
return -EINVAL;
}
- count = hp_elog->_drc_u.drc_count;
drc_index = hp_elog->_drc_u.drc_index;
lock_device_hotplug();
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index 6ed22127391b..921f12182f3e 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -77,18 +77,27 @@ static u32 cpu_to_drc_index(int cpu)
ret = drc.drc_index_start + (thread_index * drc.sequential_inc);
} else {
- const __be32 *indexes;
-
- indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
- if (indexes == NULL)
- goto err_of_node_put;
+ u32 nr_drc_indexes, thread_drc_index;
/*
- * The first element indexes[0] is the number of drc_indexes
- * returned in the list. Hence thread_index+1 will get the
- * drc_index corresponding to core number thread_index.
+ * The first element of ibm,drc-indexes array is the
+ * number of drc_indexes returned in the list. Hence
+ * thread_index+1 will get the drc_index corresponding
+ * to core number thread_index.
*/
- ret = indexes[thread_index + 1];
+ rc = of_property_read_u32_index(dn, "ibm,drc-indexes",
+ 0, &nr_drc_indexes);
+ if (rc)
+ goto err_of_node_put;
+
+ WARN_ON_ONCE(thread_index > nr_drc_indexes);
+ rc = of_property_read_u32_index(dn, "ibm,drc-indexes",
+ thread_index + 1,
+ &thread_drc_index);
+ if (rc)
+ goto err_of_node_put;
+
+ ret = thread_drc_index;
}
rc = 0;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index d97d52772789..c97d15352f9f 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -539,43 +539,44 @@ static void pseries_print_mce_info(struct pt_regs *regs,
int disposition = rtas_error_disposition(errp);
static const char * const initiators[] = {
- "Unknown",
- "CPU",
- "PCI",
- "ISA",
- "Memory",
- "Power Mgmt",
+ [0] = "Unknown",
+ [1] = "CPU",
+ [2] = "PCI",
+ [3] = "ISA",
+ [4] = "Memory",
+ [5] = "Power Mgmt",
};
static const char * const mc_err_types[] = {
- "UE",
- "SLB",
- "ERAT",
- "TLB",
- "D-Cache",
- "Unknown",
- "I-Cache",
+ [0] = "UE",
+ [1] = "SLB",
+ [2] = "ERAT",
+ [3] = "Unknown",
+ [4] = "TLB",
+ [5] = "D-Cache",
+ [6] = "Unknown",
+ [7] = "I-Cache",
};
static const char * const mc_ue_types[] = {
- "Indeterminate",
- "Instruction fetch",
- "Page table walk ifetch",
- "Load/Store",
- "Page table walk Load/Store",
+ [0] = "Indeterminate",
+ [1] = "Instruction fetch",
+ [2] = "Page table walk ifetch",
+ [3] = "Load/Store",
+ [4] = "Page table walk Load/Store",
};
/* SLB sub errors valid values are 0x0, 0x1, 0x2 */
static const char * const mc_slb_types[] = {
- "Parity",
- "Multihit",
- "Indeterminate",
+ [0] = "Parity",
+ [1] = "Multihit",
+ [2] = "Indeterminate",
};
/* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
static const char * const mc_soft_types[] = {
- "Unknown",
- "Parity",
- "Multihit",
- "Indeterminate",
+ [0] = "Unknown",
+ [1] = "Parity",
+ [2] = "Multihit",
+ [3] = "Indeterminate",
};
if (!rtas_error_extended(errp)) {
@@ -706,6 +707,87 @@ out:
return disposition;
}
+#ifdef CONFIG_MEMORY_FAILURE
+
+static DEFINE_PER_CPU(int, rtas_ue_count);
+static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]);
+
+#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
+#define UE_LOGICAL_ADDR_PROVIDED 0x20
+
+
+static void pseries_hwpoison_work_fn(struct work_struct *work)
+{
+ unsigned long paddr;
+ int index;
+
+ while (__this_cpu_read(rtas_ue_count) > 0) {
+ index = __this_cpu_read(rtas_ue_count) - 1;
+ paddr = __this_cpu_read(rtas_ue_paddr[index]);
+ memory_failure(paddr >> PAGE_SHIFT, 0);
+ __this_cpu_dec(rtas_ue_count);
+ }
+}
+
+static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn);
+
+static void queue_ue_paddr(unsigned long paddr)
+{
+ int index;
+
+ index = __this_cpu_inc_return(rtas_ue_count) - 1;
+ if (index >= MAX_MC_EVT) {
+ __this_cpu_dec(rtas_ue_count);
+ return;
+ }
+ this_cpu_write(rtas_ue_paddr[index], paddr);
+ schedule_work(&hwpoison_work);
+}
+
+static void pseries_do_memory_failure(struct pt_regs *regs,
+ struct pseries_mc_errorlog *mce_log)
+{
+ unsigned long paddr;
+
+ if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
+ paddr = be64_to_cpu(mce_log->logical_address);
+ } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
+ unsigned long pfn;
+
+ pfn = addr_to_pfn(regs,
+ be64_to_cpu(mce_log->effective_address));
+ if (pfn == ULONG_MAX)
+ return;
+ paddr = pfn << PAGE_SHIFT;
+ } else {
+ return;
+ }
+ queue_ue_paddr(paddr);
+}
+
+static void pseries_process_ue(struct pt_regs *regs,
+ struct rtas_error_log *errp)
+{
+ struct pseries_errorlog *pseries_log;
+ struct pseries_mc_errorlog *mce_log;
+
+ if (!rtas_error_extended(errp))
+ return;
+
+ pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+ if (!pseries_log)
+ return;
+
+ mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+
+ if (mce_log->error_type == MC_ERROR_TYPE_UE)
+ pseries_do_memory_failure(regs, mce_log);
+}
+#else
+static inline void pseries_process_ue(struct pt_regs *regs,
+ struct rtas_error_log *errp) { }
+#endif /*CONFIG_MEMORY_FAILURE */
+
/*
* Process MCE rtas errlog event.
*/
@@ -764,6 +846,8 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
recovered = 1;
}
+ pseries_process_ue(regs, err);
+
/* Queue irq work to log this rtas event later. */
irq_work_queue(&mce_errlog_process_work);