diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-30 12:22:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-30 12:22:28 -0700 |
commit | c70a4be130de333ea079c59da41cc959712bb01c (patch) | |
tree | efa1b9a7aac979dcbf53ce89e2f8ffc61f6d2952 /arch/powerpc/mm | |
parent | 437d1a5b66ca60f209e25f469b395741cc10b731 (diff) | |
parent | 5256426247837feb8703625bda7fcfc824af04cf (diff) | |
download | linux-c70a4be130de333ea079c59da41cc959712bb01c.tar.bz2 |
Merge tag 'powerpc-5.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
- Enable KFENCE for 32-bit.
- Implement EBPF for 32-bit.
- Convert 32-bit to do interrupt entry/exit in C.
- Convert 64-bit BookE to do interrupt entry/exit in C.
- Changes to our signal handling code to use user_access_begin/end()
more extensively.
- Add support for time namespaces (CONFIG_TIME_NS)
- A series of fixes that allow us to reenable STRICT_KERNEL_RWX.
- Other smaller features, fixes & cleanups.
Thanks to Alexey Kardashevskiy, Andreas Schwab, Andrew Donnellan, Aneesh
Kumar K.V, Athira Rajeev, Bhaskar Chowdhury, Bixuan Cui, Cédric Le
Goater, Chen Huang, Chris Packham, Christophe Leroy, Christopher M.
Riedl, Colin Ian King, Dan Carpenter, Daniel Axtens, Daniel Henrique
Barboza, David Gibson, Davidlohr Bueso, Denis Efremov, dingsenjie,
Dmitry Safonov, Dominic DeMarco, Fabiano Rosas, Ganesh Goudar, Geert
Uytterhoeven, Geetika Moolchandani, Greg Kurz, Guenter Roeck, Haren
Myneni, He Ying, Jiapeng Chong, Jordan Niethe, Laurent Dufour, Lee
Jones, Leonardo Bras, Li Huafei, Madhavan Srinivasan, Mahesh Salgaonkar,
Masahiro Yamada, Nathan Chancellor, Nathan Lynch, Nicholas Piggin,
Oliver O'Halloran, Paul Menzel, Pu Lehui, Randy Dunlap, Ravi Bangoria,
Rosen Penev, Russell Currey, Santosh Sivaraj, Sebastian Andrzej Siewior,
Segher Boessenkool, Shivaprasad G Bhat, Srikar Dronamraju, Stephen
Rothwell, Thadeu Lima de Souza Cascardo, Thomas Gleixner, Tony Ambardar,
Tyrel Datwyler, Vaibhav Jain, Vincenzo Frascino, Xiongwei Song, Yang Li,
Yu Kuai, and Zhang Yunkai.
* tag 'powerpc-5.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (302 commits)
powerpc/signal32: Fix erroneous SIGSEGV on RT signal return
powerpc: Avoid clang uninitialized warning in __get_user_size_allowed
powerpc/papr_scm: Mark nvdimm as unarmed if needed during probe
powerpc/kvm: Fix build error when PPC_MEM_KEYS/PPC_PSERIES=n
powerpc/kasan: Fix shadow start address with modules
powerpc/kernel/iommu: Use largepool as a last resort when !largealloc
powerpc/kernel/iommu: Align size for IOMMU_PAGE_SIZE() to save TCEs
powerpc/44x: fix spelling mistake in Kconfig "varients" -> "variants"
powerpc/iommu: Annotate nested lock for lockdep
powerpc/iommu: Do not immediately panic when failed IOMMU table allocation
powerpc/iommu: Allocate it_map by vmalloc
selftests/powerpc: remove unneeded semicolon
powerpc/64s: remove unneeded semicolon
powerpc/eeh: remove unneeded semicolon
powerpc/selftests: Add selftest to test concurrent perf/ptrace events
powerpc/selftests/perf-hwbreak: Add testcases for 2nd DAWR
powerpc/selftests/perf-hwbreak: Coalesce event creation code
powerpc/selftests/ptrace-hwbreak: Add testcases for 2nd DAWR
powerpc/configs: Add IBMVNIC to some 64-bit configs
selftests/powerpc: Add uaccess flush test
...
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/Makefile | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s32/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s32/hash_low.S | 14 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s32/kuep.c | 40 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s32/mmu.c | 9 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/hash_pgtable.c | 130 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/hash_utils.c | 35 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/mmu_context.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/pkeys.c | 20 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/radix_pgtable.c | 10 | ||||
-rw-r--r-- | arch/powerpc/mm/cacheflush.c | 234 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 60 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/maccess.c | 21 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 283 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context.c | 24 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/nohash/8xx.c | 4 |
18 files changed, 500 insertions, 398 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 3b4e9e4e25ea..c3df3a8501d4 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -8,7 +8,8 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ - init-common.o mmu_context.o drmem.o + init-common.o mmu_context.o drmem.o \ + cacheflush.o obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/ obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/ obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/ diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 446d9de88ce4..7f0c8a78ba0c 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -9,3 +9,4 @@ endif obj-y += mmu.o mmu_context.o obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o +obj-$(CONFIG_PPC_KUEP) += kuep.o diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 0e6dc830c38b..fb4233a5bdf7 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -140,10 +140,6 @@ _GLOBAL(hash_page) bne- .Lretry /* retry if someone got there first */ mfsrin r3,r4 /* get segment reg for segment */ -#ifndef CONFIG_VMAP_STACK - mfctr r0 - stw r0,_CTR(r11) -#endif bl create_hpte /* add the hash table entry */ #ifdef CONFIG_SMP @@ -152,17 +148,7 @@ _GLOBAL(hash_page) li r0,0 stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) #endif - -#ifdef CONFIG_VMAP_STACK b fast_hash_page_return -#else - /* Return from the exception */ - lwz r5,_CTR(r11) - mtctr r5 - lwz r0,GPR0(r11) - lwz r8,GPR8(r11) - b fast_exception_return -#endif #ifdef CONFIG_SMP .Lhash_page_out: diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c new file mode 100644 index 000000000000..8ed1b8634839 --- /dev/null +++ b/arch/powerpc/mm/book3s32/kuep.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <asm/kup.h> +#include <asm/reg.h> +#include <asm/task_size_32.h> +#include <asm/mmu.h> + +#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do { \ + if (TASK_SIZE > ((n) << 28)) \ + mtsr(val1, (n) << 28); \ + if (TASK_SIZE > (((n) + 1) << 28)) \ + mtsr(val2, ((n) + 1) << 28); \ + val1 = (val1 + 0x222) & 0xf0ffffff; \ + val2 = (val2 + 0x222) & 0xf0ffffff; \ +} while (0) + +static __always_inline void kuep_update(u32 val) +{ + int val1 = val; + int val2 = (val + 0x111) & 0xf0ffffff; + + KUEP_UPDATE_TWO_USER_SEGMENTS(0); + KUEP_UPDATE_TWO_USER_SEGMENTS(2); + KUEP_UPDATE_TWO_USER_SEGMENTS(4); + KUEP_UPDATE_TWO_USER_SEGMENTS(6); + KUEP_UPDATE_TWO_USER_SEGMENTS(8); + KUEP_UPDATE_TWO_USER_SEGMENTS(10); + KUEP_UPDATE_TWO_USER_SEGMENTS(12); + KUEP_UPDATE_TWO_USER_SEGMENTS(14); +} + +void kuep_lock(void) +{ + kuep_update(mfsr(0) | SR_NX); +} + +void kuep_unlock(void) +{ + kuep_update(mfsr(0) & ~SR_NX); +} diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index d7eb266a3f7a..159930351d9f 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -162,7 +162,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; - if (debug_pagealloc_enabled() || __map_without_bats) { + if (debug_pagealloc_enabled_or_kfence() || __map_without_bats) { pr_debug_once("Read-Write memory mapped without BATs\n"); if (base >= border) return base; @@ -184,17 +184,10 @@ static bool is_module_segment(unsigned long addr) { if (!IS_ENABLED(CONFIG_MODULES)) return false; -#ifdef MODULES_VADDR if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M)) return false; if (addr > ALIGN(MODULES_END, SZ_256M) - 1) return false; -#else - if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M)) - return false; - if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1) - return false; -#endif return true; } diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 567e0c6b3978..ad5eff097d31 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/mm.h> +#include <linux/stop_machine.h> #include <asm/sections.h> #include <asm/mmu.h> @@ -400,10 +401,103 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_STRICT_KERNEL_RWX + +struct change_memory_parms { + unsigned long start, end, newpp; + unsigned int step, nr_cpus, master_cpu; + atomic_t cpu_counter; +}; + +// We'd rather this was on the stack but it has to be in the RMO +static struct change_memory_parms chmem_parms; + +// And therefore we need a lock to protect it from concurrent use +static DEFINE_MUTEX(chmem_lock); + +static void change_memory_range(unsigned long start, unsigned long end, + unsigned int step, unsigned long newpp) +{ + unsigned long idx; + + pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", + start, end, newpp, step); + + for (idx = start; idx < end; idx += step) + /* Not sure if we can do much with the return value */ + mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, + mmu_kernel_ssize); +} + +static int notrace chmem_secondary_loop(struct change_memory_parms *parms) +{ + unsigned long msr, tmp, flags; + int *p; + + p = &parms->cpu_counter.counter; + + local_irq_save(flags); + hard_irq_disable(); + + asm volatile ( + // Switch to real mode and leave interrupts off + "mfmsr %[msr] ;" + "li %[tmp], %[MSR_IR_DR] ;" + "andc %[tmp], %[msr], %[tmp] ;" + "mtmsrd %[tmp] ;" + + // Tell the master we are in real mode + "1: " + "lwarx %[tmp], 0, %[p] ;" + "addic %[tmp], %[tmp], -1 ;" + "stwcx. %[tmp], 0, %[p] ;" + "bne- 1b ;" + + // Spin until the counter goes to zero + "2: ;" + "lwz %[tmp], 0(%[p]) ;" + "cmpwi %[tmp], 0 ;" + "bne- 2b ;" + + // Switch back to virtual mode + "mtmsrd %[msr] ;" + + : // outputs + [msr] "=&r" (msr), [tmp] "=&b" (tmp), "+m" (*p) + : // inputs + [p] "b" (p), [MSR_IR_DR] "i" (MSR_IR | MSR_DR) + : // clobbers + "cc", "xer" + ); + + local_irq_restore(flags); + + return 0; +} + +static int change_memory_range_fn(void *data) +{ + struct change_memory_parms *parms = data; + + if (parms->master_cpu != smp_processor_id()) + return chmem_secondary_loop(parms); + + // Wait for all but one CPU (this one) to call-in + while (atomic_read(&parms->cpu_counter) > 1) + barrier(); + + change_memory_range(parms->start, parms->end, parms->step, parms->newpp); + + mb(); + + // Signal the other CPUs that we're done + atomic_dec(&parms->cpu_counter); + + return 0; +} + static bool hash__change_memory_range(unsigned long start, unsigned long end, unsigned long newpp) { - unsigned long idx; unsigned int step, shift; shift = mmu_psize_defs[mmu_linear_psize].shift; @@ -415,25 +509,43 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end, if (start >= end) return false; - pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", - start, end, newpp, step); + if (firmware_has_feature(FW_FEATURE_LPAR)) { + mutex_lock(&chmem_lock); - for (idx = start; idx < end; idx += step) - /* Not sure if we can do much with the return value */ - mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, - mmu_kernel_ssize); + chmem_parms.start = start; + chmem_parms.end = end; + chmem_parms.step = step; + chmem_parms.newpp = newpp; + chmem_parms.master_cpu = smp_processor_id(); + + cpus_read_lock(); + + atomic_set(&chmem_parms.cpu_counter, num_online_cpus()); + + // Ensure state is consistent before we call the other CPUs + mb(); + + stop_machine_cpuslocked(change_memory_range_fn, &chmem_parms, + cpu_online_mask); + + cpus_read_unlock(); + mutex_unlock(&chmem_lock); + } else + change_memory_range(start, end, step, newpp); return true; } void hash__mark_rodata_ro(void) { - unsigned long start, end; + unsigned long start, end, pp; start = (unsigned long)_stext; end = (unsigned long)__init_begin; - WARN_ON(!hash__change_memory_range(start, end, PP_RXXX)); + pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY); + + WARN_ON(!hash__change_memory_range(start, end, pp)); } void hash__mark_initmem_nx(void) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 581b20a2feaf..96d9aa164007 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -338,7 +338,7 @@ repeat: int htab_remove_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize) { - unsigned long vaddr; + unsigned long vaddr, time_limit; unsigned int step, shift; int rc; int ret = 0; @@ -351,8 +351,19 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, /* Unmap the full range specificied */ vaddr = ALIGN_DOWN(vstart, step); + time_limit = jiffies + HZ; + for (;vaddr < vend; vaddr += step) { rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize); + + /* + * For large number of mappings introduce a cond_resched() + * to prevent softlockup warnings. + */ + if (time_after(jiffies, time_limit)) { + cond_resched(); + time_limit = jiffies + HZ; + } if (rc == -ENOENT) { ret = -ENOENT; continue; @@ -1145,7 +1156,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) /* page is dirty */ if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) { - if (trap == 0x400) { + if (trap == INTERRUPT_INST_STORAGE) { flush_dcache_icache_page(page); set_bit(PG_dcache_clean, &page->flags); } else @@ -1545,10 +1556,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) if (user_mode(regs) || (region_id == USER_REGION_ID)) access &= ~_PAGE_PRIVILEGED; - if (regs->trap == 0x400) + if (TRAP(regs) == INTERRUPT_INST_STORAGE) access |= _PAGE_EXEC; - err = hash_page_mm(mm, ea, access, regs->trap, flags); + err = hash_page_mm(mm, ea, access, TRAP(regs), flags); if (unlikely(err < 0)) { // failed to instert a hash PTE due to an hypervisor error if (user_mode(regs)) { @@ -1572,10 +1583,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) { unsigned long dsisr = regs->dsisr; - long err; - if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) - goto page_fault; + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) { + hash__do_page_fault(regs); + return 0; + } /* * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then @@ -1595,13 +1607,10 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) return 0; } - err = __do_hash_fault(regs); - if (err) { -page_fault: - err = hash__do_page_fault(regs); - } + if (__do_hash_fault(regs)) + hash__do_page_fault(regs); - return err; + return 0; } #ifdef CONFIG_PPC_MM_SLICES diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index 0c8557220ae2..c10fc8a72fb3 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -119,7 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm) /* This is fork. Copy hash_context details from current->mm */ memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context)); #ifdef CONFIG_PPC_SUBPAGE_PROT - /* inherit subpage prot detalis if we have one. */ + /* inherit subpage prot details if we have one. */ if (current->mm->context.hash_context->spt) { mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), GFP_KERNEL); diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 15dcc5ad91c5..a2d9ad138709 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -301,19 +301,6 @@ void setup_kuap(bool disabled) } #endif -static inline void update_current_thread_amr(u64 value) -{ - current->thread.regs->amr = value; -} - -static inline void update_current_thread_iamr(u64 value) -{ - if (!likely(pkey_execute_disable_supported)) - return; - - current->thread.regs->iamr = value; -} - #ifdef CONFIG_PPC_MEM_KEYS void pkey_mm_init(struct mm_struct *mm) { @@ -328,7 +315,7 @@ static inline void init_amr(int pkey, u8 init_bits) u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); - update_current_thread_amr(old_amr | new_amr_bits); + current->thread.regs->amr = old_amr | new_amr_bits; } static inline void init_iamr(int pkey, u8 init_bits) @@ -336,7 +323,10 @@ static inline void init_iamr(int pkey, u8 init_bits) u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); - update_current_thread_iamr(old_iamr | new_iamr_bits); + if (!likely(pkey_execute_disable_supported)) + return; + + current->thread.regs->iamr = old_iamr | new_iamr_bits; } /* diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 98f0b243c1ab..50d536ecc89b 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -108,7 +108,7 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } @@ -168,7 +168,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } @@ -180,8 +180,8 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, } #ifdef CONFIG_STRICT_KERNEL_RWX -void radix__change_memory_range(unsigned long start, unsigned long end, - unsigned long clear) +static void radix__change_memory_range(unsigned long start, unsigned long end, + unsigned long clear) { unsigned long idx; pgd_t *pgdp; @@ -1058,7 +1058,7 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, * Book3S does not require a TLB flush when relaxing access * restrictions when the address space is not attached to a * NMMU, because the core MMU will reload the pte after taking - * an access fault, which is defined by the architectue. + * an access fault, which is defined by the architecture. */ } /* See ptesync comment in radix__set_pte_at */ diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c new file mode 100644 index 000000000000..63363787e000 --- /dev/null +++ b/arch/powerpc/mm/cacheflush.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/highmem.h> +#include <linux/kprobes.h> + +/** + * flush_coherent_icache() - if a CPU has a coherent icache, flush it + * Return true if the cache was flushed, false otherwise + */ +static inline bool flush_coherent_icache(void) +{ + /* + * For a snooping icache, we still need a dummy icbi to purge all the + * prefetched instructions from the ifetch buffers. We also need a sync + * before the icbi to order the the actual stores to memory that might + * have modified instructions with the icbi. + */ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { + mb(); /* sync */ + icbi((void *)PAGE_OFFSET); + mb(); /* sync */ + isync(); + return true; + } + + return false; +} + +/** + * invalidate_icache_range() - Flush the icache by issuing icbi across an address range + * @start: the start address + * @stop: the stop address (exclusive) + */ +static void invalidate_icache_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_icache_shift(); + unsigned long bytes = l1_icache_bytes(); + char *addr = (char *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + icbi(addr); + + mb(); /* sync */ + isync(); +} + +/** + * flush_icache_range: Write any modified data cache blocks out to memory + * and invalidate the corresponding blocks in the instruction cache + * + * Generic code will call this after writing memory, before executing from it. + * + * @start: the start address + * @stop: the stop address (exclusive) + */ +void flush_icache_range(unsigned long start, unsigned long stop) +{ + if (flush_coherent_icache()) + return; + + clean_dcache_range(start, stop); + + if (IS_ENABLED(CONFIG_44x)) { + /* + * Flash invalidate on 44x because we are passed kmapped + * addresses and this doesn't work for userspace pages due to + * the virtually tagged icache. + */ + iccci((void *)start); + mb(); /* sync */ + isync(); + } else + invalidate_icache_range(start, stop); +} +EXPORT_SYMBOL(flush_icache_range); + +#ifdef CONFIG_HIGHMEM +/** + * flush_dcache_icache_phys() - Flush a page by it's physical address + * @physaddr: the physical address of the page + */ +static void flush_dcache_icache_phys(unsigned long physaddr) +{ + unsigned long bytes = l1_dcache_bytes(); + unsigned long nb = PAGE_SIZE / bytes; + unsigned long addr = physaddr & PAGE_MASK; + unsigned long msr, msr0; + unsigned long loop1 = addr, loop2 = addr; + + msr0 = mfmsr(); + msr = msr0 & ~MSR_DR; + /* + * This must remain as ASM to prevent potential memory accesses + * while the data MMU is disabled + */ + asm volatile( + " mtctr %2;\n" + " mtmsr %3;\n" + " isync;\n" + "0: dcbst 0, %0;\n" + " addi %0, %0, %4;\n" + " bdnz 0b;\n" + " sync;\n" + " mtctr %2;\n" + "1: icbi 0, %1;\n" + " addi %1, %1, %4;\n" + " bdnz 1b;\n" + " sync;\n" + " mtmsr %5;\n" + " isync;\n" + : "+&r" (loop1), "+&r" (loop2) + : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) + : "ctr", "memory"); +} +NOKPROBE_SYMBOL(flush_dcache_icache_phys) +#else +static void flush_dcache_icache_phys(unsigned long physaddr) +{ +} +#endif + +/** + * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * @p: the address of the page to flush + */ +static void __flush_dcache_icache(void *p) +{ + unsigned long addr = (unsigned long)p & PAGE_MASK; + + clean_dcache_range(addr, addr + PAGE_SIZE); + + /* + * We don't flush the icache on 44x. Those have a virtual icache and we + * don't have access to the virtual address here (it's not the page + * vaddr but where it's mapped in user space). The flushing of the + * icache on these is handled elsewhere, when a change in the address + * space occurs, before returning to user space. + */ + + if (mmu_has_feature(MMU_FTR_TYPE_44x)) + return; + + invalidate_icache_range(addr, addr + PAGE_SIZE); +} + +static void flush_dcache_icache_hugepage(struct page *page) +{ + int i; + int nr = compound_nr(page); + + if (!PageHighMem(page)) { + for (i = 0; i < nr; i++) + __flush_dcache_icache(lowmem_page_address(page + i)); + } else { + for (i = 0; i < nr; i++) { + void *start = kmap_local_page(page + i); + + __flush_dcache_icache(start); + kunmap_local(start); + } + } +} + +void flush_dcache_icache_page(struct page *page) +{ + if (flush_coherent_icache()) + return; + + if (PageCompound(page)) + return flush_dcache_icache_hugepage(page); + + if (!PageHighMem(page)) { + __flush_dcache_icache(lowmem_page_address(page)); + } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { + void *start = kmap_local_page(page); + + __flush_dcache_icache(start); + kunmap_local(start); + } else { + flush_dcache_icache_phys(page_to_phys(page)); + } +} +EXPORT_SYMBOL(flush_dcache_icache_page); + +void clear_user_page(void *page, unsigned long vaddr, struct page *pg) +{ + clear_page(page); + + /* + * We shouldn't have to do this, but some versions of glibc + * require it (ld.so assumes zero filled pages are icache clean) + * - Anton + */ + flush_dcache_page(pg); +} +EXPORT_SYMBOL(clear_user_page); + +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *pg) +{ + copy_page(vto, vfrom); + + /* + * We should be able to use the following optimisation, however + * there are two problems. + * Firstly a bug in some versions of binutils meant PLT sections + * were not marked executable. + * Secondly the first word in the GOT section is blrl, used + * to establish the GOT address. Until recently the GOT was + * not marked executable. + * - Anton + */ +#if 0 + if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) + return; +#endif + + flush_dcache_page(pg); +} + +void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + void *maddr; + + maddr = kmap_local_page(page) + (addr & ~PAGE_MASK); + flush_icache_range((unsigned long)maddr, (unsigned long)maddr + len); + kunmap_local(maddr); +} diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index bb368257b55c..34f641d4a2fe 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -32,6 +32,8 @@ #include <linux/context_tracking.h> #include <linux/hugetlb.h> #include <linux/uaccess.h> +#include <linux/kfence.h> +#include <linux/pkeys.h> #include <asm/firmware.h> #include <asm/interrupt.h> @@ -87,7 +89,6 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address) return __bad_area(regs, address, SEGV_MAPERR); } -#ifdef CONFIG_PPC_MEM_KEYS static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, struct vm_area_struct *vma) { @@ -127,7 +128,6 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, return 0; } -#endif static noinline int bad_access(struct pt_regs *regs, unsigned long address) { @@ -197,7 +197,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address, bool is_write) { - int is_exec = TRAP(regs) == 0x400; + int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | @@ -234,7 +234,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return false; } -#ifdef CONFIG_PPC_MEM_KEYS static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, struct vm_area_struct *vma) { @@ -248,7 +247,6 @@ static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, return false; } -#endif static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma) { @@ -393,7 +391,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, struct vm_area_struct * vma; struct mm_struct *mm = current->mm; unsigned int flags = FAULT_FLAG_DEFAULT; - int is_exec = TRAP(regs) == 0x400; + int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; int is_user = user_mode(regs); int is_write = page_fault_is_write(error_code); vm_fault_t fault, major = 0; @@ -418,8 +416,12 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, * take a page fault to a kernel address or a page fault to a user * address outside of dedicated places */ - if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) + if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) { + if (kfence_handle_page_fault(address, is_write, regs)) + return 0; + return SIGSEGV; + } /* * If we're in an interrupt, have no user context or are running @@ -492,11 +494,9 @@ retry: return bad_area(regs, address); } -#ifdef CONFIG_PPC_MEM_KEYS if (unlikely(access_pkey_error(is_write, is_exec, (error_code & DSISR_KEYFAULT), vma))) return bad_access_pkey(regs, address, vma); -#endif /* CONFIG_PPC_MEM_KEYS */ if (unlikely(access_error(is_write, is_exec, vma))) return bad_access(regs, address); @@ -539,39 +539,25 @@ retry: } NOKPROBE_SYMBOL(___do_page_fault); -static long __do_page_fault(struct pt_regs *regs) +static __always_inline void __do_page_fault(struct pt_regs *regs) { - const struct exception_table_entry *entry; long err; err = ___do_page_fault(regs, regs->dar, regs->dsisr); - if (likely(!err)) - return err; - - entry = search_exception_tables(regs->nip); - if (likely(entry)) { - instruction_pointer_set(regs, extable_fixup(entry)); - return 0; - } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { - __bad_page_fault(regs, err); - return 0; - } else { - /* 32 and 64e handle the bad page fault in asm */ - return err; - } + if (unlikely(err)) + bad_page_fault(regs, err); } -NOKPROBE_SYMBOL(__do_page_fault); -DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) +DEFINE_INTERRUPT_HANDLER(do_page_fault) { - return __do_page_fault(regs); + __do_page_fault(regs); } #ifdef CONFIG_PPC_BOOK3S_64 /* Same as do_page_fault but interrupt entry has already run in do_hash_fault */ -long hash__do_page_fault(struct pt_regs *regs) +void hash__do_page_fault(struct pt_regs *regs) { - return __do_page_fault(regs); + __do_page_fault(regs); } NOKPROBE_SYMBOL(hash__do_page_fault); #endif @@ -581,27 +567,27 @@ NOKPROBE_SYMBOL(hash__do_page_fault); * It is called from the DSI and ISI handlers in head.S and from some * of the procedures in traps.c. */ -void __bad_page_fault(struct pt_regs *regs, int sig) +static void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); /* kernel has accessed a bad area */ switch (TRAP(regs)) { - case 0x300: - case 0x380: - case 0xe00: + case INTERRUPT_DATA_STORAGE: + case INTERRUPT_DATA_SEGMENT: + case INTERRUPT_H_DATA_STORAGE: pr_alert("BUG: %s on %s at 0x%08lx\n", regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : "Unable to handle kernel data access", is_write ? "write" : "read", regs->dar); break; - case 0x400: - case 0x480: + case INTERRUPT_INST_STORAGE: + case INTERRUPT_INST_SEGMENT: pr_alert("BUG: Unable to handle kernel instruction fetch%s", regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n"); break; - case 0x600: + case INTERRUPT_ALIGNMENT: pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n", regs->dar); break; diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 02c7db4087cb..3d690be48e84 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -97,6 +97,9 @@ static void __init MMU_setup(void) if (IS_ENABLED(CONFIG_PPC_8xx)) return; + if (IS_ENABLED(CONFIG_KFENCE)) + __map_without_ltlbs = 1; + if (debug_pagealloc_enabled()) __map_without_ltlbs = 1; diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c index fa9a7a718fc6..a3c30a884076 100644 --- a/arch/powerpc/mm/maccess.c +++ b/arch/powerpc/mm/maccess.c @@ -3,7 +3,28 @@ #include <linux/uaccess.h> #include <linux/kernel.h> +#include <asm/disassemble.h> +#include <asm/inst.h> +#include <asm/ppc-opcode.h> + bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return is_kernel_addr((unsigned long)unsafe_src); } + +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) +{ + unsigned int val, suffix; + int err; + + err = copy_from_kernel_nofault(&val, src, sizeof(val)); + if (err) + return err; + if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { + err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return err; +} diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 4e8ce6d85232..6564b4d81324 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -12,49 +12,18 @@ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds */ -#include <linux/export.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/gfp.h> -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/stddef.h> -#include <linux/init.h> #include <linux/memblock.h> #include <linux/highmem.h> -#include <linux/initrd.h> -#include <linux/pagemap.h> #include <linux/suspend.h> -#include <linux/hugetlb.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/memremap.h> #include <linux/dma-direct.h> -#include <linux/kprobes.h> -#include <asm/prom.h> -#include <asm/io.h> -#include <asm/mmu_context.h> -#include <asm/mmu.h> -#include <asm/smp.h> #include <asm/machdep.h> -#include <asm/btext.h> -#include <asm/tlb.h> -#include <asm/sections.h> -#include <asm/sparsemem.h> -#include <asm/vdso.h> -#include <asm/fixmap.h> -#include <asm/swiotlb.h> #include <asm/rtas.h> #include <asm/kasan.h> #include <asm/svm.h> -#include <asm/mmzone.h> #include <mm/mmu_decl.h> -static DEFINE_MUTEX(linear_mapping_mutex); unsigned long long memory_limit; bool init_mem_is_free; @@ -72,6 +41,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, EXPORT_SYMBOL(phys_mem_access_prot); #ifdef CONFIG_MEMORY_HOTPLUG +static DEFINE_MUTEX(linear_mapping_mutex); #ifdef CONFIG_NUMA int memory_add_physaddr_to_nid(u64 start) @@ -340,257 +310,6 @@ void free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } -/** - * flush_coherent_icache() - if a CPU has a coherent icache, flush it - * @addr: The base address to use (can be any valid address, the whole cache will be flushed) - * Return true if the cache was flushed, false otherwise - */ -static inline bool flush_coherent_icache(unsigned long addr) -{ - /* - * For a snooping icache, we still need a dummy icbi to purge all the - * prefetched instructions from the ifetch buffers. We also need a sync - * before the icbi to order the the actual stores to memory that might - * have modified instructions with the icbi. - */ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { - mb(); /* sync */ - allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES); - icbi((void *)addr); - prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES); - mb(); /* sync */ - isync(); - return true; - } - - return false; -} - -/** - * invalidate_icache_range() - Flush the icache by issuing icbi across an address range - * @start: the start address - * @stop: the stop address (exclusive) - */ -static void invalidate_icache_range(unsigned long start, unsigned long stop) -{ - unsigned long shift = l1_icache_shift(); - unsigned long bytes = l1_icache_bytes(); - char *addr = (char *)(start & ~(bytes - 1)); - unsigned long size = stop - (unsigned long)addr + (bytes - 1); - unsigned long i; - - for (i = 0; i < size >> shift; i++, addr += bytes) - icbi(addr); - - mb(); /* sync */ - isync(); -} - -/** - * flush_icache_range: Write any modified data cache blocks out to memory - * and invalidate the corresponding blocks in the instruction cache - * - * Generic code will call this after writing memory, before executing from it. - * - * @start: the start address - * @stop: the stop address (exclusive) - */ -void flush_icache_range(unsigned long start, unsigned long stop) -{ - if (flush_coherent_icache(start)) - return; - - clean_dcache_range(start, stop); - - if (IS_ENABLED(CONFIG_44x)) { - /* - * Flash invalidate on 44x because we are passed kmapped - * addresses and this doesn't work for userspace pages due to - * the virtually tagged icache. - */ - iccci((void *)start); - mb(); /* sync */ - isync(); - } else - invalidate_icache_range(start, stop); -} -EXPORT_SYMBOL(flush_icache_range); - -#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) -/** - * flush_dcache_icache_phys() - Flush a page by it's physical address - * @physaddr: the physical address of the page - */ -static void flush_dcache_icache_phys(unsigned long physaddr) -{ - unsigned long bytes = l1_dcache_bytes(); - unsigned long nb = PAGE_SIZE / bytes; - unsigned long addr = physaddr & PAGE_MASK; - unsigned long msr, msr0; - unsigned long loop1 = addr, loop2 = addr; - - msr0 = mfmsr(); - msr = msr0 & ~MSR_DR; - /* - * This must remain as ASM to prevent potential memory accesses - * while the data MMU is disabled - */ - asm volatile( - " mtctr %2;\n" - " mtmsr %3;\n" - " isync;\n" - "0: dcbst 0, %0;\n" - " addi %0, %0, %4;\n" - " bdnz 0b;\n" - " sync;\n" - " mtctr %2;\n" - "1: icbi 0, %1;\n" - " addi %1, %1, %4;\n" - " bdnz 1b;\n" - " sync;\n" - " mtmsr %5;\n" - " isync;\n" - : "+&r" (loop1), "+&r" (loop2) - : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) - : "ctr", "memory"); -} -NOKPROBE_SYMBOL(flush_dcache_icache_phys) -#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) - -/* - * This is called when a page has been modified by the kernel. - * It just marks the page as not i-cache clean. We do the i-cache - * flush later when the page is given to a user process, if necessary. - */ -void flush_dcache_page(struct page *page) -{ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) - return; - /* avoid an atomic op if possible */ - if (test_bit(PG_dcache_clean, &page->flags)) - clear_bit(PG_dcache_clean, &page->flags); -} -EXPORT_SYMBOL(flush_dcache_page); - -static void flush_dcache_icache_hugepage(struct page *page) -{ - int i; - void *start; - - BUG_ON(!PageCompound(page)); - - for (i = 0; i < compound_nr(page); i++) { - if (!PageHighMem(page)) { - __flush_dcache_icache(page_address(page+i)); - } else { - start = kmap_atomic(page+i); - __flush_dcache_icache(start); - kunmap_atomic(start); - } - } -} - -void flush_dcache_icache_page(struct page *page) -{ - - if (PageCompound(page)) - return flush_dcache_icache_hugepage(page); - -#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) - /* On 8xx there is no need to kmap since highmem is not supported */ - __flush_dcache_icache(page_address(page)); -#else - if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { - void *start = kmap_atomic(page); - __flush_dcache_icache(start); - kunmap_atomic(start); - } else { - unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; - - if (flush_coherent_icache(addr)) - return; - flush_dcache_icache_phys(addr); - } -#endif -} -EXPORT_SYMBOL(flush_dcache_icache_page); - -/** - * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * - * @page: the address of the page to flush - */ -void __flush_dcache_icache(void *p) -{ - unsigned long addr = (unsigned long)p; - - if (flush_coherent_icache(addr)) - return; - - clean_dcache_range(addr, addr + PAGE_SIZE); - - /* - * We don't flush the icache on 44x. Those have a virtual icache and we - * don't have access to the virtual address here (it's not the page - * vaddr but where it's mapped in user space). The flushing of the - * icache on these is handled elsewhere, when a change in the address - * space occurs, before returning to user space. - */ - - if (mmu_has_feature(MMU_FTR_TYPE_44x)) - return; - - invalidate_icache_range(addr, addr + PAGE_SIZE); -} - -void clear_user_page(void *page, unsigned long vaddr, struct page *pg) -{ - clear_page(page); - - /* - * We shouldn't have to do this, but some versions of glibc - * require it (ld.so assumes zero filled pages are icache clean) - * - Anton - */ - flush_dcache_page(pg); -} -EXPORT_SYMBOL(clear_user_page); - -void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, - struct page *pg) -{ - copy_page(vto, vfrom); - - /* - * We should be able to use the following optimisation, however - * there are two problems. - * Firstly a bug in some versions of binutils meant PLT sections - * were not marked executable. - * Secondly the first word in the GOT section is blrl, used - * to establish the GOT address. Until recently the GOT was - * not marked executable. - * - Anton - */ -#if 0 - if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) - return; -#endif - - flush_dcache_page(pg); -} - -void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, - unsigned long addr, int len) -{ - unsigned long maddr; - - maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); - flush_icache_range(maddr, maddr + len); - kunmap(page); -} - /* * System memory should not be in /proc/iomem but various tools expect it * (eg kdump). diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 18f20da0d348..a857af401738 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -43,24 +43,26 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, /* * This full barrier orders the store to the cpumask above vs - * a subsequent operation which allows this CPU to begin loading - * translations for next. + * a subsequent load which allows this CPU/MMU to begin loading + * translations for 'next' from page table PTEs into the TLB. * - * When using the radix MMU that operation is the load of the + * When using the radix MMU, that operation is the load of the * MMU context id, which is then moved to SPRN_PID. * * For the hash MMU it is either the first load from slb_cache - * in switch_slb(), and/or the store of paca->mm_ctx_id in - * copy_mm_to_paca(). + * in switch_slb() to preload the SLBs, or the load of + * get_user_context which loads the context for the VSID hash + * to insert a new SLB, in the SLB fault handler. * * On the other side, the barrier is in mm/tlb-radix.c for - * radix which orders earlier stores to clear the PTEs vs - * the load of mm_cpumask. And pte_xchg which does the same - * thing for hash. + * radix which orders earlier stores to clear the PTEs before + * the load of mm_cpumask to check which CPU TLBs should be + * flushed. For hash, pte_xchg to clear the PTE includes the + * barrier. * - * This full barrier is needed by membarrier when switching - * between processes after store to rq->curr, before user-space - * memory accesses. + * This full barrier is also needed by membarrier when + * switching between processes after store to rq->curr, before + * user-space memory accesses. */ smp_mb(); diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 998810e68562..7dac910c0b21 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -185,3 +185,8 @@ void ptdump_check_wx(void); #else static inline void ptdump_check_wx(void) { } #endif + +static inline bool debug_pagealloc_enabled_or_kfence(void) +{ + return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled(); +} diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 19a3eec1d8c5..71bfdbedacee 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -149,7 +149,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); unsigned long sinittext = __pa(_sinittext); - bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled(); + bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled_or_kfence(); unsigned long boundary = strict_boundary ? sinittext : etext8; unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); @@ -161,7 +161,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return 0; mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); - if (debug_pagealloc_enabled()) { + if (debug_pagealloc_enabled_or_kfence()) { top = boundary; } else { mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); |