From 9be77e11dade414d2fa63750aa5c754fac49d619 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 19 Feb 2021 17:56:48 +0100 Subject: powerpc/mm: Move the linear_mapping_mutex to the ifdef where it is used The mutex linear_mapping_mutex is defined at the of the file while its only two user are within the CONFIG_MEMORY_HOTPLUG block. A compile without CONFIG_MEMORY_HOTPLUG set fails on PREEMPT_RT because its mutex implementation is smart enough to realize that it is unused. Move the definition of linear_mapping_mutex to ifdef block where it is used. Fixes: 1f73ad3e8d755 ("powerpc/mm: print warning in arch_remove_linear_mapping()") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210219165648.2505482-1-bigeasy@linutronix.de --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 4e8ce6d85232..7a59a5c9aa5d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -54,7 +54,6 @@ #include -static DEFINE_MUTEX(linear_mapping_mutex); unsigned long long memory_limit; bool init_mem_is_free; @@ -72,6 +71,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, EXPORT_SYMBOL(phys_mem_access_prot); #ifdef CONFIG_MEMORY_HOTPLUG +static DEFINE_MUTEX(linear_mapping_mutex); #ifdef CONFIG_NUMA int memory_add_physaddr_to_nid(u64 start) -- cgit v1.2.3 From 90cbac0e995dd92f7bcf82f74aa50250bf194a4a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 4 Mar 2021 14:35:09 +0000 Subject: powerpc: Enable KFENCE for PPC32 Add architecture specific implementation details for KFENCE and enable KFENCE for the ppc32 architecture. In particular, this implements the required interface in . KFENCE requires that attributes for pages from its memory pool can individually be set. Therefore, force the Read/Write linear map to be mapped at page granularity. Signed-off-by: Christophe Leroy Acked-by: Marco Elver Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8dfe1bd2abde26337c1d8c1ad0acfcc82185e0d5.1614868445.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 13 +++++++------ arch/powerpc/include/asm/kfence.h | 33 +++++++++++++++++++++++++++++++++ arch/powerpc/mm/book3s32/mmu.c | 2 +- arch/powerpc/mm/fault.c | 7 ++++++- arch/powerpc/mm/init_32.c | 3 +++ arch/powerpc/mm/mmu_decl.h | 5 +++++ arch/powerpc/mm/nohash/8xx.c | 4 ++-- 7 files changed, 57 insertions(+), 10 deletions(-) create mode 100644 arch/powerpc/include/asm/kfence.h (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 386ae12d8523..d46db0bfb998 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -185,6 +185,7 @@ config PPC select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KGDB + select HAVE_ARCH_KFENCE if PPC32 select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_NVRAM_OPS @@ -786,7 +787,7 @@ config THREAD_SHIFT config DATA_SHIFT_BOOL bool "Set custom data alignment" depends on ADVANCED_OPTIONS - depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC + depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) help This option allows you to set the kernel data alignment. When @@ -798,13 +799,13 @@ config DATA_SHIFT_BOOL config DATA_SHIFT int "Data shift" if DATA_SHIFT_BOOL default 24 if STRICT_KERNEL_RWX && PPC64 - range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_BOOK3S_32 - range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_8xx + range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 + range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 - default 18 if DEBUG_PAGEALLOC && PPC_BOOK3S_32 + default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 default 23 if STRICT_KERNEL_RWX && PPC_8xx - default 23 if DEBUG_PAGEALLOC && PPC_8xx && PIN_TLB_DATA - default 19 if DEBUG_PAGEALLOC && PPC_8xx + default 23 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && PIN_TLB_DATA + default 19 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx default PPC_PAGE_SHIFT help On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO. diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h new file mode 100644 index 000000000000..a9846b68c6b9 --- /dev/null +++ b/arch/powerpc/include/asm/kfence.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * powerpc KFENCE support. + * + * Copyright (C) 2020 CS GROUP France + */ + +#ifndef __ASM_POWERPC_KFENCE_H +#define __ASM_POWERPC_KFENCE_H + +#include +#include + +static inline bool arch_kfence_init_pool(void) +{ + return true; +} + +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + pte_t *kpte = virt_to_kpte(addr); + + if (protect) { + pte_update(&init_mm, addr, kpte, _PAGE_PRESENT, 0, 0); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + } else { + pte_update(&init_mm, addr, kpte, 0, _PAGE_PRESENT, 0); + } + + return true; +} + +#endif /* __ASM_POWERPC_KFENCE_H */ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index d7eb266a3f7a..a0db398b5c26 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -162,7 +162,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; - if (debug_pagealloc_enabled() || __map_without_bats) { + if (debug_pagealloc_enabled_or_kfence() || __map_without_bats) { pr_debug_once("Read-Write memory mapped without BATs\n"); if (base >= border) return base; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index bb368257b55c..bea13682c909 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -418,8 +419,12 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, * take a page fault to a kernel address or a page fault to a user * address outside of dedicated places */ - if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) + if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) { + if (kfence_handle_page_fault(address, is_write, regs)) + return 0; + return SIGSEGV; + } /* * If we're in an interrupt, have no user context or are running diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 02c7db4087cb..3d690be48e84 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -97,6 +97,9 @@ static void __init MMU_setup(void) if (IS_ENABLED(CONFIG_PPC_8xx)) return; + if (IS_ENABLED(CONFIG_KFENCE)) + __map_without_ltlbs = 1; + if (debug_pagealloc_enabled()) __map_without_ltlbs = 1; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 998810e68562..7dac910c0b21 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -185,3 +185,8 @@ void ptdump_check_wx(void); #else static inline void ptdump_check_wx(void) { } #endif + +static inline bool debug_pagealloc_enabled_or_kfence(void) +{ + return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled(); +} diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 19a3eec1d8c5..71bfdbedacee 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -149,7 +149,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); unsigned long sinittext = __pa(_sinittext); - bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled(); + bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled_or_kfence(); unsigned long boundary = strict_boundary ? sinittext : etext8; unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); @@ -161,7 +161,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return 0; mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); - if (debug_pagealloc_enabled()) { + if (debug_pagealloc_enabled_or_kfence()) { top = boundary; } else { mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); -- cgit v1.2.3 From 7aa8dd67f15731f659390018b5c9fd95f5975b3d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:22 +0000 Subject: powerpc/32: Always enable data translation in exception prolog If the code can use a stack in vm area, it can also use a stack in linear space. Simplify code by removing old non VMAP stack code on PPC32. That means the data translation is now re-enabled early in exception prolog in all cases, not only when using VMAP stacks. While we are touching EXCEPTION_PROLOG macros, remove the unused for_rtas parameter in EXCEPTION_PROLOG_1. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7cd6440c60a7e8f4f035b245c57720f51e225aae.1615552866.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/processor.h | 4 +- arch/powerpc/kernel/asm-offsets.c | 2 - arch/powerpc/kernel/entry_32.S | 19 +++----- arch/powerpc/kernel/fpu.S | 2 - arch/powerpc/kernel/head_32.h | 85 +----------------------------------- arch/powerpc/kernel/head_40x.S | 23 ---------- arch/powerpc/kernel/head_8xx.S | 19 +------- arch/powerpc/kernel/head_book3s_32.S | 47 +------------------- arch/powerpc/kernel/idle_6xx.S | 12 +---- arch/powerpc/kernel/idle_e500.S | 4 +- arch/powerpc/kernel/vector.S | 2 - arch/powerpc/mm/book3s32/hash_low.S | 14 ------ 12 files changed, 17 insertions(+), 216 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 43cbd9281055..eae16facc390 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -147,11 +147,9 @@ struct thread_struct { #ifdef CONFIG_PPC_RTAS unsigned long rtas_sp; /* stack pointer for when in RTAS */ #endif -#endif #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) unsigned long kuap; /* opened segments for user access */ #endif -#ifdef CONFIG_VMAP_STACK unsigned long srr0; unsigned long srr1; unsigned long dar; @@ -160,7 +158,7 @@ struct thread_struct { unsigned long r0, r3, r4, r5, r6, r8, r9, r11; unsigned long lr, ctr; #endif -#endif +#endif /* CONFIG_PPC32 */ /* Debug Registers */ struct debug_reg debug; #ifdef CONFIG_PPC_FPU_REGS diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 73620536c801..85ba2b0bc8d8 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -131,7 +131,6 @@ int main(void) OFFSET(KSP_VSID, thread_struct, ksp_vsid); #else /* CONFIG_PPC64 */ OFFSET(PGDIR, thread_struct, pgdir); -#ifdef CONFIG_VMAP_STACK OFFSET(SRR0, thread_struct, srr0); OFFSET(SRR1, thread_struct, srr1); OFFSET(DAR, thread_struct, dar); @@ -148,7 +147,6 @@ int main(void) OFFSET(THLR, thread_struct, lr); OFFSET(THCTR, thread_struct, ctr); #endif -#endif #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); OFFSET(THREAD_ACC, thread_struct, acc); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 66198e6e25e7..33e97032ca25 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -129,7 +129,7 @@ transfer_to_handler: stw r12,_CTR(r11) stw r2,_XER(r11) mfspr r12,SPRN_SPRG_THREAD - tovirt_vmstack r12, r12 + tovirt(r12, r12) beq 2f /* if from user, fix up THREAD.regs */ addi r2, r12, -THREAD addi r11,r1,STACK_FRAME_OVERHEAD @@ -153,8 +153,7 @@ transfer_to_handler: transfer_to_handler_cont: 3: mflr r9 - tovirt_novmstack r2, r2 /* set r2 to current */ - tovirt_vmstack r9, r9 + tovirt(r9, r9) lwz r11,0(r9) /* virtual address of handler */ lwz r9,4(r9) /* where to go when done */ #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) @@ -933,7 +932,6 @@ _GLOBAL(enter_rtas) lis r6,1f@ha /* physical return address for rtas */ addi r6,r6,1f@l tophys(r6,r6) - tophys_novmstack r7, r1 lwz r8,RTASENTRY(r4) lwz r4,RTASBASE(r4) mfmsr r9 @@ -942,22 +940,19 @@ _GLOBAL(enter_rtas) mtmsr r0 /* disable interrupts so SRR0/1 don't get trashed */ li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR) mtlr r6 - stw r7, THREAD + RTAS_SP(r2) + stw r1, THREAD + RTAS_SP(r2) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 rfi -1: tophys_novmstack r9, r1 -#ifdef CONFIG_VMAP_STACK +1: li r0, MSR_KERNEL & ~MSR_IR /* can take DTLB miss */ mtmsr r0 isync -#endif - lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */ - lwz r9,8(r9) /* original msr value */ + lwz r8,INT_FRAME_SIZE+4(r1) /* get return address */ + lwz r9,8(r1) /* original msr value */ addi r1,r1,INT_FRAME_SIZE li r0,0 - tophys_novmstack r7, r2 - stw r0, THREAD + RTAS_SP(r7) + stw r0, THREAD + RTAS_SP(r2) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 rfi /* return to caller */ diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 3ff9a8fafa46..2c57ece6671c 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -92,9 +92,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) /* enable use of FP after return */ #ifdef CONFIG_PPC32 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ -#ifdef CONFIG_VMAP_STACK tovirt(r5, r5) -#endif lwz r4,THREAD_FPEXC_MODE(r5) ori r9,r9,MSR_FP /* enable FP for current */ or r9,r9,r4 diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 25ee6b26ef5a..1b707755c68e 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -19,7 +19,6 @@ .macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0 mtspr SPRN_SPRG_SCRATCH0,r10 mtspr SPRN_SPRG_SCRATCH1,r11 -#ifdef CONFIG_VMAP_STACK mfspr r10, SPRN_SPRG_THREAD .if \handle_dar_dsisr #ifdef CONFIG_40x @@ -37,17 +36,13 @@ .endif mfspr r11, SPRN_SRR0 stw r11, SRR0(r10) -#endif mfspr r11, SPRN_SRR1 /* check whether user or kernel */ -#ifdef CONFIG_VMAP_STACK stw r11, SRR1(r10) -#endif mfcr r10 andi. r11, r11, MSR_PR .endm -.macro EXCEPTION_PROLOG_1 for_rtas=0 -#ifdef CONFIG_VMAP_STACK +.macro EXCEPTION_PROLOG_1 mtspr SPRN_SPRG_SCRATCH2,r1 subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */ beq 1f @@ -55,20 +50,13 @@ lwz r1,TASK_STACK-THREAD(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE 1: +#ifdef CONFIG_VMAP_STACK mtcrf 0x3f, r1 bt 32 - THREAD_ALIGN_SHIFT, stack_overflow -#else - subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ - beq 1f - mfspr r11,SPRN_SPRG_THREAD - lwz r11,TASK_STACK-THREAD(r11) - addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE -1: tophys(r11, r11) #endif .endm .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 -#ifdef CONFIG_VMAP_STACK LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take DTLB miss */ mtmsr r11 isync @@ -76,11 +64,6 @@ stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 -#else - stw r1,GPR1(r11) - stw r1,0(r11) - tovirt(r1, r11) /* set new kernel sp */ -#endif stw r10,_CCR(r11) /* save registers */ stw r12,GPR12(r11) stw r9,GPR9(r11) @@ -90,7 +73,6 @@ stw r12,GPR11(r11) mflr r10 stw r10,_LINK(r11) -#ifdef CONFIG_VMAP_STACK mfspr r12, SPRN_SPRG_THREAD tovirt(r12, r12) .if \handle_dar_dsisr @@ -101,20 +83,12 @@ .endif lwz r9, SRR1(r12) lwz r12, SRR0(r12) -#else - mfspr r12,SPRN_SRR0 - mfspr r9,SPRN_SRR1 -#endif #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #elif defined(CONFIG_PPC_8xx) mtspr SPRN_EID, r2 /* Set MSR_RI */ #else -#ifdef CONFIG_VMAP_STACK li r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */ -#else - li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */ -#endif mtmsr r10 /* (except for mach check in rtas) */ #endif stw r0,GPR0(r11) @@ -166,59 +140,6 @@ b transfer_to_syscall /* jump to handler */ .endm -.macro save_dar_dsisr_on_stack reg1, reg2, sp -#ifndef CONFIG_VMAP_STACK -#ifdef CONFIG_40x - mfspr \reg1, SPRN_DEAR - mfspr \reg2, SPRN_ESR -#else - mfspr \reg1, SPRN_DAR - mfspr \reg2, SPRN_DSISR -#endif - stw \reg1, _DAR(\sp) - stw \reg2, _DSISR(\sp) -#endif -.endm - -.macro get_and_save_dar_dsisr_on_stack reg1, reg2, sp -#ifdef CONFIG_VMAP_STACK - lwz \reg1, _DAR(\sp) - lwz \reg2, _DSISR(\sp) -#else - save_dar_dsisr_on_stack \reg1, \reg2, \sp -#endif -.endm - -.macro tovirt_vmstack dst, src -#ifdef CONFIG_VMAP_STACK - tovirt(\dst, \src) -#else - .ifnc \dst, \src - mr \dst, \src - .endif -#endif -.endm - -.macro tovirt_novmstack dst, src -#ifndef CONFIG_VMAP_STACK - tovirt(\dst, \src) -#else - .ifnc \dst, \src - mr \dst, \src - .endif -#endif -.endm - -.macro tophys_novmstack dst, src -#ifndef CONFIG_VMAP_STACK - tophys(\dst, \src) -#else - .ifnc \dst, \src - mr \dst, \src - .endif -#endif -.endm - /* * Note: code which follows this uses cr0.eq (set if from kernel), * r11, r12 (SRR0), and r9 (SRR1). @@ -266,7 +187,6 @@ label: ret_from_except) .macro vmap_stack_overflow_exception -#ifdef CONFIG_VMAP_STACK #ifdef CONFIG_SMP mfspr r1, SPRN_SPRG_THREAD lwz r1, TASK_CPU - THREAD(r1) @@ -285,7 +205,6 @@ label: SAVE_NVGPRS(r11) addi r3, r1, STACK_FRAME_OVERHEAD EXC_XFER_STD(0, stack_overflow_exception) -#endif .endm #endif /* __HEAD_32_H__ */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 72e4962902de..7da673ec63ef 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -111,12 +111,10 @@ _ENTRY(crit_esr) mfspr r11,SPRN_SRR1 stw r10,crit_srr0@l(0) stw r11,crit_srr1@l(0) -#ifdef CONFIG_VMAP_STACK mfspr r10,SPRN_DEAR mfspr r11,SPRN_ESR stw r10,crit_dear@l(0) stw r11,crit_esr@l(0) -#endif mfcr r10 /* save CR in r10 for now */ mfspr r11,SPRN_SRR3 /* check whether user or kernel */ andi. r11,r11,MSR_PR @@ -126,7 +124,6 @@ _ENTRY(crit_esr) /* COMING FROM USER MODE */ mfspr r11,SPRN_SPRG_THREAD /* if from user, start at top of */ lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */ -#ifdef CONFIG_VMAP_STACK 1: stw r1,crit_r1@l(0) addi r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ LOAD_REG_IMMEDIATE(r11,MSR_KERNEL & ~(MSR_IR | MSR_RI)) @@ -136,35 +133,18 @@ _ENTRY(crit_esr) stw r11,GPR1(r1) stw r11,0(r1) mr r11,r1 -#else -1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ - tophys(r11,r11) - stw r1,GPR1(r11) - stw r1,0(r11) - tovirt(r1,r11) -#endif stw r10,_CCR(r11) /* save various registers */ stw r12,GPR12(r11) stw r9,GPR9(r11) mflr r10 stw r10,_LINK(r11) -#ifdef CONFIG_VMAP_STACK lis r9,PAGE_OFFSET@ha lwz r10,crit_r10@l(r9) lwz r12,crit_r11@l(r9) -#else - lwz r10,crit_r10@l(0) - lwz r12,crit_r11@l(0) -#endif stw r10,GPR10(r11) stw r12,GPR11(r11) -#ifdef CONFIG_VMAP_STACK lwz r12,crit_dear@l(r9) lwz r9,crit_esr@l(r9) -#else - mfspr r12,SPRN_DEAR /* save DEAR and ESR in the frame */ - mfspr r9,SPRN_ESR /* in them at the point where the */ -#endif stw r12,_DEAR(r11) /* since they may have had stuff */ stw r9,_ESR(r11) /* exception was taken */ mfspr r12,SPRN_SRR2 @@ -220,7 +200,6 @@ _ENTRY(crit_esr) */ START_EXCEPTION(0x0300, DataStorage) EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 EXC_XFER_LITE(0x300, handle_page_fault) /* @@ -240,14 +219,12 @@ _ENTRY(crit_esr) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x700, program_check_exception) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 46dff3f9c31f..792e2fd86479 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -124,7 +124,6 @@ instruction_counter: . = 0x200 MachineCheck: EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 li r6, RPN_PATTERN mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -137,7 +136,6 @@ MachineCheck: . = 0x600 Alignment: EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 li r6, RPN_PATTERN mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -333,21 +331,16 @@ DataTLBError: cmpwi cr1, r11, RPN_PATTERN beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ -#ifdef CONFIG_VMAP_STACK li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */ -#endif EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 handle_dar_dsisr=1 - get_and_save_dar_dsisr_on_stack r4, r5, r11 + lwz r4, _DAR(r11) + lwz r5, _DSISR(r11) andis. r10,r5,DSISR_NOHPTE@h beq+ .Ldtlbie tlbie r4 .Ldtlbie: -#ifndef CONFIG_VMAP_STACK - li r10,RPN_PATTERN - mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ -#endif /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) @@ -364,10 +357,6 @@ do_databreakpoint: addi r3,r1,STACK_FRAME_OVERHEAD mfspr r4,SPRN_BAR stw r4,_DAR(r11) -#ifndef CONFIG_VMAP_STACK - mfspr r5,SPRN_DSISR - stw r5,_DSISR(r11) -#endif EXC_XFER_STD(0x1c00, do_break) . = 0x1c00 @@ -510,14 +499,10 @@ FixupDAR:/* Entry point for dcbx workaround. */ 152: mfdar r11 mtctr r11 /* restore ctr reg from DAR */ -#ifdef CONFIG_VMAP_STACK mfspr r11, SPRN_SPRG_THREAD stw r10, DAR(r11) mfspr r10, SPRN_DSISR stw r10, DSISR(r11) -#else - mtdar r10 /* save fault EA to DAR */ -#endif mfspr r10,SPRN_M_TW b DARFixed /* Go back to normal TLB handling */ diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 565e84e20a72..1cf7bd5d5ec1 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -260,21 +260,14 @@ __secondary_hold_acknowledge: MachineCheck: EXCEPTION_PROLOG_0 #ifdef CONFIG_PPC_CHRP -#ifdef CONFIG_VMAP_STACK mtspr SPRN_SPRG_SCRATCH2,r1 mfspr r1, SPRN_SPRG_THREAD lwz r1, RTAS_SP(r1) cmpwi cr1, r1, 0 bne cr1, 7f mfspr r1, SPRN_SPRG_SCRATCH2 -#else - mfspr r11, SPRN_SPRG_THREAD - lwz r11, RTAS_SP(r11) - cmpwi cr1, r11, 0 - bne cr1, 7f -#endif #endif /* CONFIG_PPC_CHRP */ - EXCEPTION_PROLOG_1 for_rtas=1 + EXCEPTION_PROLOG_1 7: EXCEPTION_PROLOG_2 addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP @@ -288,7 +281,6 @@ MachineCheck: . = 0x300 DO_KVM 0x300 DataAccess: -#ifdef CONFIG_VMAP_STACK #ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION mtspr SPRN_SPRG_SCRATCH2,r10 @@ -310,29 +302,11 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) 1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 b handle_page_fault_tramp_1 -#else /* CONFIG_VMAP_STACK */ - EXCEPTION_PROLOG handle_dar_dsisr=1 - get_and_save_dar_dsisr_on_stack r4, r5, r11 -#ifdef CONFIG_PPC_BOOK3S_604 -BEGIN_MMU_FTR_SECTION - andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h - bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ - rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ - bl hash_page - b handle_page_fault_tramp_1 -MMU_FTR_SECTION_ELSE -#endif - b handle_page_fault_tramp_2 -#ifdef CONFIG_PPC_BOOK3S_604 -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) -#endif -#endif /* CONFIG_VMAP_STACK */ /* Instruction access exception. */ . = 0x400 DO_KVM 0x400 InstructionAccess: -#ifdef CONFIG_VMAP_STACK mtspr SPRN_SPRG_SCRATCH0,r10 mtspr SPRN_SPRG_SCRATCH1,r11 mfspr r10, SPRN_SPRG_THREAD @@ -353,18 +327,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 -#else /* CONFIG_VMAP_STACK */ - EXCEPTION_PROLOG - andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */ - beq 1f /* if so, try to put a PTE */ - li r3,0 /* into the hash table */ - mr r4,r12 /* SRR0 is fault address */ -#ifdef CONFIG_PPC_BOOK3S_604 -BEGIN_MMU_FTR_SECTION - bl hash_page -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) -#endif -#endif /* CONFIG_VMAP_STACK */ andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ stw r5, _DSISR(r11) stw r12, _DAR(r11) @@ -378,7 +340,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) DO_KVM 0x600 Alignment: EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 addi r3,r1,STACK_FRAME_OVERHEAD b alignment_exception_tramp @@ -689,18 +650,13 @@ alignment_exception_tramp: EXC_XFER_STD(0x600, alignment_exception) handle_page_fault_tramp_1: -#ifdef CONFIG_VMAP_STACK EXCEPTION_PROLOG_2 handle_dar_dsisr=1 -#endif lwz r5, _DSISR(r11) - /* fall through */ -handle_page_fault_tramp_2: andis. r0, r5, DSISR_DABRMATCH@h bne- 1f EXC_XFER_LITE(0x300, handle_page_fault) 1: EXC_XFER_STD(0x300, do_break) -#ifdef CONFIG_VMAP_STACK #ifdef CONFIG_PPC_BOOK3S_604 .macro save_regs_thread thread stw r0, THR0(\thread) @@ -775,6 +731,7 @@ fast_hash_page_return: rfi #endif /* CONFIG_PPC_BOOK3S_604 */ +#ifdef CONFIG_VMAP_STACK stack_overflow: vmap_stack_overflow_exception #endif diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 69df840f7253..153366e178c4 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -145,9 +145,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /* * Return from NAP/DOZE mode, restore some CPU specific registers, - * we are called with DR/IR still off and r2 containing physical - * address of current. R11 points to the exception frame (physical - * address). We have to preserve r10. + * R11 points to the exception frame. We have to preserve r10. */ _GLOBAL(power_save_ppc32_restore) lwz r9,_LINK(r11) /* interrupted in ppc6xx_idle: */ @@ -166,11 +164,7 @@ BEGIN_FTR_SECTION mfspr r9,SPRN_HID0 andis. r9,r9,HID0_NAP@h beq 1f -#ifdef CONFIG_VMAP_STACK addis r9, r11, nap_save_msscr0@ha -#else - addis r9,r11,(nap_save_msscr0-KERNELBASE)@ha -#endif lwz r9,nap_save_msscr0@l(r9) mtspr SPRN_MSSCR0, r9 sync @@ -178,11 +172,7 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) BEGIN_FTR_SECTION -#ifdef CONFIG_VMAP_STACK addis r9, r11, nap_save_hid1@ha -#else - addis r9,r11,(nap_save_hid1-KERNELBASE)@ha -#endif lwz r9,nap_save_hid1@l(r9) mtspr SPRN_HID1, r9 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 72c85b6f3898..7795727e7f08 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -74,8 +74,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) /* * Return from NAP/DOZE mode, restore some CPU specific registers, - * r2 containing physical address of current. - * r11 points to the exception frame (physical address). + * r2 containing address of current. + * r11 points to the exception frame. * We have to preserve r10. */ _GLOBAL(power_save_ppc32_restore) diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 801dc28fdcca..f5a52f444e36 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -67,9 +67,7 @@ _GLOBAL(load_up_altivec) #ifdef CONFIG_PPC32 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ oris r9,r9,MSR_VEC@h -#ifdef CONFIG_VMAP_STACK tovirt(r5, r5) -#endif #else ld r4,PACACURRENT(r13) addi r5,r4,THREAD /* Get THREAD */ diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 0e6dc830c38b..fb4233a5bdf7 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -140,10 +140,6 @@ _GLOBAL(hash_page) bne- .Lretry /* retry if someone got there first */ mfsrin r3,r4 /* get segment reg for segment */ -#ifndef CONFIG_VMAP_STACK - mfctr r0 - stw r0,_CTR(r11) -#endif bl create_hpte /* add the hash table entry */ #ifdef CONFIG_SMP @@ -152,17 +148,7 @@ _GLOBAL(hash_page) li r0,0 stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) #endif - -#ifdef CONFIG_VMAP_STACK b fast_hash_page_return -#else - /* Return from the exception */ - lwz r5,_CTR(r11) - mtctr r5 - lwz r0,GPR0(r11) - lwz r8,GPR8(r11) - b fast_exception_return -#endif #ifdef CONFIG_SMP .Lhash_page_out: -- cgit v1.2.3 From af6f2ce84b2f666762f75f085a7e5d6514743a84 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:37 +0000 Subject: powerpc/32: Call bad_page_fault() from do_page_fault() Now that non volatile registers are saved at all time, no need to split bad_page_fault() out of do_page_fault(). Remove handle_page_fault() and use do_page_fault() directly. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cfb95be8863204cc2bf45a22ea44dd1d0dc16b7f.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 16 ---------------- arch/powerpc/kernel/head_40x.S | 4 ++-- arch/powerpc/kernel/head_8xx.S | 4 ++-- arch/powerpc/kernel/head_book3s_32.S | 4 ++-- arch/powerpc/kernel/head_booke.h | 4 ++-- arch/powerpc/kernel/head_fsl_booke.S | 2 +- arch/powerpc/mm/fault.c | 2 +- 7 files changed, 10 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e538ae73394d..76e1502b3e6f 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -220,22 +220,6 @@ ret_from_kernel_thread: li r3,0 b ret_from_syscall -/* - * Top-level page fault handling. - * This is in assembler because if do_page_fault tells us that - * it is a bad kernel page fault, we want to save the non-volatile - * registers before calling bad_page_fault. - */ - .globl handle_page_fault -handle_page_fault: - bl do_page_fault - cmpwi r3,0 - beq+ ret_from_except - mr r4,r3 /* err arg for bad_page_fault */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl __bad_page_fault - b ret_from_except_full - /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 08563d4170c6..a65778380704 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -207,7 +207,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) */ START_EXCEPTION(0x0300, DataStorage) EXCEPTION_PROLOG DataStorage handle_dar_dsisr=1 - EXC_XFER_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, do_page_fault) /* * 0x0400 - Instruction Storage Exception @@ -218,7 +218,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) li r5,0 stw r5, _ESR(r11) /* Zero ESR */ stw r12, _DEAR(r11) /* SRR0 as DEAR */ - EXC_XFER_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, do_page_fault) /* 0x0500 - External Interrupt Exception */ EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index eb1d40a8f2c4..4078d0dc2f18 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -301,7 +301,7 @@ instruction_counter: .Litlbie: stw r12, _DAR(r11) stw r5, _DSISR(r11) - EXC_XFER_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, do_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We bail out to @@ -322,7 +322,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ tlbie r4 .Ldtlbie: /* 0x300 is DataAccess exception, needed by bad_page_fault() */ - EXC_XFER_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, do_page_fault) #ifdef CONFIG_VMAP_STACK vmap_stack_overflow_exception diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 453f6ea959f9..e78b4a7c23af 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -299,7 +299,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) lwz r5, _DSISR(r11) andis. r0, r5, DSISR_DABRMATCH@h bne- 1f - EXC_XFER_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, do_page_fault) 1: EXC_XFER_STD(0x300, do_break) @@ -328,7 +328,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ stw r5, _DSISR(r11) stw r12, _DAR(r11) - EXC_XFER_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, do_page_fault) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 009a56d70d76..036a69d16605 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -462,7 +462,7 @@ label: stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ stw r4, _DEAR(r11); \ - EXC_XFER_LITE(0x0300, handle_page_fault) + EXC_XFER_LITE(0x0300, do_page_fault) #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ @@ -470,7 +470,7 @@ label: mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ stw r12, _DEAR(r11); /* Pass SRR0 as arg2 */ \ - EXC_XFER_LITE(0x0400, handle_page_fault) + EXC_XFER_LITE(0x0400, do_page_fault) #define ALIGNMENT_EXCEPTION \ START_EXCEPTION(Alignment) \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index f51c66f747ad..72e9aa45b99b 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -370,7 +370,7 @@ interrupt_base: stw r4, _DEAR(r11) andis. r10,r5,(ESR_ILK|ESR_DLK)@h bne 1f - EXC_XFER_LITE(0x0300, handle_page_fault) + EXC_XFER_LITE(0x0300, do_page_fault) 1: EXC_XFER_LITE(0x0300, CacheLockingException) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index bea13682c909..0d4e4ff77e03 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -557,7 +557,7 @@ static long __do_page_fault(struct pt_regs *regs) if (likely(entry)) { instruction_pointer_set(regs, extable_fixup(entry)); return 0; - } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { + } else if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64)) { __bad_page_fault(regs, err); return 0; } else { -- cgit v1.2.3 From b5efec00b671c5d7e9cb9e73a1d4925dd6ce8dcd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Mar 2021 12:50:47 +0000 Subject: powerpc/32s: Move KUEP locking/unlocking in C This can be done in C, do it. Unrolling the loop gains approx. 15% performance. From now on, prepare_transfer_to_handler() is only for interrupts from kernel. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4eadd873927e9a73c3d1dfe2f9497353465514cf.1615552867.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 31 ------------------------- arch/powerpc/include/asm/interrupt.h | 6 ++++- arch/powerpc/include/asm/kup.h | 8 +++++++ arch/powerpc/kernel/entry_32.S | 16 ------------- arch/powerpc/kernel/head_32.h | 3 +++ arch/powerpc/kernel/head_booke.h | 3 +++ arch/powerpc/kernel/interrupt.c | 4 ++++ arch/powerpc/mm/book3s32/Makefile | 1 + arch/powerpc/mm/book3s32/kuep.c | 40 ++++++++++++++++++++++++++++++++ 9 files changed, 64 insertions(+), 48 deletions(-) create mode 100644 arch/powerpc/mm/book3s32/kuep.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 73bc5d2c431d..b97ea60f6fa3 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -7,37 +7,6 @@ #ifdef __ASSEMBLY__ -.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */ -101: mtsrin \gpr1, \gpr2 - addi \gpr1, \gpr1, 0x111 /* next VSID */ - rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ - addis \gpr2, \gpr2, 0x1000 /* address of next segment */ - bdnz 101b - isync -.endm - -.macro kuep_lock gpr1, gpr2 -#ifdef CONFIG_PPC_KUEP - li \gpr1, NUM_USER_SEGMENTS - li \gpr2, 0 - mtctr \gpr1 - mfsrin \gpr1, \gpr2 - oris \gpr1, \gpr1, SR_NX@h /* set Nx */ - kuep_update_sr \gpr1, \gpr2 -#endif -.endm - -.macro kuep_unlock gpr1, gpr2 -#ifdef CONFIG_PPC_KUEP - li \gpr1, NUM_USER_SEGMENTS - li \gpr2, 0 - mtctr \gpr1 - mfsrin \gpr1, \gpr2 - rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */ - kuep_update_sr \gpr1, \gpr2 -#endif -.endm - #ifdef CONFIG_PPC_KUAP .macro kuap_update_sr gpr1, gpr2, gpr3 /* NEVER use r0 as gpr2 due to addis */ diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 861e6eadc98c..857375309255 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -33,8 +33,10 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup if (!arch_irq_disabled_regs(regs)) trace_hardirqs_off(); - if (user_mode(regs)) + if (user_mode(regs)) { + kuep_lock(); account_cpu_user_entry(); + } #endif /* * Book3E reconciles irq soft mask in asm @@ -89,6 +91,8 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt exception_exit(state->ctx_state); #endif + if (user_mode(regs)) + kuep_unlock(); /* * Book3S exits to user via interrupt_exit_user_prepare(), which does * context tracking, which is a cleaner way to handle PREEMPT=y diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 7ec21af49a45..25671f711ec2 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -55,6 +55,14 @@ void setup_kuep(bool disabled); static inline void setup_kuep(bool disabled) { } #endif /* CONFIG_PPC_KUEP */ +#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) +void kuep_lock(void); +void kuep_unlock(void); +#else +static inline void kuep_lock(void) { } +static inline void kuep_unlock(void) { } +#endif + #ifdef CONFIG_PPC_KUAP void setup_kuap(bool disabled); #else diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 44d0eddf8738..112d6247c391 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -51,16 +51,9 @@ #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) .globl prepare_transfer_to_handler prepare_transfer_to_handler: - andi. r12,r9,MSR_PR addi r12,r2,THREAD - beq 2f -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_lock r11, r12 -#endif - blr /* if from kernel, check interrupted DOZE/NAP mode */ -2: kuap_save_and_lock r11, r12, r9, r5, r6 lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 @@ -86,9 +79,6 @@ _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) .globl transfer_to_syscall transfer_to_syscall: SAVE_NVGPRS(r1) -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_lock r11, r12 -#endif /* Calling convention has r9 = orig r0, r10 = regs */ addi r10,r1,STACK_FRAME_OVERHEAD @@ -105,9 +95,6 @@ ret_from_syscall: cmplwi cr0,r5,0 bne- 2f #endif /* CONFIG_PPC_47x */ -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_unlock r5, r7 -#endif kuap_check r2, r4 lwz r4,_LINK(r1) lwz r5,_CCR(r1) @@ -311,9 +298,6 @@ interrupt_return: bne- .Lrestore_nvgprs .Lfast_user_interrupt_return: -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_unlock r10, r11 -#endif kuap_check r2, r4 lwz r11,_NIP(r1) lwz r12,_MSR(r1) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index c018fcdf9157..a8221ddcbd66 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -133,7 +133,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) .macro prepare_transfer_to_handler #ifdef CONFIG_PPC_BOOK3S_32 + andi. r12,r9,MSR_PR + bne 777f bl prepare_transfer_to_handler +777: #endif .endm diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index cb96ae002af6..f82470091697 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -105,7 +105,10 @@ END_BTB_FLUSH_SECTION .macro prepare_transfer_to_handler #ifdef CONFIG_E500 + andi. r12,r9,MSR_PR + bne 777f bl prepare_transfer_to_handler +777: #endif .endm diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 6875b82f613a..20ace874cd98 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -33,6 +33,8 @@ notrace long system_call_exception(long r3, long r4, long r5, { syscall_fn f; + kuep_lock(); + regs->orig_gpr3 = r3; if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) @@ -354,6 +356,8 @@ again: */ kuap_user_restore(regs); #endif + kuep_unlock(); + return ret; } diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 446d9de88ce4..7f0c8a78ba0c 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -9,3 +9,4 @@ endif obj-y += mmu.o mmu_context.o obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o +obj-$(CONFIG_PPC_KUEP) += kuep.o diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c new file mode 100644 index 000000000000..8ed1b8634839 --- /dev/null +++ b/arch/powerpc/mm/book3s32/kuep.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include + +#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do { \ + if (TASK_SIZE > ((n) << 28)) \ + mtsr(val1, (n) << 28); \ + if (TASK_SIZE > (((n) + 1) << 28)) \ + mtsr(val2, ((n) + 1) << 28); \ + val1 = (val1 + 0x222) & 0xf0ffffff; \ + val2 = (val2 + 0x222) & 0xf0ffffff; \ +} while (0) + +static __always_inline void kuep_update(u32 val) +{ + int val1 = val; + int val2 = (val + 0x111) & 0xf0ffffff; + + KUEP_UPDATE_TWO_USER_SEGMENTS(0); + KUEP_UPDATE_TWO_USER_SEGMENTS(2); + KUEP_UPDATE_TWO_USER_SEGMENTS(4); + KUEP_UPDATE_TWO_USER_SEGMENTS(6); + KUEP_UPDATE_TWO_USER_SEGMENTS(8); + KUEP_UPDATE_TWO_USER_SEGMENTS(10); + KUEP_UPDATE_TWO_USER_SEGMENTS(12); + KUEP_UPDATE_TWO_USER_SEGMENTS(14); +} + +void kuep_lock(void) +{ + kuep_update(mfsr(0) | SR_NX); +} + +void kuep_unlock(void) +{ + kuep_update(mfsr(0) & ~SR_NX); +} -- cgit v1.2.3 From 7a7d744ffe87ae10ab98004d1a6ca1f691af58e1 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Fri, 12 Mar 2021 16:55:37 +0530 Subject: powerpc/mm/book3s64: Fix a typo in mmu_context.c s/detalis/details/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210312112537.4585-1-unixbhaskar@gmail.com --- arch/powerpc/mm/book3s64/mmu_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index 0c8557220ae2..c10fc8a72fb3 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -119,7 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm) /* This is fork. Copy hash_context details from current->mm */ memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context)); #ifdef CONFIG_PPC_SUBPAGE_PROT - /* inherit subpage prot detalis if we have one. */ + /* inherit subpage prot details if we have one. */ if (current->mm->context.hash_context->spt) { mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), GFP_KERNEL); -- cgit v1.2.3 From c2a2a5d0270c641ce030aee247569afc1a0efbe5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 14 Mar 2021 20:33:20 +1100 Subject: powerpc/64s: Fold update_current_thread_[i]amr() into their only callers lkp reported warnings in some configuration due to update_current_thread_amr() being unused: arch/powerpc/mm/book3s64/pkeys.c:284:20: error: unused function 'update_current_thread_amr' static inline void update_current_thread_amr(u64 value) Which is because it's only use is inside an ifdef. We could move it inside the ifdef, but it's a single line function and only has one caller, so just fold it in. Similarly update_current_thread_iamr() is small and only called once, so fold it in also. Fixes: 48a8ab4eeb82 ("powerpc/book3s64/pkeys: Don't update SPRN_AMR when in kernel mode.") Reported-by: kernel test robot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210314093320.132331-1-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/pkeys.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 15dcc5ad91c5..a2d9ad138709 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -301,19 +301,6 @@ void setup_kuap(bool disabled) } #endif -static inline void update_current_thread_amr(u64 value) -{ - current->thread.regs->amr = value; -} - -static inline void update_current_thread_iamr(u64 value) -{ - if (!likely(pkey_execute_disable_supported)) - return; - - current->thread.regs->iamr = value; -} - #ifdef CONFIG_PPC_MEM_KEYS void pkey_mm_init(struct mm_struct *mm) { @@ -328,7 +315,7 @@ static inline void init_amr(int pkey, u8 init_bits) u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); - update_current_thread_amr(old_amr | new_amr_bits); + current->thread.regs->amr = old_amr | new_amr_bits; } static inline void init_iamr(int pkey, u8 init_bits) @@ -336,7 +323,10 @@ static inline void init_iamr(int pkey, u8 init_bits) u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); - update_current_thread_iamr(old_iamr | new_iamr_bits); + if (!likely(pkey_execute_disable_supported)) + return; + + current->thread.regs->iamr = old_iamr | new_iamr_bits; } /* -- cgit v1.2.3 From 98c26a72751ecb2ed247cdfd6cb2385f37195707 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 15 Mar 2021 14:52:51 +0000 Subject: powerpc/mm: Remove unneeded #ifdef CONFIG_PPC_MEM_KEYS In fault.c, #ifdef CONFIG_PPC_MEM_KEYS is not needed because all functions are always defined, and arch_vma_access_permitted() always returns true when CONFIG_PPC_MEM_KEYS is not defined so access_pkey_error() will return false so bad_access_pkey() will never be called. Include linux/pkeys.h to get a definition of vma_pkeys() for bad_access_pkey(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8038392f38d81f2ad169347efac29146f553b238.1615819955.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/fault.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 0d4e4ff77e03..0c0b1c2cfb49 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -88,7 +89,6 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address) return __bad_area(regs, address, SEGV_MAPERR); } -#ifdef CONFIG_PPC_MEM_KEYS static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, struct vm_area_struct *vma) { @@ -128,7 +128,6 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, return 0; } -#endif static noinline int bad_access(struct pt_regs *regs, unsigned long address) { @@ -235,7 +234,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return false; } -#ifdef CONFIG_PPC_MEM_KEYS static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, struct vm_area_struct *vma) { @@ -249,7 +247,6 @@ static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, return false; } -#endif static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma) { @@ -497,11 +494,9 @@ retry: return bad_area(regs, address); } -#ifdef CONFIG_PPC_MEM_KEYS if (unlikely(access_pkey_error(is_write, is_exec, (error_code & DSISR_KEYFAULT), vma))) return bad_access_pkey(regs, address, vma); -#endif /* CONFIG_PPC_MEM_KEYS */ if (unlikely(access_error(is_write, is_exec, vma))) return bad_access(regs, address); -- cgit v1.2.3 From 1479e3d3b7559133b0a107772b5841e9c2cad450 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:52:05 +1000 Subject: powerpc/64s: Fix hash fault to use TRAP accessor Hash faults use the trap vector to decide whether this is an instruction or data fault. This should use the TRAP accessor rather than open access regs->trap. This won't cause a problem at the moment because 64s only uses trap flags for system call interrupts (the norestart flag), but that could change if any other trap flags get used in future. Fixes: a4922f5442e7e ("powerpc/64s: move the hash fault handling logic to C") Suggested-by: Christophe Leroy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316105205.407767-1-npiggin@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 581b20a2feaf..7719995323c3 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1545,10 +1545,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) if (user_mode(regs) || (region_id == USER_REGION_ID)) access &= ~_PAGE_PRIVILEGED; - if (regs->trap == 0x400) + if (TRAP(regs) == 0x400) access |= _PAGE_EXEC; - err = hash_page_mm(mm, ea, access, regs->trap, flags); + err = hash_page_mm(mm, ea, access, TRAP(regs), flags); if (unlikely(err < 0)) { // failed to instert a hash PTE due to an hypervisor error if (user_mode(regs)) { -- cgit v1.2.3 From 4763d37827643750a39a8c7a9205928c09618a6f Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Wed, 20 Jan 2021 19:50:21 +0530 Subject: powerpc: Spelling/typo fixes Various spelling/typo fixes. Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpm2.h | 2 +- arch/powerpc/kernel/head_8xx.S | 2 +- arch/powerpc/mm/book3s64/radix_pgtable.c | 2 +- drivers/macintosh/windfarm_smu_controls.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h index 2211b934ecb4..bda45788cfcc 100644 --- a/arch/powerpc/include/asm/cpm2.h +++ b/arch/powerpc/include/asm/cpm2.h @@ -594,7 +594,7 @@ typedef struct fcc_enet { uint fen_p256c; /* Total packets 256 < bytes <= 511 */ uint fen_p512c; /* Total packets 512 < bytes <= 1023 */ uint fen_p1024c; /* Total packets 1024 < bytes <= 1518 */ - uint fen_cambuf; /* Internal CAM buffer poiner */ + uint fen_cambuf; /* Internal CAM buffer pointer */ ushort fen_rfthr; /* Received frames threshold */ ushort fen_rfcnt; /* Received frames count */ } fcc_enet_t; diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 34feb628c88d..e3b066703eab 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -804,7 +804,7 @@ EXPORT_SYMBOL(empty_zero_page) swapper_pg_dir: .space PGD_TABLE_SIZE -/* Room for two PTE table poiners, usually the kernel and current user +/* Room for two PTE table pointers, usually the kernel and current user * pointer to their respective root page table (pgdir). */ .globl abatron_pteptrs diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 98f0b243c1ab..8b8f1451e944 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1058,7 +1058,7 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, * Book3S does not require a TLB flush when relaxing access * restrictions when the address space is not attached to a * NMMU, because the core MMU will reload the pte after taking - * an access fault, which is defined by the architectue. + * an access fault, which is defined by the architecture. */ } /* See ptesync comment in radix__set_pte_at */ diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c index 79cb1ad09bfd..75966052819a 100644 --- a/drivers/macintosh/windfarm_smu_controls.c +++ b/drivers/macintosh/windfarm_smu_controls.c @@ -94,7 +94,7 @@ static int smu_set_fan(int pwm, u8 id, u16 value) return rc; wait_for_completion(&comp); - /* Handle fallback (see coment above) */ + /* Handle fallback (see comment above) */ if (cmd.status != 0 && smu_supports_new_fans_ops) { printk(KERN_WARNING "windfarm: SMU failed new fan command " "falling back to old method\n"); -- cgit v1.2.3 From b8b2f37cf632434456182e9002d63cbc4cccc50c Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Mon, 8 Feb 2021 14:29:56 +1100 Subject: powerpc/64s: Fix pte update for kernel memory on radix When adding a PTE a ptesync is needed to order the update of the PTE with subsequent accesses otherwise a spurious fault may be raised. radix__set_pte_at() does not do this for performance gains. For non-kernel memory this is not an issue as any faults of this kind are corrected by the page fault handler. For kernel memory these faults are not handled. The current solution is that there is a ptesync in flush_cache_vmap() which should be called when mapping from the vmalloc region. However, map_kernel_page() does not call flush_cache_vmap(). This is troublesome in particular for code patching with Strict RWX on radix. In do_patch_instruction() the page frame that contains the instruction to be patched is mapped and then immediately patched. With no ordering or synchronization between setting up the PTE and writing to the page it is possible for faults. As the code patching is done using __put_user_asm_goto() the resulting fault is obscured - but using a normal store instead it can be seen: BUG: Unable to handle kernel data access on write at 0xc008000008f24a3c Faulting instruction address: 0xc00000000008bd74 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: nop_module(PO+) [last unloaded: nop_module] CPU: 4 PID: 757 Comm: sh Tainted: P O 5.10.0-rc5-01361-ge3c1b78c8440-dirty #43 NIP: c00000000008bd74 LR: c00000000008bd50 CTR: c000000000025810 REGS: c000000016f634a0 TRAP: 0300 Tainted: P O (5.10.0-rc5-01361-ge3c1b78c8440-dirty) MSR: 9000000000009033 CR: 44002884 XER: 00000000 CFAR: c00000000007c68c DAR: c008000008f24a3c DSISR: 42000000 IRQMASK: 1 This results in the kind of issue reported here: https://lore.kernel.org/linuxppc-dev/15AC5B0E-A221-4B8C-9039-FA96B8EF7C88@lca.pw/ Chris Riedl suggested a reliable way to reproduce the issue: $ mount -t debugfs none /sys/kernel/debug $ (while true; do echo function > /sys/kernel/debug/tracing/current_tracer ; echo nop > /sys/kernel/debug/tracing/current_tracer ; done) & Turning ftrace on and off does a large amount of code patching which in usually less then 5min will crash giving a trace like: ftrace-powerpc: (____ptrval____): replaced (4b473b11) != old (60000000) ------------[ ftrace bug ]------------ ftrace failed to modify [] napi_busy_loop+0xc/0x390 actual: 11:3b:47:4b Setting ftrace call site to call ftrace function ftrace record flags: 80000001 (1) expected tramp: c00000000006c96c ------------[ cut here ]------------ WARNING: CPU: 4 PID: 809 at kernel/trace/ftrace.c:2065 ftrace_bug+0x28c/0x2e8 Modules linked in: nop_module(PO-) [last unloaded: nop_module] CPU: 4 PID: 809 Comm: sh Tainted: P O 5.10.0-rc5-01360-gf878ccaf250a #1 NIP: c00000000024f334 LR: c00000000024f330 CTR: c0000000001a5af0 REGS: c000000004c8b760 TRAP: 0700 Tainted: P O (5.10.0-rc5-01360-gf878ccaf250a) MSR: 900000000282b033 CR: 28008848 XER: 20040000 CFAR: c0000000001a9c98 IRQMASK: 0 GPR00: c00000000024f330 c000000004c8b9f0 c000000002770600 0000000000000022 GPR04: 00000000ffff7fff c000000004c8b6d0 0000000000000027 c0000007fe9bcdd8 GPR08: 0000000000000023 ffffffffffffffd8 0000000000000027 c000000002613118 GPR12: 0000000000008000 c0000007fffdca00 0000000000000000 0000000000000000 GPR16: 0000000023ec37c5 0000000000000000 0000000000000000 0000000000000008 GPR20: c000000004c8bc90 c0000000027a2d20 c000000004c8bcd0 c000000002612fe8 GPR24: 0000000000000038 0000000000000030 0000000000000028 0000000000000020 GPR28: c000000000ff1b68 c000000000bf8e5c c00000000312f700 c000000000fbb9b0 NIP ftrace_bug+0x28c/0x2e8 LR ftrace_bug+0x288/0x2e8 Call Trace: ftrace_bug+0x288/0x2e8 (unreliable) ftrace_modify_all_code+0x168/0x210 arch_ftrace_update_code+0x18/0x30 ftrace_run_update_code+0x44/0xc0 ftrace_startup+0xf8/0x1c0 register_ftrace_function+0x4c/0xc0 function_trace_init+0x80/0xb0 tracing_set_tracer+0x2a4/0x4f0 tracing_set_trace_write+0xd4/0x130 vfs_write+0xf0/0x330 ksys_write+0x84/0x140 system_call_exception+0x14c/0x230 system_call_common+0xf0/0x27c To fix this when updating kernel memory PTEs using ptesync. Fixes: f1cb8f9beba8 ("powerpc/64s/radix: avoid ptesync after set_pte and ptep_set_access_flags") Signed-off-by: Jordan Niethe Reviewed-by: Nicholas Piggin [mpe: Tidy up change log slightly] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210208032957.1232102-1-jniethe5@gmail.com --- arch/powerpc/include/asm/book3s/64/radix.h | 6 ++++-- arch/powerpc/mm/book3s64/radix_pgtable.c | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index c7813dc628fc..59cab558e2f0 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -222,8 +222,10 @@ static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr, * from ptesync, it should probably go into update_mmu_cache, rather * than set_pte_at (which is used to set ptes unrelated to faults). * - * Spurious faults to vmalloc region are not tolerated, so there is - * a ptesync in flush_cache_vmap. + * Spurious faults from the kernel memory are not tolerated, so there + * is a ptesync in flush_cache_vmap, and __map_kernel_page() follows + * the pte update sequence from ISA Book III 6.10 Translation Table + * Update Synchronization Requirements. */ } diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 8b8f1451e944..55f26c0e389e 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -108,7 +108,7 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } @@ -168,7 +168,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } -- cgit v1.2.3 From 2c02e656a29d5f64193eb93da92781bcf0517146 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 31 Mar 2021 11:38:42 +1100 Subject: powerpc/64s: Use htab_convert_pte_flags() in hash__mark_rodata_ro() In hash__mark_rodata_ro() we pass the raw PP_RXXX value to hash__change_memory_range(). That has the effect of setting the key to zero, because PP_RXXX contains no key value. Fix it by using htab_convert_pte_flags(), which knows how to convert a pgprot into a pp value, including the key. Fixes: d94b827e89dc ("powerpc/book3s64/kuap: Use Key 3 for kernel mapping with hash translation") Signed-off-by: Michael Ellerman Reviewed-by: Daniel Axtens Link: https://lore.kernel.org/r/20210331003845.216246-3-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/hash_pgtable.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 567e0c6b3978..03819c259f0a 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -428,12 +428,14 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end, void hash__mark_rodata_ro(void) { - unsigned long start, end; + unsigned long start, end, pp; start = (unsigned long)_stext; end = (unsigned long)__init_begin; - WARN_ON(!hash__change_memory_range(start, end, PP_RXXX)); + pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY); + + WARN_ON(!hash__change_memory_range(start, end, pp)); } void hash__mark_initmem_nx(void) -- cgit v1.2.3 From 6f223ebe9c3f3ed315a06cec156086f1f7f7ded1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 31 Mar 2021 11:38:43 +1100 Subject: powerpc/mm/64s/hash: Factor out change_memory_range() Pull the loop calling hpte_updateboltedpp() out of hash__change_memory_range() into a helper function. We need it to be a separate function for the next patch. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331003845.216246-4-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/hash_pgtable.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 03819c259f0a..3663d3cdffac 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -400,10 +400,23 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_STRICT_KERNEL_RWX +static void change_memory_range(unsigned long start, unsigned long end, + unsigned int step, unsigned long newpp) +{ + unsigned long idx; + + pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", + start, end, newpp, step); + + for (idx = start; idx < end; idx += step) + /* Not sure if we can do much with the return value */ + mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, + mmu_kernel_ssize); +} + static bool hash__change_memory_range(unsigned long start, unsigned long end, unsigned long newpp) { - unsigned long idx; unsigned int step, shift; shift = mmu_psize_defs[mmu_linear_psize].shift; @@ -415,13 +428,7 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end, if (start >= end) return false; - pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", - start, end, newpp, step); - - for (idx = start; idx < end; idx += step) - /* Not sure if we can do much with the return value */ - mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, - mmu_kernel_ssize); + change_memory_range(start, end, step, newpp); return true; } -- cgit v1.2.3 From 87e65ad7bd3a84a992723753fcc23d31c2d063c2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 31 Mar 2021 11:38:44 +1100 Subject: powerpc/mm/64s/hash: Add real-mode change_memory_range() for hash LPAR When we enabled STRICT_KERNEL_RWX we received some reports of boot failures when using the Hash MMU and running under phyp. The crashes are intermittent, and often exhibit as a completely unresponsive system, or possibly an oops. One example, which was caught in xmon: [ 14.068327][ T1] devtmpfs: mounted [ 14.069302][ T1] Freeing unused kernel memory: 5568K [ 14.142060][ T347] BUG: Unable to handle kernel instruction fetch [ 14.142063][ T1] Run /sbin/init as init process [ 14.142074][ T347] Faulting instruction address: 0xc000000000004400 cpu 0x2: Vector: 400 (Instruction Access) at [c00000000c7475e0] pc: c000000000004400: exc_virt_0x4400_instruction_access+0x0/0x80 lr: c0000000001862d4: update_rq_clock+0x44/0x110 sp: c00000000c747880 msr: 8000000040001031 current = 0xc00000000c60d380 paca = 0xc00000001ec9de80 irqmask: 0x03 irq_happened: 0x01 pid = 347, comm = kworker/2:1 ... enter ? for help [c00000000c747880] c0000000001862d4 update_rq_clock+0x44/0x110 (unreliable) [c00000000c7478f0] c000000000198794 update_blocked_averages+0xb4/0x6d0 [c00000000c7479f0] c000000000198e40 update_nohz_stats+0x90/0xd0 [c00000000c747a20] c0000000001a13b4 _nohz_idle_balance+0x164/0x390 [c00000000c747b10] c0000000001a1af8 newidle_balance+0x478/0x610 [c00000000c747be0] c0000000001a1d48 pick_next_task_fair+0x58/0x480 [c00000000c747c40] c000000000eaab5c __schedule+0x12c/0x950 [c00000000c747cd0] c000000000eab3e8 schedule+0x68/0x120 [c00000000c747d00] c00000000016b730 worker_thread+0x130/0x640 [c00000000c747da0] c000000000174d50 kthread+0x1a0/0x1b0 [c00000000c747e10] c00000000000e0f0 ret_from_kernel_thread+0x5c/0x6c This shows that CPU 2, which was idle, woke up and then appears to randomly take an instruction fault on a completely valid area of kernel text. The cause turns out to be the call to hash__mark_rodata_ro(), late in boot. Due to the way we layout text and rodata, that function actually changes the permissions for all of text and rodata to read-only plus execute. To do the permission change we use a hypervisor call, H_PROTECT. On phyp that appears to be implemented by briefly removing the mapping of the kernel text, before putting it back with the updated permissions. If any other CPU is executing during that window, it will see spurious faults on the kernel text and/or data, leading to crashes. To fix it we use stop machine to collect all other CPUs, and then have them drop into real mode (MMU off), while we change the mapping. That way they are unaffected by the mapping temporarily disappearing. We don't see this bug on KVM because KVM always use VPM=1, where faults are directed to the hypervisor, and the fault will be serialised vs the h_protect() by HPTE_V_HVLOCK. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210331003845.216246-5-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/hash_pgtable.c | 105 +++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 3663d3cdffac..ad5eff097d31 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -400,6 +401,19 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_STRICT_KERNEL_RWX + +struct change_memory_parms { + unsigned long start, end, newpp; + unsigned int step, nr_cpus, master_cpu; + atomic_t cpu_counter; +}; + +// We'd rather this was on the stack but it has to be in the RMO +static struct change_memory_parms chmem_parms; + +// And therefore we need a lock to protect it from concurrent use +static DEFINE_MUTEX(chmem_lock); + static void change_memory_range(unsigned long start, unsigned long end, unsigned int step, unsigned long newpp) { @@ -414,6 +428,73 @@ static void change_memory_range(unsigned long start, unsigned long end, mmu_kernel_ssize); } +static int notrace chmem_secondary_loop(struct change_memory_parms *parms) +{ + unsigned long msr, tmp, flags; + int *p; + + p = &parms->cpu_counter.counter; + + local_irq_save(flags); + hard_irq_disable(); + + asm volatile ( + // Switch to real mode and leave interrupts off + "mfmsr %[msr] ;" + "li %[tmp], %[MSR_IR_DR] ;" + "andc %[tmp], %[msr], %[tmp] ;" + "mtmsrd %[tmp] ;" + + // Tell the master we are in real mode + "1: " + "lwarx %[tmp], 0, %[p] ;" + "addic %[tmp], %[tmp], -1 ;" + "stwcx. %[tmp], 0, %[p] ;" + "bne- 1b ;" + + // Spin until the counter goes to zero + "2: ;" + "lwz %[tmp], 0(%[p]) ;" + "cmpwi %[tmp], 0 ;" + "bne- 2b ;" + + // Switch back to virtual mode + "mtmsrd %[msr] ;" + + : // outputs + [msr] "=&r" (msr), [tmp] "=&b" (tmp), "+m" (*p) + : // inputs + [p] "b" (p), [MSR_IR_DR] "i" (MSR_IR | MSR_DR) + : // clobbers + "cc", "xer" + ); + + local_irq_restore(flags); + + return 0; +} + +static int change_memory_range_fn(void *data) +{ + struct change_memory_parms *parms = data; + + if (parms->master_cpu != smp_processor_id()) + return chmem_secondary_loop(parms); + + // Wait for all but one CPU (this one) to call-in + while (atomic_read(&parms->cpu_counter) > 1) + barrier(); + + change_memory_range(parms->start, parms->end, parms->step, parms->newpp); + + mb(); + + // Signal the other CPUs that we're done + atomic_dec(&parms->cpu_counter); + + return 0; +} + static bool hash__change_memory_range(unsigned long start, unsigned long end, unsigned long newpp) { @@ -428,7 +509,29 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end, if (start >= end) return false; - change_memory_range(start, end, step, newpp); + if (firmware_has_feature(FW_FEATURE_LPAR)) { + mutex_lock(&chmem_lock); + + chmem_parms.start = start; + chmem_parms.end = end; + chmem_parms.step = step; + chmem_parms.newpp = newpp; + chmem_parms.master_cpu = smp_processor_id(); + + cpus_read_lock(); + + atomic_set(&chmem_parms.cpu_counter, num_online_cpus()); + + // Ensure state is consistent before we call the other CPUs + mb(); + + stop_machine_cpuslocked(change_memory_range_fn, &chmem_parms, + cpu_online_mask); + + cpus_read_unlock(); + mutex_unlock(&chmem_lock); + } else + change_memory_range(start, end, step, newpp); return true; } -- cgit v1.2.3 From a5d6a3e73acbd619dd5b7b831762b755f9e2db80 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Sun, 4 Apr 2021 22:01:48 +0530 Subject: powerpc/mm: Add cond_resched() while removing hpte mappings While removing large number of mappings from hash page tables for large memory systems as soft-lockup is reported because of the time spent inside htap_remove_mapping() like one below: watchdog: BUG: soft lockup - CPU#8 stuck for 23s! NIP plpar_hcall+0x38/0x58 LR pSeries_lpar_hpte_invalidate+0x68/0xb0 Call Trace: 0x1fffffffffff000 (unreliable) pSeries_lpar_hpte_removebolted+0x9c/0x230 hash__remove_section_mapping+0xec/0x1c0 remove_section_mapping+0x28/0x3c arch_remove_memory+0xfc/0x150 devm_memremap_pages_release+0x180/0x2f0 devm_action_release+0x30/0x50 release_nodes+0x28c/0x300 device_release_driver_internal+0x16c/0x280 unbind_store+0x124/0x170 drv_attr_store+0x44/0x60 sysfs_kf_write+0x64/0x90 kernfs_fop_write+0x1b0/0x290 __vfs_write+0x3c/0x70 vfs_write+0xd4/0x270 ksys_write+0xdc/0x130 system_call+0x5c/0x70 Fix this by adding a cond_resched() to the loop in htap_remove_mapping() that issues hcall to remove hpte mapping. The call to cond_resched() is issued every HZ jiffies which should prevent the soft-lockup from being reported. Suggested-by: Aneesh Kumar K.V Signed-off-by: Vaibhav Jain Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210404163148.321346-1-vaibhav@linux.ibm.com --- arch/powerpc/mm/book3s64/hash_utils.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7719995323c3..12de1906e97b 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -338,7 +338,7 @@ repeat: int htab_remove_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize) { - unsigned long vaddr; + unsigned long vaddr, time_limit; unsigned int step, shift; int rc; int ret = 0; @@ -351,8 +351,19 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, /* Unmap the full range specificied */ vaddr = ALIGN_DOWN(vstart, step); + time_limit = jiffies + HZ; + for (;vaddr < vend; vaddr += step) { rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize); + + /* + * For large number of mappings introduce a cond_resched() + * to prevent softlockup warnings. + */ + if (time_after(jiffies, time_limit)) { + cond_resched(); + time_limit = jiffies + HZ; + } if (rc == -ENOENT) { ret = -ENOENT; continue; -- cgit v1.2.3 From 80edc68e0479bafdc4869ec3351e42316b824596 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 1 Apr 2021 13:30:43 +0000 Subject: powerpc/32s: Define a MODULE area below kernel text all the time On book3s/32, the segment below kernel text is used for module allocation when CONFIG_STRICT_KERNEL_RWX is defined. In order to benefit from the powerpc specific module_alloc() function which allocate modules with 32 Mbytes from end of kernel text, use that segment below PAGE_OFFSET at all time. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a46dcdd39a9e80b012d86c294c4e5cd8d31665f3.1617283827.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/book3s/32/pgtable.h | 2 -- arch/powerpc/mm/book3s32/mmu.c | 7 ------- 3 files changed, 1 insertion(+), 10 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 36d7c56df91d..7c5c72cbf19f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1220,7 +1220,7 @@ config TASK_SIZE_BOOL config TASK_SIZE hex "Size of user task space" if TASK_SIZE_BOOL default "0x80000000" if PPC_8xx - default "0xb0000000" if PPC_BOOK3S_32 && STRICT_KERNEL_RWX + default "0xb0000000" if PPC_BOOK3S_32 default "0xc0000000" endmenu diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 415ae29fa73a..83c65845a1a9 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -194,10 +194,8 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #define VMALLOC_END ioremap_bot #endif -#ifdef CONFIG_STRICT_KERNEL_RWX #define MODULES_END ALIGN_DOWN(PAGE_OFFSET, SZ_256M) #define MODULES_VADDR (MODULES_END - SZ_256M) -#endif #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index a0db398b5c26..159930351d9f 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -184,17 +184,10 @@ static bool is_module_segment(unsigned long addr) { if (!IS_ENABLED(CONFIG_MODULES)) return false; -#ifdef MODULES_VADDR if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M)) return false; if (addr > ALIGN(MODULES_END, SZ_256M) - 1) return false; -#else - if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M)) - return false; - if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1) - return false; -#endif return true; } -- cgit v1.2.3 From b26e8f27253a47bff90972b987112fd8396e9b8d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:24 +0000 Subject: powerpc/mem: Move cache flushing functions into mm/cacheflush.c Cache flushing functions are in the middle of completely unrelated stuff in mm/mem.c Create a dedicated mm/cacheflush.c for those functions. Also cleanup the list of included headers. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7bf6f1600acad146e541a4e220940062f2e5b03d.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/Makefile | 3 +- arch/powerpc/mm/cacheflush.c | 255 +++++++++++++++++++++++++++++++++++++++ arch/powerpc/mm/mem.c | 281 ------------------------------------------- 3 files changed, 257 insertions(+), 282 deletions(-) create mode 100644 arch/powerpc/mm/cacheflush.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 3b4e9e4e25ea..c3df3a8501d4 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -8,7 +8,8 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ - init-common.o mmu_context.o drmem.o + init-common.o mmu_context.o drmem.o \ + cacheflush.o obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/ obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/ obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/ diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c new file mode 100644 index 000000000000..40613d2fda37 --- /dev/null +++ b/arch/powerpc/mm/cacheflush.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +/** + * flush_coherent_icache() - if a CPU has a coherent icache, flush it + * @addr: The base address to use (can be any valid address, the whole cache will be flushed) + * Return true if the cache was flushed, false otherwise + */ +static inline bool flush_coherent_icache(unsigned long addr) +{ + /* + * For a snooping icache, we still need a dummy icbi to purge all the + * prefetched instructions from the ifetch buffers. We also need a sync + * before the icbi to order the the actual stores to memory that might + * have modified instructions with the icbi. + */ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { + mb(); /* sync */ + allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES); + icbi((void *)addr); + prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES); + mb(); /* sync */ + isync(); + return true; + } + + return false; +} + +/** + * invalidate_icache_range() - Flush the icache by issuing icbi across an address range + * @start: the start address + * @stop: the stop address (exclusive) + */ +static void invalidate_icache_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_icache_shift(); + unsigned long bytes = l1_icache_bytes(); + char *addr = (char *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + icbi(addr); + + mb(); /* sync */ + isync(); +} + +/** + * flush_icache_range: Write any modified data cache blocks out to memory + * and invalidate the corresponding blocks in the instruction cache + * + * Generic code will call this after writing memory, before executing from it. + * + * @start: the start address + * @stop: the stop address (exclusive) + */ +void flush_icache_range(unsigned long start, unsigned long stop) +{ + if (flush_coherent_icache(start)) + return; + + clean_dcache_range(start, stop); + + if (IS_ENABLED(CONFIG_44x)) { + /* + * Flash invalidate on 44x because we are passed kmapped + * addresses and this doesn't work for userspace pages due to + * the virtually tagged icache. + */ + iccci((void *)start); + mb(); /* sync */ + isync(); + } else + invalidate_icache_range(start, stop); +} +EXPORT_SYMBOL(flush_icache_range); + +#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) +/** + * flush_dcache_icache_phys() - Flush a page by it's physical address + * @physaddr: the physical address of the page + */ +static void flush_dcache_icache_phys(unsigned long physaddr) +{ + unsigned long bytes = l1_dcache_bytes(); + unsigned long nb = PAGE_SIZE / bytes; + unsigned long addr = physaddr & PAGE_MASK; + unsigned long msr, msr0; + unsigned long loop1 = addr, loop2 = addr; + + msr0 = mfmsr(); + msr = msr0 & ~MSR_DR; + /* + * This must remain as ASM to prevent potential memory accesses + * while the data MMU is disabled + */ + asm volatile( + " mtctr %2;\n" + " mtmsr %3;\n" + " isync;\n" + "0: dcbst 0, %0;\n" + " addi %0, %0, %4;\n" + " bdnz 0b;\n" + " sync;\n" + " mtctr %2;\n" + "1: icbi 0, %1;\n" + " addi %1, %1, %4;\n" + " bdnz 1b;\n" + " sync;\n" + " mtmsr %5;\n" + " isync;\n" + : "+&r" (loop1), "+&r" (loop2) + : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) + : "ctr", "memory"); +} +NOKPROBE_SYMBOL(flush_dcache_icache_phys) +#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) + +/* + * This is called when a page has been modified by the kernel. + * It just marks the page as not i-cache clean. We do the i-cache + * flush later when the page is given to a user process, if necessary. + */ +void flush_dcache_page(struct page *page) +{ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* avoid an atomic op if possible */ + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); +} +EXPORT_SYMBOL(flush_dcache_page); + +static void flush_dcache_icache_hugepage(struct page *page) +{ + int i; + void *start; + + BUG_ON(!PageCompound(page)); + + for (i = 0; i < compound_nr(page); i++) { + if (!PageHighMem(page)) { + __flush_dcache_icache(page_address(page+i)); + } else { + start = kmap_atomic(page+i); + __flush_dcache_icache(start); + kunmap_atomic(start); + } + } +} + +void flush_dcache_icache_page(struct page *page) +{ + + if (PageCompound(page)) + return flush_dcache_icache_hugepage(page); + +#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) + /* On 8xx there is no need to kmap since highmem is not supported */ + __flush_dcache_icache(page_address(page)); +#else + if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { + void *start = kmap_atomic(page); + __flush_dcache_icache(start); + kunmap_atomic(start); + } else { + unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; + + if (flush_coherent_icache(addr)) + return; + flush_dcache_icache_phys(addr); + } +#endif +} +EXPORT_SYMBOL(flush_dcache_icache_page); + +/** + * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * @page: the address of the page to flush + */ +void __flush_dcache_icache(void *p) +{ + unsigned long addr = (unsigned long)p; + + if (flush_coherent_icache(addr)) + return; + + clean_dcache_range(addr, addr + PAGE_SIZE); + + /* + * We don't flush the icache on 44x. Those have a virtual icache and we + * don't have access to the virtual address here (it's not the page + * vaddr but where it's mapped in user space). The flushing of the + * icache on these is handled elsewhere, when a change in the address + * space occurs, before returning to user space. + */ + + if (mmu_has_feature(MMU_FTR_TYPE_44x)) + return; + + invalidate_icache_range(addr, addr + PAGE_SIZE); +} + +void clear_user_page(void *page, unsigned long vaddr, struct page *pg) +{ + clear_page(page); + + /* + * We shouldn't have to do this, but some versions of glibc + * require it (ld.so assumes zero filled pages are icache clean) + * - Anton + */ + flush_dcache_page(pg); +} +EXPORT_SYMBOL(clear_user_page); + +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *pg) +{ + copy_page(vto, vfrom); + + /* + * We should be able to use the following optimisation, however + * there are two problems. + * Firstly a bug in some versions of binutils meant PLT sections + * were not marked executable. + * Secondly the first word in the GOT section is blrl, used + * to establish the GOT address. Until recently the GOT was + * not marked executable. + * - Anton + */ +#if 0 + if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) + return; +#endif + + flush_dcache_page(pg); +} + +void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + unsigned long maddr; + + maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); + flush_icache_range(maddr, maddr + len); + kunmap(page); +} diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 7a59a5c9aa5d..6564b4d81324 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -12,45 +12,15 @@ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include -#include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include #include @@ -340,257 +310,6 @@ void free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } -/** - * flush_coherent_icache() - if a CPU has a coherent icache, flush it - * @addr: The base address to use (can be any valid address, the whole cache will be flushed) - * Return true if the cache was flushed, false otherwise - */ -static inline bool flush_coherent_icache(unsigned long addr) -{ - /* - * For a snooping icache, we still need a dummy icbi to purge all the - * prefetched instructions from the ifetch buffers. We also need a sync - * before the icbi to order the the actual stores to memory that might - * have modified instructions with the icbi. - */ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { - mb(); /* sync */ - allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES); - icbi((void *)addr); - prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES); - mb(); /* sync */ - isync(); - return true; - } - - return false; -} - -/** - * invalidate_icache_range() - Flush the icache by issuing icbi across an address range - * @start: the start address - * @stop: the stop address (exclusive) - */ -static void invalidate_icache_range(unsigned long start, unsigned long stop) -{ - unsigned long shift = l1_icache_shift(); - unsigned long bytes = l1_icache_bytes(); - char *addr = (char *)(start & ~(bytes - 1)); - unsigned long size = stop - (unsigned long)addr + (bytes - 1); - unsigned long i; - - for (i = 0; i < size >> shift; i++, addr += bytes) - icbi(addr); - - mb(); /* sync */ - isync(); -} - -/** - * flush_icache_range: Write any modified data cache blocks out to memory - * and invalidate the corresponding blocks in the instruction cache - * - * Generic code will call this after writing memory, before executing from it. - * - * @start: the start address - * @stop: the stop address (exclusive) - */ -void flush_icache_range(unsigned long start, unsigned long stop) -{ - if (flush_coherent_icache(start)) - return; - - clean_dcache_range(start, stop); - - if (IS_ENABLED(CONFIG_44x)) { - /* - * Flash invalidate on 44x because we are passed kmapped - * addresses and this doesn't work for userspace pages due to - * the virtually tagged icache. - */ - iccci((void *)start); - mb(); /* sync */ - isync(); - } else - invalidate_icache_range(start, stop); -} -EXPORT_SYMBOL(flush_icache_range); - -#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) -/** - * flush_dcache_icache_phys() - Flush a page by it's physical address - * @physaddr: the physical address of the page - */ -static void flush_dcache_icache_phys(unsigned long physaddr) -{ - unsigned long bytes = l1_dcache_bytes(); - unsigned long nb = PAGE_SIZE / bytes; - unsigned long addr = physaddr & PAGE_MASK; - unsigned long msr, msr0; - unsigned long loop1 = addr, loop2 = addr; - - msr0 = mfmsr(); - msr = msr0 & ~MSR_DR; - /* - * This must remain as ASM to prevent potential memory accesses - * while the data MMU is disabled - */ - asm volatile( - " mtctr %2;\n" - " mtmsr %3;\n" - " isync;\n" - "0: dcbst 0, %0;\n" - " addi %0, %0, %4;\n" - " bdnz 0b;\n" - " sync;\n" - " mtctr %2;\n" - "1: icbi 0, %1;\n" - " addi %1, %1, %4;\n" - " bdnz 1b;\n" - " sync;\n" - " mtmsr %5;\n" - " isync;\n" - : "+&r" (loop1), "+&r" (loop2) - : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) - : "ctr", "memory"); -} -NOKPROBE_SYMBOL(flush_dcache_icache_phys) -#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) - -/* - * This is called when a page has been modified by the kernel. - * It just marks the page as not i-cache clean. We do the i-cache - * flush later when the page is given to a user process, if necessary. - */ -void flush_dcache_page(struct page *page) -{ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) - return; - /* avoid an atomic op if possible */ - if (test_bit(PG_dcache_clean, &page->flags)) - clear_bit(PG_dcache_clean, &page->flags); -} -EXPORT_SYMBOL(flush_dcache_page); - -static void flush_dcache_icache_hugepage(struct page *page) -{ - int i; - void *start; - - BUG_ON(!PageCompound(page)); - - for (i = 0; i < compound_nr(page); i++) { - if (!PageHighMem(page)) { - __flush_dcache_icache(page_address(page+i)); - } else { - start = kmap_atomic(page+i); - __flush_dcache_icache(start); - kunmap_atomic(start); - } - } -} - -void flush_dcache_icache_page(struct page *page) -{ - - if (PageCompound(page)) - return flush_dcache_icache_hugepage(page); - -#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) - /* On 8xx there is no need to kmap since highmem is not supported */ - __flush_dcache_icache(page_address(page)); -#else - if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { - void *start = kmap_atomic(page); - __flush_dcache_icache(start); - kunmap_atomic(start); - } else { - unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; - - if (flush_coherent_icache(addr)) - return; - flush_dcache_icache_phys(addr); - } -#endif -} -EXPORT_SYMBOL(flush_dcache_icache_page); - -/** - * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * - * @page: the address of the page to flush - */ -void __flush_dcache_icache(void *p) -{ - unsigned long addr = (unsigned long)p; - - if (flush_coherent_icache(addr)) - return; - - clean_dcache_range(addr, addr + PAGE_SIZE); - - /* - * We don't flush the icache on 44x. Those have a virtual icache and we - * don't have access to the virtual address here (it's not the page - * vaddr but where it's mapped in user space). The flushing of the - * icache on these is handled elsewhere, when a change in the address - * space occurs, before returning to user space. - */ - - if (mmu_has_feature(MMU_FTR_TYPE_44x)) - return; - - invalidate_icache_range(addr, addr + PAGE_SIZE); -} - -void clear_user_page(void *page, unsigned long vaddr, struct page *pg) -{ - clear_page(page); - - /* - * We shouldn't have to do this, but some versions of glibc - * require it (ld.so assumes zero filled pages are icache clean) - * - Anton - */ - flush_dcache_page(pg); -} -EXPORT_SYMBOL(clear_user_page); - -void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, - struct page *pg) -{ - copy_page(vto, vfrom); - - /* - * We should be able to use the following optimisation, however - * there are two problems. - * Firstly a bug in some versions of binutils meant PLT sections - * were not marked executable. - * Secondly the first word in the GOT section is blrl, used - * to establish the GOT address. Until recently the GOT was - * not marked executable. - * - Anton - */ -#if 0 - if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) - return; -#endif - - flush_dcache_page(pg); -} - -void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, - unsigned long addr, int len) -{ - unsigned long maddr; - - maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); - flush_icache_range(maddr, maddr + len); - kunmap(page); -} - /* * System memory should not be in /proc/iomem but various tools expect it * (eg kdump). -- cgit v1.2.3 From bf26e0bbd2f82b52605cd7c880245eefe67e09f3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:26 +0000 Subject: powerpc/mem: Declare __flush_dcache_icache() static __flush_dcache_icache() is only used in mem.c. Move it before the functions that use it and declare it static. And also fix the name of the parameter in the comment. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3fa903eb5a10b2bc7d99a8c559ffdaa05452d8e0.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cacheflush.h | 1 - arch/powerpc/mm/cacheflush.c | 60 +++++++++++++++++------------------ 2 files changed, 30 insertions(+), 31 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index f63495109f63..9110489ea411 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -40,7 +40,6 @@ void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, #define flush_icache_user_page flush_icache_user_page void flush_dcache_icache_page(struct page *page); -void __flush_dcache_icache(void *page); /** * flush_dcache_range(): Write any modified data cache blocks out to memory and diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index 40613d2fda37..742d3e0fb12f 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -135,6 +135,36 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); +/** + * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * @p: the address of the page to flush + */ +static void __flush_dcache_icache(void *p) +{ + unsigned long addr = (unsigned long)p; + + if (flush_coherent_icache(addr)) + return; + + clean_dcache_range(addr, addr + PAGE_SIZE); + + /* + * We don't flush the icache on 44x. Those have a virtual icache and we + * don't have access to the virtual address here (it's not the page + * vaddr but where it's mapped in user space). The flushing of the + * icache on these is handled elsewhere, when a change in the address + * space occurs, before returning to user space. + */ + + if (mmu_has_feature(MMU_FTR_TYPE_44x)) + return; + + invalidate_icache_range(addr, addr + PAGE_SIZE); +} + static void flush_dcache_icache_hugepage(struct page *page) { int i; @@ -178,36 +208,6 @@ void flush_dcache_icache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_icache_page); -/** - * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * - * @page: the address of the page to flush - */ -void __flush_dcache_icache(void *p) -{ - unsigned long addr = (unsigned long)p; - - if (flush_coherent_icache(addr)) - return; - - clean_dcache_range(addr, addr + PAGE_SIZE); - - /* - * We don't flush the icache on 44x. Those have a virtual icache and we - * don't have access to the virtual address here (it's not the page - * vaddr but where it's mapped in user space). The flushing of the - * icache on these is handled elsewhere, when a change in the address - * space occurs, before returning to user space. - */ - - if (mmu_has_feature(MMU_FTR_TYPE_44x)) - return; - - invalidate_icache_range(addr, addr + PAGE_SIZE); -} - void clear_user_page(void *page, unsigned long vaddr, struct page *pg) { clear_page(page); -- cgit v1.2.3 From 131637a17dc97fde3d007ab224e30c7ff4e62f6e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:27 +0000 Subject: powerpc/mem: Remove address argument to flush_coherent_icache() flush_coherent_icache() can use any valid address as mentionned by the comment. Use PAGE_OFFSET as base address. This allows removing the user access stuff. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/742b6360ae4f344a1c6ecfadcf3b6645f443fa7a.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/cacheflush.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index 742d3e0fb12f..dc2d39da6f63 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -5,10 +5,9 @@ /** * flush_coherent_icache() - if a CPU has a coherent icache, flush it - * @addr: The base address to use (can be any valid address, the whole cache will be flushed) * Return true if the cache was flushed, false otherwise */ -static inline bool flush_coherent_icache(unsigned long addr) +static inline bool flush_coherent_icache(void) { /* * For a snooping icache, we still need a dummy icbi to purge all the @@ -18,9 +17,7 @@ static inline bool flush_coherent_icache(unsigned long addr) */ if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { mb(); /* sync */ - allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES); - icbi((void *)addr); - prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES); + icbi((void *)PAGE_OFFSET); mb(); /* sync */ isync(); return true; @@ -60,7 +57,7 @@ static void invalidate_icache_range(unsigned long start, unsigned long stop) */ void flush_icache_range(unsigned long start, unsigned long stop) { - if (flush_coherent_icache(start)) + if (flush_coherent_icache()) return; clean_dcache_range(start, stop); @@ -146,7 +143,7 @@ static void __flush_dcache_icache(void *p) { unsigned long addr = (unsigned long)p; - if (flush_coherent_icache(addr)) + if (flush_coherent_icache()) return; clean_dcache_range(addr, addr + PAGE_SIZE); @@ -200,7 +197,7 @@ void flush_dcache_icache_page(struct page *page) } else { unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; - if (flush_coherent_icache(addr)) + if (flush_coherent_icache()) return; flush_dcache_icache_phys(addr); } -- cgit v1.2.3 From e618c7aea1f2a2d615a99948f1f5cb4c11b6bf57 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:28 +0000 Subject: powerpc/mem: Call flush_coherent_icache() at higher level flush_coherent_icache() doesn't need the address anymore, so it can be called immediately when entering the public functions and doesn't need to be disseminated among lower level functions. And use page_to_phys() instead of open coding the calculation of phys address to call flush_dcache_icache_phys(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5f063986e325d2efdd404b8f8c5f4bcbd4eb11a6.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/cacheflush.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index dc2d39da6f63..811045c50d82 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -143,9 +143,6 @@ static void __flush_dcache_icache(void *p) { unsigned long addr = (unsigned long)p; - if (flush_coherent_icache()) - return; - clean_dcache_range(addr, addr + PAGE_SIZE); /* @@ -182,6 +179,8 @@ static void flush_dcache_icache_hugepage(struct page *page) void flush_dcache_icache_page(struct page *page) { + if (flush_coherent_icache()) + return; if (PageCompound(page)) return flush_dcache_icache_hugepage(page); @@ -195,11 +194,7 @@ void flush_dcache_icache_page(struct page *page) __flush_dcache_icache(start); kunmap_atomic(start); } else { - unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; - - if (flush_coherent_icache()) - return; - flush_dcache_icache_phys(addr); + flush_dcache_icache_phys(page_to_phys(page)); } #endif } -- cgit v1.2.3 From cd97d9e8b5aa45a7f867a10e99f1d6ce0a5deb8b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:29 +0000 Subject: powerpc/mem: Optimise flush_dcache_icache_hugepage() flush_dcache_icache_hugepage() is a static function, with only one caller. That caller calls it when PageCompound() is true, so bugging on !PageCompound() is useless if we can trust the compiler a little. Remove the BUG_ON(!PageCompound()). The number of elements of a page won't change over time, but GCC doesn't know about it, so it gets the value at every iteration. To avoid that, call compound_nr() outside the loop and save it in a local variable. Whether the page is a HIGHMEM page or not doesn't change over time. But GCC doesn't know it so it does the test on every iteration. Do the test outside the loop. When the page is not a HIGHMEM page, page_address() will fallback on lowmem_page_address(), so call lowmem_page_address() directly and don't suffer the call to page_address() on every iteration. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ab03712b70105fccfceef095aa03007de9295a40.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/cacheflush.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index 811045c50d82..3268a3e55c3f 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -162,14 +162,14 @@ static void __flush_dcache_icache(void *p) static void flush_dcache_icache_hugepage(struct page *page) { int i; + int nr = compound_nr(page); void *start; - BUG_ON(!PageCompound(page)); - - for (i = 0; i < compound_nr(page); i++) { - if (!PageHighMem(page)) { - __flush_dcache_icache(page_address(page+i)); - } else { + if (!PageHighMem(page)) { + for (i = 0; i < nr; i++) + __flush_dcache_icache(lowmem_page_address(page + i)); + } else { + for (i = 0; i < nr; i++) { start = kmap_atomic(page+i); __flush_dcache_icache(start); kunmap_atomic(start); -- cgit v1.2.3 From 52d490437ffb1bab0a63ab7b1a64514d8c17dd4d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:30 +0000 Subject: powerpc/mem: flush_dcache_icache_phys() is for HIGHMEM pages only __flush_dcache_icache() is usable for non HIGHMEM pages on every platform. It is only for HIGHMEM pages that BOOKE needs kmap() and BOOK3S needs flush_dcache_icache_phys(). So make flush_dcache_icache_phys() dependent on CONFIG_HIGHMEM and call it only when it is a HIGHMEM page. We could make flush_dcache_icache_phys() available at all time, but as it is declared NOKPROBE_SYMBOL(), GCC doesn't optimise it out when it is not used. So define a stub for !CONFIG_HIGHMEM in order to remove the #ifdef in flush_dcache_icache_page() and use IS_ENABLED() instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/79ed5d7914f497cd5fcd681ca2f4d50a91719455.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/cacheflush.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index 3268a3e55c3f..2d92cb6bc423 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -76,7 +76,7 @@ void flush_icache_range(unsigned long start, unsigned long stop) } EXPORT_SYMBOL(flush_icache_range); -#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) +#ifdef CONFIG_HIGHMEM /** * flush_dcache_icache_phys() - Flush a page by it's physical address * @physaddr: the physical address of the page @@ -115,7 +115,11 @@ static void flush_dcache_icache_phys(unsigned long physaddr) : "ctr", "memory"); } NOKPROBE_SYMBOL(flush_dcache_icache_phys) -#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) +#else +static void flush_dcache_icache_phys(unsigned long physaddr) +{ +} +#endif /* * This is called when a page has been modified by the kernel. @@ -185,18 +189,15 @@ void flush_dcache_icache_page(struct page *page) if (PageCompound(page)) return flush_dcache_icache_hugepage(page); -#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) - /* On 8xx there is no need to kmap since highmem is not supported */ - __flush_dcache_icache(page_address(page)); -#else - if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { + if (!PageHighMem(page)) { + __flush_dcache_icache(lowmem_page_address(page)); + } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { void *start = kmap_atomic(page); __flush_dcache_icache(start); kunmap_atomic(start); } else { flush_dcache_icache_phys(page_to_phys(page)); } -#endif } EXPORT_SYMBOL(flush_dcache_icache_page); -- cgit v1.2.3 From 67b8e6af191a6ed717be548307eb15048f8181d8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:31 +0000 Subject: powerpc/mem: Help GCC realise __flush_dcache_icache() flushes single pages 'And' the given page address with PAGE_MASK to help GCC. With the patch: 00000024 <__flush_dcache_icache>: 24: 54 63 00 26 rlwinm r3,r3,0,0,19 28: 39 40 00 40 li r10,64 2c: 7c 69 1b 78 mr r9,r3 30: 7d 49 03 a6 mtctr r10 34: 7c 00 48 6c dcbst 0,r9 38: 39 29 00 20 addi r9,r9,32 3c: 7c 00 48 6c dcbst 0,r9 40: 39 29 00 20 addi r9,r9,32 44: 42 00 ff f0 bdnz 34 <__flush_dcache_icache+0x10> 48: 7c 00 04 ac hwsync 4c: 39 20 00 40 li r9,64 50: 7d 29 03 a6 mtctr r9 54: 7c 00 1f ac icbi 0,r3 58: 38 63 00 20 addi r3,r3,32 5c: 7c 00 1f ac icbi 0,r3 60: 38 63 00 20 addi r3,r3,32 64: 42 00 ff f0 bdnz 54 <__flush_dcache_icache+0x30> 68: 7c 00 04 ac hwsync 6c: 4c 00 01 2c isync 70: 4e 80 00 20 blr Without the patch: 00000024 <__flush_dcache_icache>: 24: 54 6a 00 34 rlwinm r10,r3,0,0,26 28: 39 23 10 1f addi r9,r3,4127 2c: 7d 2a 48 50 subf r9,r10,r9 30: 55 29 d9 7f rlwinm. r9,r9,27,5,31 34: 41 82 00 94 beq c8 <__flush_dcache_icache+0xa4> 38: 71 28 00 01 andi. r8,r9,1 3c: 38 c9 ff ff addi r6,r9,-1 40: 7d 48 53 78 mr r8,r10 44: 7d 27 4b 78 mr r7,r9 48: 40 82 00 6c bne b4 <__flush_dcache_icache+0x90> 4c: 54 e7 f8 7e rlwinm r7,r7,31,1,31 50: 7c e9 03 a6 mtctr r7 54: 7c 00 40 6c dcbst 0,r8 58: 39 08 00 20 addi r8,r8,32 5c: 7c 00 40 6c dcbst 0,r8 60: 39 08 00 20 addi r8,r8,32 64: 42 00 ff f0 bdnz 54 <__flush_dcache_icache+0x30> 68: 7c 00 04 ac hwsync 6c: 71 28 00 01 andi. r8,r9,1 70: 39 09 ff ff addi r8,r9,-1 74: 40 82 00 2c bne a0 <__flush_dcache_icache+0x7c> 78: 55 29 f8 7e rlwinm r9,r9,31,1,31 7c: 7d 29 03 a6 mtctr r9 80: 7c 00 57 ac icbi 0,r10 84: 39 4a 00 20 addi r10,r10,32 88: 7c 00 57 ac icbi 0,r10 8c: 39 4a 00 20 addi r10,r10,32 90: 42 00 ff f0 bdnz 80 <__flush_dcache_icache+0x5c> 94: 7c 00 04 ac hwsync 98: 4c 00 01 2c isync 9c: 4e 80 00 20 blr a0: 7c 00 57 ac icbi 0,r10 a4: 2c 08 00 00 cmpwi r8,0 a8: 39 4a 00 20 addi r10,r10,32 ac: 40 82 ff cc bne 78 <__flush_dcache_icache+0x54> b0: 4b ff ff e4 b 94 <__flush_dcache_icache+0x70> b4: 7c 00 50 6c dcbst 0,r10 b8: 2c 06 00 00 cmpwi r6,0 bc: 39 0a 00 20 addi r8,r10,32 c0: 40 82 ff 8c bne 4c <__flush_dcache_icache+0x28> c4: 4b ff ff a4 b 68 <__flush_dcache_icache+0x44> c8: 7c 00 04 ac hwsync cc: 7c 00 04 ac hwsync d0: 4c 00 01 2c isync d4: 4e 80 00 20 blr Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/23030822ea5cd0a122948b10226abe56602dc027.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/cacheflush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index 2d92cb6bc423..abeef69ed4e4 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -145,7 +145,7 @@ EXPORT_SYMBOL(flush_dcache_page); */ static void __flush_dcache_icache(void *p) { - unsigned long addr = (unsigned long)p; + unsigned long addr = (unsigned long)p & PAGE_MASK; clean_dcache_range(addr, addr + PAGE_SIZE); -- cgit v1.2.3 From 6c96020882b17fb6f4fbf7f8cef8c606460fc14d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:32 +0000 Subject: powerpc/mem: Inline flush_dcache_page() flush_dcache_page() is only a few lines, it is worth inlining. ia64, csky, mips, openrisc and riscv have a similar flush_dcache_page() and inline it. On pmac32_defconfig, we get a small size reduction. On ppc64_defconfig, we get a very small size increase. In both case that's in the noise (less than 0.1%). text data bss dec hex filename 18991155 5934744 1497624 26423523 19330e3 vmlinux64.before 18994829 5936732 1497624 26429185 1934701 vmlinux64.after 9150963 2467502 184548 11803013 b41985 vmlinux32.before 9149689 2467302 184548 11801539 b413c3 vmlinux32.after Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/21c417488b70b7629dae316539fb7bb8bdef4fdd.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cacheflush.h | 14 +++++++++++++- arch/powerpc/mm/cacheflush.c | 15 --------------- 2 files changed, 13 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 9110489ea411..7564dd4fd12b 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -30,7 +30,19 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) #endif /* CONFIG_PPC_BOOK3S_64 */ #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -extern void flush_dcache_page(struct page *page); +/* + * This is called when a page has been modified by the kernel. + * It just marks the page as not i-cache clean. We do the i-cache + * flush later when the page is given to a user process, if necessary. + */ +static inline void flush_dcache_page(struct page *page) +{ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* avoid an atomic op if possible */ + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); +} void flush_icache_range(unsigned long start, unsigned long stop); #define flush_icache_range flush_icache_range diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index abeef69ed4e4..d9eafa077c09 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -121,21 +121,6 @@ static void flush_dcache_icache_phys(unsigned long physaddr) } #endif -/* - * This is called when a page has been modified by the kernel. - * It just marks the page as not i-cache clean. We do the i-cache - * flush later when the page is given to a user process, if necessary. - */ -void flush_dcache_page(struct page *page) -{ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) - return; - /* avoid an atomic op if possible */ - if (test_bit(PG_dcache_clean, &page->flags)) - clear_bit(PG_dcache_clean, &page->flags); -} -EXPORT_SYMBOL(flush_dcache_page); - /** * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. * Note: this is necessary because the instruction cache does *not* -- cgit v1.2.3 From 7e9ab144c128df7660a2f33c9c6d1422fe798060 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Apr 2021 15:30:33 +0000 Subject: powerpc/mem: Use kmap_local_page() in flushing functions Flushing functions don't rely on preemption being disabled, so use kmap_local_page() instead of kmap_atomic(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b6a880ea0ec7886b51edbb4979c188be549231c0.1617895813.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/cacheflush.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index d9eafa077c09..63363787e000 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -152,16 +152,16 @@ static void flush_dcache_icache_hugepage(struct page *page) { int i; int nr = compound_nr(page); - void *start; if (!PageHighMem(page)) { for (i = 0; i < nr; i++) __flush_dcache_icache(lowmem_page_address(page + i)); } else { for (i = 0; i < nr; i++) { - start = kmap_atomic(page+i); + void *start = kmap_local_page(page + i); + __flush_dcache_icache(start); - kunmap_atomic(start); + kunmap_local(start); } } } @@ -177,9 +177,10 @@ void flush_dcache_icache_page(struct page *page) if (!PageHighMem(page)) { __flush_dcache_icache(lowmem_page_address(page)); } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { - void *start = kmap_atomic(page); + void *start = kmap_local_page(page); + __flush_dcache_icache(start); - kunmap_atomic(start); + kunmap_local(start); } else { flush_dcache_icache_phys(page_to_phys(page)); } @@ -225,9 +226,9 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, unsigned long addr, int len) { - unsigned long maddr; + void *maddr; - maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); - flush_icache_range(maddr, maddr + len); - kunmap(page); + maddr = kmap_local_page(page) + (addr & ~PAGE_MASK); + flush_icache_range((unsigned long)maddr, (unsigned long)maddr + len); + kunmap_local(maddr); } -- cgit v1.2.3 From d738ee8d56de38c91610741f672ec5c1ffae76fc Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:42:02 +1000 Subject: powerpc/64e/interrupt: handle bad_page_fault in C With non-volatile registers saved on interrupt, bad_page_fault can now be called by do_page_fault. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316104206.407354-9-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64e.S | 6 ------ arch/powerpc/mm/fault.c | 5 +---- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 3c222a97f023..7c3654b0d0f4 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -937,12 +937,6 @@ storage_fault_common: ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) bl do_page_fault - cmpdi r3,0 - bne- 1f - b interrupt_return - mr r4,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - bl __bad_page_fault b interrupt_return /* diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 0c0b1c2cfb49..18e588fda43d 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -552,12 +552,9 @@ static long __do_page_fault(struct pt_regs *regs) if (likely(entry)) { instruction_pointer_set(regs, extable_fixup(entry)); return 0; - } else if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64)) { + } else { __bad_page_fault(regs, err); return 0; - } else { - /* 32 and 64e handle the bad page fault in asm */ - return err; } } NOKPROBE_SYMBOL(__do_page_fault); -- cgit v1.2.3 From c45ba4f44f6b9c98a5fc1511d8853ad6843c877b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 16 Mar 2021 20:42:03 +1000 Subject: powerpc: clean up do_page_fault search_exception_tables + __bad_page_fault can be substituted with bad_page_fault, do_page_fault no longer needs to return a value to asm for any sub-architecture, and __bad_page_fault can be static. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210316104206.407354-10-npiggin@gmail.com --- arch/powerpc/include/asm/bug.h | 5 +---- arch/powerpc/include/asm/interrupt.h | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 16 +++++++--------- arch/powerpc/mm/fault.c | 27 ++++++++------------------- 4 files changed, 17 insertions(+), 33 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index d1635ffbb179..0b2162890d8b 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -111,11 +111,8 @@ #ifndef __ASSEMBLY__ struct pt_regs; -long do_page_fault(struct pt_regs *); -long hash__do_page_fault(struct pt_regs *); +void hash__do_page_fault(struct pt_regs *); void bad_page_fault(struct pt_regs *, int); -void __bad_page_fault(struct pt_regs *regs, int sig); -void do_bad_page_fault_segv(struct pt_regs *regs); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index a2f551938e64..b1b9919e0489 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -444,7 +444,7 @@ DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault); DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); /* fault.c */ -DECLARE_INTERRUPT_HANDLER_RET(do_page_fault); +DECLARE_INTERRUPT_HANDLER(do_page_fault); DECLARE_INTERRUPT_HANDLER(do_bad_page_fault_segv); /* process.c */ diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 12de1906e97b..c1dace327e39 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1583,10 +1583,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) { unsigned long dsisr = regs->dsisr; - long err; - if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) - goto page_fault; + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) { + hash__do_page_fault(regs); + return 0; + } /* * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then @@ -1606,13 +1607,10 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) return 0; } - err = __do_hash_fault(regs); - if (err) { -page_fault: - err = hash__do_page_fault(regs); - } + if (__do_hash_fault(regs)) + hash__do_page_fault(regs); - return err; + return 0; } #ifdef CONFIG_PPC_MM_SLICES diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 18e588fda43d..5227def84b5e 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -539,36 +539,25 @@ retry: } NOKPROBE_SYMBOL(___do_page_fault); -static long __do_page_fault(struct pt_regs *regs) +static __always_inline void __do_page_fault(struct pt_regs *regs) { - const struct exception_table_entry *entry; long err; err = ___do_page_fault(regs, regs->dar, regs->dsisr); - if (likely(!err)) - return err; - - entry = search_exception_tables(regs->nip); - if (likely(entry)) { - instruction_pointer_set(regs, extable_fixup(entry)); - return 0; - } else { - __bad_page_fault(regs, err); - return 0; - } + if (unlikely(err)) + bad_page_fault(regs, err); } -NOKPROBE_SYMBOL(__do_page_fault); -DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) +DEFINE_INTERRUPT_HANDLER(do_page_fault) { - return __do_page_fault(regs); + __do_page_fault(regs); } #ifdef CONFIG_PPC_BOOK3S_64 /* Same as do_page_fault but interrupt entry has already run in do_hash_fault */ -long hash__do_page_fault(struct pt_regs *regs) +void hash__do_page_fault(struct pt_regs *regs) { - return __do_page_fault(regs); + __do_page_fault(regs); } NOKPROBE_SYMBOL(hash__do_page_fault); #endif @@ -578,7 +567,7 @@ NOKPROBE_SYMBOL(hash__do_page_fault); * It is called from the DSI and ISI handlers in head.S and from some * of the procedures in traps.c. */ -void __bad_page_fault(struct pt_regs *regs, int sig) +static void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); -- cgit v1.2.3 From 7098f8f0cf0387443fd8702f24a8a2521d5133f3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 13 Apr 2021 23:54:27 +1000 Subject: powerpc/mm/radix: Make radix__change_memory_range() static The lkp bot pointed out that with W=1 we get: arch/powerpc/mm/book3s64/radix_pgtable.c:183:6: error: no previous prototype for 'radix__change_memory_range' Which is really saying that it could be static, make it so. Reported-by: kernel test robot Signed-off-by: Michael Ellerman --- arch/powerpc/mm/book3s64/radix_pgtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 55f26c0e389e..50d536ecc89b 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -180,8 +180,8 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, } #ifdef CONFIG_STRICT_KERNEL_RWX -void radix__change_memory_range(unsigned long start, unsigned long end, - unsigned long clear) +static void radix__change_memory_range(unsigned long start, unsigned long end, + unsigned long clear) { unsigned long idx; pgd_t *pgdp; -- cgit v1.2.3 From 7153d4bf0b373428d0393c001019da4d0483fddb Mon Sep 17 00:00:00 2001 From: Xiongwei Song Date: Wed, 14 Apr 2021 19:00:33 +0800 Subject: powerpc/traps: Enhance readability for trap types Define macros to list ppc interrupt types in interttupt.h, replace the reference of the trap hex values with these macros. Referred the hex numbers in arch/powerpc/kernel/exceptions-64e.S, arch/powerpc/kernel/exceptions-64s.S, arch/powerpc/kernel/head_*.S, arch/powerpc/kernel/head_booke.h and arch/powerpc/include/asm/kvm_asm.h. Signed-off-by: Xiongwei Song [mpe: Resolve conflicts in nmi_disables_ftrace(), fix 40x build] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1618398033-13025-1-git-send-email-sxwjean@me.com --- arch/powerpc/include/asm/interrupt.h | 52 ++++++++++++++++++++++++++++++++--- arch/powerpc/kernel/fadump.c | 2 +- arch/powerpc/kernel/interrupt.c | 2 +- arch/powerpc/kernel/process.c | 4 ++- arch/powerpc/kernel/traps.c | 6 ++-- arch/powerpc/kexec/crash.c | 3 +- arch/powerpc/mm/book3s64/hash_utils.c | 4 +-- arch/powerpc/mm/fault.c | 16 +++++------ arch/powerpc/perf/core-book3s.c | 5 ++-- arch/powerpc/xmon/xmon.c | 20 +++++++++----- 10 files changed, 84 insertions(+), 30 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index b1b9919e0489..ed2c4042c3d1 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -9,6 +9,50 @@ #include #include +/* BookE/4xx */ +#define INTERRUPT_CRITICAL_INPUT 0x100 + +/* BookE */ +#define INTERRUPT_DEBUG 0xd00 +#ifdef CONFIG_BOOKE +#define INTERRUPT_PERFMON 0x260 +#define INTERRUPT_DOORBELL 0x280 +#endif + +/* BookS/4xx/8xx */ +#define INTERRUPT_MACHINE_CHECK 0x200 + +/* BookS/8xx */ +#define INTERRUPT_SYSTEM_RESET 0x100 + +/* BookS */ +#define INTERRUPT_DATA_SEGMENT 0x380 +#define INTERRUPT_INST_SEGMENT 0x480 +#define INTERRUPT_TRACE 0xd00 +#define INTERRUPT_H_DATA_STORAGE 0xe00 +#define INTERRUPT_H_FAC_UNAVAIL 0xf80 +#ifdef CONFIG_PPC_BOOK3S +#define INTERRUPT_DOORBELL 0xa00 +#define INTERRUPT_PERFMON 0xf00 +#endif + +/* BookE/BookS/4xx/8xx */ +#define INTERRUPT_DATA_STORAGE 0x300 +#define INTERRUPT_INST_STORAGE 0x400 +#define INTERRUPT_ALIGNMENT 0x600 +#define INTERRUPT_PROGRAM 0x700 +#define INTERRUPT_SYSCALL 0xc00 + +/* BookE/BookS/44x */ +#define INTERRUPT_FP_UNAVAIL 0x800 + +/* BookE/BookS/44x/8xx */ +#define INTERRUPT_DECREMENTER 0x900 + +#ifndef INTERRUPT_PERFMON +#define INTERRUPT_PERFMON 0x0 +#endif + static inline void nap_adjust_return(struct pt_regs *regs) { #ifdef CONFIG_PPC_970_NAP @@ -65,7 +109,7 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ - if (TRAP(regs) != 0x700) + if (TRAP(regs) != INTERRUPT_PROGRAM) CT_WARN_ON(ct_state() != CONTEXT_KERNEL); } #endif @@ -131,13 +175,13 @@ static inline bool nmi_disables_ftrace(struct pt_regs *regs) { /* Allow DEC and PMI to be traced when they are soft-NMI */ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { - if (TRAP(regs) == 0x900) + if (TRAP(regs) == INTERRUPT_DECREMENTER) return false; - if (TRAP(regs) == 0xf00) + if (TRAP(regs) == INTERRUPT_PERFMON) return false; } if (IS_ENABLED(CONFIG_PPC_BOOK3E)) { - if (TRAP(regs) == 0x260) + if (TRAP(regs) == INTERRUPT_PERFMON) return false; } diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index eddf362caedc..b55b4c23f3b6 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -728,7 +728,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) * If we came in via system reset, wait a while for the secondary * CPUs to enter. */ - if (TRAP(&(fdh->regs)) == 0x100) { + if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) { msecs = CRASH_TIMEOUT; while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0)) mdelay(1); diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index b953bb5027e6..e4559f8914eb 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -447,7 +447,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ - if (TRAP(regs) != 0x700) + if (TRAP(regs) != INTERRUPT_PROGRAM) CT_WARN_ON(ct_state() == CONTEXT_USER); kuap = kuap_get_and_assert_locked(); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5269a0d737ed..89e34aa273e2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1467,7 +1467,9 @@ static void __show_regs(struct pt_regs *regs) trap = TRAP(regs); if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR)) pr_cont("CFAR: "REG" ", regs->orig_gpr3); - if (trap == 0x200 || trap == 0x300 || trap == 0x600) { + if (trap == INTERRUPT_MACHINE_CHECK || + trap == INTERRUPT_DATA_STORAGE || + trap == INTERRUPT_ALIGNMENT) { if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE)) pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); else diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 2babed7a6a29..b4ab95c9e94a 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -221,7 +221,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, /* * system_reset_excption handles debugger, crash dump, panic, for 0x100 */ - if (TRAP(regs) == 0x100) + if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) return; crash_fadump(regs, "die oops"); @@ -289,7 +289,7 @@ void die(const char *str, struct pt_regs *regs, long err) /* * system_reset_excption handles debugger, crash dump, panic, for 0x100 */ - if (TRAP(regs) != 0x100) { + if (TRAP(regs) != INTERRUPT_SYSTEM_RESET) { if (debugger(regs)) return; } @@ -1691,7 +1691,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception) u8 status; bool hv; - hv = (TRAP(regs) == 0xf80); + hv = (TRAP(regs) == INTERRUPT_H_FAC_UNAVAIL); if (hv) value = mfspr(SPRN_HFSCR); else diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index c9a889880214..0196d0c211ac 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * The primary CPU waits a while for all secondary CPUs to enter. This is to @@ -336,7 +337,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) * If we came in via system reset, wait a while for the secondary * CPUs to enter. */ - if (TRAP(regs) == 0x100) + if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) mdelay(PRIMARY_TIMEOUT); crash_kexec_prepare_cpus(crashing_cpu); diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index c1dace327e39..96d9aa164007 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1156,7 +1156,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) /* page is dirty */ if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) { - if (trap == 0x400) { + if (trap == INTERRUPT_INST_STORAGE) { flush_dcache_icache_page(page); set_bit(PG_dcache_clean, &page->flags); } else @@ -1556,7 +1556,7 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) if (user_mode(regs) || (region_id == USER_REGION_ID)) access &= ~_PAGE_PRIVILEGED; - if (TRAP(regs) == 0x400) + if (TRAP(regs) == INTERRUPT_INST_STORAGE) access |= _PAGE_EXEC; err = hash_page_mm(mm, ea, access, TRAP(regs), flags); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 5227def84b5e..34f641d4a2fe 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -197,7 +197,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address, bool is_write) { - int is_exec = TRAP(regs) == 0x400; + int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | @@ -391,7 +391,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, struct vm_area_struct * vma; struct mm_struct *mm = current->mm; unsigned int flags = FAULT_FLAG_DEFAULT; - int is_exec = TRAP(regs) == 0x400; + int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; int is_user = user_mode(regs); int is_write = page_fault_is_write(error_code); vm_fault_t fault, major = 0; @@ -574,20 +574,20 @@ static void __bad_page_fault(struct pt_regs *regs, int sig) /* kernel has accessed a bad area */ switch (TRAP(regs)) { - case 0x300: - case 0x380: - case 0xe00: + case INTERRUPT_DATA_STORAGE: + case INTERRUPT_DATA_SEGMENT: + case INTERRUPT_H_DATA_STORAGE: pr_alert("BUG: %s on %s at 0x%08lx\n", regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : "Unable to handle kernel data access", is_write ? "write" : "read", regs->dar); break; - case 0x400: - case 0x480: + case INTERRUPT_INST_STORAGE: + case INTERRUPT_INST_SEGMENT: pr_alert("BUG: Unable to handle kernel instruction fetch%s", regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n"); break; - case 0x600: + case INTERRUPT_ALIGNMENT: pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n", regs->dar); break; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b17358e8dc12..3f223b0bf5cb 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_PPC64 #include "internal.h" @@ -168,7 +169,7 @@ static bool regs_use_siar(struct pt_regs *regs) * they have not been setup using perf_read_regs() and so regs->result * is something random. */ - return ((TRAP(regs) == 0xf00) && regs->result); + return ((TRAP(regs) == INTERRUPT_PERFMON) && regs->result); } /* @@ -347,7 +348,7 @@ static inline void perf_read_regs(struct pt_regs *regs) * hypervisor samples as well as samples in the kernel with * interrupts off hence the userspace check. */ - if (TRAP(regs) != 0xf00) + if (TRAP(regs) != INTERRUPT_PERFMON) use_siar = 0; else if ((ppmu->flags & PPMU_NO_SIAR)) use_siar = 0; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 361534f67082..a619b9ed8458 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -54,6 +54,7 @@ #include #include #include +#include #ifdef CONFIG_PPC64 #include @@ -605,7 +606,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) * debugger break (IPI). This is similar to * crash_kexec_secondary(). */ - if (TRAP(regs) != 0x100 || !wait_for_other_cpus(ncpus)) + if (TRAP(regs) != INTERRUPT_SYSTEM_RESET || !wait_for_other_cpus(ncpus)) smp_send_debugger_break(); wait_for_other_cpus(ncpus); @@ -615,7 +616,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) if (!locked_down) { /* for breakpoint or single step, print curr insn */ - if (bp || TRAP(regs) == 0xd00) + if (bp || TRAP(regs) == INTERRUPT_TRACE) ppc_inst_dump(regs->nip, 1, 0); printf("enter ? for help\n"); } @@ -684,7 +685,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) disable_surveillance(); if (!locked_down) { /* for breakpoint or single step, print current insn */ - if (bp || TRAP(regs) == 0xd00) + if (bp || TRAP(regs) == INTERRUPT_TRACE) ppc_inst_dump(regs->nip, 1, 0); printf("enter ? for help\n"); } @@ -1769,9 +1770,12 @@ static void excprint(struct pt_regs *fp) printf(" sp: %lx\n", fp->gpr[1]); printf(" msr: %lx\n", fp->msr); - if (trap == 0x300 || trap == 0x380 || trap == 0x600 || trap == 0x200) { + if (trap == INTERRUPT_DATA_STORAGE || + trap == INTERRUPT_DATA_SEGMENT || + trap == INTERRUPT_ALIGNMENT || + trap == INTERRUPT_MACHINE_CHECK) { printf(" dar: %lx\n", fp->dar); - if (trap != 0x380) + if (trap != INTERRUPT_DATA_SEGMENT) printf(" dsisr: %lx\n", fp->dsisr); } @@ -1785,7 +1789,7 @@ static void excprint(struct pt_regs *fp) current->pid, current->comm); } - if (trap == 0x700) + if (trap == INTERRUPT_PROGRAM) print_bug_trap(fp); printf(linux_banner); @@ -1837,7 +1841,9 @@ static void prregs(struct pt_regs *fp) printf("ctr = "REG" xer = "REG" trap = %4lx\n", fp->ctr, fp->xer, fp->trap); trap = TRAP(fp); - if (trap == 0x300 || trap == 0x380 || trap == 0x600) + if (trap == INTERRUPT_DATA_STORAGE || + trap == INTERRUPT_DATA_SEGMENT || + trap == INTERRUPT_ALIGNMENT) printf("dar = "REG" dsisr = %.8lx\n", fp->dar, fp->dsisr); } -- cgit v1.2.3 From 39352430aaa05fbe4ba710231c70b334513078f2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 14 Apr 2021 13:08:43 +0000 Subject: powerpc: Move copy_inst_from_kernel_nofault() When probe_kernel_read_inst() was created, there was no good place to put it, so a file called lib/inst.c was dedicated for it. Since then, probe_kernel_read_inst() has been renamed copy_inst_from_kernel_nofault(). And mm/maccess.h didn't exist at that time. Today, mm/maccess.h is related to copy_from_kernel_nofault(). Move copy_inst_from_kernel_nofault() into mm/maccess.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9655d8957313906b77b8db5700a0e33ce06f45e5.1618405715.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/Makefile | 2 +- arch/powerpc/lib/inst.c | 26 -------------------------- arch/powerpc/mm/maccess.c | 21 +++++++++++++++++++++ 3 files changed, 22 insertions(+), 27 deletions(-) delete mode 100644 arch/powerpc/lib/inst.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index d4efc182662a..f2c690ee75d1 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -16,7 +16,7 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o test_code-patching.o +obj-y += alloc.o code-patching.o feature-fixups.o pmem.o test_code-patching.o ifndef CONFIG_KASAN obj-y += string.o memcmp_$(BITS).o diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c deleted file mode 100644 index e554d1357f2f..000000000000 --- a/arch/powerpc/lib/inst.c +++ /dev/null @@ -1,26 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2020, IBM Corporation. - */ - -#include -#include -#include -#include - -int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) -{ - unsigned int val, suffix; - int err; - - err = copy_from_kernel_nofault(&val, src, sizeof(val)); - if (err) - return err; - if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { - err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); - *inst = ppc_inst_prefix(val, suffix); - } else { - *inst = ppc_inst(val); - } - return err; -} diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c index fa9a7a718fc6..a3c30a884076 100644 --- a/arch/powerpc/mm/maccess.c +++ b/arch/powerpc/mm/maccess.c @@ -3,7 +3,28 @@ #include #include +#include +#include +#include + bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return is_kernel_addr((unsigned long)unsafe_src); } + +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) +{ + unsigned int val, suffix; + int err; + + err = copy_from_kernel_nofault(&val, src, sizeof(val)); + if (err) + return err; + if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { + err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return err; +} -- cgit v1.2.3 From 0f197ddce403af33aa7f15af55644549778a9988 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 22 Apr 2021 01:17:32 +1000 Subject: powerpc/64s: Fix mm_cpumask memory ordering comment The memory ordering comment no longer applies, because mm_ctx_id is no longer used anywhere. At best always been difficult to follow. It's better to consider the load on which the slbmte depends on, which the MMU depends on before it can start loading TLBs, rather than a store which may or may not have a subsequent dependency chain to the slbmte. So update the comment and we use the load of the mm's user context ID. This is much more analogous the radix ordering too, which is good. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210421151733.212858-1-npiggin@gmail.com --- arch/powerpc/mm/mmu_context.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 18f20da0d348..a857af401738 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -43,24 +43,26 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, /* * This full barrier orders the store to the cpumask above vs - * a subsequent operation which allows this CPU to begin loading - * translations for next. + * a subsequent load which allows this CPU/MMU to begin loading + * translations for 'next' from page table PTEs into the TLB. * - * When using the radix MMU that operation is the load of the + * When using the radix MMU, that operation is the load of the * MMU context id, which is then moved to SPRN_PID. * * For the hash MMU it is either the first load from slb_cache - * in switch_slb(), and/or the store of paca->mm_ctx_id in - * copy_mm_to_paca(). + * in switch_slb() to preload the SLBs, or the load of + * get_user_context which loads the context for the VSID hash + * to insert a new SLB, in the SLB fault handler. * * On the other side, the barrier is in mm/tlb-radix.c for - * radix which orders earlier stores to clear the PTEs vs - * the load of mm_cpumask. And pte_xchg which does the same - * thing for hash. + * radix which orders earlier stores to clear the PTEs before + * the load of mm_cpumask to check which CPU TLBs should be + * flushed. For hash, pte_xchg to clear the PTE includes the + * barrier. * - * This full barrier is needed by membarrier when switching - * between processes after store to rq->curr, before user-space - * memory accesses. + * This full barrier is also needed by membarrier when + * switching between processes after store to rq->curr, before + * user-space memory accesses. */ smp_mb(); -- cgit v1.2.3