From 6c284228eb356a1ec62a704b4d2329711831eaed Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 3 Jun 2019 08:20:28 +0000 Subject: powerpc: Fix kexec failure on book3s/32 In the old days, _PAGE_EXEC didn't exist on 6xx aka book3s/32. Therefore, allthough __mapin_ram_chunk() was already mapping kernel text with PAGE_KERNEL_TEXT and the rest with PAGE_KERNEL, the entire memory was executable. Part of the memory (first 512kbytes) was mapped with BATs instead of page table, but it was also entirely mapped as executable. In commit 385e89d5b20f ("powerpc/mm: add exec protection on powerpc 603"), we started adding exec protection to some 6xx, namely the 603, for pages mapped via pagetables. Then, in commit 63b2bc619565 ("powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX"), the exec protection was extended to BAT mapped memory, so that really only the kernel text could be executed. The problem here is that kexec is based on copying some code into upper part of memory then executing it from there in order to install a fresh new kernel at its definitive location. However, the code is position independant and first part of it is just there to deactivate the MMU and jump to the second part. So it is possible to run this first part inplace instead of running the copy. Once the MMU is off, there is no protection anymore and the second part of the code will just run as before. Reported-by: Aaro Koskinen Fixes: 63b2bc619565 ("powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX") Cc: stable@vger.kernel.org # v5.1+ Signed-off-by: Christophe Leroy Tested-by: Aaro Koskinen Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kexec.h | 3 +++ arch/powerpc/kernel/machine_kexec_32.c | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 4a585cba1787..c68476818753 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -94,6 +94,9 @@ static inline bool kdump_in_progress(void) return crashing_cpu >= 0; } +void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer, + unsigned long start_address) __noreturn; + #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c index affe5dcce7f4..2b160d68db49 100644 --- a/arch/powerpc/kernel/machine_kexec_32.c +++ b/arch/powerpc/kernel/machine_kexec_32.c @@ -30,7 +30,6 @@ typedef void (*relocate_new_kernel_t)( */ void default_machine_kexec(struct kimage *image) { - extern const unsigned char relocate_new_kernel[]; extern const unsigned int relocate_new_kernel_size; unsigned long page_list; unsigned long reboot_code_buffer, reboot_code_buffer_phys; @@ -58,6 +57,9 @@ void default_machine_kexec(struct kimage *image) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); printk(KERN_INFO "Bye!\n"); + if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x)) + relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start); + /* now call it */ rnk = (relocate_new_kernel_t) reboot_code_buffer; (*rnk)(page_list, reboot_code_buffer_phys, image->start); -- cgit v1.2.3 From 33258a1db165cf43a9e6382587ad06e9b7f8187c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 7 Jun 2019 13:56:35 +1000 Subject: powerpc/64s: Fix THP PMD collapse serialisation Commit 1b2443a547f9 ("powerpc/book3s64: Avoid multiple endian conversion in pte helpers") changed the actual bitwise tests in pte_access_permitted by using pte_write() and pte_present() helpers rather than raw bitwise testing _PAGE_WRITE and _PAGE_PRESENT bits. The pte_present() change now returns true for PTEs which are !_PAGE_PRESENT and _PAGE_INVALID, which is the combination used by pmdp_invalidate() to synchronize access from lock-free lookups. pte_access_permitted() is used by pmd_access_permitted(), so allowing GUP lock free access to proceed with such PTEs breaks this synchronisation. This bug has been observed on a host using the hash page table MMU, with random crashes and corruption in guests, usually together with bad PMD messages in the host. Fix this by adding an explicit check in pmd_access_permitted(), and documenting the condition explicitly. The pte_write() change should be okay, and would prevent GUP from falling back to the slow path when encountering savedwrite PTEs, which matches what x86 (that does not implement savedwrite) does. Fixes: 1b2443a547f9 ("powerpc/book3s64: Avoid multiple endian conversion in pte helpers") Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Nicholas Piggin Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 30 ++++++++++++++++++++++++++++ arch/powerpc/mm/book3s64/pgtable.c | 3 +++ 2 files changed, 33 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 7dede2e34b70..ccf00a8b98c6 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -876,6 +876,23 @@ static inline int pmd_present(pmd_t pmd) return false; } +static inline int pmd_is_serializing(pmd_t pmd) +{ + /* + * If the pmd is undergoing a split, the _PAGE_PRESENT bit is clear + * and _PAGE_INVALID is set (see pmd_present, pmdp_invalidate). + * + * This condition may also occur when flushing a pmd while flushing + * it (see ptep_modify_prot_start), so callers must ensure this + * case is fine as well. + */ + if ((pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) == + cpu_to_be64(_PAGE_INVALID)) + return true; + + return false; +} + static inline int pmd_bad(pmd_t pmd) { if (radix_enabled()) @@ -1092,6 +1109,19 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_access_permitted pmd_access_permitted static inline bool pmd_access_permitted(pmd_t pmd, bool write) { + /* + * pmdp_invalidate sets this combination (which is not caught by + * !pte_present() check in pte_access_permitted), to prevent + * lock-free lookups, as part of the serialize_against_pte_lookup() + * synchronisation. + * + * This also catches the case where the PTE's hardware PRESENT bit is + * cleared while TLB is flushed, which is suboptimal but should not + * be frequent. + */ + if (pmd_is_serializing(pmd)) + return false; + return pte_access_permitted(pmd_pte(pmd), write); } diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 16bda049187a..ff98b663c83e 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -116,6 +116,9 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, /* * This ensures that generic code that rely on IRQ disabling * to prevent a parallel THP split work as expected. + * + * Marking the entry with _PAGE_INVALID && ~_PAGE_PRESENT requires + * a special case check in pmd_access_permitted. */ serialize_against_pte_lookup(vma->vm_mm); return __pmd(old_pmd); -- cgit v1.2.3 From a00196a272161338d4b1d66ec69e3d57c6b280e0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 7 Jun 2019 13:56:36 +1000 Subject: powerpc/64s: __find_linux_pte() synchronization vs pmdp_invalidate() The change to pmdp_invalidate() to mark the pmd with _PAGE_INVALID broke the synchronisation against lock free lookups, __find_linux_pte()'s pmd_none() check no longer returns true for such cases. Fix this by adding a check for this condition as well. Fixes: da7ad366b497 ("powerpc/mm/book3s: Update pmd_present to look at _PAGE_PRESENT bit") Cc: stable@vger.kernel.org # v4.20+ Suggested-by: Aneesh Kumar K.V Signed-off-by: Nicholas Piggin Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pgtable.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index db4a6253df92..533fc6fa6726 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -372,13 +372,25 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, pdshift = PMD_SHIFT; pmdp = pmd_offset(&pud, ea); pmd = READ_ONCE(*pmdp); + /* - * A hugepage collapse is captured by pmd_none, because - * it mark the pmd none and do a hpte invalidate. + * A hugepage collapse is captured by this condition, see + * pmdp_collapse_flush. */ if (pmd_none(pmd)) return NULL; +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * A hugepage split is captured by this condition, see + * pmdp_invalidate. + * + * Huge page modification can be caught here too. + */ + if (pmd_is_serializing(pmd)) + return NULL; +#endif + if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { if (is_thp) *is_thp = true; -- cgit v1.2.3 From c21f5a9ed85ca3e914ca11f421677ae9ae0d04b0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 3 Jun 2019 13:00:51 +0000 Subject: powerpc/32s: fix booting with CONFIG_PPC_EARLY_DEBUG_BOOTX When booting through OF, setup_disp_bat() does nothing because disp_BAT are not set. By change, it used to work because BOOTX buffer is mapped 1:1 at address 0x81000000 by the bootloader, and btext_setup_display() sets virt addr same as phys addr. But since commit 215b823707ce ("powerpc/32s: set up an early static hash table for KASAN."), a temporary page table overrides the bootloader mapping. This 0x81000000 is also problematic with the newly implemented Kernel Userspace Access Protection (KUAP) because it is within user address space. This patch fixes those issues by properly setting disp_BAT through a call to btext_prepare_BAT(), allowing setup_disp_bat() to properly setup BAT3 for early bootx screen buffer access. Reported-by: Mathieu Malaterre Fixes: 215b823707ce ("powerpc/32s: set up an early static hash table for KASAN.") Signed-off-by: Christophe Leroy Tested-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/btext.h | 4 ++++ arch/powerpc/kernel/prom_init.c | 1 + arch/powerpc/kernel/prom_init_check.sh | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/btext.h b/arch/powerpc/include/asm/btext.h index 3ffad030393c..461b0f193864 100644 --- a/arch/powerpc/include/asm/btext.h +++ b/arch/powerpc/include/asm/btext.h @@ -13,7 +13,11 @@ extern void btext_update_display(unsigned long phys, int width, int height, int depth, int pitch); extern void btext_setup_display(int width, int height, int depth, int pitch, unsigned long address); +#ifdef CONFIG_PPC32 extern void btext_prepare_BAT(void); +#else +static inline void btext_prepare_BAT(void) { } +#endif extern void btext_map(void); extern void btext_unmap(void); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 00682b8df330..61795c39de21 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2340,6 +2340,7 @@ static void __init prom_check_displays(void) prom_printf("W=%d H=%d LB=%d addr=0x%x\n", width, height, pitch, addr); btext_setup_display(width, height, 8, pitch, addr); + btext_prepare_BAT(); } #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ } diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 4cac45cb5de5..acf63ad8f4ce 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -27,7 +27,7 @@ fi WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start -logo_linux_clut224 +logo_linux_clut224 btext_prepare_BAT reloc_got2 kernstart_addr memstart_addr linux_banner _stext __prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." -- cgit v1.2.3 From ca72d88378b2f2444d3ec145dd442d449d3fefbc Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 12 Jun 2019 23:35:07 +1000 Subject: powerpc/mm/64s/hash: Reallocate context ids on fork When using the Hash Page Table (HPT) MMU, userspace memory mappings are managed at two levels. Firstly in the Linux page tables, much like other architectures, and secondly in the SLB (Segment Lookaside Buffer) and HPT. It's the SLB and HPT that are actually used by the hardware to do translations. As part of the series adding support for 4PB user virtual address space using the hash MMU, we added support for allocating multiple "context ids" per process, one for each 512TB chunk of address space. These are tracked in an array called extended_id in the mm_context_t of a process that has done a mapping above 512TB. If such a process forks (ie. clone(2) without CLONE_VM set) it's mm is copied, including the mm_context_t, and then init_new_context() is called to reinitialise parts of the mm_context_t as appropriate to separate the address spaces of the two processes. The key step in ensuring the two processes have separate address spaces is to allocate a new context id for the process, this is done at the beginning of hash__init_new_context(). If we didn't allocate a new context id then the two processes would share mappings as far as the SLB and HPT are concerned, even though their Linux page tables would be separate. For mappings above 512TB, which use the extended_id array, we neglected to allocate new context ids on fork, meaning the parent and child use the same ids and therefore share those mappings even though they're supposed to be separate. This can lead to the parent seeing writes done by the child, which is essentially memory corruption. There is an additional exposure which is that if the child process exits, all its context ids are freed, including the context ids that are still in use by the parent for mappings above 512TB. One or more of those ids can then be reallocated to a third process, that process can then read/write to the parent's mappings above 512TB. Additionally if the freed id is used for the third process's primary context id, then the parent is able to read/write to the third process's mappings *below* 512TB. All of these are fundamental failures to enforce separation between processes. The only mitigating factor is that the bug only occurs if a process creates mappings above 512TB, and most applications still do not create such mappings. Only machines using the hash page table MMU are affected, eg. PowerPC 970 (G5), PA6T, Power5/6/7/8/9. By default Power9 bare metal machines (powernv) use the Radix MMU and are not affected, unless the machine has been explicitly booted in HPT mode (using disable_radix on the kernel command line). KVM guests on Power9 may be affected if the host or guest is configured to use the HPT MMU. LPARs under PowerVM on Power9 are affected as they always use the HPT MMU. Kernels built with PAGE_SIZE=4K are not affected. The fix is relatively simple, we need to reallocate context ids for all extended mappings on fork. Fixes: f384796c40dc ("powerpc/mm: Add support for handling > 512TB address in SLB miss") Cc: stable@vger.kernel.org # v4.17+ Signed-off-by: Michael Ellerman --- arch/powerpc/mm/mmu_context_book3s64.c | 46 +++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index f720c5cc0b5e..8751ae2e2d04 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -55,14 +55,48 @@ EXPORT_SYMBOL_GPL(hash__alloc_context_id); void slb_setup_new_exec(void); +static int realloc_context_ids(mm_context_t *ctx) +{ + int i, id; + + /* + * id 0 (aka. ctx->id) is special, we always allocate a new one, even if + * there wasn't one allocated previously (which happens in the exec + * case where ctx is newly allocated). + * + * We have to be a bit careful here. We must keep the existing ids in + * the array, so that we can test if they're non-zero to decide if we + * need to allocate a new one. However in case of error we must free the + * ids we've allocated but *not* any of the existing ones (or risk a + * UAF). That's why we decrement i at the start of the error handling + * loop, to skip the id that we just tested but couldn't reallocate. + */ + for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) { + if (i == 0 || ctx->extended_id[i]) { + id = hash__alloc_context_id(); + if (id < 0) + goto error; + + ctx->extended_id[i] = id; + } + } + + /* The caller expects us to return id */ + return ctx->id; + +error: + for (i--; i >= 0; i--) { + if (ctx->extended_id[i]) + ida_free(&mmu_context_ida, ctx->extended_id[i]); + } + + return id; +} + static int hash__init_new_context(struct mm_struct *mm) { int index; - index = hash__alloc_context_id(); - if (index < 0) - return index; - /* * The old code would re-promote on fork, we don't do that when using * slices as it could cause problem promoting slices that have been @@ -80,6 +114,10 @@ static int hash__init_new_context(struct mm_struct *mm) if (mm->context.id == 0) slice_init_new_context_exec(mm); + index = realloc_context_ids(&mm->context); + if (index < 0) + return index; + subpage_prot_init_new_context(mm); pkey_mm_init(mm); -- cgit v1.2.3 From b7f8b440f3001cc1775c028f0a783786113c2ae3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 11 Jun 2019 15:47:20 +0000 Subject: powerpc/32s: fix initial setup of segment registers on secondary CPU The patch referenced below moved the loading of segment registers out of load_up_mmu() in order to do it earlier in the boot sequence. However, the secondary CPU still needs it to be done when loading up the MMU. Reported-by: Erhard F. Fixes: 215b823707ce ("powerpc/32s: set up an early static hash table for KASAN") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/head_32.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 755fab9641d6..c82947a3892a 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -757,6 +757,7 @@ __secondary_start: stw r0,0(r3) /* load up the MMU */ + bl load_segment_registers bl load_up_mmu /* ptr to phys current thread */ -- cgit v1.2.3 From e8732ffa2e096d433c3f2349b871d43ed0d39f5c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 13 Jun 2019 13:52:30 +0000 Subject: powerpc/booke: fix fast syscall entry on SMP Use r10 instead of r9 to calculate CPU offset as r9 contains the value from SRR1 which is used later. Fixes: 1a4b739bbb4f ("powerpc/32: implement fast entry for syscalls on BOOKE") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/head_booke.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index bfeb469e8106..9f9e0d109d7d 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -145,9 +145,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) tophys(r11,r11) addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP - lwz r9,TASK_CPU(r2) - slwi r9,r9,3 - add r11,r11,r9 + lwz r10, TASK_CPU(r2) + slwi r10, r10, 3 + add r11, r11, r10 #endif lwz r12,0(r11) mtspr SPRN_DBCR0,r12 -- cgit v1.2.3 From 82f6e266f8123d7938713c0e10c03aa655b3e68a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 23 May 2019 08:39:27 +0000 Subject: powerpc/32: fix build failure on book3e with KVM Build failure was introduced by the commit identified below, due to missed macro expension leading to wrong called function's name. arch/powerpc/kernel/head_fsl_booke.o: In function `SystemCall': arch/powerpc/kernel/head_fsl_booke.S:416: undefined reference to `kvmppc_handler_BOOKE_INTERRUPT_SYSCALL_SPRN_SRR1' Makefile:1052: recipe for target 'vmlinux' failed The called function should be kvmppc_handler_8_0x01B(). This patch fixes it. Reported-by: Paul Mackerras Fixes: 1a4b739bbb4f ("powerpc/32: implement fast entry for syscalls on BOOKE") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/head_booke.h | 4 ++-- arch/powerpc/kernel/head_fsl_booke.S | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 9f9e0d109d7d..2ae635df9026 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -83,7 +83,7 @@ END_BTB_FLUSH_SECTION SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) -.macro SYSCALL_ENTRY trapno intno +.macro SYSCALL_ENTRY trapno intno srr1 mfspr r10, SPRN_SPRG_THREAD #ifdef CONFIG_KVM_BOOKE_HV BEGIN_FTR_SECTION @@ -94,7 +94,7 @@ BEGIN_FTR_SECTION mfspr r11, SPRN_SRR1 mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */ bf 3, 1975f - b kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1 + b kvmppc_handler_\intno\()_\srr1 1975: mr r12, r13 lwz r13, THREAD_NORMSAVE(2)(r10) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 6621f230cc37..2b39f42c3676 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -413,7 +413,7 @@ interrupt_base: /* System Call Interrupt */ START_EXCEPTION(SystemCall) - SYSCALL_ENTRY 0xc00 SYSCALL + SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1 /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \ -- cgit v1.2.3 From fabb2efcf0846e28b4910fc20bdc203d3d0170af Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 17 Jun 2019 17:16:18 +1000 Subject: KVM: PPC: Book3S HV: Fix r3 corruption in h_set_dabr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c1fe190c0672 ("powerpc: Add force enable of DAWR on P9 option") screwed up some assembler and corrupted a pointer in r3. This resulted in crashes like the below: BUG: Kernel NULL pointer dereference at 0x000013bf Faulting instruction address: 0xc00000000010b044 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA pSeries CPU: 8 PID: 1771 Comm: qemu-system-ppc Kdump: loaded Not tainted 5.2.0-rc4+ #3 NIP: c00000000010b044 LR: c0080000089dacf4 CTR: c00000000010aff4 REGS: c00000179b397710 TRAP: 0300 Not tainted (5.2.0-rc4+) MSR: 800000000280b033 CR: 42244842 XER: 00000000 CFAR: c00000000010aff8 DAR: 00000000000013bf DSISR: 42000000 IRQMASK: 0 GPR00: c0080000089dd6bc c00000179b3979a0 c008000008a04300 ffffffffffffffff GPR04: 0000000000000000 0000000000000003 000000002444b05d c0000017f11c45d0 ... NIP kvmppc_h_set_dabr+0x50/0x68 LR kvmppc_pseries_do_hcall+0xa3c/0xeb0 [kvm_hv] Call Trace: 0xc0000017f11c0000 (unreliable) kvmppc_vcpu_run_hv+0x694/0xec0 [kvm_hv] kvmppc_vcpu_run+0x34/0x48 [kvm] kvm_arch_vcpu_ioctl_run+0x2f4/0x400 [kvm] kvm_vcpu_ioctl+0x460/0x850 [kvm] do_vfs_ioctl+0xe4/0xb40 ksys_ioctl+0xc4/0x110 sys_ioctl+0x28/0x80 system_call+0x5c/0x70 Instruction dump: 4082fff4 4c00012c 38600000 4e800020 e96280c0 896b0000 2c2b0000 3860ffff 4d820020 50852e74 508516f6 78840724 f8a313c8 7c942ba6 7cbc2ba6 Fix the bug by only changing r3 when we are returning immediately. Fixes: c1fe190c0672 ("powerpc: Add force enable of DAWR on P9 option") Signed-off-by: Michael Neuling Signed-off-by: Suraj Jitindar Singh Reported-by: Cédric Le Goater Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index f9b2620fbecd..5cb8516b209c 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2507,8 +2507,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) LOAD_REG_ADDR(r11, dawr_force_enable) lbz r11, 0(r11) cmpdi r11, 0 + bne 3f li r3, H_HARDWARE - beqlr + blr +3: /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW rlwimi r5, r4, 2, DAWRX_WT -- cgit v1.2.3 From 84b028243ef07a3f65c1857343ada2b1022f8bed Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Mon, 17 Jun 2019 17:16:19 +1000 Subject: KVM: PPC: Book3S HV: Only write DAWR[X] when handling h_set_dawr in real mode The hcall H_SET_DAWR is used by a guest to set the data address watchpoint register (DAWR). This hcall is handled in the host in kvmppc_h_set_dawr() which can be called in either real mode on the guest exit path from hcall_try_real_mode() in book3s_hv_rmhandlers.S, or in virtual mode when called from kvmppc_pseries_do_hcall() in book3s_hv.c. The function kvmppc_h_set_dawr() updates the dawr and dawrx fields in the vcpu struct accordingly and then also writes the respective values into the DAWR and DAWRX registers directly. It is necessary to write the registers directly here when calling the function in real mode since the path to re-enter the guest won't do this. However when in virtual mode the host DAWR and DAWRX values have already been restored, and so writing the registers would overwrite these. Additionally there is no reason to write the guest values here as these will be read from the vcpu struct and written to the registers appropriately the next time the vcpu is run. This also avoids the case when handling h_set_dawr for a nested guest where the guest hypervisor isn't able to write the DAWR and DAWRX registers directly and must rely on the real hypervisor to do this for it when it calls H_ENTER_NESTED. Fixes: c1fe190c0672 ("powerpc: Add force enable of DAWR on P9 option") Signed-off-by: Suraj Jitindar Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 5cb8516b209c..bc18366cd1ba 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2517,9 +2517,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) clrrdi r4, r4, 3 std r4, VCPU_DAWR(r3) std r5, VCPU_DAWRX(r3) + /* + * If came in through the real mode hcall handler then it is necessary + * to write the registers since the return path won't. Otherwise it is + * sufficient to store then in the vcpu struct as they will be loaded + * next time the vcpu is run. + */ + mfmsr r6 + andi. r6, r6, MSR_DR /* in real mode? */ + bne 4f mtspr SPRN_DAWR, r4 mtspr SPRN_DAWRX, r5 - li r3, 0 +4: li r3, 0 blr _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */ -- cgit v1.2.3 From 9739ab7eda459f0669ec9807e0d9be5020bab88c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2019 10:24:46 +0200 Subject: powerpc: enable a 30-bit ZONE_DMA for 32-bit pmac With the strict dma mask checking introduced with the switch to the generic DMA direct code common wifi chips on 32-bit powerbooks stopped working. Add a 30-bit ZONE_DMA to the 32-bit pmac builds to allow them to reliably allocate dma coherent memory. Fixes: 65a21b71f948 ("powerpc/dma: remove dma_nommu_dma_supported") Reported-by: Aaro Koskinen Signed-off-by: Christoph Hellwig Tested-by: Larry Finger Acked-by: Larry Finger Tested-by: Aaro Koskinen Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/page.h | 7 +++++++ arch/powerpc/mm/mem.c | 3 ++- arch/powerpc/platforms/powermac/Kconfig | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index dbc8c0679480..3d013e4696e9 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -323,6 +323,13 @@ struct vm_area_struct; #endif /* __ASSEMBLY__ */ #include +/* + * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks. + */ +#ifdef CONFIG_PPC32 +#define ARCH_ZONE_DMA_BITS 30 +#else #define ARCH_ZONE_DMA_BITS 31 +#endif #endif /* _ASM_POWERPC_PAGE_H */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e885fe2aafcc..40bd4153ab09 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -253,7 +253,8 @@ void __init paging_init(void) (long int)((top_of_ram - total_ram) >> 20)); #ifdef CONFIG_ZONE_DMA - max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffffffUL >> PAGE_SHIFT); + max_zone_pfns[ZONE_DMA] = min(max_low_pfn, + ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig index f834a19ed772..c02d8c503b29 100644 --- a/arch/powerpc/platforms/powermac/Kconfig +++ b/arch/powerpc/platforms/powermac/Kconfig @@ -7,6 +7,7 @@ config PPC_PMAC select PPC_INDIRECT_PCI if PPC32 select PPC_MPC106 if PPC32 select PPC_NATIVE + select ZONE_DMA if PPC32 default y config PPC_PMAC64 -- cgit v1.2.3 From 50087112592016a3fc10b394a55f1f1a1bde6908 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Thu, 20 Jun 2019 11:46:49 +1000 Subject: KVM: PPC: Book3S HV: Invalidate ERAT when flushing guest TLB entries When a guest vcpu moves from one physical thread to another it is necessary for the host to perform a tlb flush on the previous core if another vcpu from the same guest is going to run there. This is because the guest may use the local form of the tlb invalidation instruction meaning stale tlb entries would persist where it previously ran. This is handled on guest entry in kvmppc_check_need_tlb_flush() which calls flush_guest_tlb() to perform the tlb flush. Previously the generic radix__local_flush_tlb_lpid_guest() function was used, however the functionality was reimplemented in flush_guest_tlb() to avoid the trace_tlbie() call as the flushing may be done in real mode. The reimplementation in flush_guest_tlb() was missing an erat invalidation after flushing the tlb. This lead to observable memory corruption in the guest due to the caching of stale translations. Fix this by adding the erat invalidation. Fixes: 70ea13f6e609 ("KVM: PPC: Book3S HV: Flush TLB on secondary radix threads") Signed-off-by: Suraj Jitindar Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_hv_builtin.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 6035d24f1d1d..a46286f73eec 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -833,6 +833,7 @@ static void flush_guest_tlb(struct kvm *kvm) } } asm volatile("ptesync": : :"memory"); + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); } void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, -- cgit v1.2.3 From e13e7cd4c0c1cc9984d9b6a8663e10d76b53f2aa Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 22 Jun 2019 08:55:54 +1000 Subject: powerpc/64s/exception: Fix machine check early corrupting AMR The early machine check runs in real mode, so locking is unnecessary. Worse, the windup does not restore AMR, so this can result in a false KUAP fault after a recoverable machine check hits inside a user copy operation. Fix this similarly to HMI by just avoiding the kuap lock in the early machine check handler (it will be set by the late handler that runs in virtual mode if that runs). If the virtual mode handler is reached, it will lock and restore the AMR. Fixes: 890274c2dc4c0 ("powerpc/64s: Implement KUAP for Radix MMU") Cc: Russell Currey Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6b86055e5251..73ba246ca11d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -315,7 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early) mfspr r11,SPRN_DSISR /* Save DSISR */ std r11,_DSISR(r1) std r9,_CCR(r1) /* Save CR in stackframe */ - kuap_save_amr_and_lock r9, r10, cr1 + /* We don't touch AMR here, we never go to virtual mode */ /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) mfmsr r11 /* get MSR value */ -- cgit v1.2.3