From 113fe88eed53af08800f54a03e463636105831e0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 11 Jun 2022 18:55:15 +0200 Subject: powerpc: Don't include asm/setup.h in asm/machdep.h asm/machdep.h doesn't need asm/setup.h Remove it. Add it directly in files that needs it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3b1dfb19a2c3265fb4abc2bfc7b6eae9261a998b.1654966508.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/pci-common.c | 1 + arch/powerpc/kernel/prom.c | 2 +- arch/powerpc/kernel/prom_init.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 068410cd54a3..c87999289752 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "../../../drivers/pci/pci.h" diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index feae8509b59c..1066b072db35 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 04694ec423f6..1939683e35ed 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.2.3 From 46d60bdb1283bb0f22d9480e2d6c972623cb4182 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 6 May 2022 11:14:24 +0200 Subject: powerpc: Include asm/firmware.h in all users of firmware_has_feature() Trying to remove asm/ppc_asm.h from all places that don't need it leads to several failures linked to firmware_has_feature(). To fix it, include asm/firmware.h in all files using firmware_has_feature() All users found with: git grep -L "firmware\.h" ` git grep -l "firmware_has_feature("` Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/11956ec181a034b51a881ac9c059eea72c679a73.1651828453.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/64/hugetlb.h | 3 +++ arch/powerpc/include/asm/cputime.h | 1 + arch/powerpc/include/asm/interrupt.h | 1 + arch/powerpc/include/asm/mman.h | 1 + arch/powerpc/include/asm/prom.h | 1 + arch/powerpc/kernel/dawr.c | 1 + arch/powerpc/kexec/core.c | 1 + arch/powerpc/kvm/book3s_64_mmu_radix.c | 1 + arch/powerpc/kvm/book3s_hv_nested.c | 1 + arch/powerpc/mm/book3s64/hash_pgtable.c | 1 + arch/powerpc/mm/book3s64/pkeys.c | 1 + arch/powerpc/mm/hugetlbpage.c | 1 + arch/powerpc/platforms/pseries/papr_platform_attributes.c | 1 + arch/powerpc/platforms/pseries/vas.c | 1 + 14 files changed, 16 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index b37a28f62cf6..aa1c67c8bfc8 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_H #define _ASM_POWERPC_BOOK3S_64_HUGETLB_H + +#include + /* * For radix we want generic code to handle hugetlb. But then if we want * both hash and radix to be enabled together we need to workaround the diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 504f7fe6711a..6d2b27997492 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -19,6 +19,7 @@ #include #include #include +#include typedef u64 __nocast cputime_t; typedef u64 __nocast cputime64_t; diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index b14f54d789d2..8069dbc4b8d1 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -69,6 +69,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 1b024e64c8ec..17a77d47ed6d 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -12,6 +12,7 @@ #include #include #include +#include static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 5c80152e8f18..6f109b5cb84e 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -14,6 +14,7 @@ #include #include #include +#include /* These includes should be removed once implicit includes are cleaned up. */ #include diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c index 30d4eca88d17..909a05cd2809 100644 --- a/arch/powerpc/kernel/dawr.c +++ b/arch/powerpc/kernel/dawr.c @@ -11,6 +11,7 @@ #include #include #include +#include bool dawr_force_enable; EXPORT_SYMBOL_GPL(dawr_force_enable); diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 9773dd0640f3..cf84bfe9e27e 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -20,6 +20,7 @@ #include #include #include +#include void machine_kexec_mask_interrupts(void) { unsigned int i; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 42851c32ff3b..9d4b3feda3b6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * Supported radix tree geometry. diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 0644732d1a25..be8249cc6107 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -20,6 +20,7 @@ #include #include #include +#include static struct patb_entry *pseries_partition_tb; diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 2e0cad5817ba..ae008b9df0e6 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -13,6 +13,7 @@ #include #include #include +#include #include diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 753e62ba67af..1d2675ab6711 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b282af39fcf6..bc84a594ca62 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -24,6 +24,7 @@ #include #include #include +#include bool hugetlb_disabled = false; diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c index 515150417bb3..526c621b098b 100644 --- a/arch/powerpc/platforms/pseries/papr_platform_attributes.c +++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c @@ -22,6 +22,7 @@ #include #include +#include #include "pseries.h" diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 500a1fc4a1d7..91e7eda0606c 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "vas.h" -- cgit v1.2.3 From e93dee186fc95f2058b0c9d2317d8b876b8512db Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 6 May 2022 11:14:25 +0200 Subject: powerpc: Don't include asm/ppc_asm.h in other headers asm/ppc_asm.h is not needed in any of the header it is included. It is only needed by irq.c. Include it there and remove it from other headers. word-at-a-time.h only need ex_table.h, so include it instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e2d7b96547037f852c7ed164e4f79e8918c2607a.1651828453.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/io.h | 1 - arch/powerpc/include/asm/uaccess.h | 1 - arch/powerpc/include/asm/word-at-a-time.h | 2 +- arch/powerpc/kernel/irq.c | 1 + 4 files changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index c5a5f7c9b231..9b07133c3bd1 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -33,7 +33,6 @@ extern struct pci_dev *isa_bridge_pcidev; #include #include #include -#include #define SIO_CONFIG_RA 0x398 #define SIO_CONFIG_RD 0x399 diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 9b82b38ff867..14a08806f8e8 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -2,7 +2,6 @@ #ifndef _ARCH_POWERPC_UACCESS_H #define _ARCH_POWERPC_UACCESS_H -#include #include #include #include diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index f3f4710d4ff5..46c31fb8748d 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -7,7 +7,7 @@ #include #include -#include +#include #ifdef __BIG_ENDIAN__ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index dd09919c3c66..4db27198b002 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -65,6 +65,7 @@ #include #include #include +#include #ifdef CONFIG_PPC64 #include -- cgit v1.2.3 From 7d7b28b302085e1ec2815bc9f5205af28394c5db Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 18 May 2022 09:40:15 +0200 Subject: powerpc/irq: Split irq.c More than half of irq.c is dedicated to PPC64. Move PPC64 code out of irq.c into irq_64.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9f1a47de80f78d3dd270a7a72f69f55f581c4054.1652859593.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/irq.c | 421 -------------------------------------- arch/powerpc/kernel/irq_64.c | 476 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 477 insertions(+), 422 deletions(-) create mode 100644 arch/powerpc/kernel/irq_64.c (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index f91f0f29a566..317c984ee9d0 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -63,7 +63,7 @@ obj-y := cputable.o syscalls.o \ hw_breakpoint_constraints.o interrupt.o \ kdebugfs.o stacktrace.o obj-y += ptrace/ -obj-$(CONFIG_PPC64) += setup_64.o \ +obj-$(CONFIG_PPC64) += setup_64.o irq_64.o\ paca.o nvram_64.o note.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o obj-$(CONFIG_VDSO32) += vdso32_wrapper.o diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4db27198b002..4abfe6bd6622 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -67,12 +67,6 @@ #include #include -#ifdef CONFIG_PPC64 -#include -#include -#include -#include -#endif #define CREATE_TRACE_POINTS #include #include @@ -89,411 +83,6 @@ u32 tau_interrupts(unsigned long cpu); #endif #endif /* CONFIG_PPC32 */ -#ifdef CONFIG_PPC64 - -int distribute_irqs = 1; - -static inline notrace unsigned long get_irq_happened(void) -{ - unsigned long happened; - - __asm__ __volatile__("lbz %0,%1(13)" - : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); - - return happened; -} - -void replay_soft_interrupts(void) -{ - struct pt_regs regs; - - /* - * Be careful here, calling these interrupt handlers can cause - * softirqs to be raised, which they may run when calling irq_exit, - * which will cause local_irq_enable() to be run, which can then - * recurse into this function. Don't keep any state across - * interrupt handler calls which may change underneath us. - * - * We use local_paca rather than get_paca() to avoid all the - * debug_smp_processor_id() business in this low level function. - */ - - ppc_save_regs(®s); - regs.softe = IRQS_ENABLED; - regs.msr |= MSR_EE; - -again: - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(mfmsr() & MSR_EE); - - /* - * Force the delivery of pending soft-disabled interrupts on PS3. - * Any HV call will have this side effect. - */ - if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { - u64 tmp, tmp2; - lv1_get_version_info(&tmp, &tmp2); - } - - /* - * Check if an hypervisor Maintenance interrupt happened. - * This is a higher priority interrupt than the others, so - * replay it first. - */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { - local_paca->irq_happened &= ~PACA_IRQ_HMI; - regs.trap = INTERRUPT_HMI; - handle_hmi_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (local_paca->irq_happened & PACA_IRQ_DEC) { - local_paca->irq_happened &= ~PACA_IRQ_DEC; - regs.trap = INTERRUPT_DECREMENTER; - timer_interrupt(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (local_paca->irq_happened & PACA_IRQ_EE) { - local_paca->irq_happened &= ~PACA_IRQ_EE; - regs.trap = INTERRUPT_EXTERNAL; - do_IRQ(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { - local_paca->irq_happened &= ~PACA_IRQ_DBELL; - regs.trap = INTERRUPT_DOORBELL; - doorbell_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - /* Book3E does not support soft-masking PMI interrupts */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { - local_paca->irq_happened &= ~PACA_IRQ_PMI; - regs.trap = INTERRUPT_PERFMON; - performance_monitor_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) { - /* - * We are responding to the next interrupt, so interrupt-off - * latencies should be reset here. - */ - trace_hardirqs_on(); - trace_hardirqs_off(); - goto again; - } -} - -#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) -static inline void replay_soft_interrupts_irqrestore(void) -{ - unsigned long kuap_state = get_kuap(); - - /* - * Check if anything calls local_irq_enable/restore() when KUAP is - * disabled (user access enabled). We handle that case here by saving - * and re-locking AMR but we shouldn't get here in the first place, - * hence the warning. - */ - kuap_assert_locked(); - - if (kuap_state != AMR_KUAP_BLOCKED) - set_kuap(AMR_KUAP_BLOCKED); - - replay_soft_interrupts(); - - if (kuap_state != AMR_KUAP_BLOCKED) - set_kuap(kuap_state); -} -#else -#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() -#endif - -notrace void arch_local_irq_restore(unsigned long mask) -{ - unsigned char irq_happened; - - /* Write the new soft-enabled value if it is a disable */ - if (mask) { - irq_soft_mask_set(mask); - return; - } - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(in_nmi() || in_hardirq()); - - /* - * After the stb, interrupts are unmasked and there are no interrupts - * pending replay. The restart sequence makes this atomic with - * respect to soft-masked interrupts. If this was just a simple code - * sequence, a soft-masked interrupt could become pending right after - * the comparison and before the stb. - * - * This allows interrupts to be unmasked without hard disabling, and - * also without new hard interrupts coming in ahead of pending ones. - */ - asm_volatile_goto( -"1: \n" -" lbz 9,%0(13) \n" -" cmpwi 9,0 \n" -" bne %l[happened] \n" -" stb 9,%1(13) \n" -"2: \n" - RESTART_TABLE(1b, 2b, 1b) - : : "i" (offsetof(struct paca_struct, irq_happened)), - "i" (offsetof(struct paca_struct, irq_soft_mask)) - : "cr0", "r9" - : happened); - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(!(mfmsr() & MSR_EE)); - - return; - -happened: - irq_happened = get_irq_happened(); - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(!irq_happened); - - if (irq_happened == PACA_IRQ_HARD_DIS) { - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(mfmsr() & MSR_EE); - irq_soft_mask_set(IRQS_ENABLED); - local_paca->irq_happened = 0; - __hard_irq_enable(); - return; - } - - /* Have interrupts to replay, need to hard disable first */ - if (!(irq_happened & PACA_IRQ_HARD_DIS)) { - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { - if (!(mfmsr() & MSR_EE)) { - /* - * An interrupt could have come in and cleared - * MSR[EE] and set IRQ_HARD_DIS, so check - * IRQ_HARD_DIS again and warn if it is still - * clear. - */ - irq_happened = get_irq_happened(); - WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS)); - } - } - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - } else { - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { - if (WARN_ON_ONCE(mfmsr() & MSR_EE)) - __hard_irq_disable(); - } - } - - /* - * Disable preempt here, so that the below preempt_enable will - * perform resched if required (a replayed interrupt may set - * need_resched). - */ - preempt_disable(); - irq_soft_mask_set(IRQS_ALL_DISABLED); - trace_hardirqs_off(); - - replay_soft_interrupts_irqrestore(); - local_paca->irq_happened = 0; - - trace_hardirqs_on(); - irq_soft_mask_set(IRQS_ENABLED); - __hard_irq_enable(); - preempt_enable(); -} -EXPORT_SYMBOL(arch_local_irq_restore); - -/* - * This is a helper to use when about to go into idle low-power - * when the latter has the side effect of re-enabling interrupts - * (such as calling H_CEDE under pHyp). - * - * You call this function with interrupts soft-disabled (this is - * already the case when ppc_md.power_save is called). The function - * will return whether to enter power save or just return. - * - * In the former case, it will have notified lockdep of interrupts - * being re-enabled and generally sanitized the lazy irq state, - * and in the latter case it will leave with interrupts hard - * disabled and marked as such, so the local_irq_enable() call - * in arch_cpu_idle() will properly re-enable everything. - */ -bool prep_irq_for_idle(void) -{ - /* - * First we need to hard disable to ensure no interrupt - * occurs before we effectively enter the low power state - */ - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - /* - * If anything happened while we were soft-disabled, - * we return now and do not enter the low power state. - */ - if (lazy_irq_pending()) - return false; - - /* Tell lockdep we are about to re-enable */ - trace_hardirqs_on(); - - /* - * Mark interrupts as soft-enabled and clear the - * PACA_IRQ_HARD_DIS from the pending mask since we - * are about to hard enable as well as a side effect - * of entering the low power state. - */ - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - irq_soft_mask_set(IRQS_ENABLED); - - /* Tell the caller to enter the low power state */ - return true; -} - -#ifdef CONFIG_PPC_BOOK3S -/* - * This is for idle sequences that return with IRQs off, but the - * idle state itself wakes on interrupt. Tell the irq tracer that - * IRQs are enabled for the duration of idle so it does not get long - * off times. Must be paired with fini_irq_for_idle_irqsoff. - */ -bool prep_irq_for_idle_irqsoff(void) -{ - WARN_ON(!irqs_disabled()); - - /* - * First we need to hard disable to ensure no interrupt - * occurs before we effectively enter the low power state - */ - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - /* - * If anything happened while we were soft-disabled, - * we return now and do not enter the low power state. - */ - if (lazy_irq_pending()) - return false; - - /* Tell lockdep we are about to re-enable */ - trace_hardirqs_on(); - - return true; -} - -/* - * Take the SRR1 wakeup reason, index into this table to find the - * appropriate irq_happened bit. - * - * Sytem reset exceptions taken in idle state also come through here, - * but they are NMI interrupts so do not need to wait for IRQs to be - * restored, and should be taken as early as practical. These are marked - * with 0xff in the table. The Power ISA specifies 0100b as the system - * reset interrupt reason. - */ -#define IRQ_SYSTEM_RESET 0xff - -static const u8 srr1_to_lazyirq[0x10] = { - 0, 0, 0, - PACA_IRQ_DBELL, - IRQ_SYSTEM_RESET, - PACA_IRQ_DBELL, - PACA_IRQ_DEC, - 0, - PACA_IRQ_EE, - PACA_IRQ_EE, - PACA_IRQ_HMI, - 0, 0, 0, 0, 0 }; - -void replay_system_reset(void) -{ - struct pt_regs regs; - - ppc_save_regs(®s); - regs.trap = 0x100; - get_paca()->in_nmi = 1; - system_reset_exception(®s); - get_paca()->in_nmi = 0; -} -EXPORT_SYMBOL_GPL(replay_system_reset); - -void irq_set_pending_from_srr1(unsigned long srr1) -{ - unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18; - u8 reason = srr1_to_lazyirq[idx]; - - /* - * Take the system reset now, which is immediately after registers - * are restored from idle. It's an NMI, so interrupts need not be - * re-enabled before it is taken. - */ - if (unlikely(reason == IRQ_SYSTEM_RESET)) { - replay_system_reset(); - return; - } - - if (reason == PACA_IRQ_DBELL) { - /* - * When doorbell triggers a system reset wakeup, the message - * is not cleared, so if the doorbell interrupt is replayed - * and the IPI handled, the doorbell interrupt would still - * fire when EE is enabled. - * - * To avoid taking the superfluous doorbell interrupt, - * execute a msgclr here before the interrupt is replayed. - */ - ppc_msgclr(PPC_DBELL_MSGTYPE); - } - - /* - * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0, - * so this can be called unconditionally with the SRR1 wake - * reason as returned by the idle code, which uses 0 to mean no - * interrupt. - * - * If a future CPU was to designate this as an interrupt reason, - * then a new index for no interrupt must be assigned. - */ - local_paca->irq_happened |= reason; -} -#endif /* CONFIG_PPC_BOOK3S */ - -/* - * Force a replay of the external interrupt handler on this CPU. - */ -void force_external_irq_replay(void) -{ - /* - * This must only be called with interrupts soft-disabled, - * the replay will happen when re-enabling. - */ - WARN_ON(!arch_irqs_disabled()); - - /* - * Interrupts must always be hard disabled before irq_happened is - * modified (to prevent lost update in case of interrupt between - * load and store). - */ - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - /* Indicate in the PACA that we have an interrupt to replay */ - local_paca->irq_happened |= PACA_IRQ_EE; -} - -#endif /* CONFIG_PPC64 */ - int arch_show_interrupts(struct seq_file *p, int prec) { int j; @@ -795,13 +384,3 @@ int irq_choose_cpu(const struct cpumask *mask) return hard_smp_processor_id(); } #endif - -#ifdef CONFIG_PPC64 -static int __init setup_noirqdistrib(char *str) -{ - distribute_irqs = 0; - return 1; -} - -__setup("noirqdistrib", setup_noirqdistrib); -#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c new file mode 100644 index 000000000000..56dd7d94e661 --- /dev/null +++ b/arch/powerpc/kernel/irq_64.c @@ -0,0 +1,476 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Derived from arch/i386/kernel/irq.c + * Copyright (C) 1992 Linus Torvalds + * Adapted from arch/i386 by Gary Thomas + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Updated and modified by Cort Dougan + * Copyright (C) 1996-2001 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +int distribute_irqs = 1; + +static inline notrace unsigned long get_irq_happened(void) +{ + unsigned long happened; + + __asm__ __volatile__("lbz %0,%1(13)" + : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); + + return happened; +} + +void replay_soft_interrupts(void) +{ + struct pt_regs regs; + + /* + * Be careful here, calling these interrupt handlers can cause + * softirqs to be raised, which they may run when calling irq_exit, + * which will cause local_irq_enable() to be run, which can then + * recurse into this function. Don't keep any state across + * interrupt handler calls which may change underneath us. + * + * We use local_paca rather than get_paca() to avoid all the + * debug_smp_processor_id() business in this low level function. + */ + + ppc_save_regs(®s); + regs.softe = IRQS_ENABLED; + regs.msr |= MSR_EE; + +again: + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + + /* + * Force the delivery of pending soft-disabled interrupts on PS3. + * Any HV call will have this side effect. + */ + if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { + u64 tmp, tmp2; + lv1_get_version_info(&tmp, &tmp2); + } + + /* + * Check if an hypervisor Maintenance interrupt happened. + * This is a higher priority interrupt than the others, so + * replay it first. + */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { + local_paca->irq_happened &= ~PACA_IRQ_HMI; + regs.trap = INTERRUPT_HMI; + handle_hmi_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (local_paca->irq_happened & PACA_IRQ_DEC) { + local_paca->irq_happened &= ~PACA_IRQ_DEC; + regs.trap = INTERRUPT_DECREMENTER; + timer_interrupt(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (local_paca->irq_happened & PACA_IRQ_EE) { + local_paca->irq_happened &= ~PACA_IRQ_EE; + regs.trap = INTERRUPT_EXTERNAL; + do_IRQ(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { + local_paca->irq_happened &= ~PACA_IRQ_DBELL; + regs.trap = INTERRUPT_DOORBELL; + doorbell_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + /* Book3E does not support soft-masking PMI interrupts */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { + local_paca->irq_happened &= ~PACA_IRQ_PMI; + regs.trap = INTERRUPT_PERFMON; + performance_monitor_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) { + /* + * We are responding to the next interrupt, so interrupt-off + * latencies should be reset here. + */ + trace_hardirqs_on(); + trace_hardirqs_off(); + goto again; + } +} + +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) +static inline void replay_soft_interrupts_irqrestore(void) +{ + unsigned long kuap_state = get_kuap(); + + /* + * Check if anything calls local_irq_enable/restore() when KUAP is + * disabled (user access enabled). We handle that case here by saving + * and re-locking AMR but we shouldn't get here in the first place, + * hence the warning. + */ + kuap_assert_locked(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(AMR_KUAP_BLOCKED); + + replay_soft_interrupts(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(kuap_state); +} +#else +#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() +#endif + +notrace void arch_local_irq_restore(unsigned long mask) +{ + unsigned char irq_happened; + + /* Write the new soft-enabled value if it is a disable */ + if (mask) { + irq_soft_mask_set(mask); + return; + } + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(in_nmi() || in_hardirq()); + + /* + * After the stb, interrupts are unmasked and there are no interrupts + * pending replay. The restart sequence makes this atomic with + * respect to soft-masked interrupts. If this was just a simple code + * sequence, a soft-masked interrupt could become pending right after + * the comparison and before the stb. + * + * This allows interrupts to be unmasked without hard disabling, and + * also without new hard interrupts coming in ahead of pending ones. + */ + asm_volatile_goto( +"1: \n" +" lbz 9,%0(13) \n" +" cmpwi 9,0 \n" +" bne %l[happened] \n" +" stb 9,%1(13) \n" +"2: \n" + RESTART_TABLE(1b, 2b, 1b) + : : "i" (offsetof(struct paca_struct, irq_happened)), + "i" (offsetof(struct paca_struct, irq_soft_mask)) + : "cr0", "r9" + : happened); + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!(mfmsr() & MSR_EE)); + + return; + +happened: + irq_happened = get_irq_happened(); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!irq_happened); + + if (irq_happened == PACA_IRQ_HARD_DIS) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + irq_soft_mask_set(IRQS_ENABLED); + local_paca->irq_happened = 0; + __hard_irq_enable(); + return; + } + + /* Have interrupts to replay, need to hard disable first */ + if (!(irq_happened & PACA_IRQ_HARD_DIS)) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (!(mfmsr() & MSR_EE)) { + /* + * An interrupt could have come in and cleared + * MSR[EE] and set IRQ_HARD_DIS, so check + * IRQ_HARD_DIS again and warn if it is still + * clear. + */ + irq_happened = get_irq_happened(); + WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS)); + } + } + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + } else { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (WARN_ON_ONCE(mfmsr() & MSR_EE)) + __hard_irq_disable(); + } + } + + /* + * Disable preempt here, so that the below preempt_enable will + * perform resched if required (a replayed interrupt may set + * need_resched). + */ + preempt_disable(); + irq_soft_mask_set(IRQS_ALL_DISABLED); + trace_hardirqs_off(); + + replay_soft_interrupts_irqrestore(); + local_paca->irq_happened = 0; + + trace_hardirqs_on(); + irq_soft_mask_set(IRQS_ENABLED); + __hard_irq_enable(); + preempt_enable(); +} +EXPORT_SYMBOL(arch_local_irq_restore); + +/* + * This is a helper to use when about to go into idle low-power + * when the latter has the side effect of re-enabling interrupts + * (such as calling H_CEDE under pHyp). + * + * You call this function with interrupts soft-disabled (this is + * already the case when ppc_md.power_save is called). The function + * will return whether to enter power save or just return. + * + * In the former case, it will have notified lockdep of interrupts + * being re-enabled and generally sanitized the lazy irq state, + * and in the latter case it will leave with interrupts hard + * disabled and marked as such, so the local_irq_enable() call + * in arch_cpu_idle() will properly re-enable everything. + */ +bool prep_irq_for_idle(void) +{ + /* + * First we need to hard disable to ensure no interrupt + * occurs before we effectively enter the low power state + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* + * If anything happened while we were soft-disabled, + * we return now and do not enter the low power state. + */ + if (lazy_irq_pending()) + return false; + + /* Tell lockdep we are about to re-enable */ + trace_hardirqs_on(); + + /* + * Mark interrupts as soft-enabled and clear the + * PACA_IRQ_HARD_DIS from the pending mask since we + * are about to hard enable as well as a side effect + * of entering the low power state. + */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + irq_soft_mask_set(IRQS_ENABLED); + + /* Tell the caller to enter the low power state */ + return true; +} + +#ifdef CONFIG_PPC_BOOK3S +/* + * This is for idle sequences that return with IRQs off, but the + * idle state itself wakes on interrupt. Tell the irq tracer that + * IRQs are enabled for the duration of idle so it does not get long + * off times. Must be paired with fini_irq_for_idle_irqsoff. + */ +bool prep_irq_for_idle_irqsoff(void) +{ + WARN_ON(!irqs_disabled()); + + /* + * First we need to hard disable to ensure no interrupt + * occurs before we effectively enter the low power state + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* + * If anything happened while we were soft-disabled, + * we return now and do not enter the low power state. + */ + if (lazy_irq_pending()) + return false; + + /* Tell lockdep we are about to re-enable */ + trace_hardirqs_on(); + + return true; +} + +/* + * Take the SRR1 wakeup reason, index into this table to find the + * appropriate irq_happened bit. + * + * Sytem reset exceptions taken in idle state also come through here, + * but they are NMI interrupts so do not need to wait for IRQs to be + * restored, and should be taken as early as practical. These are marked + * with 0xff in the table. The Power ISA specifies 0100b as the system + * reset interrupt reason. + */ +#define IRQ_SYSTEM_RESET 0xff + +static const u8 srr1_to_lazyirq[0x10] = { + 0, 0, 0, + PACA_IRQ_DBELL, + IRQ_SYSTEM_RESET, + PACA_IRQ_DBELL, + PACA_IRQ_DEC, + 0, + PACA_IRQ_EE, + PACA_IRQ_EE, + PACA_IRQ_HMI, + 0, 0, 0, 0, 0 }; + +void replay_system_reset(void) +{ + struct pt_regs regs; + + ppc_save_regs(®s); + regs.trap = 0x100; + get_paca()->in_nmi = 1; + system_reset_exception(®s); + get_paca()->in_nmi = 0; +} +EXPORT_SYMBOL_GPL(replay_system_reset); + +void irq_set_pending_from_srr1(unsigned long srr1) +{ + unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18; + u8 reason = srr1_to_lazyirq[idx]; + + /* + * Take the system reset now, which is immediately after registers + * are restored from idle. It's an NMI, so interrupts need not be + * re-enabled before it is taken. + */ + if (unlikely(reason == IRQ_SYSTEM_RESET)) { + replay_system_reset(); + return; + } + + if (reason == PACA_IRQ_DBELL) { + /* + * When doorbell triggers a system reset wakeup, the message + * is not cleared, so if the doorbell interrupt is replayed + * and the IPI handled, the doorbell interrupt would still + * fire when EE is enabled. + * + * To avoid taking the superfluous doorbell interrupt, + * execute a msgclr here before the interrupt is replayed. + */ + ppc_msgclr(PPC_DBELL_MSGTYPE); + } + + /* + * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0, + * so this can be called unconditionally with the SRR1 wake + * reason as returned by the idle code, which uses 0 to mean no + * interrupt. + * + * If a future CPU was to designate this as an interrupt reason, + * then a new index for no interrupt must be assigned. + */ + local_paca->irq_happened |= reason; +} +#endif /* CONFIG_PPC_BOOK3S */ + +/* + * Force a replay of the external interrupt handler on this CPU. + */ +void force_external_irq_replay(void) +{ + /* + * This must only be called with interrupts soft-disabled, + * the replay will happen when re-enabling. + */ + WARN_ON(!arch_irqs_disabled()); + + /* + * Interrupts must always be hard disabled before irq_happened is + * modified (to prevent lost update in case of interrupt between + * load and store). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* Indicate in the PACA that we have an interrupt to replay */ + local_paca->irq_happened |= PACA_IRQ_EE; +} + +static int __init setup_noirqdistrib(char *str) +{ + distribute_irqs = 0; + return 1; +} + +__setup("noirqdistrib", setup_noirqdistrib); -- cgit v1.2.3 From 98552307e3a72d480e72744bf5da2a865822f496 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 18 May 2022 09:40:16 +0200 Subject: powerpc/irq64: Remove get_irq_happened() No need to open code the read of local_paca->irq_happened in assembly, we have READ_ONCE() for doing the same. Replace get_irq_happened() by READ_ONCE(local_paca->irq_happened). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/af511b53e4eb51f8fbc51eda7f5597175e68dce6.1652859593.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/irq_64.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index 56dd7d94e661..01645e03e9f0 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -68,16 +68,6 @@ int distribute_irqs = 1; -static inline notrace unsigned long get_irq_happened(void) -{ - unsigned long happened; - - __asm__ __volatile__("lbz %0,%1(13)" - : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); - - return happened; -} - void replay_soft_interrupts(void) { struct pt_regs regs; @@ -234,7 +224,7 @@ notrace void arch_local_irq_restore(unsigned long mask) return; happened: - irq_happened = get_irq_happened(); + irq_happened = READ_ONCE(local_paca->irq_happened); if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(!irq_happened); @@ -257,7 +247,7 @@ happened: * IRQ_HARD_DIS again and warn if it is still * clear. */ - irq_happened = get_irq_happened(); + irq_happened = READ_ONCE(local_paca->irq_happened); WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS)); } } -- cgit v1.2.3 From 41f20d6db2b64677225bb0b97df956241c353ef8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 3 Jun 2022 15:08:14 +0200 Subject: powerpc/irq: Increase stack_overflow detection limit when KASAN is enabled When KASAN is enabled, as shown by the Oops below, the 2k limit is not enough to allow stack dump after a stack overflow detection when CONFIG_DEBUG_STACKOVERFLOW is selected: do_IRQ: stack overflow: 1984 CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1 Call Trace: Oops: Kernel stack overflow, sig: 11 [#1] BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac Modules linked in: sr_mod cdrom radeon(+) ohci_pci(+) hwmon i2c_algo_bit drm_ttm_helper ttm drm_dp_helper snd_aoa_i2sbus snd_aoa_soundbus snd_pcm ehci_pci snd_timer ohci_hcd snd ssb ehci_hcd 8250_pci soundcore drm_kms_helper pcmcia 8250 pcmcia_core syscopyarea usbcore sysfillrect 8250_base sysimgblt serial_mctrl_gpio fb_sys_fops usb_common pkcs8_key_parser fuse drm drm_panel_orientation_quirks configfs CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1 NIP: c02e5558 LR: c07eb3bc CTR: c07f46a8 REGS: e7fe9f50 TRAP: 0000 Not tainted (5.18.0-gentoo-PMacG4) MSR: 00001032 CR: 44a14824 XER: 20000000 GPR00: c07eb3bc eaa1c000 c26baea0 eaa1c0a0 00000008 00000000 c07eb3bc eaa1c010 GPR08: eaa1c0a8 04f3f3f3 f1f1f1f1 c07f4c84 44a14824 0080f7e4 00000005 00000010 GPR16: 00000025 eaa1c154 eaa1c158 c0dbad64 00000020 fd543810 eaa1c0a0 eaa1c29e GPR24: c0dbad44 c0db8740 05ffffff fd543802 eaa1c150 c0c9a3c0 eaa1c0a0 c0c9a3c0 NIP [c02e5558] kasan_check_range+0xc/0x2b4 LR [c07eb3bc] format_decode+0x80/0x604 Call Trace: [eaa1c000] [c07eb3bc] format_decode+0x80/0x604 (unreliable) [eaa1c070] [c07f4dac] vsnprintf+0x128/0x938 [eaa1c110] [c07f5788] sprintf+0xa0/0xc0 [eaa1c180] [c0154c1c] __sprint_symbol.constprop.0+0x170/0x198 [eaa1c230] [c07ee71c] symbol_string+0xf8/0x260 [eaa1c430] [c07f46d0] pointer+0x15c/0x710 [eaa1c4b0] [c07f4fbc] vsnprintf+0x338/0x938 [eaa1c550] [c00e8fa0] vprintk_store+0x2a8/0x678 [eaa1c690] [c00e94e4] vprintk_emit+0x174/0x378 [eaa1c6d0] [c00ea008] _printk+0x9c/0xc0 [eaa1c750] [c000ca94] show_stack+0x21c/0x260 [eaa1c7a0] [c07d0bd4] dump_stack_lvl+0x60/0x90 [eaa1c7c0] [c0009234] __do_IRQ+0x170/0x174 [eaa1c800] [c0009258] do_IRQ+0x20/0x34 [eaa1c820] [c00045b4] HardwareInterrupt_virt+0x108/0x10c ... An investigation shows that on PPC32, calling dump_stack() requires more than 1k when KASAN is not selected and a bit more than 2k bytes when KASAN is selected. On PPC64 the registers are twice the size of PPC32 registers, so the need should be approximately twice the need on PPC32. In the meantime we have THREAD_SIZE which is twice larger on PPC64 than PPC32, and twice larger when KASAN is selected. So we can easily use the value of THREAD_SIZE to set the limit. On PPC32, THREAD_SIZE is 8k without KASAN and 16k with KASAN. On PPC64, THREAD_SIZE is 16k without KASAN. To be on the safe side, leave 2k on PPC32 without KASAN, 4k with KASAN, and 4k on PPC64 without KASAN. It means the limit should be one fourth of THREAD_SIZE. Reported-by: Erhard Furtner Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e8b4eb82a126c3c6c352173a544fe94609ff660b.1654261687.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4abfe6bd6622..bf7078b8a765 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -194,8 +194,8 @@ static inline void check_stack_overflow(void) sp = current_stack_pointer & (THREAD_SIZE - 1); - /* check for stack overflow: is there less than 2KB free? */ - if (unlikely(sp < 2048)) { + /* check for stack overflow: is there less than 1/4th free? */ + if (unlikely(sp < THREAD_SIZE / 4)) { pr_err("do_IRQ: stack overflow: %ld\n", sp); dump_stack(); } -- cgit v1.2.3 From 051bd351a2ef9c69753dc9cf6bd396986f27778c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 9 Jun 2022 12:16:40 +0200 Subject: powerpc/irq: Make __do_irq() static Since commit 48cf12d88969 ("powerpc/irq: Inline call_do_irq() and call_do_softirq()"), __do_irq() is not used outside irq.c Reorder functions and make __do_irq() static and drop the declaration in irq.h. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/adbe1c8315ec2d63259f41468e82e51677bb1eda.1654769775.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/irq.h | 1 - arch/powerpc/kernel/irq.c | 46 +++++++++++++++++++++--------------------- 2 files changed, 23 insertions(+), 24 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 13f0409dd617..5c1516a5ba8f 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -54,7 +54,6 @@ extern void *softirq_ctx[NR_CPUS]; void __do_IRQ(struct pt_regs *regs); extern void __init init_IRQ(void); -extern void __do_irq(struct pt_regs *regs); int irq_choose_cpu(const struct cpumask *mask); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index bf7078b8a765..037db84026a1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -220,31 +220,9 @@ static __always_inline void call_do_softirq(const void *sp) ); } -static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) -{ - register unsigned long r3 asm("r3") = (unsigned long)regs; - - /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ - asm volatile ( - PPC_STLU " %%r1, %[offset](%[sp]) ;" - "mr %%r1, %[sp] ;" - "bl %[callee] ;" - PPC_LL " %%r1, 0(%%r1) ;" - : // Outputs - "+r" (r3) - : // Inputs - [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), - [callee] "i" (__do_irq) - : // Clobbers - "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", - "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", - "r11", "r12" - ); -} - DEFINE_STATIC_CALL_RET0(ppc_get_irq, *ppc_md.get_irq); -void __do_irq(struct pt_regs *regs) +static void __do_irq(struct pt_regs *regs) { unsigned int irq; @@ -270,6 +248,28 @@ void __do_irq(struct pt_regs *regs) trace_irq_exit(regs); } +static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) +{ + register unsigned long r3 asm("r3") = (unsigned long)regs; + + /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ + asm volatile ( + PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r1, %[sp] ;" + "bl %[callee] ;" + PPC_LL " %%r1, 0(%%r1) ;" + : // Outputs + "+r" (r3) + : // Inputs + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [callee] "i" (__do_irq) + : // Clobbers + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", + "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" + ); +} + void __do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); -- cgit v1.2.3 From e90855be9e90e4a046d2be817a31fae6637415a4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 9 Jun 2022 12:16:41 +0200 Subject: powerpc/irq: Perform stack_overflow detection after switching to IRQ stack When KASAN is enabled, as shown by the Oops below, the 2k limit is not enough to allow stack dump after a stack overflow detection when CONFIG_DEBUG_STACKOVERFLOW is selected: do_IRQ: stack overflow: 1984 CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1 Call Trace: Oops: Kernel stack overflow, sig: 11 [#1] BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac Modules linked in: sr_mod cdrom radeon(+) ohci_pci(+) hwmon i2c_algo_bit drm_ttm_helper ttm drm_dp_helper snd_aoa_i2sbus snd_aoa_soundbus snd_pcm ehci_pci snd_timer ohci_hcd snd ssb ehci_hcd 8250_pci soundcore drm_kms_helper pcmcia 8250 pcmcia_core syscopyarea usbcore sysfillrect 8250_base sysimgblt serial_mctrl_gpio fb_sys_fops usb_common pkcs8_key_parser fuse drm drm_panel_orientation_quirks configfs CPU: 0 PID: 126 Comm: systemd-udevd Not tainted 5.18.0-gentoo-PMacG4 #1 NIP: c02e5558 LR: c07eb3bc CTR: c07f46a8 REGS: e7fe9f50 TRAP: 0000 Not tainted (5.18.0-gentoo-PMacG4) MSR: 00001032 CR: 44a14824 XER: 20000000 GPR00: c07eb3bc eaa1c000 c26baea0 eaa1c0a0 00000008 00000000 c07eb3bc eaa1c010 GPR08: eaa1c0a8 04f3f3f3 f1f1f1f1 c07f4c84 44a14824 0080f7e4 00000005 00000010 GPR16: 00000025 eaa1c154 eaa1c158 c0dbad64 00000020 fd543810 eaa1c0a0 eaa1c29e GPR24: c0dbad44 c0db8740 05ffffff fd543802 eaa1c150 c0c9a3c0 eaa1c0a0 c0c9a3c0 NIP [c02e5558] kasan_check_range+0xc/0x2b4 LR [c07eb3bc] format_decode+0x80/0x604 Call Trace: [eaa1c000] [c07eb3bc] format_decode+0x80/0x604 (unreliable) [eaa1c070] [c07f4dac] vsnprintf+0x128/0x938 [eaa1c110] [c07f5788] sprintf+0xa0/0xc0 [eaa1c180] [c0154c1c] __sprint_symbol.constprop.0+0x170/0x198 [eaa1c230] [c07ee71c] symbol_string+0xf8/0x260 [eaa1c430] [c07f46d0] pointer+0x15c/0x710 [eaa1c4b0] [c07f4fbc] vsnprintf+0x338/0x938 [eaa1c550] [c00e8fa0] vprintk_store+0x2a8/0x678 [eaa1c690] [c00e94e4] vprintk_emit+0x174/0x378 [eaa1c6d0] [c00ea008] _printk+0x9c/0xc0 [eaa1c750] [c000ca94] show_stack+0x21c/0x260 [eaa1c7a0] [c07d0bd4] dump_stack_lvl+0x60/0x90 [eaa1c7c0] [c0009234] __do_IRQ+0x170/0x174 [eaa1c800] [c0009258] do_IRQ+0x20/0x34 [eaa1c820] [c00045b4] HardwareInterrupt_virt+0x108/0x10c ... As the detection is asynchronously performed at IRQs, we could be long after the limit has been crossed, so increasing the limit would not solve the problem completely. In order to be sure that there is enough stack space for the stack dump, do it after the switch to the IRQ stack. That way it is sure that the stack is large enough, unless the IRQ stack has been overfilled in which case the end of life is close. Reported-by: Erhard Furtner Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c215d714329f475b431a6193369035aadfc0d182.1654769775.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/irq.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 037db84026a1..56d165c186c7 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -185,14 +185,12 @@ u64 arch_irq_stat_cpu(unsigned int cpu) return sum; } -static inline void check_stack_overflow(void) +static inline void check_stack_overflow(unsigned long sp) { - long sp; - if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW)) return; - sp = current_stack_pointer & (THREAD_SIZE - 1); + sp &= THREAD_SIZE - 1; /* check for stack overflow: is there less than 1/4th free? */ if (unlikely(sp < THREAD_SIZE / 4)) { @@ -222,12 +220,14 @@ static __always_inline void call_do_softirq(const void *sp) DEFINE_STATIC_CALL_RET0(ppc_get_irq, *ppc_md.get_irq); -static void __do_irq(struct pt_regs *regs) +static void __do_irq(struct pt_regs *regs, unsigned long oldsp) { unsigned int irq; trace_irq_entry(regs); + check_stack_overflow(oldsp); + /* * Query the platform PIC for the interrupt & ack it. * @@ -255,6 +255,7 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ asm volatile ( PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r4, %%r1 ;" "mr %%r1, %[sp] ;" "bl %[callee] ;" PPC_LL " %%r1, 0(%%r1) ;" @@ -280,11 +281,9 @@ void __do_IRQ(struct pt_regs *regs) irqsp = hardirq_ctx[raw_smp_processor_id()]; sirqsp = softirq_ctx[raw_smp_processor_id()]; - check_stack_overflow(); - /* Already there ? */ if (unlikely(cursp == irqsp || cursp == sirqsp)) { - __do_irq(regs); + __do_irq(regs, current_stack_pointer); set_irq_regs(old_regs); return; } -- cgit v1.2.3 From 78f1c24abd16952d383f34eefbb0af7bb53f9b0b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 9 Jun 2022 12:16:42 +0200 Subject: powerpc/irq: Simplify __do_irq() Remove duplicated code by implementing a proper if/else. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5a3b21311191f1240850db6ab29b19ac7885fe03.1654769775.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/irq.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 56d165c186c7..d50a18888bd9 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -281,14 +281,11 @@ void __do_IRQ(struct pt_regs *regs) irqsp = hardirq_ctx[raw_smp_processor_id()]; sirqsp = softirq_ctx[raw_smp_processor_id()]; - /* Already there ? */ - if (unlikely(cursp == irqsp || cursp == sirqsp)) { + /* Already there ? If not switch stack and call */ + if (unlikely(cursp == irqsp || cursp == sirqsp)) __do_irq(regs, current_stack_pointer); - set_irq_regs(old_regs); - return; - } - /* Switch stack and call */ - call_do_irq(regs, irqsp); + else + call_do_irq(regs, irqsp); set_irq_regs(old_regs); } -- cgit v1.2.3 From 3adfb457b84bd6de4e78a99814038fbd7205f253 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:48:55 +0200 Subject: powerpc/64e: Remove MMU_FTR_USE_TLBRSRV and MMU_FTR_USE_PAIRED_MAS Commit fb5a515704d7 ("powerpc: Remove platforms/wsp and associated pieces") removed the last CPU having features MMU_FTRS_A2 and commit cd68098bcedd ("powerpc: Clean up MMU_FTRS_A2 and MMU_FTR_TYPE_3E") removed MMU_FTRS_A2 which was the last user of MMU_FTR_USE_TLBRSRV and MMU_FTR_USE_PAIRED_MAS. Remove all code that relies on MMU_FTR_USE_TLBRSRV and MMU_FTR_USE_PAIRED_MAS. With this change done, TLB miss can happen before the mmu feature fixups. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cfd5a0ecdb1598da968832e1bddf7431ec267200.1656427701.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mmu.h | 12 ------ arch/powerpc/kernel/setup_64.c | 1 - arch/powerpc/mm/nohash/book3e_hugetlbpage.c | 30 ++++--------- arch/powerpc/mm/nohash/tlb_low_64e.S | 66 ----------------------------- 4 files changed, 8 insertions(+), 101 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 5f41565a1e5d..860d0290ca4d 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -96,15 +96,6 @@ */ #define MMU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x00200000) -/* Enable use of TLB reservation. Processor should support tlbsrx. - * instruction and MAS0[WQ]. - */ -#define MMU_FTR_USE_TLBRSRV ASM_CONST(0x00800000) - -/* Use paired MAS registers (MAS7||MAS3, etc.) - */ -#define MMU_FTR_USE_PAIRED_MAS ASM_CONST(0x01000000) - /* Doesn't support the B bit (1T segment) in SLBIE */ #define MMU_FTR_NO_SLBIE_B ASM_CONST(0x02000000) @@ -180,9 +171,6 @@ enum { #ifdef CONFIG_PPC_83xx MMU_FTR_NEED_DTLB_SW_LRU | #endif -#ifdef CONFIG_PPC_BOOK3E_64 - MMU_FTR_USE_TLBRSRV | MMU_FTR_USE_PAIRED_MAS | -#endif #ifdef CONFIG_PPC_BOOK3S_64 MMU_FTR_KERNEL_RO | #ifdef CONFIG_PPC_64S_HASH_MMU diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5761f08dae95..2b2d0b0fbb30 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -113,7 +113,6 @@ void __init setup_tlb_core_data(void) * Should we panic instead? */ WARN_ONCE(smt_enabled_at_boot >= 2 && - !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && book3e_htw_mode != PPC_HTW_E6500, "%s: unsupported MMU configuration\n", __func__); } diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c index 307ca919d393..c7d4b317a823 100644 --- a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c +++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c @@ -103,21 +103,11 @@ static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid) int found = 0; mtspr(SPRN_MAS6, pid << 16); - if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) { - asm volatile( - "li %0,0\n" - "tlbsx. 0,%1\n" - "bne 1f\n" - "li %0,1\n" - "1:\n" - : "=&r"(found) : "r"(ea)); - } else { - asm volatile( - "tlbsx 0,%1\n" - "mfspr %0,0x271\n" - "srwi %0,%0,31\n" - : "=&r"(found) : "r"(ea)); - } + asm volatile( + "tlbsx 0,%1\n" + "mfspr %0,0x271\n" + "srwi %0,%0,31\n" + : "=&r"(found) : "r"(ea)); return found; } @@ -169,13 +159,9 @@ book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte) mtspr(SPRN_MAS1, mas1); mtspr(SPRN_MAS2, mas2); - if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) { - mtspr(SPRN_MAS7_MAS3, mas7_3); - } else { - if (mmu_has_feature(MMU_FTR_BIG_PHYS)) - mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); - mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); - } + if (mmu_has_feature(MMU_FTR_BIG_PHYS)) + mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); + mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); asm volatile ("tlbwe"); diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 9e9ab3803fb2..a59485c549a7 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -152,16 +152,7 @@ tlb_miss_common_bolted: clrrdi r15,r15,3 beq tlb_miss_fault_bolted /* No PGDIR, bail */ -BEGIN_MMU_FTR_SECTION - /* Set the TLB reservation and search for existing entry. Then load - * the entry. - */ - PPC_TLBSRX_DOT(0,R16) - ldx r14,r14,r15 /* grab pgd entry */ - beq tlb_miss_done_bolted /* tlb exists already, bail */ -MMU_FTR_SECTION_ELSE ldx r14,r14,r15 /* grab pgd entry */ -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 clrrdi r15,r15,3 @@ -674,16 +665,7 @@ normal_tlb_miss: clrrdi r14,r14,3 or r10,r15,r14 -BEGIN_MMU_FTR_SECTION - /* Set the TLB reservation and search for existing entry. Then load - * the entry. - */ - PPC_TLBSRX_DOT(0,R16) ld r14,0(r10) - beq normal_tlb_miss_done -MMU_FTR_SECTION_ELSE - ld r14,0(r10) -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) finish_normal_tlb_miss: /* Check if required permissions are met */ @@ -727,13 +709,9 @@ finish_normal_tlb_miss: li r11,MAS3_SW|MAS3_UW andc r15,r15,r11 1: -BEGIN_MMU_FTR_SECTION srdi r16,r15,32 mtspr SPRN_MAS3,r15 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r15 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe @@ -809,13 +787,6 @@ virt_page_table_tlb_miss: #else 1: #endif -BEGIN_MMU_FTR_SECTION - /* Search if we already have a TLB entry for that virtual address, and - * if we do, bail out. - */ - PPC_TLBSRX_DOT(0,R16) - beq virt_page_table_tlb_miss_done -END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) /* Now, we need to walk the page tables. First check if we are in * range. @@ -866,41 +837,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) clrldi r11,r15,4 /* remove region ID from RPN */ ori r10,r11,1 /* Or-in SR */ -BEGIN_MMU_FTR_SECTION srdi r16,r10,32 mtspr SPRN_MAS3,r10 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe -BEGIN_MMU_FTR_SECTION -virt_page_table_tlb_miss_done: - - /* We have overridden MAS2:EPN but currently our primary TLB miss - * handler will always restore it so that should not be an issue, - * if we ever optimize the primary handler to not write MAS2 on - * some cases, we'll have to restore MAS2:EPN here based on the - * original fault's DEAR. If we do that we have to modify the - * ITLB miss handler to also store SRR0 in the exception frame - * as DEAR. - * - * However, one nasty thing we did is we cleared the reservation - * (well, potentially we did). We do a trick here thus if we - * are not a level 0 exception (we interrupted the TLB miss) we - * offset the return address by -4 in order to replay the tlbsrx - * instruction there - */ - subf r10,r13,r12 - cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE - bne- 1f - ld r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) - addi r10,r11,-4 - std r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13) -1: -END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) /* Return to caller, normal case */ TLB_MISS_EPILOG_SUCCESS rfi @@ -1115,13 +1057,9 @@ htw_tlb_miss: */ ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) -BEGIN_MMU_FTR_SECTION srdi r16,r10,32 mtspr SPRN_MAS3,r10 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe @@ -1202,13 +1140,9 @@ tlb_load_linear: clrldi r10,r10,4 /* clear region bits */ ori r10,r10,MAS3_SR|MAS3_SW|MAS3_SX -BEGIN_MMU_FTR_SECTION srdi r16,r10,32 mtspr SPRN_MAS3,r10 mtspr SPRN_MAS7,r16 -MMU_FTR_SECTION_ELSE - mtspr SPRN_MAS7_MAS3,r10 -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) tlbwe -- cgit v1.2.3 From c7b9ed7c34a9f5dbf8222d63e3e313cef9f3150b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 28 Jun 2022 16:48:59 +0200 Subject: powerpc/64e: KASAN Full support for BOOK3E/64 We now have memory organised in a way that allows implementing KASAN. Unlike book3s/64, book3e always has translation active so the only thing needed to use KASAN is to setup an early zero shadow mapping just after setting a stack pointer and before calling early_setup(). The memory layout is now as follows +------------------------+ Kernel virtual map end (0xc000200000000000) | | | 16TB of KASAN map | | | +------------------------+ Kernel KASAN shadow map start | | | 16TB of IO map | | | +------------------------+ Kernel IO map start | | | 16TB of vmemmap | | | +------------------------+ Kernel vmemmap start | | | 16TB of vmap | | | +------------------------+ Kernel virt start (0xc000100000000000) | | | 64TB of linear mem | | | +------------------------+ Kernel linear (0xc.....) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0bef8beda27baf71e3b9e8b13e620fba6e19499b.1656427701.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 2 + arch/powerpc/Kconfig.debug | 3 +- arch/powerpc/include/asm/kasan.h | 13 +++- arch/powerpc/kernel/head_64.S | 3 + arch/powerpc/mm/kasan/Makefile | 1 + arch/powerpc/mm/kasan/init_book3e_64.c | 133 +++++++++++++++++++++++++++++++++ arch/powerpc/mm/kasan/init_book3s_64.c | 2 + arch/powerpc/platforms/Kconfig.cputype | 1 - 8 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/mm/kasan/init_book3e_64.c (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c2ce2e60c8f0..92e0cad7752d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -193,6 +193,7 @@ config PPC select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KASAN if PPC_RADIX_MMU + select HAVE_ARCH_KASAN if PPC_BOOK3E_64 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN select HAVE_ARCH_KFENCE if PPC_BOOK3S_32 || PPC_8xx || 40x select HAVE_ARCH_KGDB @@ -254,6 +255,7 @@ config PPC select IOMMU_HELPER if PPC64 select IRQ_DOMAIN select IRQ_FORCED_THREADING + select KASAN_VMALLOC if KASAN && MODULES select MMU_GATHER_PAGE_SIZE select MMU_GATHER_RCU_TABLE_FREE select MODULES_USE_ELF_RELA diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 9f363c143d86..6a8855d45af7 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -375,4 +375,5 @@ config KASAN_SHADOW_OFFSET hex depends on KASAN default 0xe0000000 if PPC32 - default 0xa80e000000000000 if PPC64 + default 0xa80e000000000000 if PPC_BOOK3S_64 + default 0xa8001c0000000000 if PPC_BOOK3E_64 diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index a6be4025cba2..92a968202ba7 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -19,7 +19,7 @@ #define KASAN_SHADOW_SCALE_SHIFT 3 -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) && defined(CONFIG_PPC32) #define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M) #else #define KASAN_KERN_START PAGE_OFFSET @@ -39,6 +39,17 @@ * c00e000000000000 << 3 + a80e000000000000 = c00fc00000000000 */ #define KASAN_SHADOW_END 0xc00fc00000000000UL + +#else + +/* + * The shadow ends before the highest accessible address + * because we don't need a shadow for the shadow. + * But it doesn't hurt to have a shadow for the shadow, + * keep shadow end aligned eases things. + */ +#define KASAN_SHADOW_END 0xc000200000000000UL + #endif #ifdef CONFIG_KASAN diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index d3eea633d11a..cf2c08902c05 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -965,6 +965,9 @@ start_here_multiplatform: * and SLB setup before we turn on relocation. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif /* Restore parameters passed from prom_init/kexec */ mr r3,r31 LOAD_REG_ADDR(r12, DOTSYM(early_setup)) diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile index 4999aadb1867..699eeffd9f55 100644 --- a/arch/powerpc/mm/kasan/Makefile +++ b/arch/powerpc/mm/kasan/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_PPC32) += init_32.o obj-$(CONFIG_PPC_8xx) += 8xx.o obj-$(CONFIG_PPC_BOOK3S_32) += book3s_32.o obj-$(CONFIG_PPC_BOOK3S_64) += init_book3s_64.o +obj-$(CONFIG_PPC_BOOK3E_64) += init_book3e_64.o diff --git a/arch/powerpc/mm/kasan/init_book3e_64.c b/arch/powerpc/mm/kasan/init_book3e_64.c new file mode 100644 index 000000000000..11519e88dc6b --- /dev/null +++ b/arch/powerpc/mm/kasan/init_book3e_64.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KASAN for 64-bit Book3e powerpc + * + * Copyright 2022, Christophe Leroy, CS GROUP France + */ + +#define DISABLE_BRANCH_PROFILING + +#include +#include +#include +#include + +#include + +static inline bool kasan_pud_table(p4d_t p4d) +{ + return p4d_page(p4d) == virt_to_page(lm_alias(kasan_early_shadow_pud)); +} + +static inline bool kasan_pmd_table(pud_t pud) +{ + return pud_page(pud) == virt_to_page(lm_alias(kasan_early_shadow_pmd)); +} + +static inline bool kasan_pte_table(pmd_t pmd) +{ + return pmd_page(pmd) == virt_to_page(lm_alias(kasan_early_shadow_pte)); +} + +static int __init kasan_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) +{ + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + pgdp = pgd_offset_k(ea); + p4dp = p4d_offset(pgdp, ea); + if (kasan_pud_table(*p4dp)) { + pudp = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); + memcpy(pudp, kasan_early_shadow_pud, PUD_TABLE_SIZE); + p4d_populate(&init_mm, p4dp, pudp); + } + pudp = pud_offset(p4dp, ea); + if (kasan_pmd_table(*pudp)) { + pmdp = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); + memcpy(pmdp, kasan_early_shadow_pmd, PMD_TABLE_SIZE); + pud_populate(&init_mm, pudp, pmdp); + } + pmdp = pmd_offset(pudp, ea); + if (kasan_pte_table(*pmdp)) { + ptep = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); + memcpy(ptep, kasan_early_shadow_pte, PTE_TABLE_SIZE); + pmd_populate_kernel(&init_mm, pmdp, ptep); + } + ptep = pte_offset_kernel(pmdp, ea); + + __set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot), 0); + + return 0; +} + +static void __init kasan_init_phys_region(void *start, void *end) +{ + unsigned long k_start, k_end, k_cur; + void *va; + + if (start >= end) + return; + + k_start = ALIGN_DOWN((unsigned long)kasan_mem_to_shadow(start), PAGE_SIZE); + k_end = ALIGN((unsigned long)kasan_mem_to_shadow(end), PAGE_SIZE); + + va = memblock_alloc(k_end - k_start, PAGE_SIZE); + for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE, va += PAGE_SIZE) + kasan_map_kernel_page(k_cur, __pa(va), PAGE_KERNEL); +} + +void __init kasan_early_init(void) +{ + int i; + unsigned long addr; + pgd_t *pgd = pgd_offset_k(KASAN_SHADOW_START); + pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL); + + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); + + for (i = 0; i < PTRS_PER_PTE; i++) + __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page, + &kasan_early_shadow_pte[i], zero_pte, 0); + + for (i = 0; i < PTRS_PER_PMD; i++) + pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i], + kasan_early_shadow_pte); + + for (i = 0; i < PTRS_PER_PUD; i++) + pud_populate(&init_mm, &kasan_early_shadow_pud[i], + kasan_early_shadow_pmd); + + for (addr = KASAN_SHADOW_START; addr != KASAN_SHADOW_END; addr += PGDIR_SIZE) + p4d_populate(&init_mm, p4d_offset(pgd++, addr), kasan_early_shadow_pud); +} + +void __init kasan_init(void) +{ + phys_addr_t start, end; + u64 i; + pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO); + + for_each_mem_range(i, &start, &end) + kasan_init_phys_region((void *)start, (void *)end); + + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) + kasan_remove_zero_shadow((void *)VMALLOC_START, VMALLOC_SIZE); + + for (i = 0; i < PTRS_PER_PTE; i++) + __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page, + &kasan_early_shadow_pte[i], zero_pte, 0); + + flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + + /* Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KASAN init done\n"); +} + +void __init kasan_late_init(void) { } diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c b/arch/powerpc/mm/kasan/init_book3s_64.c index 0da5566d6b84..9300d641cf9a 100644 --- a/arch/powerpc/mm/kasan/init_book3s_64.c +++ b/arch/powerpc/mm/kasan/init_book3s_64.c @@ -99,4 +99,6 @@ void __init kasan_init(void) pr_info("KASAN init done\n"); } +void __init kasan_early_init(void) { } + void __init kasan_late_init(void) { } diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9e2df4b66478..383ed4fe6013 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -2,7 +2,6 @@ config PPC32 bool default y if !PPC64 - select KASAN_VMALLOC if KASAN && MODULES config PPC64 bool "64-bit kernel" -- cgit v1.2.3 From 3f8ed993be3cf154a91d9ab5e80470c6c755adbe Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 25 May 2022 10:05:51 -0300 Subject: KVM: PPC: Book3S HV: Add a new config for P8 debug timing Turn the existing Kconfig KVM_BOOK3S_HV_EXIT_TIMING into KVM_BOOK3S_HV_P8_TIMING in preparation for the addition of a new config for P9 timings. This applies only to P8 code, the generic timing code is still kept under KVM_BOOK3S_HV_EXIT_TIMING. Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525130554.2614394-3-farosas@linux.ibm.com --- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kvm/Kconfig | 6 +++++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 24 ++++++++++++------------ 3 files changed, 18 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index eec536aef83a..8c10f536e478 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -379,7 +379,7 @@ int main(void) OFFSET(VCPU_SPRG2, kvm_vcpu, arch.shregs.sprg2); OFFSET(VCPU_SPRG3, kvm_vcpu, arch.shregs.sprg3); #endif -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING OFFSET(VCPU_TB_RMENTRY, kvm_vcpu, arch.rm_entry); OFFSET(VCPU_TB_RMINTR, kvm_vcpu, arch.rm_intr); OFFSET(VCPU_TB_RMEXIT, kvm_vcpu, arch.rm_exit); diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index ddd88179110a..73f8277df7d1 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -128,7 +128,11 @@ config KVM_BOOK3S_64_PR and system calls on the host. config KVM_BOOK3S_HV_EXIT_TIMING - bool "Detailed timing for hypervisor real-mode code" + bool + +config KVM_BOOK3S_HV_P8_TIMING + bool "Detailed timing for hypervisor real-mode code (for POWER8)" + select KVM_BOOK3S_HV_EXIT_TIMING depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS help Calculate time taken for each vcpu in the real-mode guest entry, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 0fc0e68d20d0..7ded202bf995 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -237,14 +237,14 @@ kvm_novcpu_wakeup: cmpdi r4, 0 beq kvmppc_primary_no_guest -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMENTRY bl kvmhv_start_timing #endif b kvmppc_got_guest kvm_novcpu_exit: -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING ld r4, HSTATE_KVM_VCPU(r13) cmpdi r4, 0 beq 13f @@ -523,7 +523,7 @@ kvmppc_hv_entry: li r6, KVM_GUEST_MODE_HOST_HV stb r6, HSTATE_IN_GUEST(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* Store initial timestamp */ cmpdi r4, 0 beq 1f @@ -894,7 +894,7 @@ fast_guest_return: li r9, KVM_GUEST_MODE_GUEST_HV stb r9, HSTATE_IN_GUEST(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* Accumulate timing */ addi r3, r4, VCPU_TB_GUEST bl kvmhv_accumulate_time @@ -945,7 +945,7 @@ secondary_too_late: cmpdi r4, 0 beq 11f stw r12, VCPU_TRAP(r4) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMEXIT bl kvmhv_accumulate_time #endif @@ -959,7 +959,7 @@ hdec_soon: li r12, BOOK3S_INTERRUPT_HV_DECREMENTER 12: stw r12, VCPU_TRAP(r4) mr r9, r4 -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMEXIT bl kvmhv_accumulate_time #endif @@ -1056,7 +1056,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) li r0, MSR_RI mtmsrd r0, 1 -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r9, VCPU_TB_RMINTR mr r4, r9 bl kvmhv_accumulate_time @@ -1135,7 +1135,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r9, VCPU_TB_RMEXIT mr r4, r9 bl kvmhv_accumulate_time @@ -1495,7 +1495,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_LPCR,r8 isync -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* Finish timing, if we have a vcpu */ ld r4, HSTATE_KVM_VCPU(r13) cmpdi r4, 0 @@ -2153,7 +2153,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) ld r4, HSTATE_KVM_VCPU(r13) std r3, VCPU_DEC_EXPIRES(r4) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING ld r4, HSTATE_KVM_VCPU(r13) addi r3, r4, VCPU_TB_CEDE bl kvmhv_accumulate_time @@ -2221,7 +2221,7 @@ kvm_end_cede: /* get vcpu pointer */ ld r4, HSTATE_KVM_VCPU(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMINTR bl kvmhv_accumulate_time #endif @@ -2961,7 +2961,7 @@ kvmppc_fix_pmao: isync blr -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* * Start timing an activity * r3 = pointer to time accumulation struct, r4 = vcpu -- cgit v1.2.3 From 2a83afe72a2b5760155c2dd840c776aee292dc90 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 31 May 2022 16:59:36 +1000 Subject: powerpc/64: Drop ppc_inst_as_str() The ppc_inst_as_str() macro tries to make printing variable length, aka "prefixed", instructions convenient. It mostly succeeds, but it does hide an on-stack buffer, which triggers stack protector. More problematically it doesn't compile at all with GCC 12, with -Wdangling-pointer, due to the fact that it returns the char buffer declared inside the macro: arch/powerpc/kernel/trace/ftrace.c: In function '__ftrace_modify_call': ./include/linux/printk.h:475:44: error: using a dangling pointer to '__str' [-Werror=dangling-pointer=] 475 | #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__) ... arch/powerpc/kernel/trace/ftrace.c:567:17: note: in expansion of macro 'pr_err' 567 | pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); | ^~~~~~ ./arch/powerpc/include/asm/inst.h:156:14: note: '__str' declared here 156 | char __str[PPC_INST_STR_LEN]; \ | ^~~~~ This could be fixed by having the caller declare the buffer, but in some places there'd need to be two buffers. In all cases where ppc_inst_as_str() is used the output is not really meant for user consumption, it's almost always indicative of a kernel bug. A simpler solution is to just print the value as an unsigned long. For normal instructions the output is identical. For prefixed instructions the value is printed as a single 64-bit quantity, whereas previously the low half was printed first. But that is good enough for debug output, especially as prefixed instructions will be rare in kernel code in practice. Old: c000000000111170 60420000 ori r2,r2,0 c000000000111174 04100001 e580fb00 .long 0xe580fb0004100001 New: c00000000010f90c 60420000 ori r2,r2,0 c00000000010f910 e580fb0004100001 .long 0xe580fb0004100001 Reported-by: Bagas Sanjaya Reported-by: Petr Mladek Signed-off-by: Michael Ellerman Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20220531065936.3674348-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/inst.h | 19 ------------------- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/trace/ftrace.c | 24 +++++++++++++----------- arch/powerpc/lib/test_emulate_step.c | 6 +++--- arch/powerpc/xmon/xmon.c | 2 +- 5 files changed, 18 insertions(+), 35 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index b49aae9f6f27..684d3f453282 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -139,25 +139,6 @@ static inline void ppc_inst_write(u32 *ptr, ppc_inst_t x) *(u64 *)ptr = ppc_inst_as_ulong(x); } -#define PPC_INST_STR_LEN sizeof("00000000 00000000") - -static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], ppc_inst_t x) -{ - if (ppc_inst_prefixed(x)) - sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x)); - else - sprintf(str, "%08x", ppc_inst_val(x)); - - return str; -} - -#define ppc_inst_as_str(x) \ -({ \ - char __str[PPC_INST_STR_LEN]; \ - __ppc_inst_as_str(__str, x); \ - __str; \ -}) - static inline int __copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src) { unsigned int val, suffix; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 1c97c0f177ae..912d4f8a13be 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -269,7 +269,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) * So, we should never get here... but, its still * good to catch them, just in case... */ - printk("Can't step on instruction %s\n", ppc_inst_as_str(insn)); + printk("Can't step on instruction %08lx\n", ppc_inst_as_ulong(insn)); BUG(); } else { /* diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 2a893e06e4f1..cab67b5120b9 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -69,8 +69,8 @@ ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) /* Make sure it is what we expect it to be */ if (!ppc_inst_equal(replaced, old)) { - pr_err("%p: replaced (%s) != old (%s)", - (void *)ip, ppc_inst_as_str(replaced), ppc_inst_as_str(old)); + pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip, + ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old)); return -EINVAL; } @@ -127,7 +127,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; } @@ -159,8 +159,8 @@ __ftrace_make_nop(struct module *mod, /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) && !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { - pr_err("Unexpected instruction %s around bl _mcount\n", - ppc_inst_as_str(op)); + pr_err("Unexpected instruction %08lx around bl _mcount\n", + ppc_inst_as_ulong(op)); return -EINVAL; } } else if (IS_ENABLED(CONFIG_PPC64)) { @@ -174,7 +174,8 @@ __ftrace_make_nop(struct module *mod, } if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { - pr_err("Expected %08lx found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); + pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC, + ppc_inst_as_ulong(op)); return -EINVAL; } } @@ -312,7 +313,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; } @@ -416,8 +417,8 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; if (!expected_nop_sequence(ip, op[0], op[1])) { - pr_err("Unexpected call sequence at %p: %s %s\n", - ip, ppc_inst_as_str(op[0]), ppc_inst_as_str(op[1])); + pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip, + ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1])); return -EINVAL; } @@ -486,7 +487,8 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) } if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { - pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op)); + pr_err("Unexpected call sequence at %p: %08lx\n", + ip, ppc_inst_as_ulong(op)); return -EINVAL; } @@ -564,7 +566,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; } diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 4f141daafcff..f2e47be05e8c 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -1616,11 +1616,11 @@ static int __init emulate_compute_instr(struct pt_regs *regs, if (analysed != 1 || GETTYPE(op.type) != COMPUTE) { if (negative) return -EFAULT; - pr_info("emulation failed, instruction = %s\n", ppc_inst_as_str(instr)); + pr_info("emulation failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); return -EFAULT; } if (analysed == 1 && negative) - pr_info("negative test failed, instruction = %s\n", ppc_inst_as_str(instr)); + pr_info("negative test failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); if (!negative) emulate_update_regs(regs, &op); return 0; @@ -1637,7 +1637,7 @@ static int __init execute_compute_instr(struct pt_regs *regs, /* Patch the NOP with the actual instruction */ patch_instruction_site(&patch__exec_instr, instr); if (exec_instr(regs)) { - pr_info("execution failed, instruction = %s\n", ppc_inst_as_str(instr)); + pr_info("execution failed, instruction = %08lx\n", ppc_inst_as_ulong(instr)); return -EFAULT; } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 3d9782ea3fa7..f80c714f1d49 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3047,7 +3047,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, dotted = 0; last_inst = inst; if (praddr) - printf(REG" %s", adr, ppc_inst_as_str(inst)); + printf(REG" %08lx", adr, ppc_inst_as_ulong(inst)); printf("\t"); if (!ppc_inst_prefixed(inst)) dump_func(ppc_inst_val(inst), adr); -- cgit v1.2.3 From 2b461880c20777d317b4ad24ef040918860133ca Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 18 Jul 2022 19:51:58 +1000 Subject: powerpc: Fix all occurences of duplicate words Since commit 87c78b612f4f ("powerpc: Fix all occurences of "the the"") fixed "the the", there's now a steady stream of patches fixing other duplicate words. Just fix them all at once, to save the overhead of dealing with individual patches for each case. This leaves a few cases of "that that", which in some contexts is correct. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220718095158.326606-1-mpe@ellerman.id.au --- arch/powerpc/crypto/aes-spe-glue.c | 2 +- arch/powerpc/kernel/btext.c | 2 +- arch/powerpc/kernel/eeh_driver.c | 2 +- arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/kernel/pci-common.c | 2 +- arch/powerpc/kernel/pci_dn.c | 2 +- arch/powerpc/kernel/ptrace/ptrace-vsx.c | 2 +- arch/powerpc/kernel/trace/ftrace.c | 6 +++--- arch/powerpc/kernel/watchdog.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 2 +- arch/powerpc/platforms/4xx/cpm.c | 2 +- arch/powerpc/platforms/powernv/vas-fault.c | 2 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +- arch/powerpc/platforms/pseries/papr_scm.c | 2 +- 15 files changed, 17 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c index e8dfe9fb0266..efab78a3a8f6 100644 --- a/arch/powerpc/crypto/aes-spe-glue.c +++ b/arch/powerpc/crypto/aes-spe-glue.c @@ -28,7 +28,7 @@ * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32 * bit unit (SU2). One of these can be a memory access that is executed via * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per - * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data + * 16 byte block or 25 cycles per byte. Thus 768 bytes of input data * will need an estimated maximum of 20,000 cycles. Headroom for cache misses * included. Even with the low end model clocked at 667 MHz this equals to a * critical time window of less than 30us. The value has been chosen to diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 8f69bb07e500..2769889219bf 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -73,7 +73,7 @@ static inline void rmci_maybe_off(void) * the display during identify_machine() and MMU_Init() * * The display is mapped to virtual address 0xD0000000, rather - * than 1:1, because some some CHRP machines put the frame buffer + * than 1:1, because some CHRP machines put the frame buffer * in the region starting at 0xC0000000 (PAGE_OFFSET). * This mapping is temporary and will disappear as soon as the * setup done by MMU_Init() is applied. diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 260273e56431..f279295179bd 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -750,7 +750,7 @@ static void eeh_pe_cleanup(struct eeh_pe *pe) * @pdev: pci_dev to check * * This function may return a false positive if we can't determine the slot's - * presence state. This might happen for for PCIe slots if the PE containing + * presence state. This might happen for PCIe slots if the PE containing * the upstream bridge is also frozen, or the bridge is part of the same PE * as the device. * diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index b66dd6f775a4..3d0dc133a9ae 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2779,7 +2779,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) /* * An interrupt came in while soft-disabled. We set paca->irq_happened, then: - * - If it was a decrementer interrupt, we bump the dec to max and and return. + * - If it was a decrementer interrupt, we bump the dec to max and return. * - If it was a doorbell we return immediately since doorbells are edge * triggered and won't automatically refire. * - If it was a HMI we return immediately since we handled it in realmode diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index c87999289752..cbb912ee92fa 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1088,7 +1088,7 @@ void pcibios_fixup_bus(struct pci_bus *bus) */ pci_read_bridge_bases(bus); - /* Now fixup the bus bus */ + /* Now fixup the bus */ pcibios_setup_bus_self(bus); } EXPORT_SYMBOL(pcibios_fixup_bus); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 938ab8838ab5..7a35fc25a304 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -259,7 +259,7 @@ void remove_sriov_vf_pdns(struct pci_dev *pdev) if (edev) { /* * We allocate pci_dn's for the totalvfs count, - * but only only the vfs that were activated + * but only the vfs that were activated * have a configured PE. */ if (edev->pe) diff --git a/arch/powerpc/kernel/ptrace/ptrace-vsx.c b/arch/powerpc/kernel/ptrace/ptrace-vsx.c index 1da4303128ef..7df08004c47d 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-vsx.c +++ b/arch/powerpc/kernel/ptrace/ptrace-vsx.c @@ -71,7 +71,7 @@ int fpr_set(struct task_struct *target, const struct user_regset *regset, } /* - * Currently to set and and get all the vsx state, you need to call + * Currently to set and get all the vsx state, you need to call * the fp and VMX calls as well. This only get/sets the lower 32 * 128bit VSX registers. */ diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index cab67b5120b9..cb158c32b50b 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -125,7 +125,7 @@ __ftrace_make_nop(struct module *mod, return -EFAULT; } - /* Make sure that that this is still a 24bit jump */ + /* Make sure that this is still a 24bit jump */ if (!is_bl_op(op)) { pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; @@ -311,7 +311,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; } - /* Make sure that that this is still a 24bit jump */ + /* Make sure that this is still a 24bit jump */ if (!is_bl_op(op)) { pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; @@ -564,7 +564,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, return -EFAULT; } - /* Make sure that that this is still a 24bit jump */ + /* Make sure that this is still a 24bit jump */ if (!is_bl_op(op)) { pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); return -EINVAL; diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 7d28b9553654..dd882d00845d 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -353,7 +353,7 @@ static void watchdog_timer_interrupt(int cpu) if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) { /* * Something has called printk from NMI context. It might be - * stuck, so this this triggers a flush that will get that + * stuck, so this triggers a flush that will get that * printk output to the console. * * See wd_lockup_ipi. diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c68883170a82..529b6c6895bf 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5665,7 +5665,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) else kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); - /* invalidate the entry (what do do on error from the above ?) */ + /* invalidate the entry (what to do on error from the above ?) */ pimap->mapped[i].r_hwirq = 0; /* diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index fc92613dc2bf..363a9447d63d 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -408,7 +408,7 @@ repeat: ssize); if (ret == -1) { /* - * Try to to keep bolted entries in primary. + * Try to keep bolted entries in primary. * Remove non bolted entries and try insert again */ ret = mmu_hash_ops.hpte_remove(hpteg); diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c index 1d3bc35ee1a7..182e12855c27 100644 --- a/arch/powerpc/platforms/4xx/cpm.c +++ b/arch/powerpc/platforms/4xx/cpm.c @@ -63,7 +63,7 @@ static unsigned int cpm_set(unsigned int cpm_reg, unsigned int mask) * known as class 1, 2 and 3. For class 1 units, they are * unconditionally put to sleep when the corresponding CPM bit is * set. For class 2 and 3 units this is not case; if they can be - * put to to sleep, they will. Here we do not verify, we just + * put to sleep, they will. Here we do not verify, we just * set them and expect them to eventually go off when they can. */ value = dcr_read(cpm.dcr_host, cpm.dcr_offset[cpm_reg]); diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index c1bfad56447d..2b47d5a86328 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -77,7 +77,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) /* * VAS can interrupt with multiple page faults. So process all * valid CRBs within fault FIFO until reaches invalid CRB. - * We use CCW[0] and pswid to validate validate CRBs: + * We use CCW[0] and pswid to validate CRBs: * * CCW[0] Reserved bit. When NX pastes CRB, CCW[0]=0 * OS sets this bit to 1 after reading CRB. diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 1b0c901a6f3b..8e40ccac0f44 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -71,7 +71,7 @@ static void pseries_pcibios_bus_add_device(struct pci_dev *pdev) if (pdev->is_virtfn) { /* * FIXME: This really should be handled by choosing the right - * parent PE in in pseries_eeh_init_edev(). + * parent PE in pseries_eeh_init_edev(). */ struct eeh_pe *physfn_pe = pci_dev_to_eeh_dev(pdev->physfn)->pe; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 69f21d30394b..20f6ed813bff 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -29,7 +29,7 @@ (1ul << ND_CMD_SET_CONFIG_DATA) | \ (1ul << ND_CMD_CALL)) -/* DIMM health bitmap bitmap indicators */ +/* DIMM health bitmap indicators */ /* SCM device is unable to persist memory contents */ #define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) /* SCM device failed to persist memory contents */ -- cgit v1.2.3 From f5e74e836097d1004077390717d4bd95d4a2c27a Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Wed, 13 Jul 2022 17:47:28 +0200 Subject: powerpc/watchdog: introduce a NMI watchdog's factor Introduce a factor which would apply to the NMI watchdog timeout. This factor is a percentage added to the watchdog_tresh value. The value is set under the watchdog_mutex protection and lockup_detector_reconfigure() is called to recompute wd_panic_timeout_tb. Once the factor is set, it remains until it is set back to 0, which means no impact. Signed-off-by: Laurent Dufour Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220713154729.80789-4-ldufour@linux.ibm.com --- arch/powerpc/include/asm/nmi.h | 2 ++ arch/powerpc/kernel/watchdog.c | 21 ++++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index ea0e487f87b1..c3c7adef74de 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -5,8 +5,10 @@ #ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); long soft_nmi_interrupt(struct pt_regs *regs); +void watchdog_nmi_set_timeout_pct(u64 pct); #else static inline void arch_touch_nmi_watchdog(void) {} +static inline void watchdog_nmi_set_timeout_pct(u64 pct) {} #endif #ifdef CONFIG_NMI_IPI diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index dd882d00845d..dbcc4a793f0b 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -91,6 +91,10 @@ static cpumask_t wd_smp_cpus_pending; static cpumask_t wd_smp_cpus_stuck; static u64 wd_smp_last_reset_tb; +#ifdef CONFIG_PPC_PSERIES +static u64 wd_timeout_pct; +#endif + /* * Try to take the exclusive watchdog action / NMI IPI / printing lock. * wd_smp_lock must be held. If this fails, we should return and wait @@ -527,7 +531,13 @@ static int stop_watchdog_on_cpu(unsigned int cpu) static void watchdog_calc_timeouts(void) { - wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq; + u64 threshold = watchdog_thresh; + +#ifdef CONFIG_PPC_PSERIES + threshold += (READ_ONCE(wd_timeout_pct) * threshold) / 100; +#endif + + wd_panic_timeout_tb = threshold * ppc_tb_freq; /* Have the SMP detector trigger a bit later */ wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2; @@ -570,3 +580,12 @@ int __init watchdog_nmi_probe(void) } return 0; } + +#ifdef CONFIG_PPC_PSERIES +void watchdog_nmi_set_timeout_pct(u64 pct) +{ + pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct); + WRITE_ONCE(wd_timeout_pct, pct); + lockup_detector_reconfigure(); +} +#endif -- cgit v1.2.3 From a2954a7e47b60bb8d8c51f1b4d55af7585a4108a Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 6 Jul 2022 12:43:04 +0200 Subject: powerpc/pci: Hide pci_device_from_OF_node() for non-powermac code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function pci_device_from_OF_node() is used only in powermac code. So hide it from all other platforms. Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220706104308.5390-2-pali@kernel.org --- arch/powerpc/include/asm/pci-bridge.h | 2 ++ arch/powerpc/kernel/pci_32.c | 2 ++ arch/powerpc/kernel/pci_64.c | 2 ++ 3 files changed, 6 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index c85f901227c9..98156932a1f5 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -170,8 +170,10 @@ static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus) return bus->sysdata; } +#ifdef CONFIG_PPC_PMAC extern int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn); +#endif #ifndef CONFIG_PPC64 extern void pci_create_OF_bus_map(void); diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 5a174936c9a0..c3b91fb62a71 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -154,6 +154,7 @@ pcibios_make_OF_bus_map(void) } +#ifdef CONFIG_PPC_PMAC /* * Returns the PCI device matching a given OF node */ @@ -193,6 +194,7 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) return -ENODEV; } EXPORT_SYMBOL(pci_device_from_OF_node); +#endif /* We create the "pci-OF-bus-map" property now so it appears in the * /proc device tree diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 19b03ddf5631..0c7cfb9fab04 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -286,6 +286,7 @@ int pcibus_to_node(struct pci_bus *bus) EXPORT_SYMBOL(pcibus_to_node); #endif +#ifdef CONFIG_PPC_PMAC int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn) { if (!PCI_DN(np)) @@ -294,3 +295,4 @@ int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn) *devfn = PCI_DN(np)->devfn; return 0; } +#endif -- cgit v1.2.3 From 407a767182d3dc87176aabddd7c109b36a5727e9 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 6 Jul 2022 12:43:05 +0200 Subject: powerpc/pci: Make pcibios_make_OF_bus_map() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function pcibios_make_OF_bus_map() is used only in pci_32.c. So make it static. Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220706104308.5390-3-pali@kernel.org --- arch/powerpc/kernel/pci_32.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index c3b91fb62a71..df981294df29 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -36,8 +36,6 @@ int pcibios_assign_bus_offset = 1; EXPORT_SYMBOL(isa_io_base); EXPORT_SYMBOL(pci_dram_offset); -void __init pcibios_make_OF_bus_map(void); - static void fixup_cpc710_pci64(struct pci_dev* dev); static u8* pci_to_OF_bus_map; @@ -108,7 +106,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus) } } -void __init +static void __init pcibios_make_OF_bus_map(void) { int i; -- cgit v1.2.3 From 704544588735b2e1115212dbba1c210b3687ff7a Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 6 Jul 2022 12:43:06 +0200 Subject: powerpc/pci: Hide pci_create_OF_bus_map() for non-chrp code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function pci_create_OF_bus_map() is used only in chrp code. So hide it from all other platforms. Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220706104308.5390-4-pali@kernel.org --- arch/powerpc/include/asm/pci-bridge.h | 2 ++ arch/powerpc/kernel/pci_32.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 98156932a1f5..e18c95f4e1d4 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -176,7 +176,9 @@ extern int pci_device_from_OF_node(struct device_node *node, #endif #ifndef CONFIG_PPC64 +#ifdef CONFIG_PPC_CHRP extern void pci_create_OF_bus_map(void); +#endif #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index df981294df29..3291af89cea4 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -194,6 +194,7 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) EXPORT_SYMBOL(pci_device_from_OF_node); #endif +#ifdef CONFIG_PPC_CHRP /* We create the "pci-OF-bus-map" property now so it appears in the * /proc device tree */ @@ -218,6 +219,7 @@ pci_create_OF_bus_map(void) of_node_put(dn); } } +#endif void pcibios_setup_phb_io_space(struct pci_controller *hose) { -- cgit v1.2.3 From 7f102d61983275ab68b5ac2715afa35c5f4ffd86 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 6 Jul 2022 12:43:07 +0200 Subject: powerpc/pci: Disable filling pci-OF-bus-map for non-chrp/powermac MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creating or filling pci-OF-bus-map property in the device-tree is deprecated since May 2006 [1] and was used only in old platforms like PowerMac. Currently kernel code handles it only for chrp and powermac code. So completely disable filling pci-OF-bus-map property for non-chrp and non-powermac platforms. [1] - https://lore.kernel.org/linuxppc-dev/1148016268.13249.14.camel@localhost.localdomain/ Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220706104308.5390-5-pali@kernel.org --- arch/powerpc/kernel/pci_32.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 3291af89cea4..2f7284b68f06 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -37,15 +37,12 @@ EXPORT_SYMBOL(isa_io_base); EXPORT_SYMBOL(pci_dram_offset); static void fixup_cpc710_pci64(struct pci_dev* dev); -static u8* pci_to_OF_bus_map; /* By default, we don't re-assign bus numbers. We do this only on * some pmacs */ static int pci_assign_all_buses; -static int pci_bus_count; - /* This will remain NULL for now, until isa-bridge.c is made common * to both 32-bit and 64-bit. */ @@ -65,6 +62,11 @@ fixup_cpc710_pci64(struct pci_dev* dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64); +#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) + +static u8* pci_to_OF_bus_map; +static int pci_bus_count; + /* * Functions below are used on OpenFirmware machines. */ @@ -221,6 +223,8 @@ pci_create_OF_bus_map(void) } #endif +#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) */ + void pcibios_setup_phb_io_space(struct pci_controller *hose) { unsigned long io_offset; @@ -252,6 +256,8 @@ static int __init pcibios_init(void) if (pci_assign_all_buses || next_busno <= hose->last_busno) next_busno = hose->last_busno + pcibios_assign_bus_offset; } + +#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) pci_bus_count = next_busno; /* OpenFirmware based machines need a map of OF bus @@ -260,6 +266,7 @@ static int __init pcibios_init(void) */ if (pci_assign_all_buses) pcibios_make_OF_bus_map(); +#endif /* Call common code to handle resource allocation */ pcibios_resource_survey(); -- cgit v1.2.3 From 5663568130825458a2a8535ccef0db9a1bf7be82 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 6 Jul 2022 12:43:08 +0200 Subject: powerpc/pci: Add config option for using all 256 PCI buses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default on PPC32 PCI bus numbers are unique across all PCI domains. So a system could have only 256 PCI buses independently of available PCI domains. This is due to filling DT property pci-OF-bus-map which does not support a multi-domain setup. On all powerpc platforms except chrp and powermac there is no DT property pci-OF-bus-map anymore and therefore it is possible on non-chrp/powermac platforms to avoid this limitation of maximum number of 256 PCI buses in a system even on multi-domain setup. But avoiding this limitation would mean that all PCI and PCIe devices would be present on completely different BDF addresses as every PCI domain starts numbering PCI bueses from zero (instead of the last bus number of previous enumerated PCI domain). Such change could break existing software which expects fixed PCI bus numbers. So add a new config option CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT which enables this change. By default it is disabled. It causes the initial value of hose->first_busno to be zero. Signed-off-by: Pali Rohár [mpe: Minor change log wording] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220706104308.5390-6-pali@kernel.org --- arch/powerpc/Kconfig | 11 +++++++++++ arch/powerpc/kernel/pci_32.c | 6 ++++++ 2 files changed, 17 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c76d499e2aa3..04499f22d06b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -375,6 +375,17 @@ config PPC_DCR depends on PPC_DCR_NATIVE || PPC_DCR_MMIO default y +config PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT + depends on PPC32 + depends on !PPC_PMAC && !PPC_CHRP + bool "Assign PCI bus numbers from zero individually for each PCI domain" + help + By default on PPC32 were PCI bus numbers unique across all PCI domains. + So system could have only 256 PCI buses independently of available + PCI domains. When this option is enabled then PCI bus numbers are + PCI domain dependent and each PCI controller on own domain can have + 256 PCI buses, like it is on other Linux architectures. + config PPC_OF_PLATFORM_PCI bool depends on PCI diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 2f7284b68f06..433965bf37b4 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -239,7 +239,9 @@ void pcibios_setup_phb_io_space(struct pci_controller *hose) static int __init pcibios_init(void) { struct pci_controller *hose, *tmp; +#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT int next_busno = 0; +#endif printk(KERN_INFO "PCI: Probing PCI hardware\n"); @@ -248,13 +250,17 @@ static int __init pcibios_init(void) /* Scan all of the recorded PCI controllers. */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { +#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT if (pci_assign_all_buses) hose->first_busno = next_busno; +#endif hose->last_busno = 0xff; pcibios_scan_phb(hose); pci_bus_add_devices(hose->bus); +#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT if (pci_assign_all_buses || next_busno <= hose->last_busno) next_busno = hose->last_busno + pcibios_assign_bus_offset; +#endif } #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) -- cgit v1.2.3 From 28f07fab26319dacc5675ae01dfc84d82122c59b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 20 May 2022 22:36:49 +1000 Subject: powerpc/vdso: Fix __kernel_sync_dicache sequence with coherent icache Processors with coherent icache require the sequence sync ; icbi ; isync to entire store->execute coherency. icbi (to any address) must be executed to ensure isync flushes the pipeline. See "POWER9 Processor User's Manual, 4.6.2.2 Instruction Cache Block Invalidate (icbi)" for details. __kernel_sync_dicache is missing icbi for the coherent icache path. Add it. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220520123649.258440-1-npiggin@gmail.com --- arch/powerpc/kernel/vdso/cacheflush.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vdso/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S index d4e43ab2d5df..0085ae464dac 100644 --- a/arch/powerpc/kernel/vdso/cacheflush.S +++ b/arch/powerpc/kernel/vdso/cacheflush.S @@ -91,6 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) 3: crclr cr0*4+so sync + icbi 0,r1 isync li r3,0 blr -- cgit v1.2.3 From f57261e69825b332f22480b37a33e03d066c0da2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 11 Jul 2022 13:06:52 +1000 Subject: powerpc/mce: use early_cpu_to_node() in mce_init() cpu_to_node() is not yet available (setup_arch() is called before setup_per_cpu_areas() by start_kernel()). Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220711030653.150950-1-npiggin@gmail.com --- arch/powerpc/kernel/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 18173199b79d..6c5d30fba766 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -756,7 +756,7 @@ void __init mce_init(void) mce_info = memblock_alloc_try_nid(sizeof(*mce_info), __alignof__(*mce_info), MEMBLOCK_LOW_LIMIT, - limit, cpu_to_node(i)); + limit, early_cpu_to_node(i)); if (!mce_info) goto err; paca_ptrs[i]->mce_info = mce_info; -- cgit v1.2.3 From 2a0fb3c155c97c75176e557d61f8e66c1bd9b735 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 14 Jun 2022 12:34:09 +0200 Subject: powerpc/32: Set an IBAT covering up to _einittext during init Always set an IBAT covering up to _einittext during init because when CONFIG_MODULES is not selected there is no reason to have an exception handler for kernel instruction TLB misses. It implies DBAT and IBAT are now totaly independent, IBATs are set by setibat() and DBAT by setbat(). This allows to revert commit 9bb162fa26ed ("powerpc/603: Fix boot failure with DEBUG_PAGEALLOC and KFENCE") Reported-by: Maxime Bizon Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ce7f04a39593934d9b1ee68c69144ccd3d4da4a1.1655202804.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 4 ++-- arch/powerpc/mm/book3s32/mmu.c | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 6c739beb938c..519b60695167 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -418,14 +418,14 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +#ifdef CONFIG_MODULES lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +#ifdef CONFIG_MODULES bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 1794132db31e..a96b73006dfb 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -159,7 +159,10 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long done; unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; + unsigned long size; + size = roundup_pow_of_two((unsigned long)_einittext - PAGE_OFFSET); + setibat(0, PAGE_OFFSET, 0, size, PAGE_KERNEL_X); if (debug_pagealloc_enabled_or_kfence()) { pr_debug_once("Read-Write memory mapped without BATs\n"); @@ -245,10 +248,9 @@ void mmu_mark_rodata_ro(void) } /* - * Set up one of the I/D BAT (block address translation) register pairs. + * Set up one of the D BAT (block address translation) register pairs. * The parameters are not checked; in particular size must be a power * of 2 between 128k and 256M. - * On 603+, only set IBAT when _PAGE_EXEC is set */ void __init setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot) @@ -284,10 +286,6 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, /* G bit must be zero in IBATs */ flags &= ~_PAGE_EXEC; } - if (flags & _PAGE_EXEC) - bat[0] = bat[1]; - else - bat[0].batu = bat[0].batl = 0; bat_addrs[index].start = virt; bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1; -- cgit v1.2.3 From 62ccae78820b25a0ac64bb0f648388ec834fcb3c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 7 Jul 2022 16:37:18 +0200 Subject: powerpc: Remove remaining parts of oprofile Commit 9850b6c69356 ("arch: powerpc: Remove oprofile") removed oprofile. Remove all remaining parts of it. Signed-off-by: Christophe Leroy Acked-by: Viresh Kumar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/298432fe1a14c0a415760011d72c3f0999efd5e2.1657204631.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 3 -- arch/powerpc/kernel/cputable.c | 67 +------------------------------ arch/powerpc/kernel/dt_cpu_ftrs.c | 4 -- arch/powerpc/platforms/cell/spufs/spufs.h | 2 +- arch/powerpc/platforms/ps3/Kconfig | 2 +- 5 files changed, 4 insertions(+), 74 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 549eb6dd146f..ae8c3e13cfce 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -70,9 +70,6 @@ struct cpu_spec { /* Used to restore cpu setup on secondary processors and at resume */ cpu_restore_t cpu_restore; - /* Used by oprofile userspace to select the right counters */ - char *oprofile_cpu_type; - /* Name of processor class, for the ELF AT_PLATFORM entry */ char *platform; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index a5dbfccd2047..d8e42ef750f1 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -149,7 +149,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970, .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970", .platform = "ppc970", }, { /* PPC970FX */ @@ -166,7 +165,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970, .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970", .platform = "ppc970", }, { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */ @@ -183,7 +181,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970, .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970MP", .platform = "ppc970", }, { /* PPC970MP */ @@ -200,7 +197,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970MP, .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970MP", .platform = "ppc970", }, { /* PPC970GX */ @@ -216,7 +212,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 8, .pmc_type = PPC_PMC_IBM, .cpu_setup = __setup_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970", .platform = "ppc970", }, { /* Power5 GR */ @@ -230,7 +225,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power5", .platform = "power5", }, { /* Power5++ */ @@ -243,7 +237,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, - .oprofile_cpu_type = "ppc64/power5++", .platform = "power5+", }, { /* Power5 GS */ @@ -257,7 +250,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power5+", .platform = "power5+", }, { /* POWER6 in P5+ mode; 2.04-compliant processor */ @@ -269,7 +261,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .platform = "power5+", }, { /* Power6 */ @@ -284,7 +275,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power6", .platform = "power6x", }, { /* 2.05-compliant processor, i.e. Power6 "architected" mode */ @@ -296,7 +286,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER6, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .platform = "power6", }, { /* 2.06-compliant processor, i.e. Power7 "architected" mode */ @@ -309,7 +298,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .machine_check_early = __machine_check_early_realmode_p7, @@ -325,7 +313,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER8, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -341,7 +328,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER9, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .platform = "power9", @@ -356,7 +342,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .mmu_features = MMU_FTRS_POWER10, .icache_bsize = 128, .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power10, .cpu_restore = __restore_cpu_power10, .platform = "power10", @@ -373,7 +358,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power7", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .machine_check_early = __machine_check_early_realmode_p7, @@ -391,7 +375,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power7", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .machine_check_early = __machine_check_early_realmode_p7, @@ -409,7 +392,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -427,7 +409,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -445,7 +426,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .machine_check_early = __machine_check_early_realmode_p8, @@ -463,7 +443,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -481,7 +460,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -499,7 +477,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -517,7 +494,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .machine_check_early = __machine_check_early_realmode_p9, @@ -535,7 +511,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power10", .cpu_setup = __setup_cpu_power10, .cpu_restore = __restore_cpu_power10, .machine_check_early = __machine_check_early_realmode_p10, @@ -554,7 +529,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 4, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/cell-be", .platform = "ppc-cell-be", }, { /* PA Semi PA6T */ @@ -570,7 +544,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_PA6T, .cpu_setup = __setup_cpu_pa6t, .cpu_restore = __restore_cpu_pa6t, - .oprofile_cpu_type = "ppc64/pa6t", .platform = "pa6t", }, { /* default match */ @@ -734,7 +707,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 745/755 */ .pvr_mask = 0xfffff000, @@ -765,7 +737,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 750FX rev 2.0 must disable HID0[DPM] */ .pvr_mask = 0xffffffff, @@ -781,7 +752,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 750FX (All revs except 2.0) */ .pvr_mask = 0xffff0000, @@ -797,7 +767,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750fx, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 750GX */ .pvr_mask = 0xffff0000, @@ -813,7 +782,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750fx, .machine_check = machine_check_generic, .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", }, { /* 740/750 (L2CR bit need fixup for 740) */ .pvr_mask = 0xffff0000, @@ -891,7 +859,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -908,7 +875,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -925,7 +891,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -942,7 +907,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -959,7 +923,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -976,7 +939,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -993,7 +955,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1010,7 +971,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1026,7 +986,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1043,7 +1002,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1060,7 +1018,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .num_pmcs = 6, .pmc_type = PPC_PMC_G4, .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", .machine_check = machine_check_generic, .platform = "ppc7450", }, @@ -1172,7 +1129,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_603, .machine_check = machine_check_83xx, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e300", .platform = "ppc603", }, { /* e300c4 (e300c1, plus one IU) */ @@ -1188,7 +1144,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_603, .machine_check = machine_check_83xx, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e300", .platform = "ppc603", }, #endif @@ -1884,7 +1839,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500", .cpu_setup = __setup_cpu_e500v1, .machine_check = machine_check_e500, .platform = "ppc8540", @@ -1903,7 +1857,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500", .cpu_setup = __setup_cpu_e500v2, .machine_check = machine_check_e500, .platform = "ppc8548", @@ -1922,7 +1875,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500mc", .cpu_setup = __setup_cpu_e500mc, .machine_check = machine_check_e500mc, .platform = "ppce500mc", @@ -1943,7 +1895,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500mc", .cpu_setup = __setup_cpu_e5500, #ifndef CONFIG_PPC32 .cpu_restore = __restore_cpu_e5500, @@ -1965,7 +1916,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 6, - .oprofile_cpu_type = "ppc/e6500", .cpu_setup = __setup_cpu_e6500, #ifndef CONFIG_PPC32 .cpu_restore = __restore_cpu_e6500, @@ -2033,23 +1983,10 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, t->pmc_type = old.pmc_type; /* - * If we have passed through this logic once before and - * have pulled the default case because the real PVR was - * not found inside cpu_specs[], then we are possibly - * running in compatibility mode. In that case, let the - * oprofiler know which set of compatibility counters to - * pull from by making sure the oprofile_cpu_type string - * is set to that of compatibility mode. If the - * oprofile_cpu_type already has a value, then we are - * possibly overriding a real PVR with a logical one, - * and, in that case, keep the current value for - * oprofile_cpu_type. Furthermore, let's ensure that the + * Let's ensure that the * fix for the PMAO bug is enabled on compatibility mode. */ - if (old.oprofile_cpu_type != NULL) { - t->oprofile_cpu_type = old.oprofile_cpu_type; - t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; - } + t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 2ad365c21afa..fc800a9fb2c4 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -102,7 +102,6 @@ static struct cpu_spec __initdata base_cpu_spec = { .dcache_bsize = 32, /* cache info init. */ .num_pmcs = 0, .pmc_type = PPC_PMC_DEFAULT, - .oprofile_cpu_type = NULL, .cpu_setup = NULL, .cpu_restore = __restore_cpu_cpufeatures, .machine_check_early = NULL, @@ -387,7 +386,6 @@ static int __init feat_enable_pmu_power8(struct dt_cpu_feature *f) cur_cpu_spec->num_pmcs = 6; cur_cpu_spec->pmc_type = PPC_PMC_IBM; - cur_cpu_spec->oprofile_cpu_type = "ppc64/power8"; return 1; } @@ -423,7 +421,6 @@ static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f) cur_cpu_spec->num_pmcs = 6; cur_cpu_spec->pmc_type = PPC_PMC_IBM; - cur_cpu_spec->oprofile_cpu_type = "ppc64/power9"; return 1; } @@ -449,7 +446,6 @@ static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) cur_cpu_spec->num_pmcs = 6; cur_cpu_spec->pmc_type = PPC_PMC_IBM; - cur_cpu_spec->oprofile_cpu_type = "ppc64/power10"; return 1; } diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index afc1d6604d12..23c6799cfa5a 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -76,7 +76,7 @@ struct spu_context { struct address_space *mss; /* 'mss' area mappings. */ struct address_space *psmap; /* 'psmap' area mappings. */ struct mutex mapping_lock; - u64 object_id; /* user space pointer for oprofile */ + u64 object_id; /* user space pointer for GNU Debugger */ enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state; struct mutex state_mutex; diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig index 610682caabc4..a44869e5ea70 100644 --- a/arch/powerpc/platforms/ps3/Kconfig +++ b/arch/powerpc/platforms/ps3/Kconfig @@ -165,7 +165,7 @@ config PS3_LPM If you intend to use the advanced performance monitoring and profiling support of the Cell processor with programs like - oprofile and perfmon2, then say Y or M, otherwise say N. + perfmon2, then say Y or M, otherwise say N. config PS3GELIC_UDBG bool "PS3 udbg output via UDP broadcasts on Ethernet" -- cgit v1.2.3 From 4d5c5bad51935482437528f7fa4dffdcb3330d8b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 8 Jul 2022 09:11:07 +0200 Subject: powerpc: Remove asm/prom.h from asm/mpc52xx.h and asm/pci.h asm/pci.h and asm/mpc52xx.h don't need asm/prom.h Declare struct device_node locally to avoid including of.h Signed-off-by: Christophe Leroy [mpe: Add missing include of prom.h to of_rtc.c] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cf5243343e2364c2b40f22ee5ad9a6e2453d1121.1657264228.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mpc52xx.h | 3 ++- arch/powerpc/include/asm/pci.h | 1 - arch/powerpc/kernel/prom.c | 1 + arch/powerpc/sysdev/of_rtc.c | 2 ++ 4 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/mpc52xx.h b/arch/powerpc/include/asm/mpc52xx.h index ddd80aae1e32..5ea16a71c2f0 100644 --- a/arch/powerpc/include/asm/mpc52xx.h +++ b/arch/powerpc/include/asm/mpc52xx.h @@ -15,7 +15,6 @@ #ifndef __ASSEMBLY__ #include -#include #include #endif /* __ASSEMBLY__ */ @@ -268,6 +267,8 @@ struct mpc52xx_intr { #ifndef __ASSEMBLY__ +struct device_node; + /* mpc52xx_common.c */ extern void mpc5200_setup_xlb_arbiter(void); extern void mpc52xx_declare_of_platform_devices(void); diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 915d6ee4b40a..0f182074cdb7 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -14,7 +14,6 @@ #include #include -#include #include /* Return values for pci_controller_ops.probe_mode function */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 1066b072db35..0a68c99da573 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -54,6 +54,7 @@ #include #include #include +#include #include diff --git a/arch/powerpc/sysdev/of_rtc.c b/arch/powerpc/sysdev/of_rtc.c index 1f408d34a6a7..420f949b7485 100644 --- a/arch/powerpc/sysdev/of_rtc.c +++ b/arch/powerpc/sysdev/of_rtc.c @@ -11,6 +11,8 @@ #include #include +#include + static __initdata struct { const char *compatible; char *plat_name; -- cgit v1.2.3 From c7255058b5430b5c42932383bd8887d591e7973a Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Thu, 30 Jun 2022 12:19:42 +0530 Subject: powerpc/crash: save cpu register data in crash_smp_send_stop() During kdump, two set of NMI IPIs are sent to secondary CPUs, if 'crash_kexec_post_notifiers' option is set. The first set of NMI IPIs to stop the CPUs and the other set to collect register data. Instead, capture register data for secondary CPUs while stopping them itself. Also, fallback to smp_send_stop() in case the function gets called without kdump configured. Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220630064942.192283-1-hbathini@linux.ibm.com --- arch/powerpc/include/asm/kexec.h | 1 + arch/powerpc/kernel/smp.c | 29 +++++---------- arch/powerpc/kexec/crash.c | 77 ++++++++++++++++++++++++---------------- 3 files changed, 57 insertions(+), 50 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 2aefe14e1442..cce69101205e 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -83,6 +83,7 @@ extern void default_machine_crash_shutdown(struct pt_regs *regs); extern int crash_shutdown_register(crash_shutdown_t handler); extern int crash_shutdown_unregister(crash_shutdown_t handler); +extern void crash_kexec_prepare(void); extern void crash_kexec_secondary(struct pt_regs *regs); int __init overlaps_crashkernel(unsigned long start, unsigned long size); extern void reserve_crashkernel(void); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index bcefab484ea6..6b850c157a62 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,6 @@ #endif #include #include -#include #include #include #include @@ -619,20 +619,6 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) } #endif -#ifdef CONFIG_NMI_IPI -static void crash_stop_this_cpu(struct pt_regs *regs) -#else -static void crash_stop_this_cpu(void *dummy) -#endif -{ - /* - * Just busy wait here and avoid marking CPU as offline to ensure - * register data is captured appropriately. - */ - while (1) - cpu_relax(); -} - void crash_smp_send_stop(void) { static bool stopped = false; @@ -651,11 +637,14 @@ void crash_smp_send_stop(void) stopped = true; -#ifdef CONFIG_NMI_IPI - smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000); -#else - smp_call_function(crash_stop_this_cpu, NULL, 0); -#endif /* CONFIG_NMI_IPI */ +#ifdef CONFIG_KEXEC_CORE + if (kexec_crash_image) { + crash_kexec_prepare(); + return; + } +#endif + + smp_send_stop(); } #ifdef CONFIG_NMI_IPI diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index 80f54723cf6d..252724ed666a 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -40,6 +40,14 @@ #define REAL_MODE_TIMEOUT 10000 static int time_to_dump; + +/* + * In case of system reset, secondary CPUs enter crash_kexec_secondary with out + * having to send an IPI explicitly. So, indicate if the crash is via + * system reset to avoid sending another IPI. + */ +static int is_via_system_reset; + /* * crash_wake_offline should be set to 1 by platforms that intend to wake * up offline cpus prior to jumping to a kdump kernel. Currently powernv @@ -101,7 +109,7 @@ void crash_ipi_callback(struct pt_regs *regs) /* NOTREACHED */ } -static void crash_kexec_prepare_cpus(int cpu) +static void crash_kexec_prepare_cpus(void) { unsigned int msecs; volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ @@ -113,7 +121,15 @@ static void crash_kexec_prepare_cpus(int cpu) if (crash_wake_offline) ncpus = num_present_cpus() - 1; - crash_send_ipi(crash_ipi_callback); + /* + * If we came in via system reset, secondaries enter via crash_kexec_secondary(). + * So, wait a while for the secondary CPUs to enter for that case. + * Else, send IPI to all other CPUs. + */ + if (is_via_system_reset) + mdelay(PRIMARY_TIMEOUT); + else + crash_send_ipi(crash_ipi_callback); smp_wmb(); again: @@ -202,7 +218,7 @@ void crash_kexec_secondary(struct pt_regs *regs) #else /* ! CONFIG_SMP */ -static void crash_kexec_prepare_cpus(int cpu) +static void crash_kexec_prepare_cpus(void) { /* * move the secondaries to us so that we can copy @@ -248,6 +264,32 @@ noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu) static inline void crash_kexec_wait_realmode(int cpu) {} #endif /* CONFIG_SMP && CONFIG_PPC64 */ +void crash_kexec_prepare(void) +{ + /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ + printk_deferred_enter(); + + /* + * This function is only called after the system + * has panicked or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means stopping other cpus in + * an SMP system. + * The kernel is broken so disable interrupts. + */ + hard_irq_disable(); + + /* + * Make a note of crashing cpu. Will be used in machine_kexec + * such that another IPI will not be sent. + */ + crashing_cpu = smp_processor_id(); + + crash_kexec_prepare_cpus(); +} + /* * Register a function to be called on shutdown. Only use this if you * can't reset your device in the second kernel. @@ -311,35 +353,10 @@ void default_machine_crash_shutdown(struct pt_regs *regs) unsigned int i; int (*old_handler)(struct pt_regs *regs); - /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ - printk_deferred_enter(); - - /* - * This function is only called after the system - * has panicked or is otherwise in a critical state. - * The minimum amount of code to allow a kexec'd kernel - * to run successfully needs to happen here. - * - * In practice this means stopping other cpus in - * an SMP system. - * The kernel is broken so disable interrupts. - */ - hard_irq_disable(); - - /* - * Make a note of crashing cpu. Will be used in machine_kexec - * such that another IPI will not be sent. - */ - crashing_cpu = smp_processor_id(); - - /* - * If we came in via system reset, wait a while for the secondary - * CPUs to enter. - */ if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) - mdelay(PRIMARY_TIMEOUT); + is_via_system_reset = 1; - crash_kexec_prepare_cpus(crashing_cpu); + crash_smp_send_stop(); crash_save_cpu(regs, crashing_cpu); -- cgit v1.2.3 From d80f6de9d601c30b53c17f00cb7cfe3169f2ddad Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 14 Jul 2022 18:11:19 +1000 Subject: powerpc/iommu: Fix iommu_table_in_use for a small default DMA window case The existing iommu_table_in_use() helper checks if the kernel is using any of TCEs. There are some reserved TCEs: 1) the very first one if DMA window starts from 0 to avoid having a zero but still valid DMA handle; 2) it_reserved_start..it_reserved_end to exclude MMIO32 window in case the default window spans across that - this is the default for the first DMA window on PowerNV. When 1) is the case and 2) is not the helper does not skip 1) and returns wrong status. This only seems occurring when passing through a PCI device to a nested guest (not something we support really well) so it has not been seen before. This fixes the bug by adding a special case for no MMIO32 reservation. Fixes: 3c33066a2190 ("powerpc/kernel/iommu: Add new iommu_table_in_use() helper") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220714081119.3714605-1-aik@ozlabs.ru --- arch/powerpc/kernel/iommu.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 7e56ddb3e0b9..caebe1431596 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -775,6 +775,11 @@ bool iommu_table_in_use(struct iommu_table *tbl) /* ignore reserved bit0 */ if (tbl->it_offset == 0) start = 1; + + /* Simple case with no reserved MMIO32 region */ + if (!tbl->it_reserved_start && !tbl->it_reserved_end) + return find_next_bit(tbl->it_map, tbl->it_size, start) != tbl->it_size; + end = tbl->it_reserved_start - tbl->it_offset; if (find_next_bit(tbl->it_map, end, start) != end) return true; -- cgit v1.2.3 From 0fe1e96fef0a5c53b4c0d1500d356f3906000f81 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 6 Jul 2022 12:21:48 +0200 Subject: powerpc/pci: Prefer PCI domain assignment via DT 'linux,pci-domain' and alias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Other Linux architectures use DT property 'linux,pci-domain' for specifying fixed PCI domain of PCI controller specified in Device-Tree. And lot of Freescale powerpc boards have defined numbered pci alias in Device-Tree for every PCIe controller which number specify preferred PCI domain. So prefer usage of DT property 'linux,pci-domain' (via function of_get_pci_domain_nr()) and DT pci alias (via function of_alias_get_id()) on powerpc architecture for assigning PCI domain to PCI controller. Fixes: 63a72284b159 ("powerpc/pci: Assign fixed PHB number based on device-tree properties") Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220706102148.5060-2-pali@kernel.org --- arch/powerpc/kernel/pci-common.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index cbb912ee92fa..8ce36aba42da 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -75,16 +75,30 @@ void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops) static int get_phb_number(struct device_node *dn) { int ret, phb_id = -1; - u32 prop_32; u64 prop; /* * Try fixed PHB numbering first, by checking archs and reading - * the respective device-tree properties. Firstly, try powernv by - * reading "ibm,opal-phbid", only present in OPAL environment. + * the respective device-tree properties. Firstly, try reading + * standard "linux,pci-domain", then try reading "ibm,opal-phbid" + * (only present in powernv OPAL environment), then try device-tree + * alias and as the last try to use lower bits of "reg" property. */ - ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + ret = of_get_pci_domain_nr(dn); + if (ret >= 0) { + prop = ret; + ret = 0; + } + if (ret) + ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + if (ret) + ret = of_alias_get_id(dn, "pci"); + if (ret >= 0) { + prop = ret; + ret = 0; + } if (ret) { + u32 prop_32; ret = of_property_read_u32_index(dn, "reg", 1, &prop_32); prop = prop_32; } @@ -96,10 +110,7 @@ static int get_phb_number(struct device_node *dn) if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap)) return phb_id; - /* - * If not pseries nor powernv, or if fixed PHB numbering tried to add - * the same PHB number twice, then fallback to dynamic PHB numbering. - */ + /* If everything fails then fallback to dynamic PHB numbering. */ phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS); BUG_ON(phb_id >= MAX_PHBS); set_bit(phb_id, phb_bitmap); -- cgit v1.2.3 From fcdb758ce113c5d1b2b7034a058a9c472e42415e Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Fri, 17 Jun 2022 14:28:05 +1000 Subject: powerpc: make facility_unavailable_exception 64s The facility unavailable exception is only available on ppc book3s machines so use CONFIG_PPC_BOOK3S_64 rather than CONFIG_PPC64. tm_unavailable is only called from facility_unavailable_exception so can also be under this Kconfig symbol. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220617042805.426231-1-rashmica@linux.ibm.com --- arch/powerpc/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3aaa50e5c72f..dadfcef5d6db 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1676,7 +1676,7 @@ DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception) die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); } -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 static void tm_unavailable(struct pt_regs *regs) { #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -- cgit v1.2.3 From e4787e71ae2de3f60bc04fe09d1be4ef628b6c68 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Fri, 17 Jun 2022 14:31:35 +1000 Subject: powerpc/signal: Update comment for clarity The comment being referred to was deleted in commit af1bbc3dd3d5 ("powerpc: Remove UP only lazy floating point and vector optimisations"). Add a bit more detail so it's clear why we need to clear the FP/VEC/VSX bits here. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220617043135.426897-1-rashmica@linux.ibm.com --- arch/powerpc/kernel/signal_64.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 472596a109e2..86bb5bb4c143 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -377,9 +377,12 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_ unsafe_get_user(set->sig[0], &sc->oldmask, efault_out); /* - * Force reload of FP/VEC. - * This has to be done before copying stuff into tsk->thread.fpr/vr - * for the reasons explained in the previous comment. + * Force reload of FP/VEC/VSX so userspace sees any changes. + * Clear these bits from the user process' MSR before copying into the + * thread struct. If we are rescheduled or preempted and another task + * uses FP/VEC/VSX, and this process has the MSR bits set, then the + * context switch code will save the current CPU state into the + * thread_struct - possibly overwriting the data we are updating here. */ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX)); -- cgit v1.2.3 From 1547db7d1f4481c1f3ec731f3edc724ef3026ede Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 1 Jul 2022 16:24:34 +0800 Subject: powerpc: Move system_call_exception() to syscall.c This is a lead-up patch to enable syscall stack randomization, which uses alloca() and makes the compiler add unconditional stack canaries on syscall entry. In order to avoid triggering needless checks and slowing down the entry path, the feature needs to disable stack protector at the compilation unit level as there is no general way to control stack protector coverage with a function attribute. So move system_call_exception() to syscall.c to avoid affecting other functions in interrupt.c. Suggested-by: Michael Ellerman Signed-off-by: Xiu Jianfeng Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220701082435.126596-2-xiujianfeng@huawei.com --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/interrupt.c | 161 ------------------------------------- arch/powerpc/kernel/syscall.c | 173 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 174 insertions(+), 162 deletions(-) create mode 100644 arch/powerpc/kernel/syscall.c (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index de954fe106c5..0963d39464c8 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -62,7 +62,7 @@ obj-y := cputable.o syscalls.o \ udbg.o misc.o io.o misc_$(BITS).o \ of_platform.o prom_parse.o firmware.o \ hw_breakpoint_constraints.o interrupt.o \ - kdebugfs.o stacktrace.o + kdebugfs.o stacktrace.o syscall.o obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o irq_64.o\ paca.o nvram_64.o note.o diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 784ea3289c84..0e75cb03244a 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -24,8 +24,6 @@ unsigned long global_dbcr0[NR_CPUS]; #endif -typedef long (*syscall_fn)(long, long, long, long, long, long); - #ifdef CONFIG_PPC_BOOK3S_64 DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); static inline bool exit_must_hard_disable(void) @@ -73,165 +71,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) return true; } -/* Has to run notrace because it is entered not completely "reconciled" */ -notrace long system_call_exception(long r3, long r4, long r5, - long r6, long r7, long r8, - unsigned long r0, struct pt_regs *regs) -{ - syscall_fn f; - - kuap_lock(); - - regs->orig_gpr3 = r3; - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); - - trace_hardirqs_off(); /* finish reconciling */ - - CT_WARN_ON(ct_state() == CONTEXT_KERNEL); - user_exit_irqoff(); - - BUG_ON(regs_is_unrecoverable(regs)); - BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(arch_irq_disabled_regs(regs)); - -#ifdef CONFIG_PPC_PKEY - if (mmu_has_feature(MMU_FTR_PKEY)) { - unsigned long amr, iamr; - bool flush_needed = false; - /* - * When entering from userspace we mostly have the AMR/IAMR - * different from kernel default values. Hence don't compare. - */ - amr = mfspr(SPRN_AMR); - iamr = mfspr(SPRN_IAMR); - regs->amr = amr; - regs->iamr = iamr; - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { - mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); - flush_needed = true; - } - if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { - mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); - flush_needed = true; - } - if (flush_needed) - isync(); - } else -#endif - kuap_assert_locked(); - - booke_restore_dbcr0(); - - account_cpu_user_entry(); - - account_stolen_time(); - - /* - * This is not required for the syscall exit path, but makes the - * stack frame look nicer. If this was initialised in the first stack - * frame, or if the unwinder was taught the first stack frame always - * returns to user with IRQS_ENABLED, this store could be avoided! - */ - irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); - - /* - * If system call is called with TM active, set _TIF_RESTOREALL to - * prevent RFSCV being used to return to userspace, because POWER9 - * TM implementation has problems with this instruction returning to - * transactional state. Final register values are not relevant because - * the transaction will be aborted upon return anyway. Or in the case - * of unsupported_scv SIGILL fault, the return state does not much - * matter because it's an edge case. - */ - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) - set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); - - /* - * If the system call was made with a transaction active, doom it and - * return without performing the system call. Unless it was an - * unsupported scv vector, in which case it's treated like an illegal - * instruction. - */ -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && - !trap_is_unsupported_scv(regs)) { - /* Enable TM in the kernel, and disable EE (for scv) */ - hard_irq_disable(); - mtmsr(mfmsr() | MSR_TM); - - /* tabort, this dooms the transaction, nothing else */ - asm volatile(".long 0x7c00071d | ((%0) << 16)" - :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); - - /* - * Userspace will never see the return value. Execution will - * resume after the tbegin. of the aborted transaction with the - * checkpointed register state. A context switch could occur - * or signal delivered to the process before resuming the - * doomed transaction context, but that should all be handled - * as expected. - */ - return -ENOSYS; - } -#endif // CONFIG_PPC_TRANSACTIONAL_MEM - - local_irq_enable(); - - if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) { - if (unlikely(trap_is_unsupported_scv(regs))) { - /* Unsupported scv vector */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return regs->gpr[3]; - } - /* - * We use the return value of do_syscall_trace_enter() as the - * syscall number. If the syscall was rejected for any reason - * do_syscall_trace_enter() returns an invalid syscall number - * and the test against NR_syscalls will fail and the return - * value to be used is in regs->gpr[3]. - */ - r0 = do_syscall_trace_enter(regs); - if (unlikely(r0 >= NR_syscalls)) - return regs->gpr[3]; - r3 = regs->gpr[3]; - r4 = regs->gpr[4]; - r5 = regs->gpr[5]; - r6 = regs->gpr[6]; - r7 = regs->gpr[7]; - r8 = regs->gpr[8]; - - } else if (unlikely(r0 >= NR_syscalls)) { - if (unlikely(trap_is_unsupported_scv(regs))) { - /* Unsupported scv vector */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return regs->gpr[3]; - } - return -ENOSYS; - } - - /* May be faster to do array_index_nospec? */ - barrier_nospec(); - - if (unlikely(is_compat_task())) { - f = (void *)compat_sys_call_table[r0]; - - r3 &= 0x00000000ffffffffULL; - r4 &= 0x00000000ffffffffULL; - r5 &= 0x00000000ffffffffULL; - r6 &= 0x00000000ffffffffULL; - r7 &= 0x00000000ffffffffULL; - r8 &= 0x00000000ffffffffULL; - - } else { - f = (void *)sys_call_table[r0]; - } - - return f(r3, r4, r5, r6, r7, r8); -} - static notrace void booke_load_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c new file mode 100644 index 000000000000..4d5689eeaf25 --- /dev/null +++ b/arch/powerpc/kernel/syscall.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include +#include +#include +#include +#include +#include + + +typedef long (*syscall_fn)(long, long, long, long, long, long); + +/* Has to run notrace because it is entered not completely "reconciled" */ +notrace long system_call_exception(long r3, long r4, long r5, + long r6, long r7, long r8, + unsigned long r0, struct pt_regs *regs) +{ + syscall_fn f; + + kuap_lock(); + + regs->orig_gpr3 = r3; + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + + trace_hardirqs_off(); /* finish reconciling */ + + CT_WARN_ON(ct_state() == CONTEXT_KERNEL); + user_exit_irqoff(); + + BUG_ON(regs_is_unrecoverable(regs)); + BUG_ON(!(regs->msr & MSR_PR)); + BUG_ON(arch_irq_disabled_regs(regs)); + +#ifdef CONFIG_PPC_PKEY + if (mmu_has_feature(MMU_FTR_PKEY)) { + unsigned long amr, iamr; + bool flush_needed = false; + /* + * When entering from userspace we mostly have the AMR/IAMR + * different from kernel default values. Hence don't compare. + */ + amr = mfspr(SPRN_AMR); + iamr = mfspr(SPRN_IAMR); + regs->amr = amr; + regs->iamr = iamr; + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + flush_needed = true; + } + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); + flush_needed = true; + } + if (flush_needed) + isync(); + } else +#endif + kuap_assert_locked(); + + booke_restore_dbcr0(); + + account_cpu_user_entry(); + + account_stolen_time(); + + /* + * This is not required for the syscall exit path, but makes the + * stack frame look nicer. If this was initialised in the first stack + * frame, or if the unwinder was taught the first stack frame always + * returns to user with IRQS_ENABLED, this store could be avoided! + */ + irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); + + /* + * If system call is called with TM active, set _TIF_RESTOREALL to + * prevent RFSCV being used to return to userspace, because POWER9 + * TM implementation has problems with this instruction returning to + * transactional state. Final register values are not relevant because + * the transaction will be aborted upon return anyway. Or in the case + * of unsupported_scv SIGILL fault, the return state does not much + * matter because it's an edge case. + */ + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) + set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); + + /* + * If the system call was made with a transaction active, doom it and + * return without performing the system call. Unless it was an + * unsupported scv vector, in which case it's treated like an illegal + * instruction. + */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && + !trap_is_unsupported_scv(regs)) { + /* Enable TM in the kernel, and disable EE (for scv) */ + hard_irq_disable(); + mtmsr(mfmsr() | MSR_TM); + + /* tabort, this dooms the transaction, nothing else */ + asm volatile(".long 0x7c00071d | ((%0) << 16)" + :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); + + /* + * Userspace will never see the return value. Execution will + * resume after the tbegin. of the aborted transaction with the + * checkpointed register state. A context switch could occur + * or signal delivered to the process before resuming the + * doomed transaction context, but that should all be handled + * as expected. + */ + return -ENOSYS; + } +#endif // CONFIG_PPC_TRANSACTIONAL_MEM + + local_irq_enable(); + + if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) { + if (unlikely(trap_is_unsupported_scv(regs))) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } + /* + * We use the return value of do_syscall_trace_enter() as the + * syscall number. If the syscall was rejected for any reason + * do_syscall_trace_enter() returns an invalid syscall number + * and the test against NR_syscalls will fail and the return + * value to be used is in regs->gpr[3]. + */ + r0 = do_syscall_trace_enter(regs); + if (unlikely(r0 >= NR_syscalls)) + return regs->gpr[3]; + r3 = regs->gpr[3]; + r4 = regs->gpr[4]; + r5 = regs->gpr[5]; + r6 = regs->gpr[6]; + r7 = regs->gpr[7]; + r8 = regs->gpr[8]; + + } else if (unlikely(r0 >= NR_syscalls)) { + if (unlikely(trap_is_unsupported_scv(regs))) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } + return -ENOSYS; + } + + /* May be faster to do array_index_nospec? */ + barrier_nospec(); + + if (unlikely(is_compat_task())) { + f = (void *)compat_sys_call_table[r0]; + + r3 &= 0x00000000ffffffffULL; + r4 &= 0x00000000ffffffffULL; + r5 &= 0x00000000ffffffffULL; + r6 &= 0x00000000ffffffffULL; + r7 &= 0x00000000ffffffffULL; + r8 &= 0x00000000ffffffffULL; + + } else { + f = (void *)sys_call_table[r0]; + } + + return f(r3, r4, r5, r6, r7, r8); +} -- cgit v1.2.3 From f4a0318f278d98d9492916722e85f258c2221f88 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 1 Jul 2022 16:24:35 +0800 Subject: powerpc: add support for syscall stack randomization Add support for adding a random offset to the stack while handling syscalls. This patch uses mftb() instead of get_random_int() for better performance. In order to avoid unconditional stack canaries on syscall entry (due to the use of alloca()), also disable stack protector to avoid triggering needless checks and slowing down the entry path. As there is no general way to control stack protector coverage with a function attribute, this must be disabled at the compilation unit level. Signed-off-by: Xiu Jianfeng Reviewed-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220701082435.126596-3-xiujianfeng@huawei.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/Makefile | 7 +++++++ arch/powerpc/kernel/syscall.c | 19 ++++++++++++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 04499f22d06b..2daeaab21240 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -196,6 +196,7 @@ config PPC select HAVE_ARCH_KASAN if PPC_BOOK3E_64 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN select HAVE_ARCH_KFENCE if PPC_BOOK3S_32 || PPC_8xx || 40x + select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 0963d39464c8..06d2d1f78f71 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -54,6 +54,13 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING endif +#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET +# Remove stack protector to avoid triggering unneeded stack canary +# checks due to randomize_kstack_offset. +CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong +CFLAGS_syscall.o += -fno-stack-protector +#endif + obj-y := cputable.o syscalls.o \ irq.o align.o signal_$(BITS).o pmc.o vdso.o \ process.o systbl.o idle.o \ diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c index 4d5689eeaf25..81ace9e8b72b 100644 --- a/arch/powerpc/kernel/syscall.c +++ b/arch/powerpc/kernel/syscall.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -18,10 +19,12 @@ notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs) { + long ret; syscall_fn f; kuap_lock(); + add_random_kstack_offset(); regs->orig_gpr3 = r3; if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) @@ -169,5 +172,19 @@ notrace long system_call_exception(long r3, long r4, long r5, f = (void *)sys_call_table[r0]; } - return f(r3, r4, r5, r6, r7, r8); + ret = f(r3, r4, r5, r6, r7, r8); + + /* + * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), + * so the maximum stack offset is 1k bytes (10 bits). + * + * The actual entropy will be further reduced by the compiler when + * applying stack alignment constraints: the powerpc architecture + * may have two kinds of stack alignment (16-bytes and 8-bytes). + * + * So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3]. + */ + choose_random_kstack_offset(mftb()); + + return ret; } -- cgit v1.2.3 From ca829e05d3d4f728810cc5e4b468d9ebc7745eb3 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Tue, 26 Jul 2022 09:57:47 +0800 Subject: powerpc/64: Init jump labels before parse_early_param() On 64-bit, calling jump_label_init() in setup_feature_keys() is too late because static keys may be used in subroutines of parse_early_param() which is again subroutine of early_init_devtree(). For example booting with "threadirqs": static_key_enable_cpuslocked(): static key '0xc000000002953260' used before call to jump_label_init() WARNING: CPU: 0 PID: 0 at kernel/jump_label.c:166 static_key_enable_cpuslocked+0xfc/0x120 ... NIP static_key_enable_cpuslocked+0xfc/0x120 LR static_key_enable_cpuslocked+0xf8/0x120 Call Trace: static_key_enable_cpuslocked+0xf8/0x120 (unreliable) static_key_enable+0x30/0x50 setup_forced_irqthreads+0x28/0x40 do_early_param+0xa0/0x108 parse_args+0x290/0x4e0 parse_early_options+0x48/0x5c parse_early_param+0x58/0x84 early_init_devtree+0xd4/0x518 early_setup+0xb4/0x214 So call jump_label_init() just before parse_early_param() in early_init_devtree(). Suggested-by: Michael Ellerman Signed-off-by: Zhouyi Zhou [mpe: Add call trace to change log and minor wording edits.] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220726015747.11754-1-zhouzhouyi@gmail.com --- arch/powerpc/kernel/prom.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 0a68c99da573..a730b951b64b 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -752,6 +752,13 @@ void __init early_init_devtree(void *params) early_init_dt_scan_root(); early_init_dt_scan_memory_ppc(); + /* + * As generic code authors expect to be able to use static keys + * in early_param() handlers, we initialize the static keys just + * before parsing early params (it's fine to call jump_label_init() + * more than once). + */ + jump_label_init(); parse_early_param(); /* make sure we've parsed cmdline for mem= before this */ -- cgit v1.2.3 From f4b39e88b42d13366b831270306326b5c20971ca Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 2 Aug 2022 20:38:32 +1000 Subject: powerpc/pci: Fix PHB numbering when using opal-phbid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recent change to the PHB numbering logic has a logic error in the handling of "ibm,opal-phbid". When an "ibm,opal-phbid" property is present, &prop is written to and ret is set to zero. The following call to of_alias_get_id() is skipped because ret == 0. But then the if (ret >= 0) is true, and the body of that if statement sets prop = ret which throws away the value that was just read from "ibm,opal-phbid". Fix the logic by only doing the ret >= 0 check in the of_alias_get_id() case. Fixes: 0fe1e96fef0a ("powerpc/pci: Prefer PCI domain assignment via DT 'linux,pci-domain' and alias") Reviewed-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220802105723.1055178-1-mpe@ellerman.id.au --- arch/powerpc/kernel/pci-common.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 8ce36aba42da..bdd3332200c5 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -91,11 +91,13 @@ static int get_phb_number(struct device_node *dn) } if (ret) ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); - if (ret) + + if (ret) { ret = of_alias_get_id(dn, "pci"); - if (ret >= 0) { - prop = ret; - ret = 0; + if (ret >= 0) { + prop = ret; + ret = 0; + } } if (ret) { u32 prop_32; -- cgit v1.2.3