From 0382a35bef70ecc074db67192ff8d37737d02b21 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Sat, 20 Aug 2022 13:51:13 +0200 Subject: powerpc/pci: Enable PCI domains in /proc when PCI bus numbers are not unique MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit powerpc systems with more PCIe controllers and more PCI domains, where on more PCI domains are same PCI numbers, when kernel is compiled with CONFIG_PROC_FS=y and CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT=y options, kernel prints "proc_dir_entry 'pci/01' already registered" error message. proc_dir_entry 'pci/01' already registered WARNING: CPU: 0 PID: 1 at fs/proc/generic.c:377 proc_register+0x1a8/0x1ac ... NIP proc_register+0x1a8/0x1ac LR proc_register+0x1a8/0x1ac Call Trace: proc_register+0x1a8/0x1ac (unreliable) _proc_mkdir+0x78/0xa4 pci_proc_attach_device+0x11c/0x168 pci_proc_init+0x80/0x98 do_one_initcall+0x80/0x284 kernel_init_freeable+0x1f4/0x2a0 kernel_init+0x24/0x150 ret_from_kernel_thread+0x5c/0x64 This regression started appearing after commit 566356813082 ("powerpc/pci: Add config option for using all 256 PCI buses") in case in each mPCIe slot is connected PCIe card and therefore PCI bus 1 is populated in for every PCIe controller / PCI domain. The reason is that PCI procfs code expects that when PCI bus numbers are not unique across all PCI domains, function pci_proc_domain() returns true for domain dependent buses. Fix this issue by setting PCI_ENABLE_PROC_DOMAINS and PCI_COMPAT_DOMAIN_0 flags for 32-bit powerpc code when CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT is enabled. Same approach is already implemented for 64-bit powerpc code (where PCI bus numbers are always domain dependent). Fixes: 566356813082 ("powerpc/pci: Add config option for using all 256 PCI buses") Signed-off-by: Pali Rohár [mpe: Trim change log oops message] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220820115113.30581-1-pali@kernel.org --- arch/powerpc/kernel/pci_32.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 433965bf37b4..855b59892c5c 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -245,6 +245,15 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware\n"); +#ifdef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT + /* + * Enable PCI domains in /proc when PCI bus numbers are not unique + * across all PCI domains to prevent conflicts. And keep PCI domain 0 + * backward compatible in /proc for video cards. + */ + pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0); +#endif + if (pci_has_flag(PCI_REASSIGN_ALL_BUS)) pci_assign_all_buses = 1; -- cgit v1.2.3 From c7acee3d2f128a38b68fb7af85dbbd91bfd0b4ad Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 21 Aug 2022 01:51:29 +0900 Subject: powerpc: align syscall table for ppc32 Christophe Leroy reported that commit 7b4537199a4a ("kbuild: link symbol CRCs at final link, removing CONFIG_MODULE_REL_CRCS") broke mpc85xx_defconfig + CONFIG_RELOCATABLE=y. LD vmlinux SYSMAP System.map SORTTAB vmlinux CHKREL vmlinux WARNING: 451 bad relocations c0b312a9 R_PPC_UADDR32 .head.text-0x3ff9ed54 c0b312ad R_PPC_UADDR32 .head.text-0x3ffac224 c0b312b1 R_PPC_UADDR32 .head.text-0x3ffb09f4 c0b312b5 R_PPC_UADDR32 .head.text-0x3fe184dc c0b312b9 R_PPC_UADDR32 .head.text-0x3fe183a8 ... The compiler emits a bunch of R_PPC_UADDR32, which is not supported by arch/powerpc/kernel/reloc_32.S. The reason is there exists an unaligned symbol. $ powerpc-linux-gnu-nm -n vmlinux ... c0b31258 d spe_aligninfo c0b31298 d __func__.0 c0b312a9 D sys_call_table c0b319b8 d __func__.0 Commit 7b4537199a4a is not the root cause. Even before that, I can reproduce the same issue for mpc85xx_defconfig + CONFIG_RELOCATABLE=y + CONFIG_MODVERSIONS=n. It is just that nobody noticed because when CONFIG_MODVERSIONS is enabled, a __crc_* symbol inserted before sys_call_table was hiding the unalignment issue. Adding alignment to the syscall table for ppc32 fixes the issue. Cc: stable@vger.kernel.org Reported-by: Christophe Leroy Signed-off-by: Masahiro Yamada Tested-by: Christophe Leroy [mpe: Trim change log discussion, add Cc stable] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/lkml/38605f6a-a568-f884-f06f-ea4da5b214f0@csgroup.eu/ Link: https://lore.kernel.org/r/20220820165129.1147589-1-masahiroy@kernel.org --- arch/powerpc/kernel/systbl.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index cb3358886203..6c1db3b6de2d 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -18,6 +18,7 @@ .p2align 3 #define __SYSCALL(nr, entry) .8byte entry #else + .p2align 2 #define __SYSCALL(nr, entry) .long entry #endif -- cgit v1.2.3 From 91926d8b7e71aaf5f84f0cf208fc5a8b7a761050 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 23 Aug 2022 21:59:52 +1000 Subject: powerpc/rtas: Fix RTAS MSR[HV] handling for Cell The semi-recent changes to MSR handling when entering RTAS (firmware) cause crashes on IBM Cell machines. An example trace: kernel tried to execute user page (2fff01a8) - exploit attempt? (uid: 0) BUG: Unable to handle kernel instruction fetch Faulting instruction address: 0x2fff01a8 Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=4 NUMA Cell Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 6.0.0-rc2-00433-gede0a8d3307a #207 NIP: 000000002fff01a8 LR: 0000000000032608 CTR: 0000000000000000 REGS: c0000000015236b0 TRAP: 0400 Tainted: G W (6.0.0-rc2-00433-gede0a8d3307a) MSR: 0000000008001002 CR: 00000000 XER: 20000000 ... NIP 0x2fff01a8 LR 0x32608 Call Trace: 0xc00000000143c5f8 (unreliable) .rtas_call+0x224/0x320 .rtas_get_boot_time+0x70/0x150 .read_persistent_clock64+0x114/0x140 .read_persistent_wall_and_boot_offset+0x24/0x80 .timekeeping_init+0x40/0x29c .start_kernel+0x674/0x8f0 start_here_common+0x1c/0x50 Unlike PAPR platforms where RTAS is only used in guests, on the IBM Cell machines Linux runs with MSR[HV] set but also uses RTAS, provided by SLOF. Fix it by copying the MSR[HV] bit from the MSR value we've just read using mfmsr into the value used for RTAS. It seems like we could also fix it using an #ifdef CELL to set MSR[HV], but that doesn't work because it's possible to build a single kernel image that runs on both Cell native and pseries. Fixes: b6b1c3ce06ca ("powerpc/rtas: Keep MSR[RI] set when calling RTAS") Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Michael Ellerman Reviewed-by: Jordan Niethe Link: https://lore.kernel.org/r/20220823115952.1203106-2-mpe@ellerman.id.au --- arch/powerpc/kernel/rtas_entry.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas_entry.S b/arch/powerpc/kernel/rtas_entry.S index 9a434d42e660..6ce95ddadbcd 100644 --- a/arch/powerpc/kernel/rtas_entry.S +++ b/arch/powerpc/kernel/rtas_entry.S @@ -109,8 +109,12 @@ __enter_rtas: * its critical regions (as specified in PAPR+ section 7.2.1). MSR[S] * is not impacted by RFI_TO_KERNEL (only urfid can unset it). So if * MSR[S] is set, it will remain when entering RTAS. + * If we're in HV mode, RTAS must also run in HV mode, so extract MSR_HV + * from the saved MSR value and insert into the value RTAS will use. */ + extrdi r0, r6, 1, 63 - MSR_HV_LG LOAD_REG_IMMEDIATE(r6, MSR_ME | MSR_RI) + insrdi r6, r0, 1, 63 - MSR_HV_LG li r0,0 mtmsrd r0,1 /* disable RI before using SRR0/1 */ -- cgit v1.2.3 From 8cbb2b50ee2dcb082675237eaaa48fe8479f8aa5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 25 Aug 2022 10:25:05 +0200 Subject: asm-generic: Conditionally enable do_softirq_own_stack() via Kconfig. Remove the CONFIG_PREEMPT_RT symbol from the ifdef around do_softirq_own_stack() and move it to Kconfig instead. Enable softirq stacks based on SOFTIRQ_ON_OWN_STACK which depends on HAVE_SOFTIRQ_ON_OWN_STACK and its default value is set to !PREEMPT_RT. This ensures that softirq stacks are not used on PREEMPT_RT and avoids a 'select' statement on an option which has a 'depends' statement. Link: https://lore.kernel.org/YvN5E%2FPrHfUhggr7@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Arnd Bergmann --- arch/Kconfig | 3 +++ arch/arm/kernel/irq.c | 2 +- arch/parisc/kernel/irq.c | 2 +- arch/powerpc/kernel/irq.c | 4 ++-- arch/s390/include/asm/softirq_stack.h | 2 +- arch/sh/kernel/irq.c | 2 +- arch/sparc/kernel/irq_64.c | 2 +- arch/x86/include/asm/irq_stack.h | 2 +- arch/x86/kernel/irq_32.c | 2 +- include/asm-generic/softirq_stack.h | 2 +- 10 files changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/Kconfig b/arch/Kconfig index 5dbf11a5ba4e..8b311e400ec1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -923,6 +923,9 @@ config HAVE_SOFTIRQ_ON_OWN_STACK Architecture provides a function to run __do_softirq() on a separate stack. +config SOFTIRQ_ON_OWN_STACK + def_bool HAVE_SOFTIRQ_ON_OWN_STACK && !PREEMPT_RT + config ALTERNATE_USER_ADDRESS_SPACE bool help diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 034cb48c9eeb..fe28fc1f759d 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -70,7 +70,7 @@ static void __init init_irq_stacks(void) } } -#ifndef CONFIG_PREEMPT_RT +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK static void ____do_softirq(void *arg) { __do_softirq(); diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index fbb882cb8dbb..b05055f3ba4b 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -480,7 +480,7 @@ static void execute_on_irq_stack(void *func, unsigned long param1) *irq_stack_in_use = 1; } -#ifndef CONFIG_PREEMPT_RT +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK void do_softirq_own_stack(void) { execute_on_irq_stack(__do_softirq, 0); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0f17268c1f0b..9ede61a5a469 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -199,7 +199,7 @@ static inline void check_stack_overflow(unsigned long sp) } } -#ifndef CONFIG_PREEMPT_RT +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK static __always_inline void call_do_softirq(const void *sp) { /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ @@ -335,7 +335,7 @@ void *mcheckirq_ctx[NR_CPUS] __read_mostly; void *softirq_ctx[NR_CPUS] __read_mostly; void *hardirq_ctx[NR_CPUS] __read_mostly; -#ifndef CONFIG_PREEMPT_RT +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK void do_softirq_own_stack(void) { call_do_softirq(softirq_ctx[smp_processor_id()]); diff --git a/arch/s390/include/asm/softirq_stack.h b/arch/s390/include/asm/softirq_stack.h index af68d6c1d584..1ac5115d3115 100644 --- a/arch/s390/include/asm/softirq_stack.h +++ b/arch/s390/include/asm/softirq_stack.h @@ -5,7 +5,7 @@ #include #include -#ifndef CONFIG_PREEMPT_RT +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK static inline void do_softirq_own_stack(void) { call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq); diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 909276738078..4e6835de54cf 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -149,7 +149,7 @@ void irq_ctx_exit(int cpu) hardirq_ctx[cpu] = NULL; } -#ifndef CONFIG_PREEMPT_RT +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK void do_softirq_own_stack(void) { struct thread_info *curctx; diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 41fa1be980a3..72da2e10e255 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -855,7 +855,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs) set_irq_regs(old_regs); } -#ifndef CONFIG_PREEMPT_RT +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK void do_softirq_own_stack(void) { void *orig_sp, *sp = softirq_stack[smp_processor_id()]; diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index 63f818aedf77..147cb8fdda92 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -203,7 +203,7 @@ IRQ_CONSTRAINTS, regs, vector); \ } -#ifndef CONFIG_PREEMPT_RT +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK /* * Macro to invoke __do_softirq on the irq stack. This is only called from * task context when bottom halves are about to be reenabled and soft diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index e5dd6da78713..01833ebf5e8e 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -132,7 +132,7 @@ int irq_init_percpu_irqstack(unsigned int cpu) return 0; } -#ifndef CONFIG_PREEMPT_RT +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK void do_softirq_own_stack(void) { struct irq_stack *irqstk; diff --git a/include/asm-generic/softirq_stack.h b/include/asm-generic/softirq_stack.h index d3e2d81656e0..2a67aed9ac52 100644 --- a/include/asm-generic/softirq_stack.h +++ b/include/asm-generic/softirq_stack.h @@ -2,7 +2,7 @@ #ifndef __ASM_GENERIC_SOFTIRQ_STACK_H #define __ASM_GENERIC_SOFTIRQ_STACK_H -#if defined(CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK) && !defined(CONFIG_PREEMPT_RT) +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK void do_softirq_own_stack(void); #else static inline void do_softirq_own_stack(void) -- cgit v1.2.3