diff options
Diffstat (limited to 'arch/powerpc/kernel')
43 files changed, 855 insertions, 2743 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index a7ca8fe62368..157b0147921f 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -5,9 +5,6 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -# Disable clang warning for using setjmp without setjmp.h header -CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header) - ifdef CONFIG_PPC64 CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) endif @@ -22,6 +19,8 @@ CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector) +CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING +CFLAGS_prom_init.o += -ffreestanding ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code @@ -39,7 +38,6 @@ KASAN_SANITIZE_btext.o := n ifdef CONFIG_KASAN CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING -CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING endif @@ -78,9 +76,8 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o -ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),) -obj-y += fadump.o -endif +obj-$(CONFIG_FA_DUMP) += fadump.o +obj-$(CONFIG_PRESERVE_FA_DUMP) += fadump.o ifdef CONFIG_PPC32 obj-$(CONFIG_E500) += idle_e500.o endif @@ -126,14 +123,6 @@ pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \ pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o -obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \ - machine_kexec_$(BITS).o -obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o -ifdef CONFIG_HAVE_IMA_KEXEC -ifdef CONFIG_IMA -obj-y += ima_kexec.o -endif -endif obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o @@ -161,16 +150,13 @@ ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),) obj-y += ucall.o endif +obj-$(CONFIG_PPC_SECURE_BOOT) += secure_boot.o ima_arch.o secvar-ops.o +obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o + # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n KCOV_INSTRUMENT_prom_init.o := n UBSAN_SANITIZE_prom_init.o := n -GCOV_PROFILE_machine_kexec_64.o := n -KCOV_INSTRUMENT_machine_kexec_64.o := n -UBSAN_SANITIZE_machine_kexec_64.o := n -GCOV_PROFILE_machine_kexec_32.o := n -KCOV_INSTRUMENT_machine_kexec_32.o := n -UBSAN_SANITIZE_machine_kexec_32.o := n GCOV_PROFILE_kprobes.o := n KCOV_INSTRUMENT_kprobes.o := n UBSAN_SANITIZE_kprobes.o := n diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 484f54dab247..f22bd6d1fe93 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -385,7 +385,8 @@ int main(void) OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32); OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec); OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec); - OFFSET(STAMP_XTIME, vdso_data, stamp_xtime); + OFFSET(STAMP_XTIME_SEC, vdso_data, stamp_xtime_sec); + OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec); OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction); OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size); OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size); @@ -393,20 +394,15 @@ int main(void) OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size); #ifdef CONFIG_PPC64 OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64); - OFFSET(TVAL64_TV_SEC, timeval, tv_sec); - OFFSET(TVAL64_TV_USEC, timeval, tv_usec); + OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec); + OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec); +#endif + OFFSET(TSPC64_TV_SEC, __kernel_timespec, tv_sec); + OFFSET(TSPC64_TV_NSEC, __kernel_timespec, tv_nsec); OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec); OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec); - OFFSET(TSPC64_TV_SEC, timespec, tv_sec); - OFFSET(TSPC64_TV_NSEC, timespec, tv_nsec); OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec); OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec); -#else - OFFSET(TVAL32_TV_SEC, timeval, tv_sec); - OFFSET(TVAL32_TV_USEC, timeval, tv_usec); - OFFSET(TSPC32_TV_SEC, timespec, tv_sec); - OFFSET(TSPC32_TV_NSEC, timespec, tv_nsec); -#endif /* timeval/timezone offsets for use by vdso */ OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest); OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime); diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 2b4f3ec0acf7..1d308780e0d3 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -231,7 +231,7 @@ _GLOBAL(__setup_cpu_e5500) blr #endif -/* flush L1 date cache, it can apply to e500v2, e500mc and e5500 */ +/* flush L1 data cache, it can apply to e500v2, e500mc and e5500 */ _GLOBAL(flush_dcache_L1) mfmsr r10 wrteei 0 diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c deleted file mode 100644 index d488311efab1..000000000000 --- a/arch/powerpc/kernel/crash.c +++ /dev/null @@ -1,374 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Architecture specific (PPC64) functions for kexec based crash dumps. - * - * Copyright (C) 2005, IBM Corp. - * - * Created by: Haren Myneni - */ - -#include <linux/kernel.h> -#include <linux/smp.h> -#include <linux/reboot.h> -#include <linux/kexec.h> -#include <linux/export.h> -#include <linux/crash_dump.h> -#include <linux/delay.h> -#include <linux/irq.h> -#include <linux/types.h> - -#include <asm/processor.h> -#include <asm/machdep.h> -#include <asm/kexec.h> -#include <asm/prom.h> -#include <asm/smp.h> -#include <asm/setjmp.h> -#include <asm/debug.h> - -/* - * The primary CPU waits a while for all secondary CPUs to enter. This is to - * avoid sending an IPI if the secondary CPUs are entering - * crash_kexec_secondary on their own (eg via a system reset). - * - * The secondary timeout has to be longer than the primary. Both timeouts are - * in milliseconds. - */ -#define PRIMARY_TIMEOUT 500 -#define SECONDARY_TIMEOUT 1000 - -#define IPI_TIMEOUT 10000 -#define REAL_MODE_TIMEOUT 10000 - -static int time_to_dump; -/* - * crash_wake_offline should be set to 1 by platforms that intend to wake - * up offline cpus prior to jumping to a kdump kernel. Currently powernv - * sets it to 1, since we want to avoid things from happening when an - * offline CPU wakes up due to something like an HMI (malfunction error), - * which propagates to all threads. - */ -int crash_wake_offline; - -#define CRASH_HANDLER_MAX 3 -/* List of shutdown handles */ -static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX]; -static DEFINE_SPINLOCK(crash_handlers_lock); - -static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; -static int crash_shutdown_cpu = -1; - -static int handle_fault(struct pt_regs *regs) -{ - if (crash_shutdown_cpu == smp_processor_id()) - longjmp(crash_shutdown_buf, 1); - return 0; -} - -#ifdef CONFIG_SMP - -static atomic_t cpus_in_crash; -void crash_ipi_callback(struct pt_regs *regs) -{ - static cpumask_t cpus_state_saved = CPU_MASK_NONE; - - int cpu = smp_processor_id(); - - hard_irq_disable(); - if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { - crash_save_cpu(regs, cpu); - cpumask_set_cpu(cpu, &cpus_state_saved); - } - - atomic_inc(&cpus_in_crash); - smp_mb__after_atomic(); - - /* - * Starting the kdump boot. - * This barrier is needed to make sure that all CPUs are stopped. - */ - while (!time_to_dump) - cpu_relax(); - - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(1, 1); - -#ifdef CONFIG_PPC64 - kexec_smp_wait(); -#else - for (;;); /* FIXME */ -#endif - - /* NOTREACHED */ -} - -static void crash_kexec_prepare_cpus(int cpu) -{ - unsigned int msecs; - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - int tries = 0; - int (*old_handler)(struct pt_regs *regs); - - printk(KERN_EMERG "Sending IPI to other CPUs\n"); - - if (crash_wake_offline) - ncpus = num_present_cpus() - 1; - - crash_send_ipi(crash_ipi_callback); - smp_wmb(); - -again: - /* - * FIXME: Until we will have the way to stop other CPUs reliably, - * the crash CPU will send an IPI and wait for other CPUs to - * respond. - */ - msecs = IPI_TIMEOUT; - while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) - mdelay(1); - - /* Would it be better to replace the trap vector here? */ - - if (atomic_read(&cpus_in_crash) >= ncpus) { - printk(KERN_EMERG "IPI complete\n"); - return; - } - - printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", - ncpus - atomic_read(&cpus_in_crash)); - - /* - * If we have a panic timeout set then we can't wait indefinitely - * for someone to activate system reset. We also give up on the - * second time through if system reset fail to work. - */ - if ((panic_timeout > 0) || (tries > 0)) - return; - - /* - * A system reset will cause all CPUs to take an 0x100 exception. - * The primary CPU returns here via setjmp, and the secondary - * CPUs reexecute the crash_kexec_secondary path. - */ - old_handler = __debugger; - __debugger = handle_fault; - crash_shutdown_cpu = smp_processor_id(); - - if (setjmp(crash_shutdown_buf) == 0) { - printk(KERN_EMERG "Activate system reset (dumprestart) " - "to stop other cpu(s)\n"); - - /* - * A system reset will force all CPUs to execute the - * crash code again. We need to reset cpus_in_crash so we - * wait for everyone to do this. - */ - atomic_set(&cpus_in_crash, 0); - smp_mb(); - - while (atomic_read(&cpus_in_crash) < ncpus) - cpu_relax(); - } - - crash_shutdown_cpu = -1; - __debugger = old_handler; - - tries++; - goto again; -} - -/* - * This function will be called by secondary cpus. - */ -void crash_kexec_secondary(struct pt_regs *regs) -{ - unsigned long flags; - int msecs = SECONDARY_TIMEOUT; - - local_irq_save(flags); - - /* Wait for the primary crash CPU to signal its progress */ - while (crashing_cpu < 0) { - if (--msecs < 0) { - /* No response, kdump image may not have been loaded */ - local_irq_restore(flags); - return; - } - - mdelay(1); - } - - crash_ipi_callback(regs); -} - -#else /* ! CONFIG_SMP */ - -static void crash_kexec_prepare_cpus(int cpu) -{ - /* - * move the secondaries to us so that we can copy - * the new kernel 0-0x100 safely - * - * do this if kexec in setup.c ? - */ -#ifdef CONFIG_PPC64 - smp_release_cpus(); -#else - /* FIXME */ -#endif -} - -void crash_kexec_secondary(struct pt_regs *regs) -{ -} -#endif /* CONFIG_SMP */ - -/* wait for all the CPUs to hit real mode but timeout if they don't come in */ -#if defined(CONFIG_SMP) && defined(CONFIG_PPC64) -static void __maybe_unused crash_kexec_wait_realmode(int cpu) -{ - unsigned int msecs; - int i; - - msecs = REAL_MODE_TIMEOUT; - for (i=0; i < nr_cpu_ids && msecs > 0; i++) { - if (i == cpu) - continue; - - while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) { - barrier(); - if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) - break; - msecs--; - mdelay(1); - } - } - mb(); -} -#else -static inline void crash_kexec_wait_realmode(int cpu) {} -#endif /* CONFIG_SMP && CONFIG_PPC64 */ - -/* - * Register a function to be called on shutdown. Only use this if you - * can't reset your device in the second kernel. - */ -int crash_shutdown_register(crash_shutdown_t handler) -{ - unsigned int i, rc; - - spin_lock(&crash_handlers_lock); - for (i = 0 ; i < CRASH_HANDLER_MAX; i++) - if (!crash_shutdown_handles[i]) { - /* Insert handle at first empty entry */ - crash_shutdown_handles[i] = handler; - rc = 0; - break; - } - - if (i == CRASH_HANDLER_MAX) { - printk(KERN_ERR "Crash shutdown handles full, " - "not registered.\n"); - rc = 1; - } - - spin_unlock(&crash_handlers_lock); - return rc; -} -EXPORT_SYMBOL(crash_shutdown_register); - -int crash_shutdown_unregister(crash_shutdown_t handler) -{ - unsigned int i, rc; - - spin_lock(&crash_handlers_lock); - for (i = 0 ; i < CRASH_HANDLER_MAX; i++) - if (crash_shutdown_handles[i] == handler) - break; - - if (i == CRASH_HANDLER_MAX) { - printk(KERN_ERR "Crash shutdown handle not found\n"); - rc = 1; - } else { - /* Shift handles down */ - for (; i < (CRASH_HANDLER_MAX - 1); i++) - crash_shutdown_handles[i] = - crash_shutdown_handles[i+1]; - /* - * Reset last entry to NULL now that it has been shifted down, - * this will allow new handles to be added here. - */ - crash_shutdown_handles[i] = NULL; - rc = 0; - } - - spin_unlock(&crash_handlers_lock); - return rc; -} -EXPORT_SYMBOL(crash_shutdown_unregister); - -void default_machine_crash_shutdown(struct pt_regs *regs) -{ - unsigned int i; - int (*old_handler)(struct pt_regs *regs); - - /* - * This function is only called after the system - * has panicked or is otherwise in a critical state. - * The minimum amount of code to allow a kexec'd kernel - * to run successfully needs to happen here. - * - * In practice this means stopping other cpus in - * an SMP system. - * The kernel is broken so disable interrupts. - */ - hard_irq_disable(); - - /* - * Make a note of crashing cpu. Will be used in machine_kexec - * such that another IPI will not be sent. - */ - crashing_cpu = smp_processor_id(); - - /* - * If we came in via system reset, wait a while for the secondary - * CPUs to enter. - */ - if (TRAP(regs) == 0x100) - mdelay(PRIMARY_TIMEOUT); - - crash_kexec_prepare_cpus(crashing_cpu); - - crash_save_cpu(regs, crashing_cpu); - - time_to_dump = 1; - - crash_kexec_wait_realmode(crashing_cpu); - - machine_kexec_mask_interrupts(); - - /* - * Call registered shutdown routines safely. Swap out - * __debugger_fault_handler, and replace on exit. - */ - old_handler = __debugger_fault_handler; - __debugger_fault_handler = handle_fault; - crash_shutdown_cpu = smp_processor_id(); - for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) { - if (setjmp(crash_shutdown_buf) == 0) { - /* - * Insert syncs and delay to ensure - * instructions in the dangerous region don't - * leak away from this protected region. - */ - asm volatile("sync; isync"); - /* dangerous region */ - crash_shutdown_handles[i](); - asm volatile("sync; isync"); - } - } - crash_shutdown_cpu = -1; - __debugger_fault_handler = old_handler; - - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(1, 0); -} diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c index 5f66b95b6858..cc14aa6c4a1b 100644 --- a/arch/powerpc/kernel/dawr.c +++ b/arch/powerpc/kernel/dawr.c @@ -30,10 +30,10 @@ int set_dawr(struct arch_hw_breakpoint *brk) * DAWR length is stored in field MDR bits 48:53. Matches range in * doublewords (64 bits) baised by -1 eg. 0b000000=1DW and * 0b111111=64DW. - * brk->len is in bytes. + * brk->hw_len is in bytes. * This aligns up to double word size, shifts and does the bias. */ - mrd = ((brk->len + 7) >> 3) - 1; + mrd = ((brk->hw_len + 7) >> 3) - 1; dawrx |= (mrd & 0x3f) << (63 - 53); if (ppc_md.set_dawr) @@ -54,7 +54,7 @@ static ssize_t dawr_write_file_bool(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { - struct arch_hw_breakpoint null_brk = {0, 0, 0}; + struct arch_hw_breakpoint null_brk = {0}; size_t rc; /* Send error to user if they hypervisor won't allow us to write DAWR */ diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c index 3482118ffe76..ef2ad4945904 100644 --- a/arch/powerpc/kernel/early_32.c +++ b/arch/powerpc/kernel/early_32.c @@ -19,10 +19,13 @@ */ notrace unsigned long __init early_init(unsigned long dt_ptr) { - unsigned long offset = reloc_offset(); + unsigned long kva, offset = reloc_offset(); + + kva = *PTRRELOC(&kernstart_virt_addr); /* First zero the BSS */ - memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start); + if (kva == KERNELBASE) + memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start); /* * Identify the CPU type and fix up code sections @@ -32,5 +35,5 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) apply_feature_fixups(); - return KERNELBASE + offset; + return kva + offset; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index d9279d0ee9f5..3dd1a422fc29 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -1,25 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * PCI Error Recovery Driver for RPA-compliant PPC64 platform. * Copyright IBM Corp. 2004 2005 * Copyright Linas Vepstas <linas@linas.org> 2004, 2005 * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com> */ #include <linux/delay.h> @@ -897,12 +881,12 @@ void eeh_handle_normal_event(struct eeh_pe *pe) /* Log the event */ if (pe->type & EEH_PE_PHB) { - pr_err("EEH: PHB#%x failure detected, location: %s\n", + pr_err("EEH: Recovering PHB#%x, location: %s\n", pe->phb->global_number, eeh_pe_loc_get(pe)); } else { struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb); - pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", + pr_err("EEH: Recovering PHB#%x-PE#%x\n", pe->phb->global_number, pe->addr); pr_err("EEH: PE location: %s, PHB location: %s\n", eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 3fa04dda1737..ab44d965a53c 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -1,25 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Sysfs entries for PCI Error Recovery for PAPR-compliant platform. * Copyright IBM Corporation 2007 * Copyright Linas Vepstas <linas@austin.ibm.com> 2007 * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com> */ #include <linux/pci.h> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6467bdab8d40..3fd3ef352e3f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -537,6 +537,7 @@ flush_count_cache: /* Save LR into r9 */ mflr r9 + // Flush the link stack .rept 64 bl .+4 .endr @@ -546,6 +547,11 @@ flush_count_cache: .balign 32 /* Restore LR */ 1: mtlr r9 + + // If we're just flushing the link stack, return here +3: nop + patch_site 3b patch__flush_link_stack_return + li r9,0x7fff mtctr r9 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 829950b96d29..e4076e3c072d 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1346,16 +1346,6 @@ skpinv: addi r6,r6,1 /* Increment */ sync isync -/* - * The mapping only needs to be cache-coherent on SMP, except on - * Freescale e500mc derivatives where it's also needed for coherent DMA. - */ -#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) -#define M_IF_NEEDED MAS2_M -#else -#define M_IF_NEEDED 0 -#endif - /* 6. Setup KERNELBASE mapping in TLB[0] * * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in @@ -1368,7 +1358,7 @@ skpinv: addi r6,r6,1 /* Increment */ ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l mtspr SPRN_MAS1,r6 - LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | M_IF_NEEDED) + LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | MAS2_M_IF_NEEDED) mtspr SPRN_MAS2,r6 rlwinm r5,r5,0,0,25 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d0018dd17e0a..46508b148e16 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -514,7 +514,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) * If stack=0, then the stack is already set in r1, and r1 is saved in r10. * PPR save and CPU accounting is not done for the !stack case (XXX why not?) */ -.macro INT_COMMON vec, area, stack, kaup, reconcile, dar, dsisr +.macro INT_COMMON vec, area, stack, kuap, reconcile, dar, dsisr .if \stack andi. r10,r12,MSR_PR /* See if coming from user */ mr r10,r1 /* Save r1 */ @@ -533,7 +533,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) std r10,GPR1(r1) /* save r1 in stackframe */ .if \stack - .if \kaup + .if \kuap kuap_save_amr_and_lock r9, r10, cr1, cr0 .endif beq 101f /* if from kernel mode */ @@ -541,7 +541,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) SAVE_PPR(\area, r9) 101: .else - .if \kaup + .if \kuap kuap_save_amr_and_lock r9, r10, cr1 .endif .endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index ed59855430b9..ff0114aeba9b 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1466,16 +1466,15 @@ static void fadump_init_files(void) */ int __init setup_fadump(void) { - if (!fw_dump.fadump_enabled) - return 0; - - if (!fw_dump.fadump_supported) { - printk(KERN_ERR "Firmware-assisted dump is not supported on" - " this hardware\n"); + if (!fw_dump.fadump_supported) return 0; - } + fadump_init_files(); fadump_show_config(); + + if (!fw_dump.fadump_enabled) + return 1; + /* * If dump data is available then see if it is valid and prepare for * saving it to the disk. @@ -1492,8 +1491,6 @@ int __init setup_fadump(void) else if (fw_dump.reserve_dump_area_size) fw_dump.ops->fadump_init_mem_struct(&fw_dump); - fadump_init_files(); - return 1; } subsys_initcall(setup_fadump); diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S index ea065282b303..8bccce6544b5 100644 --- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S +++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S @@ -153,35 +153,24 @@ skpinv: addi r6,r6,1 /* Increment */ tlbivax 0,r9 TLBSYNC -/* - * The mapping only needs to be cache-coherent on SMP, except on - * Freescale e500mc derivatives where it's also needed for coherent DMA. - */ -#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) -#define M_IF_NEEDED MAS2_M -#else -#define M_IF_NEEDED 0 -#endif - #if defined(ENTRY_MAPPING_BOOT_SETUP) -/* 6. Setup KERNELBASE mapping in TLB1[0] */ +/* 6. Setup kernstart_virt_addr mapping in TLB1[0] */ lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */ mtspr SPRN_MAS0,r6 lis r6,(MAS1_VALID|MAS1_IPROT)@h ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l mtspr SPRN_MAS1,r6 - lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@h - ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@l + lis r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h + ori r6,r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l + and r6,r6,r20 + ori r6,r6,MAS2_M_IF_NEEDED@l mtspr SPRN_MAS2,r6 mtspr SPRN_MAS3,r8 tlbwe -/* 7. Jump to KERNELBASE mapping */ - lis r6,(KERNELBASE & ~0xfff)@h - ori r6,r6,(KERNELBASE & ~0xfff)@l - rlwinm r7,r25,0,0x03ffffff - add r6,r7,r6 +/* 7. Jump to kernstart_virt_addr mapping */ + mr r6,r20 #elif defined(ENTRY_MAPPING_KEXEC_SETUP) /* diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index adf0505dbe02..838d9d4650c7 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -155,6 +155,8 @@ _ENTRY(_start); */ _ENTRY(__early_start) + LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr) + lwz r20,0(r20) #define ENTRY_MAPPING_BOOT_SETUP #include "fsl_booke_entry_mapping.S" @@ -277,8 +279,8 @@ set_ivor: ori r6, r6, swapper_pg_dir@l lis r5, abatron_pteptrs@h ori r5, r5, abatron_pteptrs@l - lis r4, KERNELBASE@h - ori r4, r4, KERNELBASE@l + lis r3, kernstart_virt_addr@ha + lwz r4, kernstart_virt_addr@l(r3) stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */ stw r6, 0(r5) @@ -1067,7 +1069,12 @@ __secondary_start: mr r5,r25 /* phys kernel start */ rlwinm r5,r5,0,~0x3ffffff /* aligned 64M */ subf r4,r5,r4 /* memstart_addr - phys kernel start */ - li r5,0 /* no device tree */ + lis r7,KERNELBASE@h + ori r7,r7,KERNELBASE@l + cmpw r20,r7 /* if kernstart_virt_addr != KERNELBASE, randomized */ + beq 2f + li r4,0 +2: li r5,0 /* no device tree */ li r6,0 /* not boot cpu */ bl restore_to_as0 @@ -1115,6 +1122,54 @@ __secondary_hold_acknowledge: #endif /* + * Create a 64M tlb by address and entry + * r3 - entry + * r4 - virtual address + * r5/r6 - physical address + */ +_GLOBAL(create_kaslr_tlb_entry) + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */ + mtspr SPRN_MAS0,r7 /* Write MAS0 */ + + lis r3,(MAS1_VALID|MAS1_IPROT)@h + ori r3,r3,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l + mtspr SPRN_MAS1,r3 /* Write MAS1 */ + + lis r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h + ori r3,r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l + and r3,r3,r4 + ori r3,r3,MAS2_M_IF_NEEDED@l + mtspr SPRN_MAS2,r3 /* Write MAS2(EPN) */ + +#ifdef CONFIG_PHYS_64BIT + ori r8,r6,(MAS3_SW|MAS3_SR|MAS3_SX) + mtspr SPRN_MAS3,r8 /* Write MAS3(RPN) */ + mtspr SPRN_MAS7,r5 +#else + ori r8,r5,(MAS3_SW|MAS3_SR|MAS3_SX) + mtspr SPRN_MAS3,r8 /* Write MAS3(RPN) */ +#endif + + tlbwe /* Write TLB */ + isync + sync + blr + +/* + * Return to the start of the relocated kernel and run again + * r3 - virtual address of fdt + * r4 - entry of the kernel + */ +_GLOBAL(reloc_kernel_entry) + mfmsr r7 + rlwinm r7, r7, 0, ~(MSR_IS | MSR_DS) + + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r7 + rfi + +/* * Create a tlb entry with the same effective and physical address as * the tlb entry used by the current running code. But set the TS to 1. * Then switch to the address space 1. It will return with the r3 set to diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 1007ec36b4cb..58ce3d37c2a3 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -127,15 +127,58 @@ int arch_bp_generic_fields(int type, int *gen_bp_type) } /* + * Watchpoint match range is always doubleword(8 bytes) aligned on + * powerpc. If the given range is crossing doubleword boundary, we + * need to increase the length such that next doubleword also get + * covered. Ex, + * + * address len = 6 bytes + * |=========. + * |------------v--|------v--------| + * | | | | | | | | | | | | | | | | | + * |---------------|---------------| + * <---8 bytes---> + * + * In this case, we should configure hw as: + * start_addr = address & ~HW_BREAKPOINT_ALIGN + * len = 16 bytes + * + * @start_addr and @end_addr are inclusive. + */ +static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw) +{ + u16 max_len = DABR_MAX_LEN; + u16 hw_len; + unsigned long start_addr, end_addr; + + start_addr = hw->address & ~HW_BREAKPOINT_ALIGN; + end_addr = (hw->address + hw->len - 1) | HW_BREAKPOINT_ALIGN; + hw_len = end_addr - start_addr + 1; + + if (dawr_enabled()) { + max_len = DAWR_MAX_LEN; + /* DAWR region can't cross 512 bytes boundary */ + if ((start_addr >> 9) != (end_addr >> 9)) + return -EINVAL; + } + + if (hw_len > max_len) + return -EINVAL; + + hw->hw_len = hw_len; + return 0; +} + +/* * Validate the arch-specific HW Breakpoint register settings */ int hw_breakpoint_arch_parse(struct perf_event *bp, const struct perf_event_attr *attr, struct arch_hw_breakpoint *hw) { - int ret = -EINVAL, length_max; + int ret = -EINVAL; - if (!bp) + if (!bp || !attr->bp_len) return ret; hw->type = HW_BRK_TYPE_TRANSLATE; @@ -155,26 +198,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, hw->address = attr->bp_addr; hw->len = attr->bp_len; - /* - * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8) - * and breakpoint addresses are aligned to nearest double-word - * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the - * 'symbolsize' should satisfy the check below. - */ if (!ppc_breakpoint_available()) return -ENODEV; - length_max = 8; /* DABR */ - if (dawr_enabled()) { - length_max = 512 ; /* 64 doublewords */ - /* DAWR region can't cross 512 boundary */ - if ((attr->bp_addr >> 9) != - ((attr->bp_addr + attr->bp_len - 1) >> 9)) - return -EINVAL; - } - if (hw->len > - (length_max - (hw->address & HW_BREAKPOINT_ALIGN))) - return -EINVAL; - return 0; + + return hw_breakpoint_validate_len(hw); } /* @@ -195,33 +222,49 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) tsk->thread.last_hit_ubp = NULL; } -static bool is_larx_stcx_instr(struct pt_regs *regs, unsigned int instr) +static bool dar_within_range(unsigned long dar, struct arch_hw_breakpoint *info) { - int ret, type; - struct instruction_op op; + return ((info->address <= dar) && (dar - info->address < info->len)); +} - ret = analyse_instr(&op, regs, instr); - type = GETTYPE(op.type); - return (!ret && (type == LARX || type == STCX)); +static bool +dar_range_overlaps(unsigned long dar, int size, struct arch_hw_breakpoint *info) +{ + return ((dar <= info->address + info->len - 1) && + (dar + size - 1 >= info->address)); } /* * Handle debug exception notifications. */ static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp, - unsigned long addr) + struct arch_hw_breakpoint *info) { unsigned int instr = 0; + int ret, type, size; + struct instruction_op op; + unsigned long addr = info->address; if (__get_user_inatomic(instr, (unsigned int *)regs->nip)) goto fail; - if (is_larx_stcx_instr(regs, instr)) { + ret = analyse_instr(&op, regs, instr); + type = GETTYPE(op.type); + size = GETSIZE(op.type); + + if (!ret && (type == LARX || type == STCX)) { printk_ratelimited("Breakpoint hit on instruction that can't be emulated." " Breakpoint at 0x%lx will be disabled.\n", addr); goto disable; } + /* + * If it's extraneous event, we still need to emulate/single- + * step the instruction, but we don't generate an event. + */ + if (size && !dar_range_overlaps(regs->dar, size, info)) + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + /* Do not emulate user-space instructions, instead single-step them */ if (user_mode(regs)) { current->thread.last_hit_ubp = bp; @@ -253,7 +296,6 @@ int hw_breakpoint_handler(struct die_args *args) struct perf_event *bp; struct pt_regs *regs = args->regs; struct arch_hw_breakpoint *info; - unsigned long dar = regs->dar; /* Disable breakpoints during exception handling */ hw_breakpoint_disable(); @@ -285,19 +327,14 @@ int hw_breakpoint_handler(struct die_args *args) goto out; } - /* - * Verify if dar lies within the address range occupied by the symbol - * being watched to filter extraneous exceptions. If it doesn't, - * we still need to single-step the instruction, but we don't - * generate an event. - */ info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (!((bp->attr.bp_addr <= dar) && - (dar - bp->attr.bp_addr < bp->attr.bp_len))) - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; - - if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info->address)) - goto out; + if (IS_ENABLED(CONFIG_PPC_8xx)) { + if (!dar_within_range(regs->dar, info)) + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + } else { + if (!stepping_handler(regs, bp, info)) + goto out; + } /* * As a policy, the callback is invoked in a 'trigger-after-execute' diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c new file mode 100644 index 000000000000..e34116255ced --- /dev/null +++ b/arch/powerpc/kernel/ima_arch.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + */ + +#include <linux/ima.h> +#include <asm/secure_boot.h> + +bool arch_ima_get_secureboot(void) +{ + return is_ppc_secureboot_enabled(); +} + +/* + * The "secure_rules" are enabled only on "secureboot" enabled systems. + * These rules verify the file signatures against known good values. + * The "appraise_type=imasig|modsig" option allows the known good signature + * to be stored as an xattr or as an appended signature. + * + * To avoid duplicate signature verification as much as possible, the IMA + * policy rule for module appraisal is added only if CONFIG_MODULE_SIG_FORCE + * is not enabled. + */ +static const char *const secure_rules[] = { + "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", +#ifndef CONFIG_MODULE_SIG_FORCE + "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", +#endif + NULL +}; + +/* + * The "trusted_rules" are enabled only on "trustedboot" enabled systems. + * These rules add the kexec kernel image and kernel modules file hashes to + * the IMA measurement list. + */ +static const char *const trusted_rules[] = { + "measure func=KEXEC_KERNEL_CHECK", + "measure func=MODULE_CHECK", + NULL +}; + +/* + * The "secure_and_trusted_rules" contains rules for both the secure boot and + * trusted boot. The "template=ima-modsig" option includes the appended + * signature, when available, in the IMA measurement list. + */ +static const char *const secure_and_trusted_rules[] = { + "measure func=KEXEC_KERNEL_CHECK template=ima-modsig", + "measure func=MODULE_CHECK template=ima-modsig", + "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", +#ifndef CONFIG_MODULE_SIG_FORCE + "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", +#endif + NULL +}; + +/* + * Returns the relevant IMA arch-specific policies based on the system secure + * boot state. + */ +const char *const *arch_get_ima_policy(void) +{ + if (is_ppc_secureboot_enabled()) { + if (IS_ENABLED(CONFIG_MODULE_SIG)) + set_module_sig_enforced(); + + if (is_ppc_trustedboot_enabled()) + return secure_and_trusted_rules; + else + return secure_rules; + } else if (is_ppc_trustedboot_enabled()) { + return trusted_rules; + } + + return NULL; +} diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kernel/ima_kexec.c deleted file mode 100644 index 720e50e490b6..000000000000 --- a/arch/powerpc/kernel/ima_kexec.c +++ /dev/null @@ -1,219 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2016 IBM Corporation - * - * Authors: - * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> - */ - -#include <linux/slab.h> -#include <linux/kexec.h> -#include <linux/of.h> -#include <linux/memblock.h> -#include <linux/libfdt.h> - -static int get_addr_size_cells(int *addr_cells, int *size_cells) -{ - struct device_node *root; - - root = of_find_node_by_path("/"); - if (!root) - return -EINVAL; - - *addr_cells = of_n_addr_cells(root); - *size_cells = of_n_size_cells(root); - - of_node_put(root); - - return 0; -} - -static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr, - size_t *size) -{ - int ret, addr_cells, size_cells; - - ret = get_addr_size_cells(&addr_cells, &size_cells); - if (ret) - return ret; - - if (len < 4 * (addr_cells + size_cells)) - return -ENOENT; - - *addr = of_read_number(prop, addr_cells); - *size = of_read_number(prop + 4 * addr_cells, size_cells); - - return 0; -} - -/** - * ima_get_kexec_buffer - get IMA buffer from the previous kernel - * @addr: On successful return, set to point to the buffer contents. - * @size: On successful return, set to the buffer size. - * - * Return: 0 on success, negative errno on error. - */ -int ima_get_kexec_buffer(void **addr, size_t *size) -{ - int ret, len; - unsigned long tmp_addr; - size_t tmp_size; - const void *prop; - - prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len); - if (!prop) - return -ENOENT; - - ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size); - if (ret) - return ret; - - *addr = __va(tmp_addr); - *size = tmp_size; - - return 0; -} - -/** - * ima_free_kexec_buffer - free memory used by the IMA buffer - */ -int ima_free_kexec_buffer(void) -{ - int ret; - unsigned long addr; - size_t size; - struct property *prop; - - prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL); - if (!prop) - return -ENOENT; - - ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size); - if (ret) - return ret; - - ret = of_remove_property(of_chosen, prop); - if (ret) - return ret; - - return memblock_free(addr, size); - -} - -/** - * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt - * - * The IMA measurement buffer is of no use to a subsequent kernel, so we always - * remove it from the device tree. - */ -void remove_ima_buffer(void *fdt, int chosen_node) -{ - int ret, len; - unsigned long addr; - size_t size; - const void *prop; - - prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len); - if (!prop) - return; - - ret = do_get_kexec_buffer(prop, len, &addr, &size); - fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer"); - if (ret) - return; - - ret = delete_fdt_mem_rsv(fdt, addr, size); - if (!ret) - pr_debug("Removed old IMA buffer reservation.\n"); -} - -#ifdef CONFIG_IMA_KEXEC -/** - * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer - * - * Architectures should use this function to pass on the IMA buffer - * information to the next kernel. - * - * Return: 0 on success, negative errno on error. - */ -int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr, - size_t size) -{ - image->arch.ima_buffer_addr = load_addr; - image->arch.ima_buffer_size = size; - - return 0; -} - -static int write_number(void *p, u64 value, int cells) -{ - if (cells == 1) { - u32 tmp; - - if (value > U32_MAX) - return -EINVAL; - - tmp = cpu_to_be32(value); - memcpy(p, &tmp, sizeof(tmp)); - } else if (cells == 2) { - u64 tmp; - - tmp = cpu_to_be64(value); - memcpy(p, &tmp, sizeof(tmp)); - } else - return -EINVAL; - - return 0; -} - -/** - * setup_ima_buffer - add IMA buffer information to the fdt - * @image: kexec image being loaded. - * @fdt: Flattened device tree for the next kernel. - * @chosen_node: Offset to the chosen node. - * - * Return: 0 on success, or negative errno on error. - */ -int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node) -{ - int ret, addr_cells, size_cells, entry_size; - u8 value[16]; - - remove_ima_buffer(fdt, chosen_node); - if (!image->arch.ima_buffer_size) - return 0; - - ret = get_addr_size_cells(&addr_cells, &size_cells); - if (ret) - return ret; - - entry_size = 4 * (addr_cells + size_cells); - - if (entry_size > sizeof(value)) - return -EINVAL; - - ret = write_number(value, image->arch.ima_buffer_addr, addr_cells); - if (ret) - return ret; - - ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size, - size_cells); - if (ret) - return ret; - - ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value, - entry_size); - if (ret < 0) - return -EINVAL; - - ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr, - image->arch.ima_buffer_size); - if (ret) - return -EINVAL; - - pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n", - image->arch.ima_buffer_addr, image->arch.ima_buffer_size); - - return 0; -} -#endif /* CONFIG_IMA_KEXEC */ diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c deleted file mode 100644 index 3072fd6dbe94..000000000000 --- a/arch/powerpc/kernel/kexec_elf_64.c +++ /dev/null @@ -1,125 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Load ELF vmlinux file for the kexec_file_load syscall. - * - * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) - * Copyright (C) 2004 IBM Corp. - * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) - * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) - * Copyright (C) 2016 IBM Corporation - * - * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c. - * Heavily modified for the kernel by - * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>. - */ - -#define pr_fmt(fmt) "kexec_elf: " fmt - -#include <linux/elf.h> -#include <linux/kexec.h> -#include <linux/libfdt.h> -#include <linux/module.h> -#include <linux/of_fdt.h> -#include <linux/slab.h> -#include <linux/types.h> - -static void *elf64_load(struct kimage *image, char *kernel_buf, - unsigned long kernel_len, char *initrd, - unsigned long initrd_len, char *cmdline, - unsigned long cmdline_len) -{ - int ret; - unsigned int fdt_size; - unsigned long kernel_load_addr; - unsigned long initrd_load_addr = 0, fdt_load_addr; - void *fdt; - const void *slave_code; - struct elfhdr ehdr; - struct kexec_elf_info elf_info; - struct kexec_buf kbuf = { .image = image, .buf_min = 0, - .buf_max = ppc64_rma_size }; - struct kexec_buf pbuf = { .image = image, .buf_min = 0, - .buf_max = ppc64_rma_size, .top_down = true, - .mem = KEXEC_BUF_MEM_UNKNOWN }; - - ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); - if (ret) - goto out; - - ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr); - if (ret) - goto out; - - pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr); - - ret = kexec_load_purgatory(image, &pbuf); - if (ret) { - pr_err("Loading purgatory failed.\n"); - goto out; - } - - pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem); - - if (initrd != NULL) { - kbuf.buffer = initrd; - kbuf.bufsz = kbuf.memsz = initrd_len; - kbuf.buf_align = PAGE_SIZE; - kbuf.top_down = false; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - ret = kexec_add_buffer(&kbuf); - if (ret) - goto out; - initrd_load_addr = kbuf.mem; - - pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr); - } - - fdt_size = fdt_totalsize(initial_boot_params) * 2; - fdt = kmalloc(fdt_size, GFP_KERNEL); - if (!fdt) { - pr_err("Not enough memory for the device tree.\n"); - ret = -ENOMEM; - goto out; - } - ret = fdt_open_into(initial_boot_params, fdt, fdt_size); - if (ret < 0) { - pr_err("Error setting up the new device tree.\n"); - ret = -EINVAL; - goto out; - } - - ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); - if (ret) - goto out; - - fdt_pack(fdt); - - kbuf.buffer = fdt; - kbuf.bufsz = kbuf.memsz = fdt_size; - kbuf.buf_align = PAGE_SIZE; - kbuf.top_down = true; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - ret = kexec_add_buffer(&kbuf); - if (ret) - goto out; - fdt_load_addr = kbuf.mem; - - pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr); - - slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset; - ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, - fdt_load_addr); - if (ret) - pr_err("Error setting up the purgatory.\n"); - -out: - kexec_free_elf_info(&elf_info); - - /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */ - return ret ? ERR_PTR(ret) : fdt; -} - -const struct kexec_file_ops kexec_elf64_ops = { - .probe = kexec_elf_probe, - .load = elf64_load, -}; diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c deleted file mode 100644 index c4ed328a7b96..000000000000 --- a/arch/powerpc/kernel/machine_kexec.c +++ /dev/null @@ -1,279 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Code to handle transition of Linux booting another kernel. - * - * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com> - * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz - * Copyright (C) 2005 IBM Corporation. - */ - -#include <linux/kexec.h> -#include <linux/reboot.h> -#include <linux/threads.h> -#include <linux/memblock.h> -#include <linux/of.h> -#include <linux/irq.h> -#include <linux/ftrace.h> - -#include <asm/kdump.h> -#include <asm/machdep.h> -#include <asm/pgalloc.h> -#include <asm/prom.h> -#include <asm/sections.h> - -void machine_kexec_mask_interrupts(void) { - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - -void machine_crash_shutdown(struct pt_regs *regs) -{ - default_machine_crash_shutdown(regs); -} - -/* - * Do what every setup is needed on image and the - * reboot code buffer to allow us to avoid allocations - * later. - */ -int machine_kexec_prepare(struct kimage *image) -{ - if (ppc_md.machine_kexec_prepare) - return ppc_md.machine_kexec_prepare(image); - else - return default_machine_kexec_prepare(image); -} - -void machine_kexec_cleanup(struct kimage *image) -{ -} - -void arch_crash_save_vmcoreinfo(void) -{ - -#ifdef CONFIG_NEED_MULTIPLE_NODES - VMCOREINFO_SYMBOL(node_data); - VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); -#endif -#ifndef CONFIG_NEED_MULTIPLE_NODES - VMCOREINFO_SYMBOL(contig_page_data); -#endif -#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP) - VMCOREINFO_SYMBOL(vmemmap_list); - VMCOREINFO_SYMBOL(mmu_vmemmap_psize); - VMCOREINFO_SYMBOL(mmu_psize_defs); - VMCOREINFO_STRUCT_SIZE(vmemmap_backing); - VMCOREINFO_OFFSET(vmemmap_backing, list); - VMCOREINFO_OFFSET(vmemmap_backing, phys); - VMCOREINFO_OFFSET(vmemmap_backing, virt_addr); - VMCOREINFO_STRUCT_SIZE(mmu_psize_def); - VMCOREINFO_OFFSET(mmu_psize_def, shift); -#endif -} - -/* - * Do not allocate memory (or fail in any way) in machine_kexec(). - * We are past the point of no return, committed to rebooting now. - */ -void machine_kexec(struct kimage *image) -{ - int save_ftrace_enabled; - - save_ftrace_enabled = __ftrace_enabled_save(); - this_cpu_disable_ftrace(); - - if (ppc_md.machine_kexec) - ppc_md.machine_kexec(image); - else - default_machine_kexec(image); - - this_cpu_enable_ftrace(); - __ftrace_enabled_restore(save_ftrace_enabled); - - /* Fall back to normal restart if we're still alive. */ - machine_restart(NULL); - for(;;); -} - -void __init reserve_crashkernel(void) -{ - unsigned long long crash_size, crash_base; - int ret; - - /* use common parsing */ - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &crash_size, &crash_base); - if (ret == 0 && crash_size > 0) { - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; - } - - if (crashk_res.end == crashk_res.start) { - crashk_res.start = crashk_res.end = 0; - return; - } - - /* We might have got these values via the command line or the - * device tree, either way sanitise them now. */ - - crash_size = resource_size(&crashk_res); - -#ifndef CONFIG_NONSTATIC_KERNEL - if (crashk_res.start != KDUMP_KERNELBASE) - printk("Crash kernel location must be 0x%x\n", - KDUMP_KERNELBASE); - - crashk_res.start = KDUMP_KERNELBASE; -#else - if (!crashk_res.start) { -#ifdef CONFIG_PPC64 - /* - * On 64bit we split the RMO in half but cap it at half of - * a small SLB (128MB) since the crash kernel needs to place - * itself and some stacks to be in the first segment. - */ - crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); -#else - crashk_res.start = KDUMP_KERNELBASE; -#endif - } - - crash_base = PAGE_ALIGN(crashk_res.start); - if (crash_base != crashk_res.start) { - printk("Crash kernel base must be aligned to 0x%lx\n", - PAGE_SIZE); - crashk_res.start = crash_base; - } - -#endif - crash_size = PAGE_ALIGN(crash_size); - crashk_res.end = crashk_res.start + crash_size - 1; - - /* The crash region must not overlap the current kernel */ - if (overlaps_crashkernel(__pa(_stext), _end - _stext)) { - printk(KERN_WARNING - "Crash kernel can not overlap current kernel\n"); - crashk_res.start = crashk_res.end = 0; - return; - } - - /* Crash kernel trumps memory limit */ - if (memory_limit && memory_limit <= crashk_res.end) { - memory_limit = crashk_res.end + 1; - printk("Adjusted memory limit for crashkernel, now 0x%llx\n", - memory_limit); - } - - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " - "for crashkernel (System RAM: %ldMB)\n", - (unsigned long)(crash_size >> 20), - (unsigned long)(crashk_res.start >> 20), - (unsigned long)(memblock_phys_mem_size() >> 20)); - - if (!memblock_is_region_memory(crashk_res.start, crash_size) || - memblock_reserve(crashk_res.start, crash_size)) { - pr_err("Failed to reserve memory for crashkernel!\n"); - crashk_res.start = crashk_res.end = 0; - return; - } -} - -int overlaps_crashkernel(unsigned long start, unsigned long size) -{ - return (start + size) > crashk_res.start && start <= crashk_res.end; -} - -/* Values we need to export to the second kernel via the device tree. */ -static phys_addr_t kernel_end; -static phys_addr_t crashk_base; -static phys_addr_t crashk_size; -static unsigned long long mem_limit; - -static struct property kernel_end_prop = { - .name = "linux,kernel-end", - .length = sizeof(phys_addr_t), - .value = &kernel_end, -}; - -static struct property crashk_base_prop = { - .name = "linux,crashkernel-base", - .length = sizeof(phys_addr_t), - .value = &crashk_base -}; - -static struct property crashk_size_prop = { - .name = "linux,crashkernel-size", - .length = sizeof(phys_addr_t), - .value = &crashk_size, -}; - -static struct property memory_limit_prop = { - .name = "linux,memory-limit", - .length = sizeof(unsigned long long), - .value = &mem_limit, -}; - -#define cpu_to_be_ulong __PASTE(cpu_to_be, BITS_PER_LONG) - -static void __init export_crashk_values(struct device_node *node) -{ - /* There might be existing crash kernel properties, but we can't - * be sure what's in them, so remove them. */ - of_remove_property(node, of_find_property(node, - "linux,crashkernel-base", NULL)); - of_remove_property(node, of_find_property(node, - "linux,crashkernel-size", NULL)); - - if (crashk_res.start != 0) { - crashk_base = cpu_to_be_ulong(crashk_res.start), - of_add_property(node, &crashk_base_prop); - crashk_size = cpu_to_be_ulong(resource_size(&crashk_res)); - of_add_property(node, &crashk_size_prop); - } - - /* - * memory_limit is required by the kexec-tools to limit the - * crash regions to the actual memory used. - */ - mem_limit = cpu_to_be_ulong(memory_limit); - of_update_property(node, &memory_limit_prop); -} - -static int __init kexec_setup(void) -{ - struct device_node *node; - - node = of_find_node_by_path("/chosen"); - if (!node) - return -ENOENT; - - /* remove any stale properties so ours can be found */ - of_remove_property(node, of_find_property(node, kernel_end_prop.name, NULL)); - - /* information needed by userspace when using default_machine_kexec */ - kernel_end = cpu_to_be_ulong(__pa(_end)); - of_add_property(node, &kernel_end_prop); - - export_crashk_values(node); - - of_node_put(node); - return 0; -} -late_initcall(kexec_setup); diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c deleted file mode 100644 index bf9f1f906d64..000000000000 --- a/arch/powerpc/kernel/machine_kexec_32.c +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * PPC32 code to handle Linux booting another kernel. - * - * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com> - * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz - * Copyright (C) 2005 IBM Corporation. - */ - -#include <linux/kexec.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <asm/cacheflush.h> -#include <asm/hw_irq.h> -#include <asm/io.h> - -typedef void (*relocate_new_kernel_t)( - unsigned long indirection_page, - unsigned long reboot_code_buffer, - unsigned long start_address) __noreturn; - -/* - * This is a generic machine_kexec function suitable at least for - * non-OpenFirmware embedded platforms. - * It merely copies the image relocation code to the control page and - * jumps to it. - * A platform specific function may just call this one. - */ -void default_machine_kexec(struct kimage *image) -{ - extern const unsigned int relocate_new_kernel_size; - unsigned long page_list; - unsigned long reboot_code_buffer, reboot_code_buffer_phys; - relocate_new_kernel_t rnk; - - /* Interrupts aren't acceptable while we reboot */ - local_irq_disable(); - - /* mask each interrupt so we are in a more sane state for the - * kexec kernel */ - machine_kexec_mask_interrupts(); - - page_list = image->head; - - /* we need both effective and real address here */ - reboot_code_buffer = - (unsigned long)page_address(image->control_code_page); - reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer); - - /* copy our kernel relocation code to the control code page */ - memcpy((void *)reboot_code_buffer, relocate_new_kernel, - relocate_new_kernel_size); - - flush_icache_range(reboot_code_buffer, - reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); - printk(KERN_INFO "Bye!\n"); - - if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x)) - relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start); - - /* now call it */ - rnk = (relocate_new_kernel_t) reboot_code_buffer; - (*rnk)(page_list, reboot_code_buffer_phys, image->start); -} - -int default_machine_kexec_prepare(struct kimage *image) -{ - return 0; -} diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c deleted file mode 100644 index 04a7cba58eff..000000000000 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ /dev/null @@ -1,417 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * PPC64 code to handle Linux booting another kernel. - * - * Copyright (C) 2004-2005, IBM Corp. - * - * Created by: Milton D Miller II - */ - - -#include <linux/kexec.h> -#include <linux/smp.h> -#include <linux/thread_info.h> -#include <linux/init_task.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/cpu.h> -#include <linux/hardirq.h> - -#include <asm/page.h> -#include <asm/current.h> -#include <asm/machdep.h> -#include <asm/cacheflush.h> -#include <asm/firmware.h> -#include <asm/paca.h> -#include <asm/mmu.h> -#include <asm/sections.h> /* _end */ -#include <asm/prom.h> -#include <asm/smp.h> -#include <asm/hw_breakpoint.h> -#include <asm/asm-prototypes.h> -#include <asm/svm.h> -#include <asm/ultravisor.h> - -int default_machine_kexec_prepare(struct kimage *image) -{ - int i; - unsigned long begin, end; /* limits of segment */ - unsigned long low, high; /* limits of blocked memory range */ - struct device_node *node; - const unsigned long *basep; - const unsigned int *sizep; - - /* - * Since we use the kernel fault handlers and paging code to - * handle the virtual mode, we must make sure no destination - * overlaps kernel static data or bss. - */ - for (i = 0; i < image->nr_segments; i++) - if (image->segment[i].mem < __pa(_end)) - return -ETXTBSY; - - /* We also should not overwrite the tce tables */ - for_each_node_by_type(node, "pci") { - basep = of_get_property(node, "linux,tce-base", NULL); - sizep = of_get_property(node, "linux,tce-size", NULL); - if (basep == NULL || sizep == NULL) - continue; - - low = *basep; - high = low + (*sizep); - - for (i = 0; i < image->nr_segments; i++) { - begin = image->segment[i].mem; - end = begin + image->segment[i].memsz; - - if ((begin < high) && (end > low)) - return -ETXTBSY; - } - } - - return 0; -} - -static void copy_segments(unsigned long ind) -{ - unsigned long entry; - unsigned long *ptr; - void *dest; - void *addr; - - /* - * We rely on kexec_load to create a lists that properly - * initializes these pointers before they are used. - * We will still crash if the list is wrong, but at least - * the compiler will be quiet. - */ - ptr = NULL; - dest = NULL; - - for (entry = ind; !(entry & IND_DONE); entry = *ptr++) { - addr = __va(entry & PAGE_MASK); - - switch (entry & IND_FLAGS) { - case IND_DESTINATION: - dest = addr; - break; - case IND_INDIRECTION: - ptr = addr; - break; - case IND_SOURCE: - copy_page(dest, addr); - dest += PAGE_SIZE; - } - } -} - -void kexec_copy_flush(struct kimage *image) -{ - long i, nr_segments = image->nr_segments; - struct kexec_segment ranges[KEXEC_SEGMENT_MAX]; - - /* save the ranges on the stack to efficiently flush the icache */ - memcpy(ranges, image->segment, sizeof(ranges)); - - /* - * After this call we may not use anything allocated in dynamic - * memory, including *image. - * - * Only globals and the stack are allowed. - */ - copy_segments(image->head); - - /* - * we need to clear the icache for all dest pages sometime, - * including ones that were in place on the original copy - */ - for (i = 0; i < nr_segments; i++) - flush_icache_range((unsigned long)__va(ranges[i].mem), - (unsigned long)__va(ranges[i].mem + ranges[i].memsz)); -} - -#ifdef CONFIG_SMP - -static int kexec_all_irq_disabled = 0; - -static void kexec_smp_down(void *arg) -{ - local_irq_disable(); - hard_irq_disable(); - - mb(); /* make sure our irqs are disabled before we say they are */ - get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; - while(kexec_all_irq_disabled == 0) - cpu_relax(); - mb(); /* make sure all irqs are disabled before this */ - hw_breakpoint_disable(); - /* - * Now every CPU has IRQs off, we can clear out any pending - * IPIs and be sure that no more will come in after this. - */ - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(0, 1); - - kexec_smp_wait(); - /* NOTREACHED */ -} - -static void kexec_prepare_cpus_wait(int wait_state) -{ - int my_cpu, i, notified=-1; - - hw_breakpoint_disable(); - my_cpu = get_cpu(); - /* Make sure each CPU has at least made it to the state we need. - * - * FIXME: There is a (slim) chance of a problem if not all of the CPUs - * are correctly onlined. If somehow we start a CPU on boot with RTAS - * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in - * time, the boot CPU will timeout. If it does eventually execute - * stuff, the secondary will start up (paca_ptrs[]->cpu_start was - * written) and get into a peculiar state. - * If the platform supports smp_ops->take_timebase(), the secondary CPU - * will probably be spinning in there. If not (i.e. pseries), the - * secondary will continue on and try to online itself/idle/etc. If it - * survives that, we need to find these - * possible-but-not-online-but-should-be CPUs and chaperone them into - * kexec_smp_wait(). - */ - for_each_online_cpu(i) { - if (i == my_cpu) - continue; - - while (paca_ptrs[i]->kexec_state < wait_state) { - barrier(); - if (i != notified) { - printk(KERN_INFO "kexec: waiting for cpu %d " - "(physical %d) to enter %i state\n", - i, paca_ptrs[i]->hw_cpu_id, wait_state); - notified = i; - } - } - } - mb(); -} - -/* - * We need to make sure each present CPU is online. The next kernel will scan - * the device tree and assume primary threads are online and query secondary - * threads via RTAS to online them if required. If we don't online primary - * threads, they will be stuck. However, we also online secondary threads as we - * may be using 'cede offline'. In this case RTAS doesn't see the secondary - * threads as offline -- and again, these CPUs will be stuck. - * - * So, we online all CPUs that should be running, including secondary threads. - */ -static void wake_offline_cpus(void) -{ - int cpu = 0; - - for_each_present_cpu(cpu) { - if (!cpu_online(cpu)) { - printk(KERN_INFO "kexec: Waking offline cpu %d.\n", - cpu); - WARN_ON(cpu_up(cpu)); - } - } -} - -static void kexec_prepare_cpus(void) -{ - wake_offline_cpus(); - smp_call_function(kexec_smp_down, NULL, /* wait */0); - local_irq_disable(); - hard_irq_disable(); - - mb(); /* make sure IRQs are disabled before we say they are */ - get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; - - kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF); - /* we are sure every CPU has IRQs off at this point */ - kexec_all_irq_disabled = 1; - - /* - * Before removing MMU mappings make sure all CPUs have entered real - * mode: - */ - kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE); - - /* after we tell the others to go down */ - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(0, 0); - - put_cpu(); -} - -#else /* ! SMP */ - -static void kexec_prepare_cpus(void) -{ - /* - * move the secondarys to us so that we can copy - * the new kernel 0-0x100 safely - * - * do this if kexec in setup.c ? - * - * We need to release the cpus if we are ever going from an - * UP to an SMP kernel. - */ - smp_release_cpus(); - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(0, 0); - local_irq_disable(); - hard_irq_disable(); -} - -#endif /* SMP */ - -/* - * kexec thread structure and stack. - * - * We need to make sure that this is 16384-byte aligned due to the - * way process stacks are handled. It also must be statically allocated - * or allocated as part of the kimage, because everything else may be - * overwritten when we copy the kexec image. We piggyback on the - * "init_task" linker section here to statically allocate a stack. - * - * We could use a smaller stack if we don't care about anything using - * current, but that audit has not been performed. - */ -static union thread_union kexec_stack __init_task_data = - { }; - -/* - * For similar reasons to the stack above, the kexecing CPU needs to be on a - * static PACA; we switch to kexec_paca. - */ -struct paca_struct kexec_paca; - -/* Our assembly helper, in misc_64.S */ -extern void kexec_sequence(void *newstack, unsigned long start, - void *image, void *control, - void (*clear_all)(void), - bool copy_with_mmu_off) __noreturn; - -/* too late to fail here */ -void default_machine_kexec(struct kimage *image) -{ - bool copy_with_mmu_off; - - /* prepare control code if any */ - - /* - * If the kexec boot is the normal one, need to shutdown other cpus - * into our wait loop and quiesce interrupts. - * Otherwise, in the case of crashed mode (crashing_cpu >= 0), - * stopping other CPUs and collecting their pt_regs is done before - * using debugger IPI. - */ - - if (!kdump_in_progress()) - kexec_prepare_cpus(); - - printk("kexec: Starting switchover sequence.\n"); - - /* switch to a staticly allocated stack. Based on irq stack code. - * We setup preempt_count to avoid using VMX in memcpy. - * XXX: the task struct will likely be invalid once we do the copy! - */ - current_thread_info()->flags = 0; - current_thread_info()->preempt_count = HARDIRQ_OFFSET; - - /* We need a static PACA, too; copy this CPU's PACA over and switch to - * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using - * non-static data. - */ - memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct)); - kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL; -#ifdef CONFIG_PPC_PSERIES - kexec_paca.lppaca_ptr = NULL; -#endif - - if (is_secure_guest() && !(image->preserve_context || - image->type == KEXEC_TYPE_CRASH)) { - uv_unshare_all_pages(); - printk("kexec: Unshared all shared pages.\n"); - } - - paca_ptrs[kexec_paca.paca_index] = &kexec_paca; - - setup_paca(&kexec_paca); - - /* - * The lppaca should be unregistered at this point so the HV won't - * touch it. In the case of a crash, none of the lppacas are - * unregistered so there is not much we can do about it here. - */ - - /* - * On Book3S, the copy must happen with the MMU off if we are either - * using Radix page tables or we are not in an LPAR since we can - * overwrite the page tables while copying. - * - * In an LPAR, we keep the MMU on otherwise we can't access beyond - * the RMA. On BookE there is no real MMU off mode, so we have to - * keep it enabled as well (but then we have bolted TLB entries). - */ -#ifdef CONFIG_PPC_BOOK3E - copy_with_mmu_off = false; -#else - copy_with_mmu_off = radix_enabled() || - !(firmware_has_feature(FW_FEATURE_LPAR) || - firmware_has_feature(FW_FEATURE_PS3_LV1)); -#endif - - /* Some things are best done in assembly. Finding globals with - * a toc is easier in C, so pass in what we can. - */ - kexec_sequence(&kexec_stack, image->start, image, - page_address(image->control_code_page), - mmu_cleanup_all, copy_with_mmu_off); - /* NOTREACHED */ -} - -#ifdef CONFIG_PPC_BOOK3S_64 -/* Values we need to export to the second kernel via the device tree. */ -static unsigned long htab_base; -static unsigned long htab_size; - -static struct property htab_base_prop = { - .name = "linux,htab-base", - .length = sizeof(unsigned long), - .value = &htab_base, -}; - -static struct property htab_size_prop = { - .name = "linux,htab-size", - .length = sizeof(unsigned long), - .value = &htab_size, -}; - -static int __init export_htab_values(void) -{ - struct device_node *node; - - /* On machines with no htab htab_address is NULL */ - if (!htab_address) - return -ENODEV; - - node = of_find_node_by_path("/chosen"); - if (!node) - return -ENODEV; - - /* remove any stale propertys so ours can be found */ - of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL)); - of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL)); - - htab_base = cpu_to_be64(__pa(htab_address)); - of_add_property(node, &htab_base_prop); - htab_size = cpu_to_be64(htab_size_bytes); - of_add_property(node, &htab_size_prop); - - of_node_put(node); - return 0; -} -late_initcall(export_htab_values); -#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c deleted file mode 100644 index 143c91724617..000000000000 --- a/arch/powerpc/kernel/machine_kexec_file_64.c +++ /dev/null @@ -1,254 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * ppc64 code to implement the kexec_file_load syscall - * - * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) - * Copyright (C) 2004 IBM Corp. - * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation - * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) - * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) - * Copyright (C) 2016 IBM Corporation - * - * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c. - * Heavily modified for the kernel by - * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>. - */ - -#include <linux/slab.h> -#include <linux/kexec.h> -#include <linux/of_fdt.h> -#include <linux/libfdt.h> -#include <asm/ima.h> - -#define SLAVE_CODE_SIZE 256 - -const struct kexec_file_ops * const kexec_file_loaders[] = { - &kexec_elf64_ops, - NULL -}; - -int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len) -{ - /* We don't support crash kernels yet. */ - if (image->type == KEXEC_TYPE_CRASH) - return -EOPNOTSUPP; - - return kexec_image_probe_default(image, buf, buf_len); -} - -/** - * setup_purgatory - initialize the purgatory's global variables - * @image: kexec image. - * @slave_code: Slave code for the purgatory. - * @fdt: Flattened device tree for the next kernel. - * @kernel_load_addr: Address where the kernel is loaded. - * @fdt_load_addr: Address where the flattened device tree is loaded. - * - * Return: 0 on success, or negative errno on error. - */ -int setup_purgatory(struct kimage *image, const void *slave_code, - const void *fdt, unsigned long kernel_load_addr, - unsigned long fdt_load_addr) -{ - unsigned int *slave_code_buf, master_entry; - int ret; - - slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL); - if (!slave_code_buf) - return -ENOMEM; - - /* Get the slave code from the new kernel and put it in purgatory. */ - ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", - slave_code_buf, SLAVE_CODE_SIZE, - true); - if (ret) { - kfree(slave_code_buf); - return ret; - } - - master_entry = slave_code_buf[0]; - memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE); - slave_code_buf[0] = master_entry; - ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", - slave_code_buf, SLAVE_CODE_SIZE, - false); - kfree(slave_code_buf); - - ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr, - sizeof(kernel_load_addr), false); - if (ret) - return ret; - ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr, - sizeof(fdt_load_addr), false); - if (ret) - return ret; - - return 0; -} - -/** - * delete_fdt_mem_rsv - delete memory reservation with given address and size - * - * Return: 0 on success, or negative errno on error. - */ -int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size) -{ - int i, ret, num_rsvs = fdt_num_mem_rsv(fdt); - - for (i = 0; i < num_rsvs; i++) { - uint64_t rsv_start, rsv_size; - - ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size); - if (ret) { - pr_err("Malformed device tree.\n"); - return -EINVAL; - } - - if (rsv_start == start && rsv_size == size) { - ret = fdt_del_mem_rsv(fdt, i); - if (ret) { - pr_err("Error deleting device tree reservation.\n"); - return -EINVAL; - } - - return 0; - } - } - - return -ENOENT; -} - -/* - * setup_new_fdt - modify /chosen and memory reservation for the next kernel - * @image: kexec image being loaded. - * @fdt: Flattened device tree for the next kernel. - * @initrd_load_addr: Address where the next initrd will be loaded. - * @initrd_len: Size of the next initrd, or 0 if there will be none. - * @cmdline: Command line for the next kernel, or NULL if there will - * be none. - * - * Return: 0 on success, or negative errno on error. - */ -int setup_new_fdt(const struct kimage *image, void *fdt, - unsigned long initrd_load_addr, unsigned long initrd_len, - const char *cmdline) -{ - int ret, chosen_node; - const void *prop; - - /* Remove memory reservation for the current device tree. */ - ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params), - fdt_totalsize(initial_boot_params)); - if (ret == 0) - pr_debug("Removed old device tree reservation.\n"); - else if (ret != -ENOENT) - return ret; - - chosen_node = fdt_path_offset(fdt, "/chosen"); - if (chosen_node == -FDT_ERR_NOTFOUND) { - chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), - "chosen"); - if (chosen_node < 0) { - pr_err("Error creating /chosen.\n"); - return -EINVAL; - } - } else if (chosen_node < 0) { - pr_err("Malformed device tree: error reading /chosen.\n"); - return -EINVAL; - } - - /* Did we boot using an initrd? */ - prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL); - if (prop) { - uint64_t tmp_start, tmp_end, tmp_size; - - tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop)); - - prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL); - if (!prop) { - pr_err("Malformed device tree.\n"); - return -EINVAL; - } - tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop)); - - /* - * kexec reserves exact initrd size, while firmware may - * reserve a multiple of PAGE_SIZE, so check for both. - */ - tmp_size = tmp_end - tmp_start; - ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size); - if (ret == -ENOENT) - ret = delete_fdt_mem_rsv(fdt, tmp_start, - round_up(tmp_size, PAGE_SIZE)); - if (ret == 0) - pr_debug("Removed old initrd reservation.\n"); - else if (ret != -ENOENT) - return ret; - - /* If there's no new initrd, delete the old initrd's info. */ - if (initrd_len == 0) { - ret = fdt_delprop(fdt, chosen_node, - "linux,initrd-start"); - if (ret) { - pr_err("Error deleting linux,initrd-start.\n"); - return -EINVAL; - } - - ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end"); - if (ret) { - pr_err("Error deleting linux,initrd-end.\n"); - return -EINVAL; - } - } - } - - if (initrd_len) { - ret = fdt_setprop_u64(fdt, chosen_node, - "linux,initrd-start", - initrd_load_addr); - if (ret < 0) - goto err; - - /* initrd-end is the first address after the initrd image. */ - ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end", - initrd_load_addr + initrd_len); - if (ret < 0) - goto err; - - ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len); - if (ret) { - pr_err("Error reserving initrd memory: %s\n", - fdt_strerror(ret)); - return -EINVAL; - } - } - - if (cmdline != NULL) { - ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline); - if (ret < 0) - goto err; - } else { - ret = fdt_delprop(fdt, chosen_node, "bootargs"); - if (ret && ret != -FDT_ERR_NOTFOUND) { - pr_err("Error deleting bootargs.\n"); - return -EINVAL; - } - } - - ret = setup_ima_buffer(image, fdt, chosen_node); - if (ret) { - pr_err("Error setting up the new device tree.\n"); - return ret; - } - - ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0); - if (ret) - goto err; - - return 0; - -err: - pr_err("Error setting up the new device tree.\n"); - return -EINVAL; -} diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 82df4b09e79f..d80212be8698 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -6,11 +6,6 @@ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) * and Paul Mackerras. * - * kexec bits: - * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com> - * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz - * PPC44x port. Copyright (C) 2011, IBM Corporation - * Author: Suzuki Poulose <suzuki@in.ibm.com> */ #include <linux/sys.h> @@ -25,7 +20,6 @@ #include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <asm/processor.h> -#include <asm/kexec.h> #include <asm/bug.h> #include <asm/ptrace.h> #include <asm/export.h> @@ -317,126 +311,6 @@ EXPORT_SYMBOL(flush_instruction_cache) #endif /* CONFIG_PPC_8xx */ /* - * Write any modified data cache blocks out to memory - * and invalidate the corresponding instruction cache blocks. - * This is a no-op on the 601. - * - * flush_icache_range(unsigned long start, unsigned long stop) - */ -_GLOBAL(flush_icache_range) -#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200) - PURGE_PREFETCHED_INS - blr /* for 601 and e200, do nothing */ -#else - rlwinm r3,r3,0,0,31 - L1_CACHE_SHIFT - subf r4,r3,r4 - addi r4,r4,L1_CACHE_BYTES - 1 - srwi. r4,r4,L1_CACHE_SHIFT - beqlr - mtctr r4 - mr r6,r3 -1: dcbst 0,r3 - addi r3,r3,L1_CACHE_BYTES - bdnz 1b - sync /* wait for dcbst's to get to ram */ -#ifndef CONFIG_44x - mtctr r4 -2: icbi 0,r6 - addi r6,r6,L1_CACHE_BYTES - bdnz 2b -#else - /* Flash invalidate on 44x because we are passed kmapped addresses and - this doesn't work for userspace pages due to the virtually tagged - icache. Sigh. */ - iccci 0, r0 -#endif - sync /* additional sync needed on g4 */ - isync - blr -#endif -_ASM_NOKPROBE_SYMBOL(flush_icache_range) -EXPORT_SYMBOL(flush_icache_range) - -/* - * Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * This is a no-op on the 601 and e200 which have a unified cache. - * - * void __flush_dcache_icache(void *page) - */ -_GLOBAL(__flush_dcache_icache) -#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200) - PURGE_PREFETCHED_INS - blr -#else - rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ - li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ - mtctr r4 - mr r6,r3 -0: dcbst 0,r3 /* Write line to ram */ - addi r3,r3,L1_CACHE_BYTES - bdnz 0b - sync -#ifdef CONFIG_44x - /* We don't flush the icache on 44x. Those have a virtual icache - * and we don't have access to the virtual address here (it's - * not the page vaddr but where it's mapped in user space). The - * flushing of the icache on these is handled elsewhere, when - * a change in the address space occurs, before returning to - * user space - */ -BEGIN_MMU_FTR_SECTION - blr -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x) -#endif /* CONFIG_44x */ - mtctr r4 -1: icbi 0,r6 - addi r6,r6,L1_CACHE_BYTES - bdnz 1b - sync - isync - blr -#endif - -#ifndef CONFIG_BOOKE -/* - * Flush a particular page from the data cache to RAM, identified - * by its physical address. We turn off the MMU so we can just use - * the physical address (this may be a highmem page without a kernel - * mapping). - * - * void __flush_dcache_icache_phys(unsigned long physaddr) - */ -_GLOBAL(__flush_dcache_icache_phys) -#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200) - PURGE_PREFETCHED_INS - blr /* for 601 and e200, do nothing */ -#else - mfmsr r10 - rlwinm r0,r10,0,28,26 /* clear DR */ - mtmsr r0 - isync - rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ - li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ - mtctr r4 - mr r6,r3 -0: dcbst 0,r3 /* Write line to ram */ - addi r3,r3,L1_CACHE_BYTES - bdnz 0b - sync - mtctr r4 -1: icbi 0,r6 - addi r6,r6,L1_CACHE_BYTES - bdnz 1b - sync - mtmsr r10 /* restore DR */ - isync - blr -#endif -#endif /* CONFIG_BOOKE */ - -/* * Copy a whole page. We use the dcbz instruction on the destination * to reduce memory traffic (it eliminates the unnecessary reads of * the destination into cache). This requires that the destination @@ -614,488 +488,3 @@ _GLOBAL(start_secondary_resume) */ _GLOBAL(__main) blr - -#ifdef CONFIG_KEXEC_CORE - /* - * Must be relocatable PIC code callable as a C function. - */ - .globl relocate_new_kernel -relocate_new_kernel: - /* r3 = page_list */ - /* r4 = reboot_code_buffer */ - /* r5 = start_address */ - -#ifdef CONFIG_FSL_BOOKE - - mr r29, r3 - mr r30, r4 - mr r31, r5 - -#define ENTRY_MAPPING_KEXEC_SETUP -#include "fsl_booke_entry_mapping.S" -#undef ENTRY_MAPPING_KEXEC_SETUP - - mr r3, r29 - mr r4, r30 - mr r5, r31 - - li r0, 0 -#elif defined(CONFIG_44x) - - /* Save our parameters */ - mr r29, r3 - mr r30, r4 - mr r31, r5 - -#ifdef CONFIG_PPC_47x - /* Check for 47x cores */ - mfspr r3,SPRN_PVR - srwi r3,r3,16 - cmplwi cr0,r3,PVR_476FPE@h - beq setup_map_47x - cmplwi cr0,r3,PVR_476@h - beq setup_map_47x - cmplwi cr0,r3,PVR_476_ISS@h - beq setup_map_47x -#endif /* CONFIG_PPC_47x */ - -/* - * Code for setting up 1:1 mapping for PPC440x for KEXEC - * - * We cannot switch off the MMU on PPC44x. - * So we: - * 1) Invalidate all the mappings except the one we are running from. - * 2) Create a tmp mapping for our code in the other address space(TS) and - * jump to it. Invalidate the entry we started in. - * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS. - * 4) Jump to the 1:1 mapping in original TS. - * 5) Invalidate the tmp mapping. - * - * - Based on the kexec support code for FSL BookE - * - */ - - /* - * Load the PID with kernel PID (0). - * Also load our MSR_IS and TID to MMUCR for TLB search. - */ - li r3, 0 - mtspr SPRN_PID, r3 - mfmsr r4 - andi. r4,r4,MSR_IS@l - beq wmmucr - oris r3,r3,PPC44x_MMUCR_STS@h -wmmucr: - mtspr SPRN_MMUCR,r3 - sync - - /* - * Invalidate all the TLB entries except the current entry - * where we are running from - */ - bl 0f /* Find our address */ -0: mflr r5 /* Make it accessible */ - tlbsx r23,0,r5 /* Find entry we are in */ - li r4,0 /* Start at TLB entry 0 */ - li r3,0 /* Set PAGEID inval value */ -1: cmpw r23,r4 /* Is this our entry? */ - beq skip /* If so, skip the inval */ - tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ -skip: - addi r4,r4,1 /* Increment */ - cmpwi r4,64 /* Are we done? */ - bne 1b /* If not, repeat */ - isync - - /* Create a temp mapping and jump to it */ - andi. r6, r23, 1 /* Find the index to use */ - addi r24, r6, 1 /* r24 will contain 1 or 2 */ - - mfmsr r9 /* get the MSR */ - rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */ - xori r7, r5, 1 /* Use the other address space */ - - /* Read the current mapping entries */ - tlbre r3, r23, PPC44x_TLB_PAGEID - tlbre r4, r23, PPC44x_TLB_XLAT - tlbre r5, r23, PPC44x_TLB_ATTRIB - - /* Save our current XLAT entry */ - mr r25, r4 - - /* Extract the TLB PageSize */ - li r10, 1 /* r10 will hold PageSize */ - rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */ - - /* XXX: As of now we use 256M, 4K pages */ - cmpwi r11, PPC44x_TLB_256M - bne tlb_4k - rotlwi r10, r10, 28 /* r10 = 256M */ - b write_out -tlb_4k: - cmpwi r11, PPC44x_TLB_4K - bne default - rotlwi r10, r10, 12 /* r10 = 4K */ - b write_out -default: - rotlwi r10, r10, 10 /* r10 = 1K */ - -write_out: - /* - * Write out the tmp 1:1 mapping for this code in other address space - * Fixup EPN = RPN , TS=other address space - */ - insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */ - - /* Write out the tmp mapping entries */ - tlbwe r3, r24, PPC44x_TLB_PAGEID - tlbwe r4, r24, PPC44x_TLB_XLAT - tlbwe r5, r24, PPC44x_TLB_ATTRIB - - subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */ - not r10, r11 /* Mask for PageNum */ - - /* Switch to other address space in MSR */ - insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ - - bl 1f -1: mflr r8 - addi r8, r8, (2f-1b) /* Find the target offset */ - - /* Jump to the tmp mapping */ - mtspr SPRN_SRR0, r8 - mtspr SPRN_SRR1, r9 - rfi - -2: - /* Invalidate the entry we were executing from */ - li r3, 0 - tlbwe r3, r23, PPC44x_TLB_PAGEID - - /* attribute fields. rwx for SUPERVISOR mode */ - li r5, 0 - ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) - - /* Create 1:1 mapping in 256M pages */ - xori r7, r7, 1 /* Revert back to Original TS */ - - li r8, 0 /* PageNumber */ - li r6, 3 /* TLB Index, start at 3 */ - -next_tlb: - rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */ - mr r4, r3 /* RPN = EPN */ - ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */ - insrwi r3, r7, 1, 23 /* Set TS from r7 */ - - tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */ - tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */ - tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */ - - addi r8, r8, 1 /* Increment PN */ - addi r6, r6, 1 /* Increment TLB Index */ - cmpwi r8, 8 /* Are we done ? */ - bne next_tlb - isync - - /* Jump to the new mapping 1:1 */ - li r9,0 - insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ - - bl 1f -1: mflr r8 - and r8, r8, r11 /* Get our offset within page */ - addi r8, r8, (2f-1b) - - and r5, r25, r10 /* Get our target PageNum */ - or r8, r8, r5 /* Target jump address */ - - mtspr SPRN_SRR0, r8 - mtspr SPRN_SRR1, r9 - rfi -2: - /* Invalidate the tmp entry we used */ - li r3, 0 - tlbwe r3, r24, PPC44x_TLB_PAGEID - sync - b ppc44x_map_done - -#ifdef CONFIG_PPC_47x - - /* 1:1 mapping for 47x */ - -setup_map_47x: - - /* - * Load the kernel pid (0) to PID and also to MMUCR[TID]. - * Also set the MSR IS->MMUCR STS - */ - li r3, 0 - mtspr SPRN_PID, r3 /* Set PID */ - mfmsr r4 /* Get MSR */ - andi. r4, r4, MSR_IS@l /* TS=1? */ - beq 1f /* If not, leave STS=0 */ - oris r3, r3, PPC47x_MMUCR_STS@h /* Set STS=1 */ -1: mtspr SPRN_MMUCR, r3 /* Put MMUCR */ - sync - - /* Find the entry we are running from */ - bl 2f -2: mflr r23 - tlbsx r23, 0, r23 - tlbre r24, r23, 0 /* TLB Word 0 */ - tlbre r25, r23, 1 /* TLB Word 1 */ - tlbre r26, r23, 2 /* TLB Word 2 */ - - - /* - * Invalidates all the tlb entries by writing to 256 RPNs(r4) - * of 4k page size in all 4 ways (0-3 in r3). - * This would invalidate the entire UTLB including the one we are - * running from. However the shadow TLB entries would help us - * to continue the execution, until we flush them (rfi/isync). - */ - addis r3, 0, 0x8000 /* specify the way */ - addi r4, 0, 0 /* TLB Word0 = (EPN=0, VALID = 0) */ - addi r5, 0, 0 - b clear_utlb_entry - - /* Align the loop to speed things up. from head_44x.S */ - .align 6 - -clear_utlb_entry: - - tlbwe r4, r3, 0 - tlbwe r5, r3, 1 - tlbwe r5, r3, 2 - addis r3, r3, 0x2000 /* Increment the way */ - cmpwi r3, 0 - bne clear_utlb_entry - addis r3, 0, 0x8000 - addis r4, r4, 0x100 /* Increment the EPN */ - cmpwi r4, 0 - bne clear_utlb_entry - - /* Create the entries in the other address space */ - mfmsr r5 - rlwinm r7, r5, 27, 31, 31 /* Get the TS (Bit 26) from MSR */ - xori r7, r7, 1 /* r7 = !TS */ - - insrwi r24, r7, 1, 21 /* Change the TS in the saved TLB word 0 */ - - /* - * write out the TLB entries for the tmp mapping - * Use way '0' so that we could easily invalidate it later. - */ - lis r3, 0x8000 /* Way '0' */ - - tlbwe r24, r3, 0 - tlbwe r25, r3, 1 - tlbwe r26, r3, 2 - - /* Update the msr to the new TS */ - insrwi r5, r7, 1, 26 - - bl 1f -1: mflr r6 - addi r6, r6, (2f-1b) - - mtspr SPRN_SRR0, r6 - mtspr SPRN_SRR1, r5 - rfi - - /* - * Now we are in the tmp address space. - * Create a 1:1 mapping for 0-2GiB in the original TS. - */ -2: - li r3, 0 - li r4, 0 /* TLB Word 0 */ - li r5, 0 /* TLB Word 1 */ - li r6, 0 - ori r6, r6, PPC47x_TLB2_S_RWX /* TLB word 2 */ - - li r8, 0 /* PageIndex */ - - xori r7, r7, 1 /* revert back to original TS */ - -write_utlb: - rotlwi r5, r8, 28 /* RPN = PageIndex * 256M */ - /* ERPN = 0 as we don't use memory above 2G */ - - mr r4, r5 /* EPN = RPN */ - ori r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M) - insrwi r4, r7, 1, 21 /* Insert the TS to Word 0 */ - - tlbwe r4, r3, 0 /* Write out the entries */ - tlbwe r5, r3, 1 - tlbwe r6, r3, 2 - addi r8, r8, 1 - cmpwi r8, 8 /* Have we completed ? */ - bne write_utlb - - /* make sure we complete the TLB write up */ - isync - - /* - * Prepare to jump to the 1:1 mapping. - * 1) Extract page size of the tmp mapping - * DSIZ = TLB_Word0[22:27] - * 2) Calculate the physical address of the address - * to jump to. - */ - rlwinm r10, r24, 0, 22, 27 - - cmpwi r10, PPC47x_TLB0_4K - bne 0f - li r10, 0x1000 /* r10 = 4k */ - bl 1f - -0: - /* Defaults to 256M */ - lis r10, 0x1000 - - bl 1f -1: mflr r4 - addi r4, r4, (2f-1b) /* virtual address of 2f */ - - subi r11, r10, 1 /* offsetmask = Pagesize - 1 */ - not r10, r11 /* Pagemask = ~(offsetmask) */ - - and r5, r25, r10 /* Physical page */ - and r6, r4, r11 /* offset within the current page */ - - or r5, r5, r6 /* Physical address for 2f */ - - /* Switch the TS in MSR to the original one */ - mfmsr r8 - insrwi r8, r7, 1, 26 - - mtspr SPRN_SRR1, r8 - mtspr SPRN_SRR0, r5 - rfi - -2: - /* Invalidate the tmp mapping */ - lis r3, 0x8000 /* Way '0' */ - - clrrwi r24, r24, 12 /* Clear the valid bit */ - tlbwe r24, r3, 0 - tlbwe r25, r3, 1 - tlbwe r26, r3, 2 - - /* Make sure we complete the TLB write and flush the shadow TLB */ - isync - -#endif - -ppc44x_map_done: - - - /* Restore the parameters */ - mr r3, r29 - mr r4, r30 - mr r5, r31 - - li r0, 0 -#else - li r0, 0 - - /* - * Set Machine Status Register to a known status, - * switch the MMU off and jump to 1: in a single step. - */ - - mr r8, r0 - ori r8, r8, MSR_RI|MSR_ME - mtspr SPRN_SRR1, r8 - addi r8, r4, 1f - relocate_new_kernel - mtspr SPRN_SRR0, r8 - sync - rfi - -1: -#endif - /* from this point address translation is turned off */ - /* and interrupts are disabled */ - - /* set a new stack at the bottom of our page... */ - /* (not really needed now) */ - addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */ - stw r0, 0(r1) - - /* Do the copies */ - li r6, 0 /* checksum */ - mr r0, r3 - b 1f - -0: /* top, read another word for the indirection page */ - lwzu r0, 4(r3) - -1: - /* is it a destination page? (r8) */ - rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */ - beq 2f - - rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */ - b 0b - -2: /* is it an indirection page? (r3) */ - rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */ - beq 2f - - rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */ - subi r3, r3, 4 - b 0b - -2: /* are we done? */ - rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */ - beq 2f - b 3f - -2: /* is it a source page? (r9) */ - rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */ - beq 0b - - rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */ - - li r7, PAGE_SIZE / 4 - mtctr r7 - subi r9, r9, 4 - subi r8, r8, 4 -9: - lwzu r0, 4(r9) /* do the copy */ - xor r6, r6, r0 - stwu r0, 4(r8) - dcbst 0, r8 - sync - icbi 0, r8 - bdnz 9b - - addi r9, r9, 4 - addi r8, r8, 4 - b 0b - -3: - - /* To be certain of avoiding problems with self-modifying code - * execute a serializing instruction here. - */ - isync - sync - - mfspr r3, SPRN_PIR /* current core we are running on */ - mr r4, r5 /* load physical address of chunk called */ - - /* jump to the entry point, usually the setup routine */ - mtlr r5 - blrl - -1: b 1b - -relocate_new_kernel_end: - - .globl relocate_new_kernel_size -relocate_new_kernel_size: - .long relocate_new_kernel_end - relocate_new_kernel -#endif diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index b55a7b4cb543..1864605eca29 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -49,108 +49,6 @@ _GLOBAL(call_do_irq) mtlr r0 blr - .section ".toc","aw" -PPC64_CACHES: - .tc ppc64_caches[TC],ppc64_caches - .section ".text" - -/* - * Write any modified data cache blocks out to memory - * and invalidate the corresponding instruction cache blocks. - * - * flush_icache_range(unsigned long start, unsigned long stop) - * - * flush all bytes from start through stop-1 inclusive - */ - -_GLOBAL_TOC(flush_icache_range) -BEGIN_FTR_SECTION - PURGE_PREFETCHED_INS - blr -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) -/* - * Flush the data cache to memory - * - * Different systems have different cache line sizes - * and in some cases i-cache and d-cache line sizes differ from - * each other. - */ - ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1BLOCKSIZE(r10)/* Get cache block size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mtctr r8 -1: dcbst 0,r6 - add r6,r6,r7 - bdnz 1b - sync - -/* Now invalidate the instruction cache */ - - lwz r7,ICACHEL1BLOCKSIZE(r10) /* Get Icache block size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 - lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mtctr r8 -2: icbi 0,r6 - add r6,r6,r7 - bdnz 2b - isync - blr -_ASM_NOKPROBE_SYMBOL(flush_icache_range) -EXPORT_SYMBOL(flush_icache_range) - -/* - * Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * - * void __flush_dcache_icache(void *page) - */ -_GLOBAL(__flush_dcache_icache) -/* - * Flush the data cache to memory - * - * Different systems have different cache line sizes - */ - -BEGIN_FTR_SECTION - PURGE_PREFETCHED_INS - blr -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) - -/* Flush the dcache */ - ld r7,PPC64_CACHES@toc(r2) - clrrdi r3,r3,PAGE_SHIFT /* Page align */ - lwz r4,DCACHEL1BLOCKSPERPAGE(r7) /* Get # dcache blocks per page */ - lwz r5,DCACHEL1BLOCKSIZE(r7) /* Get dcache block size */ - mr r6,r3 - mtctr r4 -0: dcbst 0,r6 - add r6,r6,r5 - bdnz 0b - sync - -/* Now invalidate the icache */ - - lwz r4,ICACHEL1BLOCKSPERPAGE(r7) /* Get # icache blocks per page */ - lwz r5,ICACHEL1BLOCKSIZE(r7) /* Get icache block size */ - mtctr r4 -1: icbi 0,r3 - add r3,r3,r5 - bdnz 1b - isync - blr - _GLOBAL(__bswapdi2) EXPORT_SYMBOL(__bswapdi2) srdi r8,r3,32 @@ -432,18 +330,13 @@ kexec_create_tlb: rlwimi r9,r10,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r9) */ /* Set up a temp identity mapping v:0 to p:0 and return to it. */ -#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) -#define M_IF_NEEDED MAS2_M -#else -#define M_IF_NEEDED 0 -#endif mtspr SPRN_MAS0,r9 lis r9,(MAS1_VALID|MAS1_IPROT)@h ori r9,r9,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l mtspr SPRN_MAS1,r9 - LOAD_REG_IMMEDIATE(r9, 0x0 | M_IF_NEEDED) + LOAD_REG_IMMEDIATE(r9, 0x0 | MAS2_M_IF_NEEDED) mtspr SPRN_MAS2,r9 LOAD_REG_IMMEDIATE(r9, 0x0 | MAS3_SR | MAS3_SW | MAS3_SX) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 639ceae7da9d..4df94b6e2f32 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -715,6 +715,8 @@ static void set_debug_reg_defaults(struct thread_struct *thread) { thread->hw_brk.address = 0; thread->hw_brk.type = 0; + thread->hw_brk.len = 0; + thread->hw_brk.hw_len = 0; if (ppc_breakpoint_available()) set_breakpoint(&thread->hw_brk); } @@ -816,6 +818,7 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a, return false; if (a->len != b->len) return false; + /* no need to check hw_len. it's calculated from address and len */ return true; } diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a4e7762dd286..577345382b23 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -303,16 +303,24 @@ static char __init *prom_strstr(const char *s1, const char *s2) return NULL; } -static size_t __init prom_strlcpy(char *dest, const char *src, size_t size) -{ - size_t ret = prom_strlen(src); +static size_t __init prom_strlcat(char *dest, const char *src, size_t count) +{ + size_t dsize = prom_strlen(dest); + size_t len = prom_strlen(src); + size_t res = dsize + len; + + /* This would be a bug */ + if (dsize >= count) + return count; + + dest += dsize; + count -= dsize; + if (len >= count) + len = count-1; + memcpy(dest, src, len); + dest[len] = 0; + return res; - if (size) { - size_t len = (ret >= size) ? size - 1 : ret; - memcpy(dest, src, len); - dest[len] = '\0'; - } - return ret; } #ifdef CONFIG_PPC_PSERIES @@ -764,10 +772,14 @@ static void __init early_cmdline_parse(void) prom_cmd_line[0] = 0; p = prom_cmd_line; - if ((long)prom.chosen > 0) + + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && (long)prom.chosen > 0) l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1); - if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && (l <= 0 || p[0] == '\0')) /* dbl check */ - prom_strlcpy(prom_cmd_line, CONFIG_CMDLINE, sizeof(prom_cmd_line)); + + if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || l <= 0 || p[0] == '\0') + prom_strlcat(prom_cmd_line, " " CONFIG_CMDLINE, + sizeof(prom_cmd_line)); + prom_printf("command line: %s\n", prom_cmd_line); #ifdef CONFIG_PPC64 @@ -1053,7 +1065,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .reserved2 = 0, .reserved3 = 0, .subprocessors = 1, - .byte22 = OV5_FEAT(OV5_DRMEM_V2), + .byte22 = OV5_FEAT(OV5_DRMEM_V2) | OV5_FEAT(OV5_DRC_INFO), .intarch = 0, .mmu = 0, .hash_ext = 0, @@ -3249,7 +3261,20 @@ static void setup_secure_guest(unsigned long kbase, unsigned long fdt) /* Switch to secure mode. */ prom_printf("Switching to secure mode.\n"); + /* + * The ultravisor will do an integrity check of the kernel image but we + * relocated it so the check will fail. Restore the original image by + * relocating it back to the kernel virtual base address. + */ + if (IS_ENABLED(CONFIG_RELOCATABLE)) + relocate(KERNELBASE); + ret = enter_secure_mode(kbase, fdt); + + /* Relocate the kernel again. */ + if (IS_ENABLED(CONFIG_RELOCATABLE)) + relocate(kbase); + if (ret != U_SUCCESS) { prom_printf("Returned %d from switching to secure mode.\n", ret); prom_rtas_os_term("Switch to secure mode failed.\n"); diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 78bab17b1396..b183ab9c5107 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -26,7 +26,8 @@ _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start logo_linux_clut224 btext_prepare_BAT reloc_got2 kernstart_addr memstart_addr linux_banner _stext -__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." +__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC. +relocate" NM="$1" OBJ="$2" diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 8c92febf5f44..25c0424e8868 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -2425,7 +2425,8 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, return -EIO; hw_brk.address = data & (~HW_BRK_TYPE_DABR); hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; - hw_brk.len = 8; + hw_brk.len = DABR_MAX_LEN; + hw_brk.hw_len = DABR_MAX_LEN; set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR); #ifdef CONFIG_HAVE_HW_BREAKPOINT bp = thread->ptrace_bps[0]; @@ -2439,6 +2440,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, if (bp) { attr = bp->attr; attr.bp_addr = hw_brk.address; + attr.bp_len = DABR_MAX_LEN; arch_bp_generic_fields(hw_brk.type, &attr.bp_type); /* Enable breakpoint */ @@ -2456,7 +2458,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Create a new breakpoint request if one doesn't exist already */ hw_breakpoint_init(&attr); attr.bp_addr = hw_brk.address; - attr.bp_len = 8; + attr.bp_len = DABR_MAX_LEN; arch_bp_generic_fields(hw_brk.type, &attr.bp_type); @@ -2880,18 +2882,14 @@ static long ppc_set_hwdebug(struct task_struct *child, if ((unsigned long)bp_info->addr >= TASK_SIZE) return -EIO; - brk.address = bp_info->addr & ~7UL; + brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN; brk.type = HW_BRK_TYPE_TRANSLATE; - brk.len = 8; + brk.len = DABR_MAX_LEN; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) brk.type |= HW_BRK_TYPE_READ; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) brk.type |= HW_BRK_TYPE_WRITE; #ifdef CONFIG_HAVE_HW_BREAKPOINT - /* - * Check if the request is for 'range' breakpoints. We can - * support it if range < 8 bytes. - */ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) len = bp_info->addr2 - bp_info->addr; else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) @@ -2904,7 +2902,7 @@ static long ppc_set_hwdebug(struct task_struct *child, /* Create a new breakpoint request if one doesn't exist already */ hw_breakpoint_init(&attr); - attr.bp_addr = (unsigned long)bp_info->addr & ~HW_BREAKPOINT_ALIGN; + attr.bp_addr = (unsigned long)bp_info->addr; attr.bp_len = len; arch_bp_generic_fields(brk.type, &attr.bp_type); @@ -3361,6 +3359,12 @@ void do_syscall_trace_leave(struct pt_regs *regs) user_enter(); } +void __init pt_regs_check(void); + +/* + * Dummy function, its purpose is to break the build if struct pt_regs and + * struct user_pt_regs don't match. + */ void __init pt_regs_check(void) { BUILD_BUG_ON(offsetof(struct pt_regs, gpr) != @@ -3398,4 +3402,67 @@ void __init pt_regs_check(void) offsetof(struct user_pt_regs, result)); BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs)); + + // Now check that the pt_regs offsets match the uapi #defines + #define CHECK_REG(_pt, _reg) \ + BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \ + sizeof(unsigned long))); + + CHECK_REG(PT_R0, gpr[0]); + CHECK_REG(PT_R1, gpr[1]); + CHECK_REG(PT_R2, gpr[2]); + CHECK_REG(PT_R3, gpr[3]); + CHECK_REG(PT_R4, gpr[4]); + CHECK_REG(PT_R5, gpr[5]); + CHECK_REG(PT_R6, gpr[6]); + CHECK_REG(PT_R7, gpr[7]); + CHECK_REG(PT_R8, gpr[8]); + CHECK_REG(PT_R9, gpr[9]); + CHECK_REG(PT_R10, gpr[10]); + CHECK_REG(PT_R11, gpr[11]); + CHECK_REG(PT_R12, gpr[12]); + CHECK_REG(PT_R13, gpr[13]); + CHECK_REG(PT_R14, gpr[14]); + CHECK_REG(PT_R15, gpr[15]); + CHECK_REG(PT_R16, gpr[16]); + CHECK_REG(PT_R17, gpr[17]); + CHECK_REG(PT_R18, gpr[18]); + CHECK_REG(PT_R19, gpr[19]); + CHECK_REG(PT_R20, gpr[20]); + CHECK_REG(PT_R21, gpr[21]); + CHECK_REG(PT_R22, gpr[22]); + CHECK_REG(PT_R23, gpr[23]); + CHECK_REG(PT_R24, gpr[24]); + CHECK_REG(PT_R25, gpr[25]); + CHECK_REG(PT_R26, gpr[26]); + CHECK_REG(PT_R27, gpr[27]); + CHECK_REG(PT_R28, gpr[28]); + CHECK_REG(PT_R29, gpr[29]); + CHECK_REG(PT_R30, gpr[30]); + CHECK_REG(PT_R31, gpr[31]); + CHECK_REG(PT_NIP, nip); + CHECK_REG(PT_MSR, msr); + CHECK_REG(PT_ORIG_R3, orig_gpr3); + CHECK_REG(PT_CTR, ctr); + CHECK_REG(PT_LNK, link); + CHECK_REG(PT_XER, xer); + CHECK_REG(PT_CCR, ccr); +#ifdef CONFIG_PPC64 + CHECK_REG(PT_SOFTE, softe); +#else + CHECK_REG(PT_MQ, mq); +#endif + CHECK_REG(PT_TRAP, trap); + CHECK_REG(PT_DAR, dar); + CHECK_REG(PT_DSISR, dsisr); + CHECK_REG(PT_RESULT, result); + #undef CHECK_REG + + BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long)); + + /* + * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the + * real registers. + */ + BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long)); } diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c new file mode 100644 index 000000000000..4b982324d368 --- /dev/null +++ b/arch/powerpc/kernel/secure_boot.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + */ +#include <linux/types.h> +#include <linux/of.h> +#include <asm/secure_boot.h> + +static struct device_node *get_ppc_fw_sb_node(void) +{ + static const struct of_device_id ids[] = { + { .compatible = "ibm,secureboot", }, + { .compatible = "ibm,secureboot-v1", }, + { .compatible = "ibm,secureboot-v2", }, + {}, + }; + + return of_find_matching_node(NULL, ids); +} + +bool is_ppc_secureboot_enabled(void) +{ + struct device_node *node; + bool enabled = false; + + node = get_ppc_fw_sb_node(); + enabled = of_property_read_bool(node, "os-secureboot-enforcing"); + + of_node_put(node); + + pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled"); + + return enabled; +} + +bool is_ppc_trustedboot_enabled(void) +{ + struct device_node *node; + bool enabled = false; + + node = get_ppc_fw_sb_node(); + enabled = of_property_read_bool(node, "trusted-enabled"); + + of_node_put(node); + + pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled"); + + return enabled; +} diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 7cfcb294b11c..bd70f5be1c27 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -16,7 +16,7 @@ #include <asm/setup.h> -unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; +u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; enum count_cache_flush_type { COUNT_CACHE_FLUSH_NONE = 0x1, @@ -24,6 +24,7 @@ enum count_cache_flush_type { COUNT_CACHE_FLUSH_HW = 0x4, }; static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; +static bool link_stack_flush_enabled; bool barrier_nospec_enabled; static bool no_nospec; @@ -94,13 +95,14 @@ static int barrier_nospec_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_barrier_nospec, - barrier_nospec_get, barrier_nospec_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_barrier_nospec, barrier_nospec_get, + barrier_nospec_set, "%llu\n"); static __init int barrier_nospec_debugfs_init(void) { - debugfs_create_file("barrier_nospec", 0600, powerpc_debugfs_root, NULL, - &fops_barrier_nospec); + debugfs_create_file_unsafe("barrier_nospec", 0600, + powerpc_debugfs_root, NULL, + &fops_barrier_nospec); return 0; } device_initcall(barrier_nospec_debugfs_init); @@ -108,7 +110,7 @@ device_initcall(barrier_nospec_debugfs_init); static __init int security_feature_debugfs_init(void) { debugfs_create_x64("security_features", 0400, powerpc_debugfs_root, - (u64 *)&powerpc_security_features); + &powerpc_security_features); return 0; } device_initcall(security_feature_debugfs_init); @@ -141,32 +143,33 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV); - if (rfi_flush || thread_priv) { + if (rfi_flush) { struct seq_buf s; seq_buf_init(&s, buf, PAGE_SIZE - 1); - seq_buf_printf(&s, "Mitigation: "); - - if (rfi_flush) - seq_buf_printf(&s, "RFI Flush"); - - if (rfi_flush && thread_priv) - seq_buf_printf(&s, ", "); - + seq_buf_printf(&s, "Mitigation: RFI Flush"); if (thread_priv) - seq_buf_printf(&s, "L1D private per thread"); + seq_buf_printf(&s, ", L1D private per thread"); seq_buf_printf(&s, "\n"); return s.len; } + if (thread_priv) + return sprintf(buf, "Vulnerable: L1D private per thread\n"); + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) return sprintf(buf, "Not affected\n"); return sprintf(buf, "Vulnerable\n"); } + +ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_meltdown(dev, attr, buf); +} #endif ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) @@ -212,11 +215,19 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (ccd) seq_buf_printf(&s, "Indirect branch cache disabled"); + + if (link_stack_flush_enabled) + seq_buf_printf(&s, ", Software link stack flush"); + } else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { seq_buf_printf(&s, "Mitigation: Software count cache flush"); if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW) seq_buf_printf(&s, " (hardware accelerated)"); + + if (link_stack_flush_enabled) + seq_buf_printf(&s, ", Software link stack flush"); + } else if (btb_flush_enabled) { seq_buf_printf(&s, "Mitigation: Branch predictor state flush"); } else { @@ -367,28 +378,61 @@ static int stf_barrier_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, + "%llu\n"); static __init int stf_barrier_debugfs_init(void) { - debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier); + debugfs_create_file_unsafe("stf_barrier", 0600, powerpc_debugfs_root, + NULL, &fops_stf_barrier); return 0; } device_initcall(stf_barrier_debugfs_init); #endif /* CONFIG_DEBUG_FS */ +static void no_count_cache_flush(void) +{ + count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; + pr_info("count-cache-flush: software flush disabled.\n"); +} + static void toggle_count_cache_flush(bool enable) { - if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) && + !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) + enable = false; + + if (!enable) { patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP); - count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; - pr_info("count-cache-flush: software flush disabled.\n"); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + patch_instruction_site(&patch__call_kvm_flush_link_stack, PPC_INST_NOP); +#endif + pr_info("link-stack-flush: software flush disabled.\n"); + link_stack_flush_enabled = false; + no_count_cache_flush(); return; } + // This enables the branch from _switch to flush_count_cache patch_branch_site(&patch__call_flush_count_cache, (u64)&flush_count_cache, BRANCH_SET_LINK); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + // This enables the branch from guest_exit_cont to kvm_flush_link_stack + patch_branch_site(&patch__call_kvm_flush_link_stack, + (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); +#endif + + pr_info("link-stack-flush: software flush enabled.\n"); + link_stack_flush_enabled = true; + + // If we just need to flush the link stack, patch an early return + if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + patch_instruction_site(&patch__flush_link_stack_return, PPC_INST_BLR); + no_count_cache_flush(); + return; + } + if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { count_cache_flush_type = COUNT_CACHE_FLUSH_SW; pr_info("count-cache-flush: full software flush sequence enabled.\n"); @@ -407,11 +451,20 @@ void setup_count_cache_flush(void) if (no_spectrev2 || cpu_mitigations_off()) { if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) || security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED)) - pr_warn("Spectre v2 mitigations not under software control, can't disable\n"); + pr_warn("Spectre v2 mitigations not fully under software control, can't disable\n"); enable = false; } + /* + * There's no firmware feature flag/hypervisor bit to tell us we need to + * flush the link stack on context switch. So we set it here if we see + * either of the Spectre v2 mitigations that aim to protect userspace. + */ + if (security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED) || + security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) + security_ftr_set(SEC_FTR_FLUSH_LINK_STACK); + toggle_count_cache_flush(enable); } @@ -442,13 +495,14 @@ static int count_cache_flush_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get, - count_cache_flush_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get, + count_cache_flush_set, "%llu\n"); static __init int count_cache_flush_debugfs_init(void) { - debugfs_create_file("count_cache_flush", 0600, powerpc_debugfs_root, - NULL, &fops_count_cache_flush); + debugfs_create_file_unsafe("count_cache_flush", 0600, + powerpc_debugfs_root, NULL, + &fops_count_cache_flush); return 0; } device_initcall(count_cache_flush_debugfs_init); diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c new file mode 100644 index 000000000000..6a29777d6a2d --- /dev/null +++ b/arch/powerpc/kernel/secvar-ops.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + * + * This file initializes secvar operations for PowerPC Secureboot + */ + +#include <linux/cache.h> +#include <asm/secvar.h> + +const struct secvar_operations *secvar_ops __ro_after_init; + +void set_secvar_ops(const struct secvar_operations *ops) +{ + secvar_ops = ops; +} diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c new file mode 100644 index 000000000000..a0a78aba2083 --- /dev/null +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2019 IBM Corporation <nayna@linux.ibm.com> + * + * This code exposes secure variables to user via sysfs + */ + +#define pr_fmt(fmt) "secvar-sysfs: "fmt + +#include <linux/slab.h> +#include <linux/compat.h> +#include <linux/string.h> +#include <linux/of.h> +#include <asm/secvar.h> + +#define NAME_MAX_SIZE 1024 + +static struct kobject *secvar_kobj; +static struct kset *secvar_kset; + +static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + ssize_t rc = 0; + struct device_node *node; + const char *format; + + node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); + if (!of_device_is_available(node)) + return -ENODEV; + + rc = of_property_read_string(node, "format", &format); + if (rc) + return rc; + + rc = sprintf(buf, "%s\n", format); + + of_node_put(node); + + return rc; +} + + +static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + uint64_t dsize; + int rc; + + rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); + if (rc) { + pr_err("Error retrieving %s variable size %d\n", kobj->name, + rc); + return rc; + } + + return sprintf(buf, "%llu\n", dsize); +} + +static ssize_t data_read(struct file *filep, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, + size_t count) +{ + uint64_t dsize; + char *data; + int rc; + + rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); + if (rc) { + pr_err("Error getting %s variable size %d\n", kobj->name, rc); + return rc; + } + pr_debug("dsize is %llu\n", dsize); + + data = kzalloc(dsize, GFP_KERNEL); + if (!data) + return -ENOMEM; + + rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, data, &dsize); + if (rc) { + pr_err("Error getting %s variable %d\n", kobj->name, rc); + goto data_fail; + } + + rc = memory_read_from_buffer(buf, count, &off, data, dsize); + +data_fail: + kfree(data); + return rc; +} + +static ssize_t update_write(struct file *filep, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, + size_t count) +{ + int rc; + + pr_debug("count is %ld\n", count); + rc = secvar_ops->set(kobj->name, strlen(kobj->name) + 1, buf, count); + if (rc) { + pr_err("Error setting the %s variable %d\n", kobj->name, rc); + return rc; + } + + return count; +} + +static struct kobj_attribute format_attr = __ATTR_RO(format); + +static struct kobj_attribute size_attr = __ATTR_RO(size); + +static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0); + +static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0); + +static struct bin_attribute *secvar_bin_attrs[] = { + &data_attr, + &update_attr, + NULL, +}; + +static struct attribute *secvar_attrs[] = { + &size_attr.attr, + NULL, +}; + +static const struct attribute_group secvar_attr_group = { + .attrs = secvar_attrs, + .bin_attrs = secvar_bin_attrs, +}; +__ATTRIBUTE_GROUPS(secvar_attr); + +static struct kobj_type secvar_ktype = { + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = secvar_attr_groups, +}; + +static int update_kobj_size(void) +{ + + struct device_node *node; + u64 varsize; + int rc = 0; + + node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); + if (!of_device_is_available(node)) { + rc = -ENODEV; + goto out; + } + + rc = of_property_read_u64(node, "max-var-size", &varsize); + if (rc) + goto out; + + data_attr.size = varsize; + update_attr.size = varsize; + +out: + of_node_put(node); + + return rc; +} + +static int secvar_sysfs_load(void) +{ + char *name; + uint64_t namesize = 0; + struct kobject *kobj; + int rc; + + name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL); + if (!name) + return -ENOMEM; + + do { + rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE); + if (rc) { + if (rc != -ENOENT) + pr_err("error getting secvar from firmware %d\n", + rc); + break; + } + + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (!kobj) { + rc = -ENOMEM; + break; + } + + kobject_init(kobj, &secvar_ktype); + + rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name); + if (rc) { + pr_warn("kobject_add error %d for attribute: %s\n", rc, + name); + kobject_put(kobj); + kobj = NULL; + } + + if (kobj) + kobject_uevent(kobj, KOBJ_ADD); + + } while (!rc); + + kfree(name); + return rc; +} + +static int secvar_sysfs_init(void) +{ + int rc; + + if (!secvar_ops) { + pr_warn("secvar: failed to retrieve secvar operations.\n"); + return -ENODEV; + } + + secvar_kobj = kobject_create_and_add("secvar", firmware_kobj); + if (!secvar_kobj) { + pr_err("secvar: Failed to create firmware kobj\n"); + return -ENOMEM; + } + + rc = sysfs_create_file(secvar_kobj, &format_attr.attr); + if (rc) { + kobject_put(secvar_kobj); + return -ENOMEM; + } + + secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj); + if (!secvar_kset) { + pr_err("secvar: sysfs kobject registration failed.\n"); + kobject_put(secvar_kobj); + return -ENOMEM; + } + + rc = update_kobj_size(); + if (rc) { + pr_err("Cannot read the size of the attribute\n"); + return rc; + } + + secvar_sysfs_load(); + + return 0; +} + +late_initcall(secvar_sysfs_init); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 25aaa3903000..488f1eecc0de 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -715,8 +715,28 @@ static struct notifier_block ppc_panic_block = { .priority = INT_MIN /* may not return; must be done last */ }; +/* + * Dump out kernel offset information on panic. + */ +static int dump_kernel_offset(struct notifier_block *self, unsigned long v, + void *p) +{ + pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n", + kaslr_offset(), KERNELBASE); + + return 0; +} + +static struct notifier_block kernel_offset_notifier = { + .notifier_call = dump_kernel_offset +}; + void __init setup_panic(void) { + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0) + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_offset_notifier); + /* PPC64 always does a hard irq disable in its panic handler */ if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic) return; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index a7541edf0cdb..dcffe927f5b9 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -44,6 +44,7 @@ #include <asm/asm-prototypes.h> #include <asm/kdump.h> #include <asm/feature-fixups.h> +#include <asm/early_ioremap.h> #include "setup.h" @@ -80,6 +81,8 @@ notrace void __init machine_init(u64 dt_ptr) /* Configure static keys first, now that we're relocated. */ setup_feature_keys(); + early_ioremap_setup(); + /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 44b4c432a273..6104917a282d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -65,15 +65,10 @@ #include <asm/hw_irq.h> #include <asm/feature-fixups.h> #include <asm/kup.h> +#include <asm/early_ioremap.h> #include "setup.h" -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - int spinning_secondaries; u64 ppc64_pft_size; @@ -305,7 +300,7 @@ void __init early_setup(unsigned long dt_ptr) /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr); + udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr); /* * Do early initialization using the flattened device @@ -338,6 +333,8 @@ void __init early_setup(unsigned long dt_ptr) apply_feature_fixups(); setup_feature_keys(); + early_ioremap_setup(); + /* Initialize the hash table or TLB handling */ early_init_mmu(); @@ -362,11 +359,11 @@ void __init early_setup(unsigned long dt_ptr) */ this_cpu_enable_ftrace(); - DBG(" <- early_setup()\n"); + udbg_printf(" <- %s()\n", __func__); #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX /* - * This needs to be done *last* (after the above DBG() even) + * This needs to be done *last* (after the above udbg_printf() even) * * Right after we return from this function, we turn on the MMU * which means the real-mode access trick that btext does will @@ -436,8 +433,6 @@ void smp_release_cpus(void) if (!use_spinloop()) return; - DBG(" -> smp_release_cpus()\n"); - /* All secondary cpus are spinning on a common spinloop, release them * all now so they can start to spin on their individual paca * spinloops. For non SMP kernels, the secondary cpus never get out @@ -456,9 +451,7 @@ void smp_release_cpus(void) break; udelay(1); } - DBG("spinning_secondaries = %d\n", spinning_secondaries); - - DBG(" <- smp_release_cpus()\n"); + pr_debug("spinning_secondaries = %d\n", spinning_secondaries); } #endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */ @@ -551,8 +544,6 @@ void __init initialize_cache_info(void) struct device_node *cpu = NULL, *l2, *l3 = NULL; u32 pvr; - DBG(" -> initialize_cache_info()\n"); - /* * All shipping POWER8 machines have a firmware bug that * puts incorrect information in the device-tree. This will @@ -576,10 +567,10 @@ void __init initialize_cache_info(void) */ if (cpu) { if (!parse_cache_info(cpu, false, &ppc64_caches.l1d)) - DBG("Argh, can't find dcache properties !\n"); + pr_warn("Argh, can't find dcache properties !\n"); if (!parse_cache_info(cpu, true, &ppc64_caches.l1i)) - DBG("Argh, can't find icache properties !\n"); + pr_warn("Argh, can't find icache properties !\n"); /* * Try to find the L2 and L3 if any. Assume they are @@ -604,8 +595,6 @@ void __init initialize_cache_info(void) cur_cpu_spec->dcache_bsize = dcache_bsize; cur_cpu_spec->icache_bsize = icache_bsize; - - DBG(" <- initialize_cache_info()\n"); } /* diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 3bfb3888e897..078608ec2e92 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -79,7 +79,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len, * sys_select() with the appropriate args. -- Cort */ int -ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp) +ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp) { if ( (unsigned long)n >= 4096 ) { @@ -89,7 +89,7 @@ ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s || __get_user(inp, ((fd_set __user * __user *)(buffer+1))) || __get_user(outp, ((fd_set __user * __user *)(buffer+2))) || __get_user(exp, ((fd_set __user * __user *)(buffer+3))) - || __get_user(tvp, ((struct timeval __user * __user *)(buffer+4)))) + || __get_user(tvp, ((struct __kernel_old_timeval __user * __user *)(buffer+4)))) return -EFAULT; } return sys_select(n, inp, outp, exp, tvp); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 694522308cd5..2d13cea13954 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -232,7 +232,7 @@ static u64 scan_dispatch_log(u64 stop_tb) * Accumulate stolen time by scanning the dispatch trace log. * Called on entry from user mode. */ -void accumulate_stolen_time(void) +void notrace accumulate_stolen_time(void) { u64 sst, ust; unsigned long save_irq_soft_mask = irq_soft_mask_return(); @@ -338,7 +338,7 @@ static unsigned long vtime_delta(struct task_struct *tsk, return stime; } -void vtime_account_system(struct task_struct *tsk) +void vtime_account_kernel(struct task_struct *tsk) { unsigned long stime, stime_scaled, steal_time; struct cpu_accounting_data *acct = get_accounting(tsk); @@ -366,7 +366,7 @@ void vtime_account_system(struct task_struct *tsk) #endif } } -EXPORT_SYMBOL_GPL(vtime_account_system); +EXPORT_SYMBOL_GPL(vtime_account_kernel); void vtime_account_idle(struct task_struct *tsk) { @@ -395,7 +395,7 @@ static void vtime_flush_scaled(struct task_struct *tsk, /* * Account the whole cputime accumulated in the paca * Must be called with interrupts disabled. - * Assumes that vtime_account_system/idle() has been called + * Assumes that vtime_account_kernel/idle() has been called * recently (i.e. since the last entry from usermode) so that * get_paca()->user_time_scaled is up to date. */ @@ -885,7 +885,7 @@ static notrace u64 timebase_read(struct clocksource *cs) void update_vsyscall(struct timekeeper *tk) { - struct timespec xt; + struct timespec64 xt; struct clocksource *clock = tk->tkr_mono.clock; u32 mult = tk->tkr_mono.mult; u32 shift = tk->tkr_mono.shift; @@ -957,7 +957,8 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->tb_to_xs = new_tb_to_xs; vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec; vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec; - vdso_data->stamp_xtime = xt; + vdso_data->stamp_xtime_sec = xt.tv_sec; + vdso_data->stamp_xtime_nsec = xt.tv_nsec; vdso_data->stamp_sec_fraction = frac_sec; smp_wmb(); ++(vdso_data->tb_update_count); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 82f43535e686..014ff0701f24 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -250,15 +250,22 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, } NOKPROBE_SYMBOL(oops_end); +static char *get_mmu_str(void) +{ + if (early_radix_enabled()) + return " MMU=Radix"; + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) + return " MMU=Hash"; + return ""; +} + static int __die(const char *str, struct pt_regs *regs, long err) { printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); - printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s%s %s\n", + printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", - PAGE_SIZE / 1024, - early_radix_enabled() ? " MMU=Radix" : "", - early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ? " MMU=Hash" : "", + PAGE_SIZE / 1024, get_mmu_str(), IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", IS_ENABLED(CONFIG_SMP) ? " SMP" : "", IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index a384e7c8b01c..01595e8cafe7 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -120,13 +120,15 @@ int udbg_write(const char *s, int n) #define UDBG_BUFSIZE 256 void udbg_printf(const char *fmt, ...) { - char buf[UDBG_BUFSIZE]; - va_list args; + if (udbg_putc) { + char buf[UDBG_BUFSIZE]; + va_list args; - va_start(args, fmt); - vsnprintf(buf, UDBG_BUFSIZE, fmt, args); - udbg_puts(buf); - va_end(args); + va_start(args, fmt); + vsnprintf(buf, UDBG_BUFSIZE, fmt, args); + udbg_puts(buf); + va_end(args); + } } void __init udbg_progress(char *s, unsigned short hex) diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index becd9f8767ed..c8e6902cb01b 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -15,10 +15,8 @@ /* Offset for the low 32-bit part of a field of long type */ #ifdef CONFIG_PPC64 #define LOPART 4 -#define TSPEC_TV_SEC TSPC64_TV_SEC+LOPART #else #define LOPART 0 -#define TSPEC_TV_SEC TSPC32_TV_SEC #endif .text @@ -192,7 +190,7 @@ V_FUNCTION_BEGIN(__kernel_time) bl __get_datapage@local mr r9, r3 /* datapage ptr in r9 */ - lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9) + lwz r3,STAMP_XTIME_SEC+LOPART(r9) cmplwi r11,0 /* check if t is NULL */ beq 2f @@ -268,7 +266,7 @@ __do_get_tspec: * as a 32.32 fixed-point number in r3 and r4. * Load & add the xtime stamp. */ - lwz r5,STAMP_XTIME+TSPEC_TV_SEC(r9) + lwz r5,STAMP_XTIME_SEC+LOPART(r9) lwz r6,STAMP_SEC_FRAC(r9) addc r4,r4,r6 adde r3,r3,r5 diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index 3f92561a64c4..526f5ba2593e 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -35,7 +35,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ mtctr r8 @@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ mtctr r8 diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 07bfe33fe874..1f24e411af80 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -116,8 +116,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE * too */ - ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3) - ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3) + ld r4,STAMP_XTIME_SEC(r3) + ld r5,STAMP_XTIME_NSEC(r3) bne cr6,75f /* CLOCK_MONOTONIC_COARSE */ @@ -220,7 +220,7 @@ V_FUNCTION_BEGIN(__kernel_time) mr r11,r3 /* r11 holds t */ bl V_LOCAL_FUNC(__get_datapage) - ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3) + ld r4,STAMP_XTIME_SEC(r3) cmpldi r11,0 /* check if t is NULL */ beq 2f @@ -265,7 +265,7 @@ V_FUNCTION_BEGIN(__do_get_tspec) mulhdu r6,r6,r5 /* in units of 2^-32 seconds */ /* Add stamp since epoch */ - ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3) + ld r4,STAMP_XTIME_SEC(r3) lwz r5,STAMP_SEC_FRAC(r3) or r0,r4,r5 or r0,r0,r6 diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 060a1acd7c6d..8834220036a5 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -6,6 +6,8 @@ #endif #define BSS_FIRST_SECTIONS *(.bss.prominit) +#define EMITS_PT_NOTE +#define RO_EXCEPTION_TABLE_ALIGN 0 #include <asm/page.h> #include <asm-generic/vmlinux.lds.h> @@ -18,22 +20,8 @@ ENTRY(_stext) PHDRS { - kernel PT_LOAD FLAGS(7); /* RWX */ - notes PT_NOTE FLAGS(0); - dummy PT_NOTE FLAGS(0); - - /* binutils < 2.18 has a bug that makes it misbehave when taking an - ELF file with all segments at load address 0 as input. This - happens when running "strip" on vmlinux, because of the AT() magic - in this linker script. People using GCC >= 4.2 won't run into - this problem, because the "build-id" support will put some data - into the "notes" segment (at a non-zero load address). - - To work around this, we force some data into both the "dummy" - segment and the kernel segment, so the dummy segment will get a - non-zero load address. It's not enough to always create the - "notes" segment, since if nothing gets assigned to it, its load - address will be zero. */ + text PT_LOAD FLAGS(7); /* RWX */ + note PT_NOTE FLAGS(0); } #ifdef CONFIG_PPC64 @@ -77,7 +65,7 @@ SECTIONS #else /* !CONFIG_PPC64 */ HEAD_TEXT #endif - } :kernel + } :text __head_end = .; @@ -126,7 +114,7 @@ SECTIONS __got2_end = .; #endif /* CONFIG_PPC32 */ - } :kernel + } :text . = ALIGN(ETEXT_ALIGN_SIZE); _etext = .; @@ -175,17 +163,6 @@ SECTIONS __stop__btb_flush_fixup = .; } #endif - EXCEPTION_TABLE(0) - - NOTES :kernel :notes - - /* The dummy segment contents for the bug workaround mentioned above - near PHDRS. */ - .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) { - LONG(0) - LONG(0) - LONG(0) - } :kernel :dummy /* * Init sections discarded at runtime @@ -200,7 +177,7 @@ SECTIONS #ifdef CONFIG_PPC64 *(.tramp.ftrace.init); #endif - } :kernel + } :text /* .exit.text is discarded at runtime, not link time, * to deal with references from __bug_table |