diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-05 11:36:44 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-05 11:36:44 -0700 |
commit | 7246f60068840847bdcf595be5f0b5ca632736e0 (patch) | |
tree | fd9a963a03c2655f3ba9d1ced3c87a2775f5b166 /arch/powerpc/kernel | |
parent | e579dde654fc2c6b0d3e4b77a9a4b2d2405c510e (diff) | |
parent | 700b7eadd5625d22b8235fb21259b3d7d564c000 (diff) | |
download | linux-7246f60068840847bdcf595be5f0b5ca632736e0.tar.bz2 |
Merge tag 'powerpc-4.12-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
"Highlights include:
- Larger virtual address space on 64-bit server CPUs. By default we
use a 128TB virtual address space, but a process can request access
to the full 512TB by passing a hint to mmap().
- Support for the new Power9 "XIVE" interrupt controller.
- TLB flushing optimisations for the radix MMU on Power9.
- Support for CAPI cards on Power9, using the "Coherent Accelerator
Interface Architecture 2.0".
- The ability to configure the mmap randomisation limits at build and
runtime.
- Several small fixes and cleanups to the kprobes code, as well as
support for KPROBES_ON_FTRACE.
- Major improvements to handling of system reset interrupts,
correctly treating them as NMIs, giving them a dedicated stack and
using a new hypervisor call to trigger them, all of which should
aid debugging and robustness.
- Many fixes and other minor enhancements.
Thanks to: Alastair D'Silva, Alexey Kardashevskiy, Alistair Popple,
Andrew Donnellan, Aneesh Kumar K.V, Anshuman Khandual, Anton
Blanchard, Balbir Singh, Ben Hutchings, Benjamin Herrenschmidt,
Bhupesh Sharma, Chris Packham, Christian Zigotzky, Christophe Leroy,
Christophe Lombard, Daniel Axtens, David Gibson, Gautham R. Shenoy,
Gavin Shan, Geert Uytterhoeven, Guilherme G. Piccoli, Hamish Martin,
Hari Bathini, Kees Cook, Laurent Dufour, Madhavan Srinivasan, Mahesh J
Salgaonkar, Mahesh Salgaonkar, Masami Hiramatsu, Matt Brown, Matthew
R. Ochs, Michael Neuling, Naveen N. Rao, Nicholas Piggin, Oliver
O'Halloran, Pan Xinhui, Paul Mackerras, Rashmica Gupta, Russell
Currey, Sukadev Bhattiprolu, Thadeu Lima de Souza Cascardo, Tobin C.
Harding, Tyrel Datwyler, Uma Krishnan, Vaibhav Jain, Vipin K Parashar,
Yang Shi"
* tag 'powerpc-4.12-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (214 commits)
powerpc/64s: Power9 has no LPCR[VRMASD] field so don't set it
powerpc/powernv: Fix TCE kill on NVLink2
powerpc/mm/radix: Drop support for CPUs without lockless tlbie
powerpc/book3s/mce: Move add_taint() later in virtual mode
powerpc/sysfs: Move #ifdef CONFIG_HOTPLUG_CPU out of the function body
powerpc/smp: Document irq enable/disable after migrating IRQs
powerpc/mpc52xx: Don't select user-visible RTAS_PROC
powerpc/powernv: Document cxl dependency on special case in pnv_eeh_reset()
powerpc/eeh: Clean up and document event handling functions
powerpc/eeh: Avoid use after free in eeh_handle_special_event()
cxl: Mask slice error interrupts after first occurrence
cxl: Route eeh events to all drivers in cxl_pci_error_detected()
cxl: Force context lock during EEH flow
powerpc/64: Allow CONFIG_RELOCATABLE if COMPILE_TEST
powerpc/xmon: Teach xmon oops about radix vectors
powerpc/mm/hash: Fix off-by-one in comment about kernel contexts ids
powerpc/pseries: Enable VFIO
powerpc/powernv: Fix iommu table size calculation hook for small tables
powerpc/powernv: Check kzalloc() return value in pnv_pci_table_alloc
powerpc: Add arch/powerpc/tools directory
...
Diffstat (limited to 'arch/powerpc/kernel')
39 files changed, 2090 insertions, 1296 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 811f441a125f..b9db46ae545b 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -25,8 +25,6 @@ CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) -# do not trace tracer code -CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) # timers used by tracing CFLAGS_REMOVE_time.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) endif @@ -97,6 +95,7 @@ obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o +obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -118,10 +117,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o -obj-$(CONFIG_TRACING) += trace_clock.o +obj-y += trace/ ifneq ($(CONFIG_PPC_INDIRECT_PIO),y) obj-y += iomap.o @@ -142,14 +138,14 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o # Disable GCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n UBSAN_SANITIZE_prom_init.o := n -GCOV_PROFILE_ftrace.o := n -UBSAN_SANITIZE_ftrace.o := n GCOV_PROFILE_machine_kexec_64.o := n UBSAN_SANITIZE_machine_kexec_64.o := n GCOV_PROFILE_machine_kexec_32.o := n UBSAN_SANITIZE_machine_kexec_32.o := n GCOV_PROFILE_kprobes.o := n UBSAN_SANITIZE_kprobes.o := n +GCOV_PROFILE_kprobes-ftrace.o := n +UBSAN_SANITIZE_kprobes-ftrace.o := n UBSAN_SANITIZE_vdso.o := n extra-$(CONFIG_PPC_FPU) += fpu.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4367e7df51a1..439c257dec4a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -185,6 +185,7 @@ int main(void) #ifdef CONFIG_PPC_MM_SLICES OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize); OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize); + DEFINE(PACA_ADDR_LIMIT, offsetof(struct paca_struct, addr_limit)); DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); #endif /* CONFIG_PPC_MM_SLICES */ #endif @@ -219,6 +220,7 @@ int main(void) OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); OFFSET(PACA_EXSLB, paca_struct, exslb); + OFFSET(PACA_EXNMI, paca_struct, exnmi); OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); @@ -232,7 +234,9 @@ int main(void) OFFSET(PACAEMERGSP, paca_struct, emergency_sp); #ifdef CONFIG_PPC_BOOK3S_64 OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp); + OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp); OFFSET(PACA_IN_MCE, paca_struct, in_mce); + OFFSET(PACA_IN_NMI, paca_struct, in_nmi); #endif OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); @@ -399,8 +403,8 @@ int main(void) DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); #endif -#ifdef MAX_PGD_TABLE_SIZE - DEFINE(PGD_TABLE_SIZE, MAX_PGD_TABLE_SIZE); +#ifdef CONFIG_PPC_BOOK3S_64 + DEFINE(PGD_TABLE_SIZE, (sizeof(pgd_t) << max(RADIX_PGD_INDEX_SIZE, H_PGD_INDEX_SIZE))); #else DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); #endif @@ -727,6 +731,7 @@ int main(void) OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state); OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask); OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); + OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas); #endif DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 7fe8c79e6937..10cb2896b2ae 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -29,7 +29,8 @@ _GLOBAL(__setup_cpu_power7) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - bl __init_LPCR + li r4,(LPCR_LPES1 >> LPCR_LPES_SH) + bl __init_LPCR_ISA206 bl __init_tlb_power7 mtlr r11 blr @@ -42,7 +43,8 @@ _GLOBAL(__restore_cpu_power7) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - bl __init_LPCR + li r4,(LPCR_LPES1 >> LPCR_LPES_SH) + bl __init_LPCR_ISA206 bl __init_tlb_power7 mtlr r11 blr @@ -59,7 +61,8 @@ _GLOBAL(__setup_cpu_power8) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH - bl __init_LPCR + li r4,0 /* LPES = 0 */ + bl __init_LPCR_ISA206 bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV @@ -80,7 +83,8 @@ _GLOBAL(__restore_cpu_power8) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH - bl __init_LPCR + li r4,0 /* LPES = 0 */ + bl __init_LPCR_ISA206 bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV @@ -99,11 +103,12 @@ _GLOBAL(__setup_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) + LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) andc r3, r3, r4 - bl __init_LPCR + li r4,0 /* LPES = 0 */ + bl __init_LPCR_ISA300 bl __init_HFSCR bl __init_tlb_power9 bl __init_PMU_HV @@ -122,11 +127,12 @@ _GLOBAL(__restore_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) + LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) andc r3, r3, r4 - bl __init_LPCR + li r4,0 /* LPES = 0 */ + bl __init_LPCR_ISA300 bl __init_HFSCR bl __init_tlb_power9 bl __init_PMU_HV @@ -144,9 +150,9 @@ __init_hvmode_206: std r5,CPU_SPEC_FEATURES(r4) blr -__init_LPCR: +__init_LPCR_ISA206: /* Setup a sane LPCR: - * Called with initial LPCR in R3 + * Called with initial LPCR in R3 and desired LPES 2-bit value in R4 * * LPES = 0b01 (HSRR0/1 used for 0x500) * PECE = 0b111 @@ -157,16 +163,18 @@ __init_LPCR: * * Other bits untouched for now */ - li r5,1 - rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 + li r5,0x10 + rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 + + /* POWER9 has no VRMASD */ +__init_LPCR_ISA300: + rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) li r5,4 rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3 clrrdi r3,r3,1 /* clear HDICE */ li r5,4 rldimi r3,r5, LPCR_VC_SH, 0 - li r5,0x10 - rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 mtspr SPRN_LPCR,r3 isync blr diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 2128f3a96c32..b6fe883b1016 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -20,18 +20,60 @@ #include <asm/kvm_ppc.h> #ifdef CONFIG_SMP -void doorbell_setup_this_cpu(void) + +/* + * Doorbells must only be used if CPU_FTR_DBELL is available. + * msgsnd is used in HV, and msgsndp is used in !HV. + * + * These should be used by platform code that is aware of restrictions. + * Other arch code should use ->cause_ipi. + * + * doorbell_global_ipi() sends a dbell to any target CPU. + * Must be used only by architectures that address msgsnd target + * by PIR/get_hard_smp_processor_id. + */ +void doorbell_global_ipi(int cpu) { - unsigned long tag = mfspr(SPRN_DOORBELL_CPUTAG) & PPC_DBELL_TAG_MASK; + u32 tag = get_hard_smp_processor_id(cpu); - smp_muxed_ipi_set_data(smp_processor_id(), tag); + kvmppc_set_host_ipi(cpu, 1); + /* Order previous accesses vs. msgsnd, which is treated as a store */ + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); } -void doorbell_cause_ipi(int cpu, unsigned long data) +/* + * doorbell_core_ipi() sends a dbell to a target CPU in the same core. + * Must be used only by architectures that address msgsnd target + * by TIR/cpu_thread_in_core. + */ +void doorbell_core_ipi(int cpu) { + u32 tag = cpu_thread_in_core(cpu); + + kvmppc_set_host_ipi(cpu, 1); /* Order previous accesses vs. msgsnd, which is treated as a store */ - mb(); - ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, data); + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); +} + +/* + * Attempt to cause a core doorbell if destination is on the same core. + * Returns 1 on success, 0 on failure. + */ +int doorbell_try_core_ipi(int cpu) +{ + int this_cpu = get_cpu(); + int ret = 0; + + if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) { + doorbell_core_ipi(cpu); + ret = 1; + } + + put_cpu(); + + return ret; } void doorbell_exception(struct pt_regs *regs) @@ -40,12 +82,14 @@ void doorbell_exception(struct pt_regs *regs) irq_enter(); + ppc_msgsync(); + may_hard_irq_enable(); kvmppc_set_host_ipi(smp_processor_id(), 0); __this_cpu_inc(irq_stat.doorbell_irqs); - smp_ipi_demux(); + smp_ipi_demux_relaxed(); /* already performed the barrier */ irq_exit(); set_irq_regs(old_regs); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 9de7f79e702b..63992b2d8e15 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -22,7 +22,6 @@ */ #include <linux/delay.h> -#include <linux/debugfs.h> #include <linux/sched.h> #include <linux/init.h> #include <linux/list.h> @@ -37,7 +36,7 @@ #include <linux/of.h> #include <linux/atomic.h> -#include <asm/debug.h> +#include <asm/debugfs.h> #include <asm/eeh.h> #include <asm/eeh_event.h> #include <asm/io.h> diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index b94887165a10..c405c79e50cd 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -724,7 +724,16 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, */ #define MAX_WAIT_FOR_RECOVERY 300 -static void eeh_handle_normal_event(struct eeh_pe *pe) +/** + * eeh_handle_normal_event - Handle EEH events on a specific PE + * @pe: EEH PE + * + * Attempts to recover the given PE. If recovery fails or the PE has failed + * too many times, remove the PE. + * + * Returns true if @pe should no longer be used, else false. + */ +static bool eeh_handle_normal_event(struct eeh_pe *pe) { struct pci_bus *frozen_bus; struct eeh_dev *edev, *tmp; @@ -736,13 +745,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) if (!frozen_bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); - return; + return false; } eeh_pe_update_time_stamp(pe); pe->freeze_count++; - if (pe->freeze_count > eeh_max_freezes) - goto excess_failures; + if (pe->freeze_count > eeh_max_freezes) { + pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" + "last hour and has been permanently disabled.\n", + pe->phb->global_number, pe->addr, + pe->freeze_count); + goto hard_fail; + } pr_warn("EEH: This PCI device has failed %d times in the last hour\n", pe->freeze_count); @@ -870,27 +884,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); - return; + return false; -excess_failures: +hard_fail: /* * About 90% of all real-life EEH failures in the field * are due to poorly seated PCI cards. Only 10% or so are * due to actual, failed cards. */ - pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" - "last hour and has been permanently disabled.\n" - "Please try reseating or replacing it.\n", - pe->phb->global_number, pe->addr, - pe->freeze_count); - goto perm_error; - -hard_fail: pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" "Please try reseating or replacing it\n", pe->phb->global_number, pe->addr); -perm_error: eeh_slot_error_detail(pe, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ @@ -915,10 +920,21 @@ perm_error: pci_lock_rescan_remove(); pci_hp_remove_devices(frozen_bus); pci_unlock_rescan_remove(); + + /* The passed PE should no longer be used */ + return true; } } + return false; } +/** + * eeh_handle_special_event - Handle EEH events without a specific failing PE + * + * Called when an EEH event is detected but can't be narrowed down to a + * specific PE. Iterates through possible failures and handles them as + * necessary. + */ static void eeh_handle_special_event(void) { struct eeh_pe *pe, *phb_pe; @@ -982,7 +998,14 @@ static void eeh_handle_special_event(void) */ if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { - eeh_handle_normal_event(pe); + /* + * eeh_handle_normal_event() can make the PE stale if it + * determines that the PE cannot possibly be recovered. + * Don't modify the PE state if that's the case. + */ + if (eeh_handle_normal_event(pe)) + continue; + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } else { pci_lock_rescan_remove(); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index a38600949f3a..8587059ad848 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -31,7 +31,6 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> -#include <asm/ftrace.h> #include <asm/ptrace.h> #include <asm/export.h> @@ -1315,109 +1314,3 @@ machine_check_in_rtas: /* XXX load up BATs and panic */ #endif /* CONFIG_PPC_RTAS */ - -#ifdef CONFIG_FUNCTION_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE -_GLOBAL(mcount) -_GLOBAL(_mcount) - /* - * It is required that _mcount on PPC32 must preserve the - * link register. But we have r0 to play with. We use r0 - * to push the return address back to the caller of mcount - * into the ctr register, restore the link register and - * then jump back using the ctr register. - */ - mflr r0 - mtctr r0 - lwz r0, 4(r1) - mtlr r0 - bctr - -_GLOBAL(ftrace_caller) - MCOUNT_SAVE_FRAME - /* r3 ends up with link register */ - subi r3, r3, MCOUNT_INSN_SIZE -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - MCOUNT_RESTORE_FRAME - /* old link register ends up in ctr reg */ - bctr -#else -_GLOBAL(mcount) -_GLOBAL(_mcount) - - MCOUNT_SAVE_FRAME - - subi r3, r3, MCOUNT_INSN_SIZE - LOAD_REG_ADDR(r5, ftrace_trace_function) - lwz r5,0(r5) - - mtctr r5 - bctrl - nop - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - b ftrace_graph_caller -#endif - MCOUNT_RESTORE_FRAME - bctr -#endif -EXPORT_SYMBOL(_mcount) - -_GLOBAL(ftrace_stub) - blr - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(ftrace_graph_caller) - /* load r4 with local address */ - lwz r4, 44(r1) - subi r4, r4, MCOUNT_INSN_SIZE - - /* Grab the LR out of the caller stack frame */ - lwz r3,52(r1) - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR in the callers stack frame to this. - */ - stw r3,52(r1) - - MCOUNT_RESTORE_FRAME - /* old link register ends up in ctr reg */ - bctr - -_GLOBAL(return_to_handler) - /* need to save return values */ - stwu r1, -32(r1) - stw r3, 20(r1) - stw r4, 16(r1) - stw r31, 12(r1) - mr r31, r1 - - bl ftrace_return_to_handler - nop - - /* return value has real return address */ - mtlr r3 - - lwz r3, 20(r1) - lwz r4, 16(r1) - lwz r31,12(r1) - lwz r1, 0(r1) - - /* Jump back to real return address */ - blr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 767ef6d68c9e..bfbad08a1207 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -20,7 +20,6 @@ #include <linux/errno.h> #include <linux/err.h> -#include <linux/magic.h> #include <asm/unistd.h> #include <asm/processor.h> #include <asm/page.h> @@ -33,7 +32,6 @@ #include <asm/bug.h> #include <asm/ptrace.h> #include <asm/irqflags.h> -#include <asm/ftrace.h> #include <asm/hw_irq.h> #include <asm/context_tracking.h> #include <asm/tm.h> @@ -1173,381 +1171,3 @@ _GLOBAL(enter_prom) ld r0,16(r1) mtlr r0 blr - -#ifdef CONFIG_FUNCTION_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE -_GLOBAL(mcount) -_GLOBAL(_mcount) -EXPORT_SYMBOL(_mcount) - mflr r12 - mtctr r12 - mtlr r0 - bctr - -#ifndef CC_USING_MPROFILE_KERNEL -_GLOBAL_TOC(ftrace_caller) - /* Taken from output of objdump from lib64/glibc */ - mflr r3 - ld r11, 0(r1) - stdu r1, -112(r1) - std r3, 128(r1) - ld r4, 16(r11) - subi r3, r3, MCOUNT_INSN_SIZE -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 - -#else /* CC_USING_MPROFILE_KERNEL */ -/* - * - * ftrace_caller() is the function that replaces _mcount() when ftrace is - * active. - * - * We arrive here after a function A calls function B, and we are the trace - * function for B. When we enter r1 points to A's stack frame, B has not yet - * had a chance to allocate one yet. - * - * Additionally r2 may point either to the TOC for A, or B, depending on - * whether B did a TOC setup sequence before calling us. - * - * On entry the LR points back to the _mcount() call site, and r0 holds the - * saved LR as it was on entry to B, ie. the original return address at the - * call site in A. - * - * Our job is to save the register state into a struct pt_regs (on the stack) - * and then arrange for the ftrace function to be called. - */ -_GLOBAL(ftrace_caller) - /* Save the original return address in A's stack frame */ - std r0,LRSAVE(r1) - - /* Create our stack frame + pt_regs */ - stdu r1,-SWITCH_FRAME_SIZE(r1) - - /* Save all gprs to pt_regs */ - SAVE_8GPRS(0,r1) - SAVE_8GPRS(8,r1) - SAVE_8GPRS(16,r1) - SAVE_8GPRS(24,r1) - - /* Load special regs for save below */ - mfmsr r8 - mfctr r9 - mfxer r10 - mfcr r11 - - /* Get the _mcount() call site out of LR */ - mflr r7 - /* Save it as pt_regs->nip & pt_regs->link */ - std r7, _NIP(r1) - std r7, _LINK(r1) - - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2,PACATOC(r13) /* get kernel TOC in r2 */ - - addis r3,r2,function_trace_op@toc@ha - addi r3,r3,function_trace_op@toc@l - ld r5,0(r3) - -#ifdef CONFIG_LIVEPATCH - mr r14,r7 /* remember old NIP */ -#endif - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - - /* Save special regs */ - std r8, _MSR(r1) - std r9, _CTR(r1) - std r10, _XER(r1) - std r11, _CCR(r1) - - /* Load &pt_regs in r6 for call below */ - addi r6, r1 ,STACK_FRAME_OVERHEAD - - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop - - /* Load ctr with the possibly modified NIP */ - ld r3, _NIP(r1) - mtctr r3 -#ifdef CONFIG_LIVEPATCH - cmpd r14,r3 /* has NIP been altered? */ -#endif - - /* Restore gprs */ - REST_8GPRS(0,r1) - REST_8GPRS(8,r1) - REST_8GPRS(16,r1) - REST_8GPRS(24,r1) - - /* Restore callee's TOC */ - ld r2, 24(r1) - - /* Pop our stack frame */ - addi r1, r1, SWITCH_FRAME_SIZE - - /* Restore original LR for return to B */ - ld r0, LRSAVE(r1) - mtlr r0 - -#ifdef CONFIG_LIVEPATCH - /* Based on the cmpd above, if the NIP was altered handle livepatch */ - bne- livepatch_handler -#endif - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - stdu r1, -112(r1) -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) - addi r1, r1, 112 -#endif - - ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */ - mtlr r0 - bctr /* jump after _mcount site */ -#endif /* CC_USING_MPROFILE_KERNEL */ - -_GLOBAL(ftrace_stub) - blr - -#ifdef CONFIG_LIVEPATCH - /* - * This function runs in the mcount context, between two functions. As - * such it can only clobber registers which are volatile and used in - * function linkage. - * - * We get here when a function A, calls another function B, but B has - * been live patched with a new function C. - * - * On entry: - * - we have no stack frame and can not allocate one - * - LR points back to the original caller (in A) - * - CTR holds the new NIP in C - * - r0 & r12 are free - * - * r0 can't be used as the base register for a DS-form load or store, so - * we temporarily shuffle r1 (stack pointer) into r0 and then put it back. - */ -livepatch_handler: - CURRENT_THREAD_INFO(r12, r1) - - /* Save stack pointer into r0 */ - mr r0, r1 - - /* Allocate 3 x 8 bytes */ - ld r1, TI_livepatch_sp(r12) - addi r1, r1, 24 - std r1, TI_livepatch_sp(r12) - - /* Save toc & real LR on livepatch stack */ - std r2, -24(r1) - mflr r12 - std r12, -16(r1) - - /* Store stack end marker */ - lis r12, STACK_END_MAGIC@h - ori r12, r12, STACK_END_MAGIC@l - std r12, -8(r1) - - /* Restore real stack pointer */ - mr r1, r0 - - /* Put ctr in r12 for global entry and branch there */ - mfctr r12 - bctrl - - /* - * Now we are returning from the patched function to the original - * caller A. We are free to use r0 and r12, and we can use r2 until we - * restore it. - */ - - CURRENT_THREAD_INFO(r12, r1) - - /* Save stack pointer into r0 */ - mr r0, r1 - - ld r1, TI_livepatch_sp(r12) - - /* Check stack marker hasn't been trashed */ - lis r2, STACK_END_MAGIC@h - ori r2, r2, STACK_END_MAGIC@l - ld r12, -8(r1) -1: tdne r12, r2 - EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0 - - /* Restore LR & toc from livepatch stack */ - ld r12, -16(r1) - mtlr r12 - ld r2, -24(r1) - - /* Pop livepatch stack frame */ - CURRENT_THREAD_INFO(r12, r0) - subi r1, r1, 24 - std r1, TI_livepatch_sp(r12) - - /* Restore real stack pointer */ - mr r1, r0 - - /* Return to original caller of live patched function */ - blr -#endif - - -#else -_GLOBAL_TOC(_mcount) -EXPORT_SYMBOL(_mcount) - /* Taken from output of objdump from lib64/glibc */ - mflr r3 - ld r11, 0(r1) - stdu r1, -112(r1) - std r3, 128(r1) - ld r4, 16(r11) - - subi r3, r3, MCOUNT_INSN_SIZE - LOAD_REG_ADDR(r5,ftrace_trace_function) - ld r5,0(r5) - ld r5,0(r5) - mtctr r5 - bctrl - nop - - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - b ftrace_graph_caller -#endif - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 -_GLOBAL(ftrace_stub) - blr - -#endif /* CONFIG_DYNAMIC_FTRACE */ - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -#ifndef CC_USING_MPROFILE_KERNEL -_GLOBAL(ftrace_graph_caller) - /* load r4 with local address */ - ld r4, 128(r1) - subi r4, r4, MCOUNT_INSN_SIZE - - /* Grab the LR out of the caller stack frame */ - ld r11, 112(r1) - ld r3, 16(r11) - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR in the callers stack frame to this. - */ - ld r11, 112(r1) - std r3, 16(r11) - - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 - blr - -#else /* CC_USING_MPROFILE_KERNEL */ -_GLOBAL(ftrace_graph_caller) - /* with -mprofile-kernel, parameter regs are still alive at _mcount */ - std r10, 104(r1) - std r9, 96(r1) - std r8, 88(r1) - std r7, 80(r1) - std r6, 72(r1) - std r5, 64(r1) - std r4, 56(r1) - std r3, 48(r1) - - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2, PACATOC(r13) /* get kernel TOC in r2 */ - - mfctr r4 /* ftrace_caller has moved local addr here */ - std r4, 40(r1) - mflr r3 /* ftrace_caller has restored LR from stack */ - subi r4, r4, MCOUNT_INSN_SIZE - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR to this. - */ - mtlr r3 - - ld r0, 40(r1) - mtctr r0 - ld r10, 104(r1) - ld r9, 96(r1) - ld r8, 88(r1) - ld r7, 80(r1) - ld r6, 72(r1) - ld r5, 64(r1) - ld r4, 56(r1) - ld r3, 48(r1) - - /* Restore callee's TOC */ - ld r2, 24(r1) - - addi r1, r1, 112 - mflr r0 - std r0, LRSAVE(r1) - bctr -#endif /* CC_USING_MPROFILE_KERNEL */ - -_GLOBAL(return_to_handler) - /* need to save return values */ - std r4, -32(r1) - std r3, -24(r1) - /* save TOC */ - std r2, -16(r1) - std r31, -8(r1) - mr r31, r1 - stdu r1, -112(r1) - - /* - * We might be called from a module. - * Switch to our TOC to run inside the core kernel. - */ - ld r2, PACATOC(r13) - - bl ftrace_return_to_handler - nop - - /* return value has real return address */ - mtlr r3 - - ld r1, 0(r1) - ld r4, -32(r1) - ld r3, -24(r1) - ld r2, -16(r1) - ld r31, -8(r1) - - /* Jump back to real return address */ - blr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -#endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6353019966e6..a9312b52fe6f 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -116,9 +116,11 @@ EXC_VIRT_NONE(0x4000, 0x100) EXC_REAL_BEGIN(system_reset, 0x100, 0x100) SET_SCRATCH0(r13) - GET_PACA(r13) - clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */ - EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD, + /* + * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is + * being used, so a nested NMI exception would corrupt it. + */ + EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common, EXC_STD, IDLETEST, 0x100) EXC_REAL_END(system_reset, 0x100, 0x100) @@ -126,34 +128,37 @@ EXC_VIRT_NONE(0x4100, 0x100) #ifdef CONFIG_PPC_P7_NAP EXC_COMMON_BEGIN(system_reset_idle_common) -BEGIN_FTR_SECTION - GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */ -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) - bl pnv_restore_hyp_resource + b pnv_powersave_wakeup +#endif - li r0,PNV_THREAD_RUNNING - stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ +EXC_COMMON_BEGIN(system_reset_common) + /* + * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able + * to recover, but nested NMI will notice in_nmi and not recover + * because of the use of the NMI stack. in_nmi reentrancy is tested in + * system_reset_exception. + */ + lhz r10,PACA_IN_NMI(r13) + addi r10,r10,1 + sth r10,PACA_IN_NMI(r13) + li r10,MSR_RI + mtmsrd r10,1 -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - li r0,KVM_HWTHREAD_IN_KERNEL - stb r0,HSTATE_HWTHREAD_STATE(r13) - /* Order setting hwthread_state vs. testing hwthread_req */ - sync - lbz r0,HSTATE_HWTHREAD_REQ(r13) - cmpwi r0,0 - beq 1f - BRANCH_TO_KVM(r10, kvm_start_guest) -1: -#endif + mr r10,r1 + ld r1,PACA_NMI_EMERG_SP(r13) + subi r1,r1,INT_FRAME_SIZE + EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100, + system_reset, system_reset_exception, + ADD_NVGPRS;ADD_RECONCILE) - /* Return SRR1 from power7_nap() */ - mfspr r3,SPRN_SRR1 - blt cr3,2f - b pnv_wakeup_loss -2: b pnv_wakeup_noloss -#endif + /* + * The stack is no longer in use, decrement in_nmi. + */ + lhz r10,PACA_IN_NMI(r13) + subi r10,r10,1 + sth r10,PACA_IN_NMI(r13) -EXC_COMMON(system_reset_common, 0x100, system_reset_exception) + b ret_from_except #ifdef CONFIG_PPC_PSERIES /* @@ -161,8 +166,9 @@ EXC_COMMON(system_reset_common, 0x100, system_reset_exception) */ TRAMP_REAL_BEGIN(system_reset_fwnmi) SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, - NOTEST, 0x100) + /* See comment at system_reset exception */ + EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common, + EXC_STD, NOTEST, 0x100) #endif /* CONFIG_PPC_PSERIES */ @@ -172,14 +178,6 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100) * vector */ SET_SCRATCH0(r13) /* save r13 */ - /* - * Running native on arch 2.06 or later, we may wakeup from winkle - * inside machine check. If yes, then last bit of HSPRG0 would be set - * to 1. Hence clear it unconditionally. - */ - GET_PACA(r13) - clrrdi r13,r13,1 - SET_PACA(r13) EXCEPTION_PROLOG_0(PACA_EXMC) BEGIN_FTR_SECTION b machine_check_powernv_early @@ -212,6 +210,12 @@ BEGIN_FTR_SECTION * NOTE: We are here with MSR_ME=0 (off), which means we risk a * checkstop if we get another machine check exception before we do * rfid with MSR_ME=1. + * + * This interrupt can wake directly from idle. If that is the case, + * the machine check is handled then the idle wakeup code is called + * to restore state. In that case, the POWER9 DD1 idle PACA workaround + * is not applied in the early machine check code, which will cause + * bugs. */ mr r11,r1 /* Save r1 */ lhz r10,PACA_IN_MCE(r13) @@ -268,20 +272,11 @@ machine_check_fwnmi: machine_check_pSeries_0: EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200) /* - * The following is essentially EXCEPTION_PROLOG_PSERIES_1 with the - * difference that MSR_RI is not enabled, because PACA_EXMC is being - * used, so nested machine check corrupts it. machine_check_common - * enables MSR_RI. + * MSR_RI is not enabled, because PACA_EXMC is being used, so a + * nested machine check corrupts it. machine_check_common enables + * MSR_RI. */ - ld r10,PACAKMSR(r13) - xori r10,r10,MSR_RI - mfspr r11,SPRN_SRR0 - LOAD_HANDLER(r12, machine_check_common) - mtspr SPRN_SRR0,r12 - mfspr r12,SPRN_SRR1 - mtspr SPRN_SRR1,r10 - rfid - b . /* prevent speculative execution */ + EXCEPTION_PROLOG_PSERIES_1_NORI(machine_check_common, EXC_STD) TRAMP_KVM_SKIP(PACA_EXMC, 0x200) @@ -340,6 +335,37 @@ EXC_COMMON_BEGIN(machine_check_common) /* restore original r1. */ \ ld r1,GPR1(r1) +#ifdef CONFIG_PPC_P7_NAP +/* + * This is an idle wakeup. Low level machine check has already been + * done. Queue the event then call the idle code to do the wake up. + */ +EXC_COMMON_BEGIN(machine_check_idle_common) + bl machine_check_queue_event + + /* + * We have not used any non-volatile GPRs here, and as a rule + * most exception code including machine check does not. + * Therefore PACA_NAPSTATELOST does not need to be set. Idle + * wakeup will restore volatile registers. + * + * Load the original SRR1 into r3 for pnv_powersave_wakeup_mce. + * + * Then decrement MCE nesting after finishing with the stack. + */ + ld r3,_MSR(r1) + + lhz r11,PACA_IN_MCE(r13) + subi r11,r11,1 + sth r11,PACA_IN_MCE(r13) + + /* Turn off the RI bit because SRR1 is used by idle wakeup code. */ + /* Recoverability could be improved by reducing the use of SRR1. */ + li r11,0 + mtmsrd r11,1 + + b pnv_powersave_wakeup_mce +#endif /* * Handle machine check early in real mode. We come here with * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack. @@ -352,6 +378,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) bl machine_check_early std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) + #ifdef CONFIG_PPC_P7_NAP /* * Check if thread was in power saving mode. We come here when any @@ -362,48 +389,14 @@ EXC_COMMON_BEGIN(machine_check_handle_early) * * Go back to nap/sleep/winkle mode again if (b) is true. */ - rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */ - beq 4f /* No, it wasn;t */ - /* Thread was in power saving mode. Go back to nap again. */ - cmpwi r11,2 - blt 3f - /* Supervisor/Hypervisor state loss */ - li r0,1 - stb r0,PACA_NAPSTATELOST(r13) -3: bl machine_check_queue_event - MACHINE_CHECK_HANDLER_WINDUP - GET_PACA(r13) - ld r1,PACAR1(r13) - /* - * Check what idle state this CPU was in and go back to same mode - * again. - */ - lbz r3,PACA_THREAD_IDLE_STATE(r13) - cmpwi r3,PNV_THREAD_NAP - bgt 10f - IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) - /* No return */ -10: - cmpwi r3,PNV_THREAD_SLEEP - bgt 2f - IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) - /* No return */ - -2: - /* - * Go back to winkle. Please note that this thread was woken up in - * machine check from winkle and have not restored the per-subcore - * state. Hence before going back to winkle, set last bit of HSPRG0 - * to 1. This will make sure that if this thread gets woken up - * again at reset vector 0x100 then it will get chance to restore - * the subcore state. - */ - ori r13,r13,1 - SET_PACA(r13) - IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) - /* No return */ + BEGIN_FTR_SECTION + rlwinm. r11,r12,47-31,30,31 + beq- 4f + BRANCH_TO_COMMON(r10, machine_check_idle_common) 4: + END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif + /* * Check if we are coming from hypervisor userspace. If yes then we * continue in host kernel in V mode to deliver the MC event. @@ -968,17 +961,12 @@ EXC_VIRT_NONE(0x4e60, 0x20) TRAMP_KVM_HV(PACA_EXGEN, 0xe60) TRAMP_REAL_BEGIN(hmi_exception_early) EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60) - mr r10,r1 /* Save r1 */ - ld r1,PACAEMERGSP(r13) /* Use emergency stack */ + mr r10,r1 /* Save r1 */ + ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - std r9,_CCR(r1) /* save CR in stackframe */ mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ - std r11,_NIP(r1) /* save HSRR0 in stackframe */ - mfspr r12,SPRN_HSRR1 /* Save SRR1 */ - std r12,_MSR(r1) /* save SRR1 in stackframe */ - std r10,0(r1) /* make stack chain pointer */ - std r0,GPR0(r1) /* save r0 in stackframe */ - std r10,GPR1(r1) /* save r1 in stackframe */ + mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ + EXCEPTION_PROLOG_COMMON_1() EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) EXCEPTION_PROLOG_COMMON_3(0xe60) addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 8ff0dd4e77a7..243dbef7e926 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -30,17 +30,16 @@ #include <linux/string.h> #include <linux/memblock.h> #include <linux/delay.h> -#include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/crash_dump.h> #include <linux/kobject.h> #include <linux/sysfs.h> +#include <asm/debugfs.h> #include <asm/page.h> #include <asm/prom.h> #include <asm/rtas.h> #include <asm/fadump.h> -#include <asm/debug.h> #include <asm/setup.h> static struct fw_dump fw_dump; @@ -319,15 +318,34 @@ int __init fadump_reserve_mem(void) pr_debug("fadumphdr_addr = %p\n", (void *) fw_dump.fadumphdr_addr); } else { - /* Reserve the memory at the top of memory. */ size = get_fadump_area_size(); - base = memory_boundary - size; - memblock_reserve(base, size); - printk(KERN_INFO "Reserved %ldMB of memory at %ldMB " - "for firmware-assisted dump\n", - (unsigned long)(size >> 20), - (unsigned long)(base >> 20)); + + /* + * Reserve memory at an offset closer to bottom of the RAM to + * minimize the impact of memory hot-remove operation. We can't + * use memblock_find_in_range() here since it doesn't allocate + * from bottom to top. + */ + for (base = fw_dump.boot_memory_size; + base <= (memory_boundary - size); + base += size) { + if (memblock_is_region_memory(base, size) && + !memblock_is_region_reserved(base, size)) + break; + } + if ((base > (memory_boundary - size)) || + memblock_reserve(base, size)) { + pr_err("Failed to reserve memory\n"); + return 0; + } + + pr_info("Reserved %ldMB of memory at %ldMB for firmware-" + "assisted dump (System RAM: %ldMB)\n", + (unsigned long)(size >> 20), + (unsigned long)(base >> 20), + (unsigned long)(memblock_phys_mem_size() >> 20)); } + fw_dump.reserve_dump_area_start = base; fw_dump.reserve_dump_area_size = size; return 1; diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 1607be7c0ef2..e22734278458 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -735,11 +735,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE) - - .globl mol_trampoline - .set mol_trampoline, i0x2f00 - EXPORT_SYMBOL(mol_trampoline) + EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE) . = 0x3000 @@ -1278,16 +1274,6 @@ EXPORT_SYMBOL(empty_zero_page) swapper_pg_dir: .space PGD_TABLE_SIZE - .globl intercept_table -intercept_table: - .long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700 - .long i0x800, 0, 0, 0, 0, i0xd00, 0, 0 - .long 0, 0, 0, i0x1300, 0, 0, 0, 0 - .long 0, 0, 0, 0, 0, 0, 0, 0 - .long 0, 0, 0, 0, 0, 0, 0, 0 - .long 0, 0, 0, 0, 0, 0, 0, 0 -EXPORT_SYMBOL(intercept_table) - /* Room for two PTE pointers, usually the kernel and current user pointers * to their respective root page table. */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1dc5eae2ced3..0ddc602b33a4 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -949,7 +949,8 @@ start_here_multiplatform: LOAD_REG_ADDR(r3,init_thread_union) /* set up a stack pointer */ - addi r1,r3,THREAD_SIZE + LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) + add r1,r3,r1 li r0,0 stdu r0,-STACK_FRAME_OVERHEAD(r1) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 6fd08219248d..07d4e0ad60db 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -20,6 +20,7 @@ #include <asm/kvm_book3s_asm.h> #include <asm/opal.h> #include <asm/cpuidle.h> +#include <asm/exception-64s.h> #include <asm/book3s/64/mmu-hash.h> #include <asm/mmu.h> @@ -94,12 +95,12 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) core_idle_lock_held: HMT_LOW 3: lwz r15,0(r14) - andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT + andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h bne 3b HMT_MEDIUM lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bne core_idle_lock_held + andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + bne- core_idle_lock_held blr /* @@ -113,7 +114,7 @@ core_idle_lock_held: * * Address to 'rfid' to in r5 */ -_GLOBAL(pnv_powersave_common) +pnv_powersave_common: /* Use r3 to pass state nap/sleep/winkle */ /* NAP is a state loss, we create a regs frame on the * stack, fill it up with the state we care about and @@ -188,8 +189,8 @@ pnv_enter_arch207_idle_mode: /* The following store to HSTATE_HWTHREAD_STATE(r13) */ /* MUST occur in real mode, i.e. with the MMU off, */ /* and the MMU must stay off until we clear this flag */ - /* and test HSTATE_HWTHREAD_REQ(r13) in the system */ - /* reset interrupt vector in exceptions-64s.S. */ + /* and test HSTATE_HWTHREAD_REQ(r13) in */ + /* pnv_powersave_wakeup in this file. */ /* The reason is that another thread can switch the */ /* MMU to a guest context whenever this flag is set */ /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ @@ -209,15 +210,20 @@ pnv_enter_arch207_idle_mode: /* Sleep or winkle */ lbz r7,PACA_THREAD_MASK(r13) ld r14,PACA_CORE_IDLE_STATE_PTR(r13) + li r5,0 + beq cr3,3f + lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h +3: lwarx_loop1: lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bnel core_idle_lock_held + andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + bnel- core_idle_lock_held + add r15,r15,r5 /* Add if winkle */ andc r15,r15,r7 /* Clear thread bit */ - andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS /* * If cr0 = 0, then current thread is the last thread of the core entering @@ -240,7 +246,7 @@ common_enter: /* common code for all the threads entering sleep or winkle */ IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) fastsleep_workaround_at_entry: - ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h stwcx. r15,0,r14 bne- lwarx_loop1 isync @@ -250,10 +256,10 @@ fastsleep_workaround_at_entry: li r4,1 bl opal_config_cpu_idle_state - /* Clear Lock bit */ - li r0,0 + /* Unlock */ + xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h lwsync - stw r0,0(r14) + stw r15,0(r14) b common_enter enter_winkle: @@ -301,8 +307,8 @@ power_enter_stop: lwarx_loop_stop: lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bnel core_idle_lock_held + andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + bnel- core_idle_lock_held andc r15,r15,r7 /* Clear thread bit */ stwcx. r15,0,r14 @@ -375,17 +381,113 @@ _GLOBAL(power9_idle_stop) li r4,1 b pnv_powersave_common /* No return */ + /* - * Called from reset vector. Check whether we have woken up with - * hypervisor state loss. If yes, restore hypervisor state and return - * back to reset vector. + * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1, + * HSPRG0 will be set to the HSPRG0 value of one of the + * threads in this core. Thus the value we have in r13 + * may not be this thread's paca pointer. + * + * Fortunately, the TIR remains invariant. Since this thread's + * paca pointer is recorded in all its sibling's paca, we can + * correctly recover this thread's paca pointer if we + * know the index of this thread in the core. + * + * This index can be obtained from the TIR. * - * r13 - Contents of HSPRG0 + * i.e, thread's position in the core = TIR. + * If this value is i, then this thread's paca is + * paca->thread_sibling_pacas[i]. + */ +power9_dd1_recover_paca: + mfspr r4, SPRN_TIR + /* + * Since each entry in thread_sibling_pacas is 8 bytes + * we need to left-shift by 3 bits. Thus r4 = i * 8 + */ + sldi r4, r4, 3 + /* Get &paca->thread_sibling_pacas[0] in r5 */ + ld r5, PACA_SIBLING_PACA_PTRS(r13) + /* Load paca->thread_sibling_pacas[i] into r13 */ + ldx r13, r4, r5 + SET_PACA(r13) + /* + * Indicate that we have lost NVGPR state + * which needs to be restored from the stack. + */ + li r3, 1 + stb r0,PACA_NAPSTATELOST(r13) + blr + +/* + * Called from machine check handler for powersave wakeups. + * Low level machine check processing has already been done. Now just + * go through the wake up path to get everything in order. + * + * r3 - The original SRR1 value. + * Original SRR[01] have been clobbered. + * MSR_RI is clear. + */ +.global pnv_powersave_wakeup_mce +pnv_powersave_wakeup_mce: + /* Set cr3 for pnv_powersave_wakeup */ + rlwinm r11,r3,47-31,30,31 + cmpwi cr3,r11,2 + + /* + * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake + * reason into SRR1, which allows reuse of the system reset wakeup + * code without being mistaken for another type of wakeup. + */ + oris r3,r3,SRR1_WAKEMCE_RESVD@h + mtspr SPRN_SRR1,r3 + + b pnv_powersave_wakeup + +/* + * Called from reset vector for powersave wakeups. * cr3 - set to gt if waking up with partial/complete hypervisor state loss */ -_GLOBAL(pnv_restore_hyp_resource) +.global pnv_powersave_wakeup +pnv_powersave_wakeup: + ld r2, PACATOC(r13) + BEGIN_FTR_SECTION - ld r2,PACATOC(r13); +BEGIN_FTR_SECTION_NESTED(70) + bl power9_dd1_recover_paca +END_FTR_SECTION_NESTED_IFSET(CPU_FTR_POWER9_DD1, 70) + bl pnv_restore_hyp_resource_arch300 +FTR_SECTION_ELSE + bl pnv_restore_hyp_resource_arch207 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) + + li r0,PNV_THREAD_RUNNING + stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + li r0,KVM_HWTHREAD_IN_KERNEL + stb r0,HSTATE_HWTHREAD_STATE(r13) + /* Order setting hwthread_state vs. testing hwthread_req */ + sync + lbz r0,HSTATE_HWTHREAD_REQ(r13) + cmpwi r0,0 + beq 1f + b kvm_start_guest +1: +#endif + + /* Return SRR1 from power7_nap() */ + mfspr r3,SPRN_SRR1 + blt cr3,pnv_wakeup_noloss + b pnv_wakeup_loss + +/* + * Check whether we have woken up with hypervisor state loss. + * If yes, restore hypervisor state and return back to link. + * + * cr3 - set to gt if waking up with partial/complete hypervisor state loss + */ +pnv_restore_hyp_resource_arch300: /* * POWER ISA 3. Use PSSCR to determine if we * are waking up from deep idle state @@ -400,31 +502,19 @@ BEGIN_FTR_SECTION */ rldicl r5,r5,4,60 cmpd cr4,r5,r4 - bge cr4,pnv_wakeup_tb_loss - /* - * Waking up without hypervisor state loss. Return to - * reset vector - */ - blr + bge cr4,pnv_wakeup_tb_loss /* returns to caller */ -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + blr /* Waking up without hypervisor state loss. */ +/* Same calling convention as arch300 */ +pnv_restore_hyp_resource_arch207: /* * POWER ISA 2.07 or less. - * Check if last bit of HSPGR0 is set. This indicates whether we are - * waking up from winkle. + * Check if we slept with sleep or winkle. */ - clrldi r5,r13,63 - clrrdi r13,r13,1 - - /* Now that we are sure r13 is corrected, load TOC */ - ld r2,PACATOC(r13); - cmpwi cr4,r5,1 - mtspr SPRN_HSPRG0,r13 - - lbz r0,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr2,r0,PNV_THREAD_NAP - bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ + lbz r4,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr2,r4,PNV_THREAD_NAP + bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ /* * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking @@ -433,8 +523,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) */ bgt cr3,. - blr /* Return back to System Reset vector from where - pnv_restore_hyp_resource was invoked */ + blr /* Waking up without hypervisor state loss */ /* * Called if waking up from idle state which can cause either partial or @@ -444,9 +533,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) * * r13 - PACA * cr3 - gt if waking up with partial/complete hypervisor state loss + * + * If ISA300: * cr4 - gt or eq if waking up from complete hypervisor state loss. + * + * If ISA207: + * r4 - PACA_THREAD_IDLE_STATE */ -_GLOBAL(pnv_wakeup_tb_loss) +pnv_wakeup_tb_loss: ld r1,PACAR1(r13) /* * Before entering any idle state, the NVGPRs are saved in the stack. @@ -473,18 +567,19 @@ _GLOBAL(pnv_wakeup_tb_loss) * is required to return back to reset vector after hypervisor state * restore is complete. */ + mr r18,r4 mflr r17 mfspr r16,SPRN_SRR1 BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - lbz r7,PACA_THREAD_MASK(r13) ld r14,PACA_CORE_IDLE_STATE_PTR(r13) -lwarx_loop2: - lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + lbz r7,PACA_THREAD_MASK(r13) + /* + * Take the core lock to synchronize against other threads. + * * Lock bit is set in one of the 2 cases- * a. In the sleep/winkle enter path, the last thread is executing * fastsleep workaround code. @@ -492,23 +587,93 @@ lwarx_loop2: * workaround undo code or resyncing timebase or restoring context * In either case loop until the lock bit is cleared. */ - bnel core_idle_lock_held +1: + lwarx r15,0,r14 + andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + bnel- core_idle_lock_held + oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h + stwcx. r15,0,r14 + bne- 1b + isync - cmpwi cr2,r15,0 + andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS + cmpwi cr2,r9,0 /* * At this stage * cr2 - eq if first thread to wakeup in core * cr3- gt if waking up with partial/complete hypervisor state loss + * ISA300: * cr4 - gt or eq if waking up from complete hypervisor state loss. */ - ori r15,r15,PNV_CORE_IDLE_LOCK_BIT - stwcx. r15,0,r14 - bne- lwarx_loop2 - isync - BEGIN_FTR_SECTION + /* + * Were we in winkle? + * If yes, check if all threads were in winkle, decrement our + * winkle count, set all thread winkle bits if all were in winkle. + * Check if our thread has a winkle bit set, and set cr4 accordingly + * (to match ISA300, above). Pseudo-code for core idle state + * transitions for ISA207 is as follows (everything happens atomically + * due to store conditional and/or lock bit): + * + * nap_idle() { } + * nap_wake() { } + * + * sleep_idle() + * { + * core_idle_state &= ~thread_in_core + * } + * + * sleep_wake() + * { + * bool first_in_core, first_in_subcore; + * + * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0; + * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0; + * + * core_idle_state |= thread_in_core; + * } + * + * winkle_idle() + * { + * core_idle_state &= ~thread_in_core; + * core_idle_state += 1 << WINKLE_COUNT_SHIFT; + * } + * + * winkle_wake() + * { + * bool first_in_core, first_in_subcore, winkle_state_lost; + * + * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0; + * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0; + * + * core_idle_state |= thread_in_core; + * + * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT)) + * core_idle_state |= THREAD_WINKLE_BITS; + * core_idle_state -= 1 << WINKLE_COUNT_SHIFT; + * + * winkle_state_lost = core_idle_state & + * (thread_in_core << WINKLE_THREAD_SHIFT); + * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT); + * } + * + */ + cmpwi r18,PNV_THREAD_WINKLE + bne 2f + andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h + subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h + beq 2f + ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */ +2: + /* Shift thread bit to winkle mask, then test if this thread is set, + * and remove it from the winkle bits */ + slwi r8,r7,8 + and r8,r8,r15 + andc r15,r15,r8 + cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */ + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) and r4,r4,r15 cmpwi r4,0 /* Check if first in subcore */ @@ -593,7 +758,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mtspr SPRN_WORC,r4 clear_lock: - andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h lwsync stw r15,0(r14) @@ -651,8 +816,7 @@ hypervisor_state_restored: mtspr SPRN_SRR1,r16 mtlr r17 - blr /* Return back to System Reset vector from where - pnv_restore_hyp_resource was invoked */ + blr /* return to pnv_powersave_wakeup */ fastsleep_workaround_at_exit: li r3,1 @@ -664,7 +828,8 @@ fastsleep_workaround_at_exit: * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(pnv_wakeup_loss) +.global pnv_wakeup_loss +pnv_wakeup_loss: ld r1,PACAR1(r13) BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT @@ -684,7 +849,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(pnv_wakeup_noloss) +pnv_wakeup_noloss: lbz r0,PACA_NAPSTATELOST(r13) cmpwi r0,0 bne pnv_wakeup_loss diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 5f202a566ec5..5a3231fedf08 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -711,13 +711,16 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) return tbl; } -void iommu_free_table(struct iommu_table *tbl, const char *node_name) +static void iommu_table_free(struct kref *kref) { unsigned long bitmap_sz; unsigned int order; + struct iommu_table *tbl; - if (!tbl) - return; + tbl = container_of(kref, struct iommu_table, it_kref); + + if (tbl->it_ops->free) + tbl->it_ops->free(tbl); if (!tbl->it_map) { kfree(tbl); @@ -733,7 +736,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) /* verify that table contains no entries */ if (!bitmap_empty(tbl->it_map, tbl->it_size)) - pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); + pr_warn("%s: Unexpected TCEs\n", __func__); /* calculate bitmap size in bytes */ bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); @@ -746,6 +749,24 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) kfree(tbl); } +struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl) +{ + if (kref_get_unless_zero(&tbl->it_kref)) + return tbl; + + return NULL; +} +EXPORT_SYMBOL_GPL(iommu_tce_table_get); + +int iommu_tce_table_put(struct iommu_table *tbl) +{ + if (WARN_ON(!tbl)) + return 0; + + return kref_put(&tbl->it_kref, iommu_table_free); +} +EXPORT_SYMBOL_GPL(iommu_tce_table_put); + /* Creates TCEs for a user provided buffer. The user buffer must be * contiguous real kernel storage (not vmalloc). The address passed here * comprises a page address and offset into that page. The dma_addr_t @@ -1004,6 +1025,31 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, } EXPORT_SYMBOL_GPL(iommu_tce_xchg); +#ifdef CONFIG_PPC_BOOK3S_64 +long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry, + unsigned long *hpa, enum dma_data_direction *direction) +{ + long ret; + + ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + + if (!ret && ((*direction == DMA_FROM_DEVICE) || + (*direction == DMA_BIDIRECTIONAL))) { + struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT); + + if (likely(pg)) { + SetPageDirty(pg); + } else { + tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + ret = -EFAULT; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm); +#endif + int iommu_take_ownership(struct iommu_table *tbl) { unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index a018f5cae899..5c291df30fe3 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -65,7 +65,6 @@ #include <asm/machdep.h> #include <asm/udbg.h> #include <asm/smp.h> -#include <asm/debug.h> #include <asm/livepatch.h> #include <asm/asm-prototypes.h> @@ -442,46 +441,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) return sum; } -#ifdef CONFIG_HOTPLUG_CPU -void migrate_irqs(void) -{ - struct irq_desc *desc; - unsigned int irq; - static int warned; - cpumask_var_t mask; - const struct cpumask *map = cpu_online_mask; - - alloc_cpumask_var(&mask, GFP_KERNEL); - - for_each_irq_desc(irq, desc) { - struct irq_data *data; - struct irq_chip *chip; - - data = irq_desc_get_irq_data(desc); - if (irqd_is_per_cpu(data)) - continue; - - chip = irq_data_get_irq_chip(data); - - cpumask_and(mask, irq_data_get_affinity_mask(data), map); - if (cpumask_any(mask) >= nr_cpu_ids) { - pr_warn("Breaking affinity for irq %i\n", irq); - cpumask_copy(mask, map); - } - if (chip->irq_set_affinity) - chip->irq_set_affinity(data, mask, true); - else if (desc->action && !(warned++)) - pr_err("Cannot set affinity for irq %i\n", irq); - } - - free_cpumask_var(mask); - - local_irq_enable(); - mdelay(1); - local_irq_disable(); -} -#endif - static inline void check_stack_overflow(void) { #ifdef CONFIG_DEBUG_STACKOVERFLOW diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c new file mode 100644 index 000000000000..6c089d9757c9 --- /dev/null +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -0,0 +1,104 @@ +/* + * Dynamic Ftrace based Kprobes Optimization + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) Hitachi Ltd., 2012 + * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> + * IBM Corporation + */ +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/hardirq.h> +#include <linux/preempt.h> +#include <linux/ftrace.h> + +static nokprobe_inline +int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb, unsigned long orig_nip) +{ + /* + * Emulate singlestep (and also recover regs->nip) + * as if there is a nop + */ + regs->nip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); + if (orig_nip) + regs->nip = orig_nip; + return 1; +} + +int skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + if (kprobe_ftrace(p)) + return __skip_singlestep(p, regs, kcb, 0); + else + return 0; +} +NOKPROBE_SYMBOL(skip_singlestep); + +/* Ftrace callback handler for kprobes */ +void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + hard_irq_disable(); + + p = get_kprobe((kprobe_opcode_t *)nip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + unsigned long orig_nip = regs->nip; + + /* + * On powerpc, NIP is *before* this instruction for the + * pre handler + */ + regs->nip -= MCOUNT_INSN_SIZE; + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) + __skip_singlestep(p, regs, kcb, orig_nip); + /* + * If pre_handler returns !0, it sets regs->nip and + * resets current kprobe. + */ + } +end: + local_irq_restore(flags); +} +NOKPROBE_SYMBOL(kprobe_ftrace_handler); + +int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index fce05a38851c..160ae0fa7d0d 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -35,6 +35,7 @@ #include <asm/code-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> +#include <asm/sections.h> #include <linux/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -42,7 +43,86 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; -int __kprobes arch_prepare_kprobe(struct kprobe *p) +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + return (addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end) || + (addr >= (unsigned long)_stext && + addr < (unsigned long)__head_end); +} + +kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) +{ + kprobe_opcode_t *addr; + +#ifdef PPC64_ELF_ABI_v2 + /* PPC64 ABIv2 needs local entry point */ + addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); + if (addr && !offset) { +#ifdef CONFIG_KPROBES_ON_FTRACE + unsigned long faddr; + /* + * Per livepatch.h, ftrace location is always within the first + * 16 bytes of a function on powerpc with -mprofile-kernel. + */ + faddr = ftrace_location_range((unsigned long)addr, + (unsigned long)addr + 16); + if (faddr) + addr = (kprobe_opcode_t *)faddr; + else +#endif + addr = (kprobe_opcode_t *)ppc_function_entry(addr); + } +#elif defined(PPC64_ELF_ABI_v1) + /* + * 64bit powerpc ABIv1 uses function descriptors: + * - Check for the dot variant of the symbol first. + * - If that fails, try looking up the symbol provided. + * + * This ensures we always get to the actual symbol and not + * the descriptor. + * + * Also handle <module:symbol> format. + */ + char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN]; + const char *modsym; + bool dot_appended = false; + if ((modsym = strchr(name, ':')) != NULL) { + modsym++; + if (*modsym != '\0' && *modsym != '.') { + /* Convert to <module:.symbol> */ + strncpy(dot_name, name, modsym - name); + dot_name[modsym - name] = '.'; + dot_name[modsym - name + 1] = '\0'; + strncat(dot_name, modsym, + sizeof(dot_name) - (modsym - name) - 2); + dot_appended = true; + } else { + dot_name[0] = '\0'; + strncat(dot_name, name, sizeof(dot_name) - 1); + } + } else if (name[0] != '.') { + dot_name[0] = '.'; + dot_name[1] = '\0'; + strncat(dot_name, name, KSYM_NAME_LEN - 2); + dot_appended = true; + } else { + dot_name[0] = '\0'; + strncat(dot_name, name, KSYM_NAME_LEN - 1); + } + addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name); + if (!addr && dot_appended) { + /* Let's try the original non-dot symbol lookup */ + addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); + } +#else + addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); +#endif + + return addr; +} + +int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; kprobe_opcode_t insn = *p->addr; @@ -74,30 +154,34 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) p->ainsn.boostable = 0; return ret; } +NOKPROBE_SYMBOL(arch_prepare_kprobe); -void __kprobes arch_arm_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { *p->addr = BREAKPOINT_INSTRUCTION; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } +NOKPROBE_SYMBOL(arch_arm_kprobe); -void __kprobes arch_disarm_kprobe(struct kprobe *p) +void arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } +NOKPROBE_SYMBOL(arch_disarm_kprobe); -void __kprobes arch_remove_kprobe(struct kprobe *p) +void arch_remove_kprobe(struct kprobe *p) { if (p->ainsn.insn) { free_insn_slot(p->ainsn.insn, 0); p->ainsn.insn = NULL; } } +NOKPROBE_SYMBOL(arch_remove_kprobe); -static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +static nokprobe_inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { enable_single_step(regs); @@ -110,37 +194,80 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->nip = (unsigned long)p->ainsn.insn; } -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); kcb->prev_kprobe.status = kcb->kprobe_status; kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr; } -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; } -static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, +static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_msr = regs->msr; } -void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) +bool arch_function_offset_within_entry(unsigned long offset) +{ +#ifdef PPC64_ELF_ABI_v2 +#ifdef CONFIG_KPROBES_ON_FTRACE + return offset <= 16; +#else + return offset <= 8; +#endif +#else + return !offset; +#endif +} + +void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *)regs->link; /* Replace the return addr with trampoline addr */ regs->link = (unsigned long)kretprobe_trampoline; } +NOKPROBE_SYMBOL(arch_prepare_kretprobe); -int __kprobes kprobe_handler(struct pt_regs *regs) +int try_to_emulate(struct kprobe *p, struct pt_regs *regs) +{ + int ret; + unsigned int insn = *p->ainsn.insn; + + /* regs->nip is also adjusted if emulate_step returns 1 */ + ret = emulate_step(regs, insn); + if (ret > 0) { + /* + * Once this instruction has been boosted + * successfully, set the boostable flag + */ + if (unlikely(p->ainsn.boostable == 0)) + p->ainsn.boostable = 1; + } else if (ret < 0) { + /* + * We don't allow kprobes on mtmsr(d)/rfi(d), etc. + * So, we should never get here... but, its still + * good to catch them, just in case... + */ + printk("Can't step on instruction %x\n", insn); + BUG(); + } else if (ret == 0) + /* This instruction can't be boosted */ + p->ainsn.boostable = -1; + + return ret; +} +NOKPROBE_SYMBOL(try_to_emulate); + +int kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; @@ -177,10 +304,17 @@ int __kprobes kprobe_handler(struct pt_regs *regs) */ save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); - kcb->kprobe_saved_msr = regs->msr; kprobes_inc_nmissed_count(p); prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_REENTER; + if (p->ainsn.boostable >= 0) { + ret = try_to_emulate(p, regs); + + if (ret > 0) { + restore_previous_kprobe(kcb); + return 1; + } + } return 1; } else { if (*addr != BREAKPOINT_INSTRUCTION) { @@ -197,7 +331,9 @@ int __kprobes kprobe_handler(struct pt_regs *regs) } p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { - goto ss_probe; + if (!skip_singlestep(p, regs, kcb)) + goto ss_probe; + ret = 1; } } goto no_kprobe; @@ -235,18 +371,9 @@ int __kprobes kprobe_handler(struct pt_regs *regs) ss_probe: if (p->ainsn.boostable >= 0) { - unsigned int insn = *p->ainsn.insn; + ret = try_to_emulate(p, regs); - /* regs->nip is also adjusted if emulate_step returns 1 */ - ret = emulate_step(regs, insn); if (ret > 0) { - /* - * Once this instruction has been boosted - * successfully, set the boostable flag - */ - if (unlikely(p->ainsn.boostable == 0)) - p->ainsn.boostable = 1; - if (p->post_handler) p->post_handler(p, regs, 0); @@ -254,17 +381,7 @@ ss_probe: reset_current_kprobe(); preempt_enable_no_resched(); return 1; - } else if (ret < 0) { - /* - * We don't allow kprobes on mtmsr(d)/rfi(d), etc. - * So, we should never get here... but, its still - * good to catch them, just in case... - */ - printk("Can't step on instruction %x\n", insn); - BUG(); - } else if (ret == 0) - /* This instruction can't be boosted */ - p->ainsn.boostable = -1; + } } prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_HIT_SS; @@ -274,6 +391,7 @@ no_kprobe: preempt_enable_no_resched(); return ret; } +NOKPROBE_SYMBOL(kprobe_handler); /* * Function return probe trampoline: @@ -291,8 +409,7 @@ asm(".global kretprobe_trampoline\n" /* * Called when the probe at kretprobe trampoline is hit */ -static int __kprobes trampoline_probe_handler(struct kprobe *p, - struct pt_regs *regs) +static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; @@ -361,6 +478,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, */ return 1; } +NOKPROBE_SYMBOL(trampoline_probe_handler); /* * Called after single-stepping. p->addr is the address of the @@ -370,7 +488,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * single-stepped a copy of the instruction. The address of this * copy is p->ainsn.insn. */ -int __kprobes kprobe_post_handler(struct pt_regs *regs) +int kprobe_post_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -410,8 +528,9 @@ out: return 1; } +NOKPROBE_SYMBOL(kprobe_post_handler); -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -474,13 +593,15 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) } return 0; } +NOKPROBE_SYMBOL(kprobe_fault_handler); unsigned long arch_deref_entry_point(void *entry) { return ppc_global_function_entry(entry); } +NOKPROBE_SYMBOL(arch_deref_entry_point); -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -497,17 +618,20 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) return 1; } +NOKPROBE_SYMBOL(setjmp_pre_handler); -void __used __kprobes jprobe_return(void) +void __used jprobe_return(void) { asm volatile("trap" ::: "memory"); } +NOKPROBE_SYMBOL(jprobe_return); -static void __used __kprobes jprobe_return_end(void) +static void __used jprobe_return_end(void) { -}; +} +NOKPROBE_SYMBOL(jprobe_return_end); -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -520,6 +644,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) preempt_enable_no_resched(); return 1; } +NOKPROBE_SYMBOL(longjmp_break_handler); static struct kprobe trampoline_p = { .addr = (kprobe_opcode_t *) &kretprobe_trampoline, @@ -531,10 +656,11 @@ int __init arch_init_kprobes(void) return register_kprobe(&trampoline_p); } -int __kprobes arch_trampoline_kprobe(struct kprobe *p) +int arch_trampoline_kprobe(struct kprobe *p) { if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) return 1; return 0; } +NOKPROBE_SYMBOL(arch_trampoline_kprobe); diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index a1475e6aef3a..5f9eada3519b 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -221,6 +221,8 @@ static void machine_check_process_queued_event(struct irq_work *work) { int index; + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + /* * For now just print it to console. * TODO: log this error event to FSP or nvram. @@ -228,12 +230,13 @@ static void machine_check_process_queued_event(struct irq_work *work) while (__this_cpu_read(mce_queue_count) > 0) { index = __this_cpu_read(mce_queue_count) - 1; machine_check_print_event_info( - this_cpu_ptr(&mce_event_queue[index])); + this_cpu_ptr(&mce_event_queue[index]), false); __this_cpu_dec(mce_queue_count); } } -void machine_check_print_event_info(struct machine_check_event *evt) +void machine_check_print_event_info(struct machine_check_event *evt, + bool user_mode) { const char *level, *sevstr, *subtype; static const char *mc_ue_types[] = { @@ -310,7 +313,16 @@ void machine_check_print_event_info(struct machine_check_event *evt) printk("%s%s Machine check interrupt [%s]\n", level, sevstr, evt->disposition == MCE_DISPOSITION_RECOVERED ? - "Recovered" : "[Not recovered"); + "Recovered" : "Not recovered"); + + if (user_mode) { + printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, + evt->srr0, current->pid, current->comm); + } else { + printk("%s NIP [%016llx]: %pS\n", level, evt->srr0, + (void *)evt->srr0); + } + printk("%s Initiator: %s\n", level, evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); switch (evt->error_type) { diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 763d6f58caa8..f913139bb0c2 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -72,10 +72,14 @@ void __flush_tlb_power8(unsigned int action) void __flush_tlb_power9(unsigned int action) { + unsigned int num_sets; + if (radix_enabled()) - flush_tlb_206(POWER9_TLB_SETS_RADIX, action); + num_sets = POWER9_TLB_SETS_RADIX; + else + num_sets = POWER9_TLB_SETS_HASH; - flush_tlb_206(POWER9_TLB_SETS_HASH, action); + flush_tlb_206(num_sets, action); } @@ -147,159 +151,365 @@ static int mce_flush(int what) return 0; } -static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat) -{ - if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB)) - dsisr &= ~slb; - if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT)) - dsisr &= ~erat; - if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB)) - dsisr &= ~tlb; - /* Any other errors we don't understand? */ - if (dsisr) - return 0; - return 1; -} - -static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) + +struct mce_ierror_table { + unsigned long srr1_mask; + unsigned long srr1_value; + bool nip_valid; /* nip is a valid indicator of faulting address */ + unsigned int error_type; + unsigned int error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct mce_ierror_table mce_p7_ierror_table[] = { +{ 0x00000000001c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000001c0000, 0x00000000001c0000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, 0, 0, 0, 0, 0 } }; + +static const struct mce_ierror_table mce_p8_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000001c0000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008000000, true, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008040000, true, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, 0, 0, 0, 0, 0 } }; + +static const struct mce_ierror_table mce_p9_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008000000, true, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008040000, true, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000080c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008100000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000008140000, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x0000000008180000, false, + MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x00000000081c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, 0, 0, 0, 0, 0 } }; + +struct mce_derror_table { + unsigned long dsisr_value; + bool dar_valid; /* dar is a valid indicator of faulting address */ + unsigned int error_type; + unsigned int error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct mce_derror_table mce_p7_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000040, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, false, 0, 0, 0, 0 } }; + +static const struct mce_derror_table mce_p8_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00002000, true, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00001000, true, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000200, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, false, 0, 0, 0, 0 } }; + +static const struct mce_derror_table mce_p9_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00002000, true, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00001000, true, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000200, false, + MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000040, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000020, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000010, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000008, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0, false, 0, 0, 0, 0 } }; + +static int mce_handle_ierror(struct pt_regs *regs, + const struct mce_ierror_table table[], + struct mce_error_info *mce_err, uint64_t *addr) { - long handled = 1; + uint64_t srr1 = regs->msr; + int handled = 0; + int i; + + *addr = 0; + + for (i = 0; table[i].srr1_mask; i++) { + if ((srr1 & table[i].srr1_mask) != table[i].srr1_value) + continue; + + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + handled = mce_flush(MCE_FLUSH_SLB); + break; + case MCE_ERROR_TYPE_ERAT: + handled = mce_flush(MCE_FLUSH_ERAT); + break; + case MCE_ERROR_TYPE_TLB: + handled = mce_flush(MCE_FLUSH_TLB); + break; + } - /* - * flush and reload SLBs for SLB errors and flush TLBs for TLB errors. - * reset the error bits whenever we handle them so that at the end - * we can check whether we handled all of them or not. - * */ -#ifdef CONFIG_PPC_STD_MMU_64 - if (dsisr & slb_error_bits) { - flush_and_reload_slb(); - /* reset error bits */ - dsisr &= ~(slb_error_bits); - } - if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { - if (cur_cpu_spec && cur_cpu_spec->flush_tlb) - cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL); - /* reset error bits */ - dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB; + /* now fill in mce_error_info */ + mce_err->error_type = table[i].error_type; + switch (table[i].error_type) { + case MCE_ERROR_TYPE_UE: + mce_err->u.ue_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_SLB: + mce_err->u.slb_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_ERAT: + mce_err->u.erat_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_TLB: + mce_err->u.tlb_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_USER: + mce_err->u.user_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_RA: + mce_err->u.ra_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_LINK: + mce_err->u.link_error_type = table[i].error_subtype; + break; + } + mce_err->severity = table[i].severity; + mce_err->initiator = table[i].initiator; + if (table[i].nip_valid) + *addr = regs->nip; + return handled; } -#endif - /* Any other errors we don't understand? */ - if (dsisr & 0xffffffffUL) - handled = 0; - return handled; -} + mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; + mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->initiator = MCE_INITIATOR_CPU; -static long mce_handle_derror_p7(uint64_t dsisr) -{ - return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS); + return 0; } -static long mce_handle_common_ierror(uint64_t srr1) +static int mce_handle_derror(struct pt_regs *regs, + const struct mce_derror_table table[], + struct mce_error_info *mce_err, uint64_t *addr) { - long handled = 0; - - switch (P7_SRR1_MC_IFETCH(srr1)) { - case 0: - break; -#ifdef CONFIG_PPC_STD_MMU_64 - case P7_SRR1_MC_IFETCH_SLB_PARITY: - case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: - /* flush and reload SLBs for SLB errors. */ - flush_and_reload_slb(); - handled = 1; - break; - case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: - if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { - cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL); - handled = 1; + uint64_t dsisr = regs->dsisr; + int handled = 0; + int found = 0; + int i; + + *addr = 0; + + for (i = 0; table[i].dsisr_value; i++) { + if (!(dsisr & table[i].dsisr_value)) + continue; + + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + if (mce_flush(MCE_FLUSH_SLB)) + handled = 1; + break; + case MCE_ERROR_TYPE_ERAT: + if (mce_flush(MCE_FLUSH_ERAT)) + handled = 1; + break; + case MCE_ERROR_TYPE_TLB: + if (mce_flush(MCE_FLUSH_TLB)) + handled = 1; + break; } - break; -#endif - default: - break; - } - return handled; -} - -static long mce_handle_ierror_p7(uint64_t srr1) -{ - long handled = 0; - - handled = mce_handle_common_ierror(srr1); + /* + * Attempt to handle multiple conditions, but only return + * one. Ensure uncorrectable errors are first in the table + * to match. + */ + if (found) + continue; + + /* now fill in mce_error_info */ + mce_err->error_type = table[i].error_type; + switch (table[i].error_type) { + case MCE_ERROR_TYPE_UE: + mce_err->u.ue_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_SLB: + mce_err->u.slb_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_ERAT: + mce_err->u.erat_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_TLB: + mce_err->u.tlb_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_USER: + mce_err->u.user_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_RA: + mce_err->u.ra_error_type = table[i].error_subtype; + break; + case MCE_ERROR_TYPE_LINK: + mce_err->u.link_error_type = table[i].error_subtype; + break; + } + mce_err->severity = table[i].severity; + mce_err->initiator = table[i].initiator; + if (table[i].dar_valid) + *addr = regs->dar; -#ifdef CONFIG_PPC_STD_MMU_64 - if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { - flush_and_reload_slb(); - handled = 1; + found = 1; } -#endif - return handled; -} -static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1) -{ - switch (P7_SRR1_MC_IFETCH(srr1)) { - case P7_SRR1_MC_IFETCH_SLB_PARITY: - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; - break; - case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; - break; - case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_TLB; - mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; - break; - case P7_SRR1_MC_IFETCH_UE: - case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL: - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; - break; - case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD: - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = - MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; - break; - } -} + if (found) + return handled; -static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1) -{ - mce_get_common_ierror(mce_err, srr1); - if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; - } -} + mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; + mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->initiator = MCE_INITIATOR_CPU; -static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr) -{ - if (dsisr & P7_DSISR_MC_UE) { - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; - } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) { - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = - MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; - } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) { - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; - } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; - } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { - mce_err->error_type = MCE_ERROR_TYPE_TLB; - mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; - } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; - } + return 0; } static long mce_handle_ue_error(struct pt_regs *regs) @@ -320,292 +530,42 @@ static long mce_handle_ue_error(struct pt_regs *regs) return handled; } -long __machine_check_early_realmode_p7(struct pt_regs *regs) +static long mce_handle_error(struct pt_regs *regs, + const struct mce_derror_table dtable[], + const struct mce_ierror_table itable[]) { - uint64_t srr1, nip, addr; - long handled = 1; - struct mce_error_info mce_error_info = { 0 }; - - mce_error_info.severity = MCE_SEV_ERROR_SYNC; - mce_error_info.initiator = MCE_INITIATOR_CPU; - - srr1 = regs->msr; - nip = regs->nip; + struct mce_error_info mce_err = { 0 }; + uint64_t addr; + uint64_t srr1 = regs->msr; + long handled; - /* - * Handle memory errors depending whether this was a load/store or - * ifetch exception. Also, populate the mce error_type and - * type-specific error_type from either SRR1 or DSISR, depending - * whether this was a load/store or ifetch exception - */ - if (P7_SRR1_MC_LOADSTORE(srr1)) { - handled = mce_handle_derror_p7(regs->dsisr); - mce_get_derror_p7(&mce_error_info, regs->dsisr); - addr = regs->dar; - } else { - handled = mce_handle_ierror_p7(srr1); - mce_get_ierror_p7(&mce_error_info, srr1); - addr = regs->nip; - } + if (SRR1_MC_LOADSTORE(srr1)) + handled = mce_handle_derror(regs, dtable, &mce_err, &addr); + else + handled = mce_handle_ierror(regs, itable, &mce_err, &addr); - /* Handle UE error. */ - if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) + if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) handled = mce_handle_ue_error(regs); - save_mce_event(regs, handled, &mce_error_info, nip, addr); - return handled; -} - -static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1) -{ - mce_get_common_ierror(mce_err, srr1); - if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - } -} - -static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr) -{ - mce_get_derror_p7(mce_err, dsisr); - if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) { - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - } -} - -static long mce_handle_ierror_p8(uint64_t srr1) -{ - long handled = 0; + save_mce_event(regs, handled, &mce_err, regs->nip, addr); - handled = mce_handle_common_ierror(srr1); - -#ifdef CONFIG_PPC_STD_MMU_64 - if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { - flush_and_reload_slb(); - handled = 1; - } -#endif return handled; } -static long mce_handle_derror_p8(uint64_t dsisr) -{ - return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS); -} - -long __machine_check_early_realmode_p8(struct pt_regs *regs) -{ - uint64_t srr1, nip, addr; - long handled = 1; - struct mce_error_info mce_error_info = { 0 }; - - mce_error_info.severity = MCE_SEV_ERROR_SYNC; - mce_error_info.initiator = MCE_INITIATOR_CPU; - - srr1 = regs->msr; - nip = regs->nip; - - if (P7_SRR1_MC_LOADSTORE(srr1)) { - handled = mce_handle_derror_p8(regs->dsisr); - mce_get_derror_p8(&mce_error_info, regs->dsisr); - addr = regs->dar; - } else { - handled = mce_handle_ierror_p8(srr1); - mce_get_ierror_p8(&mce_error_info, srr1); - addr = regs->nip; - } - - /* Handle UE error. */ - if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) - handled = mce_handle_ue_error(regs); - - save_mce_event(regs, handled, &mce_error_info, nip, addr); - return handled; -} - -static int mce_handle_derror_p9(struct pt_regs *regs) -{ - uint64_t dsisr = regs->dsisr; - - return mce_handle_flush_derrors(dsisr, - P9_DSISR_MC_SLB_PARITY_MFSLB | - P9_DSISR_MC_SLB_MULTIHIT_MFSLB, - - P9_DSISR_MC_TLB_MULTIHIT_MFTLB, - - P9_DSISR_MC_ERAT_MULTIHIT); -} - -static int mce_handle_ierror_p9(struct pt_regs *regs) +long __machine_check_early_realmode_p7(struct pt_regs *regs) { - uint64_t srr1 = regs->msr; + /* P7 DD1 leaves top bits of DSISR undefined */ + regs->dsisr &= 0x0000ffff; - switch (P9_SRR1_MC_IFETCH(srr1)) { - case P9_SRR1_MC_IFETCH_SLB_PARITY: - case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: - return mce_flush(MCE_FLUSH_SLB); - case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: - return mce_flush(MCE_FLUSH_TLB); - case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: - return mce_flush(MCE_FLUSH_ERAT); - default: - return 0; - } + return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table); } -static void mce_get_derror_p9(struct pt_regs *regs, - struct mce_error_info *mce_err, uint64_t *addr) -{ - uint64_t dsisr = regs->dsisr; - - mce_err->severity = MCE_SEV_ERROR_SYNC; - mce_err->initiator = MCE_INITIATOR_CPU; - - if (dsisr & P9_DSISR_MC_USER_TLBIE) - *addr = regs->nip; - else - *addr = regs->dar; - - if (dsisr & P9_DSISR_MC_UE) { - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; - } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) { - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; - } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) { - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT; - } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) { - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT; - } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) { - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) { - mce_err->error_type = MCE_ERROR_TYPE_TLB; - mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; - } else if (dsisr & P9_DSISR_MC_USER_TLBIE) { - mce_err->error_type = MCE_ERROR_TYPE_USER; - mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE; - } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; - } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) { - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; - } else if (dsisr & P9_DSISR_MC_RA_LOAD) { - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD; - } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) { - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE; - } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) { - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; - } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) { - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN; - } -} - -static void mce_get_ierror_p9(struct pt_regs *regs, - struct mce_error_info *mce_err, uint64_t *addr) +long __machine_check_early_realmode_p8(struct pt_regs *regs) { - uint64_t srr1 = regs->msr; - - switch (P9_SRR1_MC_IFETCH(srr1)) { - case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: - case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: - mce_err->severity = MCE_SEV_FATAL; - break; - default: - mce_err->severity = MCE_SEV_ERROR_SYNC; - break; - } - - mce_err->initiator = MCE_INITIATOR_CPU; - - *addr = regs->nip; - - switch (P9_SRR1_MC_IFETCH(srr1)) { - case P9_SRR1_MC_IFETCH_UE: - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; - break; - case P9_SRR1_MC_IFETCH_SLB_PARITY: - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; - break; - case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_SLB; - mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; - break; - case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_ERAT; - mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; - break; - case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: - mce_err->error_type = MCE_ERROR_TYPE_TLB; - mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; - break; - case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD: - mce_err->error_type = MCE_ERROR_TYPE_UE; - mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; - break; - case P9_SRR1_MC_IFETCH_LINK_TIMEOUT: - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT; - break; - case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT: - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT; - break; - case P9_SRR1_MC_IFETCH_RA: - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH; - break; - case P9_SRR1_MC_IFETCH_RA_TABLEWALK: - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH; - break; - case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_STORE; - break; - case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: - mce_err->error_type = MCE_ERROR_TYPE_LINK; - mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT; - break; - case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN: - mce_err->error_type = MCE_ERROR_TYPE_RA; - mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN; - break; - default: - break; - } + return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table); } long __machine_check_early_realmode_p9(struct pt_regs *regs) { - uint64_t nip, addr; - long handled; - struct mce_error_info mce_error_info = { 0 }; - - nip = regs->nip; - - if (P9_SRR1_MC_LOADSTORE(regs->msr)) { - handled = mce_handle_derror_p9(regs); - mce_get_derror_p9(regs, &mce_error_info, &addr); - } else { - handled = mce_handle_ierror_p9(regs); - mce_get_ierror_p9(regs, &mce_error_info, &addr); - } - - /* Handle UE error. */ - if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) - handled = mce_handle_ue_error(regs); - - save_mce_event(regs, handled, &mce_error_info, nip, addr); - return handled; + return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); } diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 2282bf4e63cd..ec60ed0d4aad 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -243,10 +243,10 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 2. branch to optimized_callback() and emulate_step() */ - kprobe_lookup_name("optimized_callback", op_callback_addr); - kprobe_lookup_name("emulate_step", emulate_step_addr); + op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback"); + emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step"); if (!op_callback_addr || !emulate_step_addr) { - WARN(1, "kprobe_lookup_name() failed\n"); + WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n"); goto error; } diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index dfc479df9634..8d63627e067f 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -245,3 +245,24 @@ void __init free_unused_pacas(void) free_lppacas(); } + +void copy_mm_to_paca(struct mm_struct *mm) +{ +#ifdef CONFIG_PPC_BOOK3S + mm_context_t *context = &mm->context; + + get_paca()->mm_ctx_id = context->id; +#ifdef CONFIG_PPC_MM_SLICES + VM_BUG_ON(!mm->context.addr_limit); + get_paca()->addr_limit = mm->context.addr_limit; + get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize; + memcpy(&get_paca()->mm_ctx_high_slices_psize, + &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); +#else /* CONFIG_PPC_MM_SLICES */ + get_paca()->mm_ctx_user_psize = context->user_psize; + get_paca()->mm_ctx_sllp = context->sllp; +#endif +#else /* CONFIG_PPC_BOOK3S */ + return; +#endif +} diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f5d399e46193..d2f0afeae5a0 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -55,7 +55,6 @@ #include <asm/kexec.h> #include <asm/opal.h> #include <asm/fadump.h> -#include <asm/debug.h> #include <asm/epapr_hcalls.h> #include <asm/firmware.h> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 1c1b44ec7642..dd8a04f3053a 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -815,7 +815,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .virt_base = cpu_to_be32(0xffffffff), .virt_size = cpu_to_be32(0xffffffff), .load_base = cpu_to_be32(0xffffffff), - .min_rma = cpu_to_be32(256), /* 256MB min RMA */ + .min_rma = cpu_to_be32(512), /* 512MB min RMA */ .min_load = cpu_to_be32(0xffffffff), /* full client load */ .min_rma_percent = 0, /* min RMA percentage of total RAM */ .max_pft_size = 48, /* max log_2(hash table size) */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4697da895133..5c10b5925ac2 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -31,11 +31,11 @@ #include <linux/unistd.h> #include <linux/serial.h> #include <linux/serial_8250.h> -#include <linux/debugfs.h> #include <linux/percpu.h> #include <linux/memblock.h> #include <linux/of_platform.h> #include <linux/hugetlb.h> +#include <asm/debugfs.h> #include <asm/io.h> #include <asm/paca.h> #include <asm/prom.h> @@ -920,6 +920,15 @@ void __init setup_arch(char **cmdline_p) init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; init_mm.brk = klimit; + +#ifdef CONFIG_PPC_MM_SLICES +#ifdef CONFIG_PPC64 + init_mm.context.addr_limit = TASK_SIZE_128TB; +#else +#error "context.addr_limit not initialized." +#endif +#endif + #ifdef CONFIG_PPC_64K_PAGES init_mm.context.pte_frag = NULL; #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index f997154dfc41..0d4dcaeaafcb 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -230,8 +230,8 @@ static void cpu_ready_for_interrupts(void) * If we are not in hypervisor mode the job is done once for * the whole partition in configure_exceptions(). */ - if (early_cpu_has_feature(CPU_FTR_HVMODE) && - early_cpu_has_feature(CPU_FTR_ARCH_207S)) { + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) { unsigned long lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); } @@ -637,6 +637,11 @@ void __init emergency_stack_init(void) paca[i].emergency_sp = (void *)ti + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 + /* emergency stack for NMI exception handling. */ + ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); + klp_init_thread_info(ti); + paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; + /* emergency stack for machine check exception handling. */ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); klp_init_thread_info(ti); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index d68ed1f004a3..df2a41647d8e 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -39,6 +39,7 @@ #include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/kvm_ppc.h> +#include <asm/dbell.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/prom.h> @@ -86,8 +87,6 @@ volatile unsigned int cpu_callin_map[NR_CPUS]; int smt_enabled_at_boot = 1; -static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; - /* * Returns 1 if the specified cpu should be brought up during boot. * Used to inhibit booting threads if they've been disabled or @@ -158,32 +157,33 @@ static irqreturn_t tick_broadcast_ipi_action(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t debug_ipi_action(int irq, void *data) +#ifdef CONFIG_NMI_IPI +static irqreturn_t nmi_ipi_action(int irq, void *data) { - if (crash_ipi_function_ptr) { - crash_ipi_function_ptr(get_irq_regs()); - return IRQ_HANDLED; - } - -#ifdef CONFIG_DEBUGGER - debugger_ipi(get_irq_regs()); -#endif /* CONFIG_DEBUGGER */ - + smp_handle_nmi_ipi(get_irq_regs()); return IRQ_HANDLED; } +#endif static irq_handler_t smp_ipi_action[] = { [PPC_MSG_CALL_FUNCTION] = call_function_action, [PPC_MSG_RESCHEDULE] = reschedule_action, [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action, - [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action, +#ifdef CONFIG_NMI_IPI + [PPC_MSG_NMI_IPI] = nmi_ipi_action, +#endif }; +/* + * The NMI IPI is a fallback and not truly non-maskable. It is simpler + * than going through the call function infrastructure, and strongly + * serialized, so it is more appropriate for debugging. + */ const char *smp_ipi_name[] = { [PPC_MSG_CALL_FUNCTION] = "ipi call function", [PPC_MSG_RESCHEDULE] = "ipi reschedule", [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast", - [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger", + [PPC_MSG_NMI_IPI] = "nmi ipi", }; /* optional function to request ipi, for controllers with >= 4 ipis */ @@ -191,14 +191,13 @@ int smp_request_message_ipi(int virq, int msg) { int err; - if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) { + if (msg < 0 || msg > PPC_MSG_NMI_IPI) return -EINVAL; - } -#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE) - if (msg == PPC_MSG_DEBUGGER_BREAK) { +#ifndef CONFIG_NMI_IPI + if (msg == PPC_MSG_NMI_IPI) return 1; - } #endif + err = request_irq(virq, smp_ipi_action[msg], IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND, smp_ipi_name[msg], NULL); @@ -211,17 +210,9 @@ int smp_request_message_ipi(int virq, int msg) #ifdef CONFIG_PPC_SMP_MUXED_IPI struct cpu_messages { long messages; /* current messages */ - unsigned long data; /* data for cause ipi */ }; static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); -void smp_muxed_ipi_set_data(int cpu, unsigned long data) -{ - struct cpu_messages *info = &per_cpu(ipi_message, cpu); - - info->data = data; -} - void smp_muxed_ipi_set_message(int cpu, int msg) { struct cpu_messages *info = &per_cpu(ipi_message, cpu); @@ -236,14 +227,13 @@ void smp_muxed_ipi_set_message(int cpu, int msg) void smp_muxed_ipi_message_pass(int cpu, int msg) { - struct cpu_messages *info = &per_cpu(ipi_message, cpu); - smp_muxed_ipi_set_message(cpu, msg); + /* * cause_ipi functions are required to include a full barrier * before doing whatever causes the IPI. */ - smp_ops->cause_ipi(cpu, info->data); + smp_ops->cause_ipi(cpu); } #ifdef __BIG_ENDIAN__ @@ -254,11 +244,18 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) irqreturn_t smp_ipi_demux(void) { - struct cpu_messages *info = this_cpu_ptr(&ipi_message); - unsigned long all; - mb(); /* order any irq clear */ + return smp_ipi_demux_relaxed(); +} + +/* sync-free variant. Callers should ensure synchronization */ +irqreturn_t smp_ipi_demux_relaxed(void) +{ + struct cpu_messages *info; + unsigned long all; + + info = this_cpu_ptr(&ipi_message); do { all = xchg(&info->messages, 0); #if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) @@ -278,8 +275,10 @@ irqreturn_t smp_ipi_demux(void) scheduler_ipi(); if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) tick_broadcast_ipi_handler(); - if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK)) - debug_ipi_action(0, NULL); +#ifdef CONFIG_NMI_IPI + if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI)) + nmi_ipi_action(0, NULL); +#endif } while (info->messages); return IRQ_HANDLED; @@ -316,6 +315,187 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); } +#ifdef CONFIG_NMI_IPI + +/* + * "NMI IPI" system. + * + * NMI IPIs may not be recoverable, so should not be used as ongoing part of + * a running system. They can be used for crash, debug, halt/reboot, etc. + * + * NMI IPIs are globally single threaded. No more than one in progress at + * any time. + * + * The IPI call waits with interrupts disabled until all targets enter the + * NMI handler, then the call returns. + * + * No new NMI can be initiated until targets exit the handler. + * + * The IPI call may time out without all targets entering the NMI handler. + * In that case, there is some logic to recover (and ignore subsequent + * NMI interrupts that may eventually be raised), but the platform interrupt + * handler may not be able to distinguish this from other exception causes, + * which may cause a crash. + */ + +static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0); +static struct cpumask nmi_ipi_pending_mask; +static int nmi_ipi_busy_count = 0; +static void (*nmi_ipi_function)(struct pt_regs *) = NULL; + +static void nmi_ipi_lock_start(unsigned long *flags) +{ + raw_local_irq_save(*flags); + hard_irq_disable(); + while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { + raw_local_irq_restore(*flags); + cpu_relax(); + raw_local_irq_save(*flags); + hard_irq_disable(); + } +} + +static void nmi_ipi_lock(void) +{ + while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) + cpu_relax(); +} + +static void nmi_ipi_unlock(void) +{ + smp_mb(); + WARN_ON(atomic_read(&__nmi_ipi_lock) != 1); + atomic_set(&__nmi_ipi_lock, 0); +} + +static void nmi_ipi_unlock_end(unsigned long *flags) +{ + nmi_ipi_unlock(); + raw_local_irq_restore(*flags); +} + +/* + * Platform NMI handler calls this to ack + */ +int smp_handle_nmi_ipi(struct pt_regs *regs) +{ + void (*fn)(struct pt_regs *); + unsigned long flags; + int me = raw_smp_processor_id(); + int ret = 0; + + /* + * Unexpected NMIs are possible here because the interrupt may not + * be able to distinguish NMI IPIs from other types of NMIs, or + * because the caller may have timed out. + */ + nmi_ipi_lock_start(&flags); + if (!nmi_ipi_busy_count) + goto out; + if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask)) + goto out; + + fn = nmi_ipi_function; + if (!fn) + goto out; + + cpumask_clear_cpu(me, &nmi_ipi_pending_mask); + nmi_ipi_busy_count++; + nmi_ipi_unlock(); + + ret = 1; + + fn(regs); + + nmi_ipi_lock(); + nmi_ipi_busy_count--; +out: + nmi_ipi_unlock_end(&flags); + + return ret; +} + +static void do_smp_send_nmi_ipi(int cpu) +{ + if (smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu)) + return; + + if (cpu >= 0) { + do_message_pass(cpu, PPC_MSG_NMI_IPI); + } else { + int c; + + for_each_online_cpu(c) { + if (c == raw_smp_processor_id()) + continue; + do_message_pass(c, PPC_MSG_NMI_IPI); + } + } +} + +/* + * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS. + * - fn is the target callback function. + * - delay_us > 0 is the delay before giving up waiting for targets to + * enter the handler, == 0 specifies indefinite delay. + */ +static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) +{ + unsigned long flags; + int me = raw_smp_processor_id(); + int ret = 1; + + BUG_ON(cpu == me); + BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS); + + if (unlikely(!smp_ops)) + return 0; + + /* Take the nmi_ipi_busy count/lock with interrupts hard disabled */ + nmi_ipi_lock_start(&flags); + while (nmi_ipi_busy_count) { + nmi_ipi_unlock_end(&flags); + cpu_relax(); + nmi_ipi_lock_start(&flags); + } + + nmi_ipi_function = fn; + + if (cpu < 0) { + /* ALL_OTHERS */ + cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask); + cpumask_clear_cpu(me, &nmi_ipi_pending_mask); + } else { + /* cpumask starts clear */ + cpumask_set_cpu(cpu, &nmi_ipi_pending_mask); + } + nmi_ipi_busy_count++; + nmi_ipi_unlock(); + + do_smp_send_nmi_ipi(cpu); + + while (!cpumask_empty(&nmi_ipi_pending_mask)) { + udelay(1); + if (delay_us) { + delay_us--; + if (!delay_us) + break; + } + } + + nmi_ipi_lock(); + if (!cpumask_empty(&nmi_ipi_pending_mask)) { + /* Could not gather all CPUs */ + ret = 0; + cpumask_clear(&nmi_ipi_pending_mask); + } + nmi_ipi_busy_count--; + nmi_ipi_unlock_end(&flags); + + return ret; +} +#endif /* CONFIG_NMI_IPI */ + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST void tick_broadcast(const struct cpumask *mask) { @@ -326,29 +506,22 @@ void tick_broadcast(const struct cpumask *mask) } #endif -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) -void smp_send_debugger_break(void) +#ifdef CONFIG_DEBUGGER +void debugger_ipi_callback(struct pt_regs *regs) { - int cpu; - int me = raw_smp_processor_id(); - - if (unlikely(!smp_ops)) - return; + debugger_ipi(regs); +} - for_each_online_cpu(cpu) - if (cpu != me) - do_message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); +void smp_send_debugger_break(void) +{ + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000); } #endif #ifdef CONFIG_KEXEC_CORE void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { - crash_ipi_function_ptr = crash_ipi_callback; - if (crash_ipi_callback) { - mb(); - smp_send_debugger_break(); - } + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000); } #endif @@ -439,7 +612,21 @@ int generic_cpu_disable(void) #ifdef CONFIG_PPC64 vdso_data->processorCount--; #endif - migrate_irqs(); + /* Update affinity of all IRQs previously aimed at this CPU */ + irq_migrate_all_off_this_cpu(); + + /* + * Depending on the details of the interrupt controller, it's possible + * that one of the interrupts we just migrated away from this CPU is + * actually already pending on this CPU. If we leave it in that state + * the interrupt will never be EOI'ed, and will never fire again. So + * temporarily enable interrupts here, to allow any pending interrupt to + * be received (and EOI'ed), before we take this CPU offline. + */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); + return 0; } @@ -521,6 +708,16 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) cpu_idle_thread_init(cpu, tidle); + /* + * The platform might need to allocate resources prior to bringing + * up the CPU + */ + if (smp_ops->prepare_cpu) { + rc = smp_ops->prepare_cpu(cpu); + if (rc) + return rc; + } + /* Make sure callin-map entry is 0 (can be leftover a CPU * hotplug */ diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 66711958493c..d534ed901538 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -59,7 +59,14 @@ EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { - save_context_stack(trace, tsk->thread.ksp, tsk, 0); + unsigned long sp; + + if (tsk == current) + sp = current_stack_pointer(); + else + sp = tsk->thread.ksp; + + save_context_stack(trace, sp, tsk, 0); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c index 6ae9bd5086a4..0050b2d2ff7a 100644 --- a/arch/powerpc/kernel/swsusp.c +++ b/arch/powerpc/kernel/swsusp.c @@ -10,6 +10,7 @@ */ #include <linux/sched.h> +#include <linux/suspend.h> #include <asm/current.h> #include <asm/mmu_context.h> #include <asm/switch_to.h> diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index de04c9fbb5cd..a877bf8269fe 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -42,11 +42,11 @@ #include <asm/unistd.h> #include <asm/asm-prototypes.h> -static inline unsigned long do_mmap2(unsigned long addr, size_t len, +static inline long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off, int shift) { - unsigned long ret = -EINVAL; + long ret = -EINVAL; if (!arch_validate_prot(prot)) goto out; @@ -62,16 +62,16 @@ out: return ret; } -unsigned long sys_mmap2(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) +SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, pgoff) { return do_mmap2(addr, len, prot, flags, fd, pgoff, PAGE_SHIFT-12); } -unsigned long sys_mmap(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, off_t offset) +SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, off_t, offset) { return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT); } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c1fb255a60d6..4437c70c7c2b 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -710,6 +710,10 @@ static int register_cpu_online(unsigned int cpu) struct device_attribute *attrs, *pmc_attrs; int i, nattrs; + /* For cpus present at boot a reference was already grabbed in register_cpu() */ + if (!s->of_node) + s->of_node = of_get_cpu_node(cpu, NULL); + #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_SMT)) device_create_file(s, &dev_attr_smt_snooze_delay); @@ -785,9 +789,9 @@ static int register_cpu_online(unsigned int cpu) return 0; } +#ifdef CONFIG_HOTPLUG_CPU static int unregister_cpu_online(unsigned int cpu) { -#ifdef CONFIG_HOTPLUG_CPU struct cpu *c = &per_cpu(cpu_devices, cpu); struct device *s = &c->dev; struct device_attribute *attrs, *pmc_attrs; @@ -864,9 +868,13 @@ static int unregister_cpu_online(unsigned int cpu) } #endif cacheinfo_cpu_offline(cpu); -#endif /* CONFIG_HOTPLUG_CPU */ + of_node_put(s->of_node); + s->of_node = NULL; return 0; } +#else /* !CONFIG_HOTPLUG_CPU */ +#define unregister_cpu_online NULL +#endif #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE ssize_t arch_cpu_probe(const char *buf, size_t count) diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile new file mode 100644 index 000000000000..729dffc5f7bc --- /dev/null +++ b/arch/powerpc/kernel/trace/Makefile @@ -0,0 +1,29 @@ +# +# Makefile for the powerpc trace subsystem +# + +subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror + +ifdef CONFIG_FUNCTION_TRACER +# do not trace tracer code +CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) +endif + +obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64.o +ifdef CONFIG_MPROFILE_KERNEL +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_mprofile.o +else +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o +endif +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_TRACING) += trace_clock.o + +obj-$(CONFIG_PPC64) += $(obj64-y) +obj-$(CONFIG_PPC32) += $(obj32-y) + +# Disable GCOV & sanitizers in odd or sensitive code +GCOV_PROFILE_ftrace.o := n +UBSAN_SANITIZE_ftrace.o := n diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 5c9f50c1aa99..32509de6ce4c 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -21,6 +21,7 @@ #include <linux/init.h> #include <linux/list.h> +#include <asm/asm-prototypes.h> #include <asm/cacheflush.h> #include <asm/code-patching.h> #include <asm/ftrace.h> diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S new file mode 100644 index 000000000000..afef2c076282 --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -0,0 +1,118 @@ +/* + * Split from entry_32.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/magic.h> +#include <asm/reg.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/export.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL(mcount) +_GLOBAL(_mcount) + /* + * It is required that _mcount on PPC32 must preserve the + * link register. But we have r0 to play with. We use r0 + * to push the return address back to the caller of mcount + * into the ctr register, restore the link register and + * then jump back using the ctr register. + */ + mflr r0 + mtctr r0 + lwz r0, 4(r1) + mtlr r0 + bctr + +_GLOBAL(ftrace_caller) + MCOUNT_SAVE_FRAME + /* r3 ends up with link register */ + subi r3, r3, MCOUNT_INSN_SIZE +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) +#endif + MCOUNT_RESTORE_FRAME + /* old link register ends up in ctr reg */ + bctr +#else +_GLOBAL(mcount) +_GLOBAL(_mcount) + + MCOUNT_SAVE_FRAME + + subi r3, r3, MCOUNT_INSN_SIZE + LOAD_REG_ADDR(r5, ftrace_trace_function) + lwz r5,0(r5) + + mtctr r5 + bctrl + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + b ftrace_graph_caller +#endif + MCOUNT_RESTORE_FRAME + bctr +#endif +EXPORT_SYMBOL(_mcount) + +_GLOBAL(ftrace_stub) + blr + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(ftrace_graph_caller) + /* load r4 with local address */ + lwz r4, 44(r1) + subi r4, r4, MCOUNT_INSN_SIZE + + /* Grab the LR out of the caller stack frame */ + lwz r3,52(r1) + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + stw r3,52(r1) + + MCOUNT_RESTORE_FRAME + /* old link register ends up in ctr reg */ + bctr + +_GLOBAL(return_to_handler) + /* need to save return values */ + stwu r1, -32(r1) + stw r3, 20(r1) + stw r4, 16(r1) + stw r31, 12(r1) + mr r31, r1 + + bl ftrace_return_to_handler + nop + + /* return value has real return address */ + mtlr r3 + + lwz r3, 20(r1) + lwz r4, 16(r1) + lwz r31,12(r1) + lwz r1, 0(r1) + + /* Jump back to real return address */ + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S new file mode 100644 index 000000000000..e5ccea19821e --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_64.S @@ -0,0 +1,85 @@ +/* + * Split from entry_64.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/magic.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/ppc-opcode.h> +#include <asm/export.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL(mcount) +_GLOBAL(_mcount) +EXPORT_SYMBOL(_mcount) + mflr r12 + mtctr r12 + mtlr r0 + bctr + +#else /* CONFIG_DYNAMIC_FTRACE */ +_GLOBAL_TOC(_mcount) +EXPORT_SYMBOL(_mcount) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) + + subi r3, r3, MCOUNT_INSN_SIZE + LOAD_REG_ADDR(r5,ftrace_trace_function) + ld r5,0(r5) + ld r5,0(r5) + mtctr r5 + bctrl + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + b ftrace_graph_caller +#endif + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 +_GLOBAL(ftrace_stub) + blr +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(return_to_handler) + /* need to save return values */ + std r4, -32(r1) + std r3, -24(r1) + /* save TOC */ + std r2, -16(r1) + std r31, -8(r1) + mr r31, r1 + stdu r1, -112(r1) + + /* + * We might be called from a module. + * Switch to our TOC to run inside the core kernel. + */ + ld r2, PACATOC(r13) + + bl ftrace_return_to_handler + nop + + /* return value has real return address */ + mtlr r3 + + ld r1, 0(r1) + ld r4, -32(r1) + ld r3, -24(r1) + ld r2, -16(r1) + ld r31, -8(r1) + + /* Jump back to real return address */ + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S new file mode 100644 index 000000000000..7c933a99f5d5 --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -0,0 +1,272 @@ +/* + * Split from ftrace_64.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/magic.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/ppc-opcode.h> +#include <asm/export.h> +#include <asm/thread_info.h> +#include <asm/bug.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * + * ftrace_caller() is the function that replaces _mcount() when ftrace is + * active. + * + * We arrive here after a function A calls function B, and we are the trace + * function for B. When we enter r1 points to A's stack frame, B has not yet + * had a chance to allocate one yet. + * + * Additionally r2 may point either to the TOC for A, or B, depending on + * whether B did a TOC setup sequence before calling us. + * + * On entry the LR points back to the _mcount() call site, and r0 holds the + * saved LR as it was on entry to B, ie. the original return address at the + * call site in A. + * + * Our job is to save the register state into a struct pt_regs (on the stack) + * and then arrange for the ftrace function to be called. + */ +_GLOBAL(ftrace_caller) + /* Save the original return address in A's stack frame */ + std r0,LRSAVE(r1) + + /* Create our stack frame + pt_regs */ + stdu r1,-SWITCH_FRAME_SIZE(r1) + + /* Save all gprs to pt_regs */ + SAVE_8GPRS(0,r1) + SAVE_8GPRS(8,r1) + SAVE_8GPRS(16,r1) + SAVE_8GPRS(24,r1) + + /* Load special regs for save below */ + mfmsr r8 + mfctr r9 + mfxer r10 + mfcr r11 + + /* Get the _mcount() call site out of LR */ + mflr r7 + /* Save it as pt_regs->nip */ + std r7, _NIP(r1) + /* Save the read LR in pt_regs->link */ + std r0, _LINK(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2,PACATOC(r13) /* get kernel TOC in r2 */ + + addis r3,r2,function_trace_op@toc@ha + addi r3,r3,function_trace_op@toc@l + ld r5,0(r3) + +#ifdef CONFIG_LIVEPATCH + mr r14,r7 /* remember old NIP */ +#endif + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r7, MCOUNT_INSN_SIZE + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + + /* Save special regs */ + std r8, _MSR(r1) + std r9, _CTR(r1) + std r10, _XER(r1) + std r11, _CCR(r1) + + /* Load &pt_regs in r6 for call below */ + addi r6, r1 ,STACK_FRAME_OVERHEAD + + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + + /* Load ctr with the possibly modified NIP */ + ld r3, _NIP(r1) + mtctr r3 +#ifdef CONFIG_LIVEPATCH + cmpd r14,r3 /* has NIP been altered? */ +#endif + + /* Restore gprs */ + REST_8GPRS(0,r1) + REST_8GPRS(8,r1) + REST_8GPRS(16,r1) + REST_8GPRS(24,r1) + + /* Restore possibly modified LR */ + ld r0, _LINK(r1) + mtlr r0 + + /* Restore callee's TOC */ + ld r2, 24(r1) + + /* Pop our stack frame */ + addi r1, r1, SWITCH_FRAME_SIZE + +#ifdef CONFIG_LIVEPATCH + /* Based on the cmpd above, if the NIP was altered handle livepatch */ + bne- livepatch_handler +#endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) +#endif + + bctr /* jump after _mcount site */ + +_GLOBAL(ftrace_stub) + blr + +#ifdef CONFIG_LIVEPATCH + /* + * This function runs in the mcount context, between two functions. As + * such it can only clobber registers which are volatile and used in + * function linkage. + * + * We get here when a function A, calls another function B, but B has + * been live patched with a new function C. + * + * On entry: + * - we have no stack frame and can not allocate one + * - LR points back to the original caller (in A) + * - CTR holds the new NIP in C + * - r0 & r12 are free + * + * r0 can't be used as the base register for a DS-form load or store, so + * we temporarily shuffle r1 (stack pointer) into r0 and then put it back. + */ +livepatch_handler: + CURRENT_THREAD_INFO(r12, r1) + + /* Save stack pointer into r0 */ + mr r0, r1 + + /* Allocate 3 x 8 bytes */ + ld r1, TI_livepatch_sp(r12) + addi r1, r1, 24 + std r1, TI_livepatch_sp(r12) + + /* Save toc & real LR on livepatch stack */ + std r2, -24(r1) + mflr r12 + std r12, -16(r1) + + /* Store stack end marker */ + lis r12, STACK_END_MAGIC@h + ori r12, r12, STACK_END_MAGIC@l + std r12, -8(r1) + + /* Restore real stack pointer */ + mr r1, r0 + + /* Put ctr in r12 for global entry and branch there */ + mfctr r12 + bctrl + + /* + * Now we are returning from the patched function to the original + * caller A. We are free to use r0 and r12, and we can use r2 until we + * restore it. + */ + + CURRENT_THREAD_INFO(r12, r1) + + /* Save stack pointer into r0 */ + mr r0, r1 + + ld r1, TI_livepatch_sp(r12) + + /* Check stack marker hasn't been trashed */ + lis r2, STACK_END_MAGIC@h + ori r2, r2, STACK_END_MAGIC@l + ld r12, -8(r1) +1: tdne r12, r2 + EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0 + + /* Restore LR & toc from livepatch stack */ + ld r12, -16(r1) + mtlr r12 + ld r2, -24(r1) + + /* Pop livepatch stack frame */ + CURRENT_THREAD_INFO(r12, r0) + subi r1, r1, 24 + std r1, TI_livepatch_sp(r12) + + /* Restore real stack pointer */ + mr r1, r0 + + /* Return to original caller of live patched function */ + blr +#endif /* CONFIG_LIVEPATCH */ + +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(ftrace_graph_caller) + stdu r1, -112(r1) + /* with -mprofile-kernel, parameter regs are still alive at _mcount */ + std r10, 104(r1) + std r9, 96(r1) + std r8, 88(r1) + std r7, 80(r1) + std r6, 72(r1) + std r5, 64(r1) + std r4, 56(r1) + std r3, 48(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2, PACATOC(r13) /* get kernel TOC in r2 */ + + mfctr r4 /* ftrace_caller has moved local addr here */ + std r4, 40(r1) + mflr r3 /* ftrace_caller has restored LR from stack */ + subi r4, r4, MCOUNT_INSN_SIZE + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR to this. + */ + mtlr r3 + + ld r0, 40(r1) + mtctr r0 + ld r10, 104(r1) + ld r9, 96(r1) + ld r8, 88(r1) + ld r7, 80(r1) + ld r6, 72(r1) + ld r5, 64(r1) + ld r4, 56(r1) + ld r3, 48(r1) + + /* Restore callee's TOC */ + ld r2, 24(r1) + + addi r1, r1, 112 + mflr r0 + std r0, LRSAVE(r1) + bctr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S new file mode 100644 index 000000000000..f095358da96e --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S @@ -0,0 +1,68 @@ +/* + * Split from ftrace_64.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/magic.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/ppc-opcode.h> +#include <asm/export.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL_TOC(ftrace_caller) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) + subi r3, r3, MCOUNT_INSN_SIZE +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) +#endif + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 + +_GLOBAL(ftrace_stub) + blr +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(ftrace_graph_caller) + /* load r4 with local address */ + ld r4, 128(r1) + subi r4, r4, MCOUNT_INSN_SIZE + + /* Grab the LR out of the caller stack frame */ + ld r11, 112(r1) + ld r3, 16(r11) + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + ld r11, 112(r1) + std r3, 16(r11) + + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace_clock.c b/arch/powerpc/kernel/trace/trace_clock.c index 49170690946d..49170690946d 100644 --- a/arch/powerpc/kernel/trace_clock.c +++ b/arch/powerpc/kernel/trace/trace_clock.c diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index ff365f9de27a..d4e545d27ef9 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -35,13 +35,13 @@ #include <linux/backlight.h> #include <linux/bug.h> #include <linux/kdebug.h> -#include <linux/debugfs.h> #include <linux/ratelimit.h> #include <linux/context_tracking.h> #include <asm/emulated_ops.h> #include <asm/pgtable.h> #include <linux/uaccess.h> +#include <asm/debugfs.h> #include <asm/io.h> #include <asm/machdep.h> #include <asm/rtas.h> @@ -279,18 +279,35 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) void system_reset_exception(struct pt_regs *regs) { + /* + * Avoid crashes in case of nested NMI exceptions. Recoverability + * is determined by RI and in_nmi + */ + bool nested = in_nmi(); + if (!nested) + nmi_enter(); + /* See if any machine dependent calls */ if (ppc_md.system_reset_exception) { if (ppc_md.system_reset_exception(regs)) - return; + goto out; } die("System Reset", regs, SIGABRT); +out: +#ifdef CONFIG_PPC_BOOK3S_64 + BUG_ON(get_paca()->in_nmi == 0); + if (get_paca()->in_nmi > 1) + panic("Unrecoverable nested System Reset"); +#endif /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) panic("Unrecoverable System Reset"); + if (!nested) + nmi_exit(); + /* What should we do here? We could issue a shutdown or hard reset. */ } @@ -306,8 +323,6 @@ long machine_check_early(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); - add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; @@ -741,6 +756,8 @@ void machine_check_exception(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU * one returns a positive number. However there is existing code @@ -1440,6 +1457,8 @@ void facility_unavailable_exception(struct pt_regs *regs) [FSCR_TM_LG] = "TM", [FSCR_EBB_LG] = "EBB", [FSCR_TAR_LG] = "TAR", + [FSCR_MSGP_LG] = "MSGP", + [FSCR_SCV_LG] = "SCV", }; char *facility = "unknown"; u64 value; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 1c24c894c908..2f793be3d2b1 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -77,6 +77,8 @@ SECTIONS #endif } :kernel + __head_end = .; + /* * If the build dies here, it's likely code in head_64.S is referencing * labels it can't reach, and the linker inserting stubs without the |