diff options
Diffstat (limited to 'arch/powerpc/kernel')
40 files changed, 828 insertions, 521 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 2358f97d62ec..2b4c40b255e4 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -42,7 +42,7 @@ obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o -obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o security.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index ea5eb91b836e..6bee65f3cfd3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -221,12 +221,17 @@ int main(void) OFFSET(PACA_EXMC, paca_struct, exmc); OFFSET(PACA_EXSLB, paca_struct, exslb); OFFSET(PACA_EXNMI, paca_struct, exnmi); +#ifdef CONFIG_PPC_PSERIES OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); +#endif OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid); OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area); OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use); +#endif OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx); OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count); OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx); @@ -568,6 +573,7 @@ int main(void) OFFSET(VCPU_TFHAR, kvm_vcpu, arch.tfhar); OFFSET(VCPU_TFIAR, kvm_vcpu, arch.tfiar); OFFSET(VCPU_TEXASR, kvm_vcpu, arch.texasr); + OFFSET(VCPU_ORIG_TEXASR, kvm_vcpu, arch.orig_texasr); OFFSET(VCPU_GPR_TM, kvm_vcpu, arch.gpr_tm); OFFSET(VCPU_FPRS_TM, kvm_vcpu, arch.fp_tm.fpr); OFFSET(VCPU_VRS_TM, kvm_vcpu, arch.vr_tm.vr); @@ -650,6 +656,7 @@ int main(void) HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_TID, tid); + HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]); HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]); HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]); @@ -759,6 +766,7 @@ int main(void) OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas); OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr); + OFFSET(PACA_DONT_STOP, paca_struct, dont_stop); #define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f) STOP_SPR(STOP_PID, pid); STOP_SPR(STOP_LDBAR, ldbar); diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index c5e5a94d9892..a9f3970693e1 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -226,7 +226,7 @@ BEGIN_FTR_SECTION beq 1f END_FTR_SECTION_IFSET(CPU_FTR_L3CR) lwz r6,CPU_SPEC_FEATURES(r4) - andi. r0,r6,CPU_FTR_L3_DISABLE_NAP + andis. r0,r6,CPU_FTR_L3_DISABLE_NAP@h beq 1f li r7,CPU_FTR_CAN_NAP andc r6,r6,r7 diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 462aed9bcf51..8d142e5d84cd 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -162,7 +162,7 @@ _GLOBAL(__setup_cpu_e5500) * the feature on the primary core, avoid doing it on the * secondary core. */ - andis. r6, r3, CPU_FTR_EMB_HV@h + andi. r6, r3, CPU_FTR_EMB_HV beq 2f rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV stw r3, CPU_SPEC_FEATURES(r4) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c40a9fc1e5d1..c8fc9691f8c7 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -133,36 +133,6 @@ extern void __restore_cpu_e6500(void); static struct cpu_spec __initdata cpu_specs[] = { #ifdef CONFIG_PPC_BOOK3S_64 - { /* Power4 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00350000, - .cpu_name = "POWER4 (gp)", - .cpu_features = CPU_FTRS_POWER4, - .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power4", - .oprofile_type = PPC_OPROFILE_POWER4, - .platform = "power4", - }, - { /* Power4+ */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00380000, - .cpu_name = "POWER4+ (gq)", - .cpu_features = CPU_FTRS_POWER4, - .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power4", - .oprofile_type = PPC_OPROFILE_POWER4, - .platform = "power4", - }, { /* PPC970 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00390000, @@ -553,11 +523,30 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, - { /* Power9 DD 2.1 or later (see DD2.0 above) */ + { /* Power9 DD 2.1 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0201, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD2_1, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power9", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .machine_check_early = __machine_check_early_realmode_p9, + .platform = "power9", + }, + { /* Power9 DD2.2 or later */ .pvr_mask = 0xffff0000, .pvr_value = 0x004e0000, .cpu_name = "POWER9 (raw)", - .cpu_features = CPU_FTRS_POWER9_DD2_1, + .cpu_features = CPU_FTRS_POWER9_DD2_2, .cpu_user_features = COMMON_USER_POWER9, .cpu_user_features2 = COMMON_USER2_POWER9, .mmu_features = MMU_FTRS_POWER9, @@ -609,15 +598,15 @@ static struct cpu_spec __initdata cpu_specs[] = { { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, - .cpu_name = "POWER4 (compatible)", + .cpu_name = "POWER5 (compatible)", .cpu_features = CPU_FTRS_COMPATIBLE, .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2, + .mmu_features = MMU_FTRS_POWER, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .platform = "power4", + .platform = "power5", } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 00b215125d3e..17c8b99680f2 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -238,7 +238,7 @@ static void __maybe_unused crash_kexec_wait_realmode(int cpu) if (i == cpu) continue; - while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { + while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) { barrier(); if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) break; diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 8ca5d5b74618..e88fbb1fdb8f 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -54,8 +54,7 @@ struct dt_cpu_feature { }; #define CPU_FTRS_BASE \ - (CPU_FTR_USE_TB | \ - CPU_FTR_LWSYNC | \ + (CPU_FTR_LWSYNC | \ CPU_FTR_FPU_UNAVAILABLE |\ CPU_FTR_NODSISRALIGN |\ CPU_FTR_NOEXECUTE |\ @@ -84,6 +83,7 @@ static int hv_mode; static struct { u64 lpcr; + u64 lpcr_clear; u64 hfscr; u64 fscr; } system_registers; @@ -92,6 +92,8 @@ static void (*init_pmu_registers)(void); static void __restore_cpu_cpufeatures(void) { + u64 lpcr; + /* * LPCR is restored by the power on engine already. It can be changed * after early init e.g., by radix enable, and we have no unified API @@ -104,8 +106,10 @@ static void __restore_cpu_cpufeatures(void) * The best we can do to accommodate secondary boot and idle restore * for now is "or" LPCR with existing. */ - - mtspr(SPRN_LPCR, system_registers.lpcr | mfspr(SPRN_LPCR)); + lpcr = mfspr(SPRN_LPCR); + lpcr |= system_registers.lpcr; + lpcr &= ~system_registers.lpcr_clear; + mtspr(SPRN_LPCR, lpcr); if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); @@ -325,8 +329,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f) { u64 lpcr; + system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR); lpcr = mfspr(SPRN_LPCR); - lpcr &= ~LPCR_ISL; + lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR); mtspr(SPRN_LPCR, lpcr); cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE; @@ -590,6 +595,8 @@ static struct dt_cpu_feature_match __initdata {"virtual-page-class-key-protection", feat_enable, 0}, {"transactional-memory", feat_enable_tm, CPU_FTR_TM}, {"transactional-memory-v3", feat_enable_tm, 0}, + {"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST}, + {"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG}, {"idle-nap", feat_enable_idle_nap, 0}, {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0}, {"idle-stop", feat_enable_idle_stop, 0}, @@ -707,11 +714,28 @@ static __init void cpufeatures_cpu_quirks(void) */ if ((version & 0xffffff00) == 0x004e0100) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; + else if ((version & 0xffffefff) == 0x004e0200) + ; /* DD2.0 has no feature flag */ else if ((version & 0xffffefff) == 0x004e0201) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; + else if ((version & 0xffffefff) == 0x004e0202) { + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST; + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG; + cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; + } else /* DD2.1 and up have DD2_1 */ + cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; - if ((version & 0xffff0000) == 0x004e0000) + if ((version & 0xffff0000) == 0x004e0000) { + cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR); cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + } + + /* + * PKEY was not in the initial base or feature node + * specification, but it should become optional in the next + * cpu feature version sequence. + */ + cur_cpu_spec->cpu_features |= CPU_FTR_PKEY; } static void __init cpufeatures_setup_finished(void) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 2b9df0040d6b..bc640e4c5ca5 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -394,9 +394,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) /* Check PHB state */ ret = eeh_ops->get_state(phb_pe, NULL); if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || - (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { + (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { ret = 0; goto out; } @@ -433,7 +431,6 @@ out: int eeh_dev_check_failure(struct eeh_dev *edev) { int ret; - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); unsigned long flags; struct device_node *dn; struct pci_dev *dev; @@ -525,8 +522,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * state, PE is in good state. */ if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || - ((ret & active_flags) == active_flags)) { + (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; @@ -546,8 +542,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) /* Frozen parent PE ? */ ret = eeh_ops->get_state(parent_pe, NULL); - if (ret > 0 && - (ret & active_flags) != active_flags) + if (ret > 0 && !eeh_state_active(ret)) pe = parent_pe; /* Next parent level */ @@ -888,7 +883,6 @@ static void *eeh_set_dev_freset(void *data, void *flag) */ int eeh_pe_reset_full(struct eeh_pe *pe) { - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); int type = EEH_RESET_HOT; unsigned int freset = 0; @@ -919,7 +913,7 @@ int eeh_pe_reset_full(struct eeh_pe *pe) /* Wait until the PE is in a functioning state */ state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if ((state & active_flags) == active_flags) + if (eeh_state_active(state)) break; if (state < 0) { @@ -1352,16 +1346,15 @@ static int eeh_pe_change_owner(struct eeh_pe *pe) struct eeh_dev *edev, *tmp; struct pci_dev *pdev; struct pci_device_id *id; - int flags, ret; + int ret; /* Check PE state */ - flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); ret = eeh_ops->get_state(pe, NULL); if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) return 0; /* Unfrozen PE, nothing to do */ - if ((ret & flags) == flags) + if (eeh_state_active(ret)) return 0; /* Frozen PE, check if it needs PE level reset */ diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index d4cc26618809..201943d54a6e 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -84,8 +84,7 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr) * @addr: mmio (PIO) phys address or i/o port number * * Given an mmio phys address, or a port number, find a pci device - * that implements this address. Be sure to pci_dev_put the device - * when finished. I/O port numbers are assumed to be offset + * that implements this address. I/O port numbers are assumed to be offset * from zero (that is, they do *not* have pci_io_addr added in). * It is safe to call this function within an interrupt. */ diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 0c0b66fc5bfb..b8a329f04814 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -207,18 +207,18 @@ static void *eeh_report_error(void *data, void *userdata) if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; + + device_lock(&dev->dev); dev->error_state = pci_channel_io_frozen; driver = eeh_pcid_get(dev); - if (!driver) return NULL; + if (!driver) goto out_no_dev; eeh_disable_irq(dev); if (!driver->err_handler || - !driver->err_handler->error_detected) { - eeh_pcid_put(dev); - return NULL; - } + !driver->err_handler->error_detected) + goto out; rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); @@ -227,8 +227,12 @@ static void *eeh_report_error(void *data, void *userdata) if (*res == PCI_ERS_RESULT_NONE) *res = rc; edev->in_error = true; - eeh_pcid_put(dev); pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); + +out: + eeh_pcid_put(dev); +out_no_dev: + device_unlock(&dev->dev); return NULL; } @@ -251,15 +255,14 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; + device_lock(&dev->dev); driver = eeh_pcid_get(dev); - if (!driver) return NULL; + if (!driver) goto out_no_dev; if (!driver->err_handler || !driver->err_handler->mmio_enabled || - (edev->mode & EEH_DEV_NO_HANDLER)) { - eeh_pcid_put(dev); - return NULL; - } + (edev->mode & EEH_DEV_NO_HANDLER)) + goto out; rc = driver->err_handler->mmio_enabled(dev); @@ -267,7 +270,10 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; +out: eeh_pcid_put(dev); +out_no_dev: + device_unlock(&dev->dev); return NULL; } @@ -290,20 +296,20 @@ static void *eeh_report_reset(void *data, void *userdata) if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; + + device_lock(&dev->dev); dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); - if (!driver) return NULL; + if (!driver) goto out_no_dev; eeh_enable_irq(dev); if (!driver->err_handler || !driver->err_handler->slot_reset || (edev->mode & EEH_DEV_NO_HANDLER) || - (!edev->in_error)) { - eeh_pcid_put(dev); - return NULL; - } + (!edev->in_error)) + goto out; rc = driver->err_handler->slot_reset(dev); if ((*res == PCI_ERS_RESULT_NONE) || @@ -311,7 +317,10 @@ static void *eeh_report_reset(void *data, void *userdata) if (*res == PCI_ERS_RESULT_DISCONNECT && rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; +out: eeh_pcid_put(dev); +out_no_dev: + device_unlock(&dev->dev); return NULL; } @@ -362,10 +371,12 @@ static void *eeh_report_resume(void *data, void *userdata) if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; + + device_lock(&dev->dev); dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); - if (!driver) return NULL; + if (!driver) goto out_no_dev; was_in_error = edev->in_error; edev->in_error = false; @@ -375,18 +386,20 @@ static void *eeh_report_resume(void *data, void *userdata) !driver->err_handler->resume || (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) { edev->mode &= ~EEH_DEV_NO_HANDLER; - eeh_pcid_put(dev); - return NULL; + goto out; } driver->err_handler->resume(dev); - eeh_pcid_put(dev); pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); +out: + eeh_pcid_put(dev); #ifdef CONFIG_PCI_IOV if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); #endif +out_no_dev: + device_unlock(&dev->dev); return NULL; } @@ -406,23 +419,26 @@ static void *eeh_report_failure(void *data, void *userdata) if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; + + device_lock(&dev->dev); dev->error_state = pci_channel_io_perm_failure; driver = eeh_pcid_get(dev); - if (!driver) return NULL; + if (!driver) goto out_no_dev; eeh_disable_irq(dev); if (!driver->err_handler || - !driver->err_handler->error_detected) { - eeh_pcid_put(dev); - return NULL; - } + !driver->err_handler->error_detected) + goto out; driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); - eeh_pcid_put(dev); pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); +out: + eeh_pcid_put(dev); +out_no_dev: + device_unlock(&dev->dev); return NULL; } @@ -619,17 +635,19 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) /** * eeh_reset_device - Perform actual reset of a pci slot + * @driver_eeh_aware: Does the device's driver provide EEH support? * @pe: EEH PE * @bus: PCI bus corresponding to the isolcated slot + * @rmv_data: Optional, list to record removed devices * * This routine must be called to do reset on the indicated PE. * During the reset, udev might be invoked because those affected * PCI devices will be removed and then added. */ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, - struct eeh_rmv_data *rmv_data) + struct eeh_rmv_data *rmv_data, + bool driver_eeh_aware) { - struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); time64_t tstamp; int cnt, rc; struct eeh_dev *edev; @@ -645,16 +663,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * into pci_hp_add_devices(). */ eeh_pe_state_mark(pe, EEH_PE_KEEP); - if (bus) { - if (pe->type & EEH_PE_VF) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); - } else { - pci_lock_rescan_remove(); - pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); - } - } else if (frozen_bus) { + if (driver_eeh_aware || (pe->type & EEH_PE_VF)) { eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); + } else { + pci_lock_rescan_remove(); + pci_hp_remove_devices(bus); + pci_unlock_rescan_remove(); } /* @@ -689,8 +703,9 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * the device up before the scripts have taken it down, * potentially weird things happen. */ - if (bus) { - pr_info("EEH: Sleep 5s ahead of complete hotplug\n"); + if (!driver_eeh_aware || rmv_data->removed) { + pr_info("EEH: Sleep 5s ahead of %s hotplug\n", + (driver_eeh_aware ? "partial" : "complete")); ssleep(5); /* @@ -703,19 +718,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (pe->type & EEH_PE_VF) { eeh_add_virt_device(edev, NULL); } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + if (!driver_eeh_aware) + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_hp_add_devices(bus); } - } else if (frozen_bus && rmv_data->removed) { - pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); - ssleep(5); - - edev = list_first_entry(&pe->edevs, struct eeh_dev, list); - eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - if (pe->type & EEH_PE_VF) - eeh_add_virt_device(edev, NULL); - else - pci_hp_add_devices(frozen_bus); } eeh_pe_state_clear(pe, EEH_PE_KEEP); @@ -733,28 +739,42 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, /** * eeh_handle_normal_event - Handle EEH events on a specific PE - * @pe: EEH PE + * @pe: EEH PE - which should not be used after we return, as it may + * have been invalidated. * * Attempts to recover the given PE. If recovery fails or the PE has failed * too many times, remove the PE. * - * Returns true if @pe should no longer be used, else false. + * While PHB detects address or data parity errors on particular PCI + * slot, the associated PE will be frozen. Besides, DMA's occurring + * to wild addresses (which usually happen due to bugs in device + * drivers or in PCI adapter firmware) can cause EEH error. #SERR, + * #PERR or other misc PCI-related errors also can trigger EEH errors. + * + * Recovery process consists of unplugging the device driver (which + * generated hotplug events to userspace), then issuing a PCI #RST to + * the device, then reconfiguring the PCI config space for all bridges + * & devices under this slot, and then finally restarting the device + * drivers (which cause a second set of hotplug events to go out to + * userspace). */ -static bool eeh_handle_normal_event(struct eeh_pe *pe) +void eeh_handle_normal_event(struct eeh_pe *pe) { - struct pci_bus *frozen_bus; + struct pci_bus *bus; struct eeh_dev *edev, *tmp; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; - frozen_bus = eeh_pe_bus_get(pe); - if (!frozen_bus) { + bus = eeh_pe_bus_get(pe); + if (!bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); - return false; + return; } + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + eeh_pe_update_time_stamp(pe); pe->freeze_count++; if (pe->freeze_count > eeh_max_freezes) { @@ -806,7 +826,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe) */ if (result == PCI_ERS_RESULT_NONE) { pr_info("EEH: Reset with hotplug activity\n"); - rc = eeh_reset_device(pe, frozen_bus, NULL); + rc = eeh_reset_device(pe, bus, NULL, false); if (rc) { pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); @@ -858,7 +878,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe) /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { pr_info("EEH: Reset without hotplug activity\n"); - rc = eeh_reset_device(pe, NULL, &rmv_data); + rc = eeh_reset_device(pe, bus, &rmv_data, true); if (rc) { pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); @@ -891,7 +911,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); - return false; + goto final; hard_fail: /* @@ -916,23 +936,21 @@ hard_fail: * all removed devices correctly to avoid access * the their PCI config any more. */ - if (frozen_bus) { - if (pe->type & EEH_PE_VF) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - - pci_lock_rescan_remove(); - pci_hp_remove_devices(frozen_bus); - pci_unlock_rescan_remove(); + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - /* The passed PE should no longer be used */ - return true; - } + pci_lock_rescan_remove(); + pci_hp_remove_devices(bus); + pci_unlock_rescan_remove(); + /* The passed PE should no longer be used */ + return; } - return false; +final: + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } /** @@ -942,7 +960,7 @@ hard_fail: * specific PE. Iterates through possible failures and handles them as * necessary. */ -static void eeh_handle_special_event(void) +void eeh_handle_special_event(void) { struct eeh_pe *pe, *phb_pe; struct pci_bus *bus; @@ -1005,15 +1023,7 @@ static void eeh_handle_special_event(void) */ if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { - /* - * eeh_handle_normal_event() can make the PE stale if it - * determines that the PE cannot possibly be recovered. - * Don't modify the PE state if that's the case. - */ - if (eeh_handle_normal_event(pe)) - continue; - - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + eeh_handle_normal_event(pe); } else { pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { @@ -1049,28 +1059,3 @@ static void eeh_handle_special_event(void) break; } while (rc != EEH_NEXT_ERR_NONE); } - -/** - * eeh_handle_event - Reset a PCI device after hard lockup. - * @pe: EEH PE - * - * While PHB detects address or data parity errors on particular PCI - * slot, the associated PE will be frozen. Besides, DMA's occurring - * to wild addresses (which usually happen due to bugs in device - * drivers or in PCI adapter firmware) can cause EEH error. #SERR, - * #PERR or other misc PCI-related errors also can trigger EEH errors. - * - * Recovery process consists of unplugging the device driver (which - * generated hotplug events to userspace), then issuing a PCI #RST to - * the device, then reconfiguring the PCI config space for all bridges - * & devices under this slot, and then finally restarting the device - * drivers (which cause a second set of hotplug events to go out to - * userspace). - */ -void eeh_handle_event(struct eeh_pe *pe) -{ - if (pe) - eeh_handle_normal_event(pe); - else - eeh_handle_special_event(); -} diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index accbf8b5fd46..61c9356bf9c9 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -73,7 +73,6 @@ static int eeh_event_handler(void * dummy) /* We might have event without binding PE */ pe = event->pe; if (pe) { - eeh_pe_state_mark(pe, EEH_PE_RECOVERING); if (pe->type & EEH_PE_PHB) pr_info("EEH: Detected error on PHB#%x\n", pe->phb->global_number); @@ -81,10 +80,9 @@ static int eeh_event_handler(void * dummy) pr_info("EEH: Detected PCI bus error on " "PHB#%x-PE#%x\n", pe->phb->global_number, pe->addr); - eeh_handle_event(pe); - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + eeh_handle_normal_event(pe); } else { - eeh_handle_event(NULL); + eeh_handle_special_event(); } kfree(event); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2cb5109a7ea3..51695608c68b 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -545,7 +545,7 @@ _GLOBAL(_switch) /* Cancel all explict user streams as they will have no use after context * switch and will stop the HW from creating streams itself */ - DCBT_STOP_ALL_STREAM_IDS(r6) + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6) #endif addi r6,r4,-THREAD /* Convert THREAD to 'current' */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1ecfd8ffb098..ae6a849db60b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -139,6 +139,21 @@ EXC_COMMON_BEGIN(system_reset_idle_common) b pnv_powersave_wakeup #endif +/* + * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does + * the right thing. We do not want to reconcile because that goes + * through irq tracing which we don't want in NMI. + * + * Save PACAIRQHAPPENED because some code will do a hard disable + * (e.g., xmon). So we want to restore this back to where it was + * when we return. DAR is unused in the stack, so save it there. + */ +#define ADD_RECONCILE_NMI \ + li r10,IRQS_ALL_DISABLED; \ + stb r10,PACAIRQSOFTMASK(r13); \ + lbz r10,PACAIRQHAPPENED(r13); \ + std r10,_DAR(r1) + EXC_COMMON_BEGIN(system_reset_common) /* * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able @@ -157,16 +172,56 @@ EXC_COMMON_BEGIN(system_reset_common) subi r1,r1,INT_FRAME_SIZE EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100, system_reset, system_reset_exception, - ADD_NVGPRS;ADD_RECONCILE) + ADD_NVGPRS;ADD_RECONCILE_NMI) + + /* This (and MCE) can be simplified with mtmsrd L=1 */ + /* Clear MSR_RI before setting SRR0 and SRR1. */ + li r0,MSR_RI + mfmsr r9 + andc r9,r9,r0 + mtmsrd r9,1 /* - * The stack is no longer in use, decrement in_nmi. + * MSR_RI is clear, now we can decrement paca->in_nmi. */ lhz r10,PACA_IN_NMI(r13) subi r10,r10,1 sth r10,PACA_IN_NMI(r13) - b ret_from_except + /* + * Restore soft mask settings. + */ + ld r10,_DAR(r1) + stb r10,PACAIRQHAPPENED(r13) + ld r10,SOFTE(r1) + stb r10,PACAIRQSOFTMASK(r13) + + /* + * Keep below code in synch with MACHINE_CHECK_HANDLER_WINDUP. + * Should share common bits... + */ + + /* Move original SRR0 and SRR1 into the respective regs */ + ld r9,_MSR(r1) + mtspr SPRN_SRR1,r9 + ld r3,_NIP(r1) + mtspr SPRN_SRR0,r3 + ld r9,_CTR(r1) + mtctr r9 + ld r9,_XER(r1) + mtxer r9 + ld r9,_LINK(r1) + mtlr r9 + REST_GPR(0, r1) + REST_8GPRS(2, r1) + REST_GPR(10, r1) + ld r11,_CCR(r1) + mtcr r11 + REST_GPR(11, r1) + REST_2GPRS(12, r1) + /* restore original r1. */ + ld r1,GPR1(r1) + RFI_TO_USER_OR_KERNEL #ifdef CONFIG_PPC_PSERIES /* @@ -621,7 +676,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ mtlr r10 - beq- 8f /* if bad address, make full stack frame */ + /* + * Large address, check whether we have to allocate new contexts. + */ + beq- 8f bne- cr5,2f /* if unrecoverable exception, oops */ @@ -629,14 +687,11 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) bne cr4,1f /* returning to kernel */ -.machine push -.machine "power4" mtcrf 0x80,r9 mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ mtcrf 0x02,r9 /* I/D indication is in cr6 */ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ -.machine pop RESTORE_CTR(r9, PACA_EXSLB) RESTORE_PPR_PACA(PACA_EXSLB, r9) @@ -649,14 +704,11 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) RFI_TO_USER b . /* prevent speculative execution */ 1: -.machine push -.machine "power4" mtcrf 0x80,r9 mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ mtcrf 0x02,r9 /* I/D indication is in cr6 */ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ -.machine pop RESTORE_CTR(r9, PACA_EXSLB) RESTORE_PPR_PACA(PACA_EXSLB, r9) @@ -685,7 +737,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) mr r3,r12 mfspr r11,SPRN_SRR0 mfspr r12,SPRN_SRR1 - LOAD_HANDLER(r10,bad_addr_slb) + LOAD_HANDLER(r10, large_addr_slb) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 @@ -700,7 +752,7 @@ EXC_COMMON_BEGIN(unrecov_slb) bl unrecoverable_exception b 1b -EXC_COMMON_BEGIN(bad_addr_slb) +EXC_COMMON_BEGIN(large_addr_slb) EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB) RECONCILE_IRQ_STATE(r10, r11) ld r3, PACA_EXSLB+EX_DAR(r13) @@ -710,7 +762,7 @@ EXC_COMMON_BEGIN(bad_addr_slb) std r10, _TRAP(r1) 2: bl save_nvgprs addi r3, r1, STACK_FRAME_OVERHEAD - bl slb_miss_bad_addr + bl slb_miss_large_addr b ret_from_except EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) @@ -1273,7 +1325,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) bne+ denorm_assist #endif - KVMTEST_PR(0x1500) + KVMTEST_HV(0x1500) EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100) @@ -1285,7 +1337,7 @@ EXC_VIRT_END(denorm_exception, 0x5500, 0x100) EXC_VIRT_NONE(0x5500, 0x100) #endif -TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500) +TRAMP_KVM_HV(PACA_EXGEN, 0x1500) #ifdef CONFIG_PPC_DENORMALISATION TRAMP_REAL_BEGIN(denorm_assist) @@ -1466,7 +1518,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) ld r11,PACA_L1D_FLUSH_SIZE(r13) srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ mtctr r11 - DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ /* order ld/st prior to dcbt stop all streams with flushing */ sync @@ -1506,7 +1558,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) ld r11,PACA_L1D_FLUSH_SIZE(r13) srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ mtctr r11 - DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ /* order ld/st prior to dcbt stop all streams with flushing */ sync diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index a61151a6ea5e..6eca15f25c73 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -392,19 +392,20 @@ generic_secondary_common_init: * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. */ - LOAD_REG_ADDR(r13, paca) /* Load paca pointer */ - ld r13,0(r13) /* Get base vaddr of paca array */ #ifndef CONFIG_SMP - addi r13,r13,PACA_SIZE /* know r13 if used accidentally */ b kexec_wait /* wait for next kernel if !SMP */ #else + LOAD_REG_ADDR(r8, paca_ptrs) /* Load paca_ptrs pointe */ + ld r8,0(r8) /* Get base vaddr of array */ LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */ lwz r7,0(r7) /* also the max paca allocated */ li r5,0 /* logical cpu id */ -1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ +1: + sldi r9,r5,3 /* get paca_ptrs[] index from cpu id */ + ldx r13,r9,r8 /* r13 = paca_ptrs[cpu id] */ + lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ cmpw r6,r24 /* Compare to our id */ beq 2f - addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */ addi r5,r5,1 cmpw r5,r7 /* Check if more pacas exist */ blt 1b @@ -756,10 +757,10 @@ _GLOBAL(pmac_secondary_start) mtmsrd r3 /* RI on */ /* Set up a paca value for this processor. */ - LOAD_REG_ADDR(r4,paca) /* Load paca pointer */ - ld r4,0(r4) /* Get base vaddr of paca array */ - mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ - add r13,r13,r4 /* for this processor. */ + LOAD_REG_ADDR(r4,paca_ptrs) /* Load paca pointer */ + ld r4,0(r4) /* Get base vaddr of paca_ptrs array */ + sldi r5,r24,3 /* get paca_ptrs[] index from cpu id */ + ldx r13,r5,r4 /* r13 = paca_ptrs[cpu id] */ SET_PACA(r13) /* Save vaddr of paca in an SPRG*/ /* Mark interrupts soft and hard disabled (they might be enabled diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 53b9c1dfd7d9..4c1012b80d3b 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -33,6 +33,7 @@ #include <asm/hw_breakpoint.h> #include <asm/processor.h> #include <asm/sstep.h> +#include <asm/debug.h> #include <linux/uaccess.h> /* @@ -171,6 +172,8 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the * 'symbolsize' should satisfy the check below. */ + if (!ppc_breakpoint_available()) + return -ENODEV; length_max = 8; /* DABR */ if (cpu_has_feature(CPU_FTR_DAWR)) { length_max = 512 ; /* 64 doublewords */ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 01e1c1997893..79d005445c6c 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -325,12 +325,6 @@ enter_winkle: * r3 - PSSCR value corresponding to the requested stop state. */ power_enter_stop: -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're entering idle */ - li r4,KVM_HWTHREAD_IN_IDLE - /* DO THIS IN REAL MODE! See comment above. */ - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif /* * Check if we are executing the lite variant with ESL=EC=0 */ @@ -339,6 +333,7 @@ power_enter_stop: bne .Lhandle_esl_ec_set PPC_STOP li r3,0 /* Since we didn't lose state, return 0 */ + std r3, PACA_REQ_PSSCR(r13) /* * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so @@ -427,13 +422,49 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ /* * Entered with MSR[EE]=0 and no soft-masked interrupts pending. * r3 contains desired PSSCR register value. + * + * Offline (CPU unplug) case also must notify KVM that the CPU is + * idle. */ +_GLOBAL(power9_offline_stop) +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* + * Tell KVM we're entering idle. + * This does not have to be done in real mode because the P9 MMU + * is independent per-thread. Some steppings share radix/hash mode + * between threads, but in that case KVM has a barrier sync in real + * mode before and after switching between radix and hash. + */ + li r4,KVM_HWTHREAD_IN_IDLE + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif + /* fall through */ + _GLOBAL(power9_idle_stop) std r3, PACA_REQ_PSSCR(r13) +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +BEGIN_FTR_SECTION + sync + lwz r5, PACA_DONT_STOP(r13) + cmpwi r5, 0 + bne 1f +END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) +#endif mtspr SPRN_PSSCR,r3 LOAD_REG_ADDR(r4,power_enter_stop) b pnv_powersave_common /* No return */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +1: + /* + * We get here when TM / thread reconfiguration bug workaround + * code wants to get the CPU into SMT4 mode, and therefore + * we are being asked not to stop. + */ + li r3, 0 + std r3, PACA_REQ_PSSCR(r13) + blr /* return 0 for wakeup cause / SRR1 value */ +#endif /* * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1, @@ -520,6 +551,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) mr r3,r12 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + lbz r0,HSTATE_HWTHREAD_STATE(r13) + cmpwi r0,KVM_HWTHREAD_IN_KERNEL + beq 1f li r0,KVM_HWTHREAD_IN_KERNEL stb r0,HSTATE_HWTHREAD_STATE(r13) /* Order setting hwthread_state vs. testing hwthread_req */ @@ -584,6 +618,8 @@ FTR_SECTION_ELSE_NESTED(71) mfspr r5, SPRN_PSSCR rldicl r5,r5,4,60 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71) + li r0, 0 /* clear requested_psscr to say we're awake */ + std r0, PACA_REQ_PSSCR(r13) cmpd cr4,r5,r4 bge cr4,pnv_wakeup_tb_loss /* returns to caller */ @@ -834,6 +870,8 @@ BEGIN_FTR_SECTION mtspr SPRN_PTCR,r4 ld r4,_RPR(r1) mtspr SPRN_RPR,r4 + ld r4,_AMOR(r1) + mtspr SPRN_AMOR,r4 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ld r4,_TSCR(r1) diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index aab456ed2a00..5ac84efc6ede 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -45,12 +45,32 @@ u64 ioread64(void __iomem *addr) { return readq(addr); } +u64 ioread64_lo_hi(void __iomem *addr) +{ + return readq(addr); +} +u64 ioread64_hi_lo(void __iomem *addr) +{ + return readq(addr); +} u64 ioread64be(void __iomem *addr) { return readq_be(addr); } +u64 ioread64be_lo_hi(void __iomem *addr) +{ + return readq_be(addr); +} +u64 ioread64be_hi_lo(void __iomem *addr) +{ + return readq_be(addr); +} EXPORT_SYMBOL(ioread64); +EXPORT_SYMBOL(ioread64_lo_hi); +EXPORT_SYMBOL(ioread64_hi_lo); EXPORT_SYMBOL(ioread64be); +EXPORT_SYMBOL(ioread64be_lo_hi); +EXPORT_SYMBOL(ioread64be_hi_lo); #endif /* __powerpc64__ */ void iowrite8(u8 val, void __iomem *addr) @@ -83,12 +103,32 @@ void iowrite64(u64 val, void __iomem *addr) { writeq(val, addr); } +void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + writeq(val, addr); +} +void iowrite64_hi_lo(u64 val, void __iomem *addr) +{ + writeq(val, addr); +} void iowrite64be(u64 val, void __iomem *addr) { writeq_be(val, addr); } +void iowrite64be_lo_hi(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} +void iowrite64be_hi_lo(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} EXPORT_SYMBOL(iowrite64); +EXPORT_SYMBOL(iowrite64_lo_hi); +EXPORT_SYMBOL(iowrite64_hi_lo); EXPORT_SYMBOL(iowrite64be); +EXPORT_SYMBOL(iowrite64be_lo_hi); +EXPORT_SYMBOL(iowrite64be_hi_lo); #endif /* __powerpc64__ */ /* diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index ca5d5a081e75..e4c5bf33970b 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -455,29 +455,33 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) } kretprobe_assert(ri, orig_ret_address, trampoline_address); - regs->nip = orig_ret_address; + /* - * Make LR point to the orig_ret_address. - * When the 'nop' inside the kretprobe_trampoline - * is optimized, we can do a 'blr' after executing the - * detour buffer code. + * We get here through one of two paths: + * 1. by taking a trap -> kprobe_handler() -> here + * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here + * + * When going back through (1), we need regs->nip to be setup properly + * as it is used to determine the return address from the trap. + * For (2), since nip is not honoured with optprobes, we instead setup + * the link register properly so that the subsequent 'blr' in + * kretprobe_trampoline jumps back to the right instruction. + * + * For nip, we should set the address to the previous instruction since + * we end up emulating it in kprobe_handler(), which increments the nip + * again. */ + regs->nip = orig_ret_address - 4; regs->link = orig_ret_address; - reset_current_kprobe(); kretprobe_hash_unlock(current, &flags); - preempt_enable_no_resched(); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we don't want the post_handler - * to run (and have re-enabled preemption) - */ - return 1; + + return 0; } NOKPROBE_SYMBOL(trampoline_probe_handler); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 49d34d7271e7..1044bf15d5ed 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -168,24 +168,25 @@ static void kexec_prepare_cpus_wait(int wait_state) * are correctly onlined. If somehow we start a CPU on boot with RTAS * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in * time, the boot CPU will timeout. If it does eventually execute - * stuff, the secondary will start up (paca[].cpu_start was written) and - * get into a peculiar state. If the platform supports - * smp_ops->take_timebase(), the secondary CPU will probably be spinning - * in there. If not (i.e. pseries), the secondary will continue on and - * try to online itself/idle/etc. If it survives that, we need to find - * these possible-but-not-online-but-should-be CPUs and chaperone them - * into kexec_smp_wait(). + * stuff, the secondary will start up (paca_ptrs[]->cpu_start was + * written) and get into a peculiar state. + * If the platform supports smp_ops->take_timebase(), the secondary CPU + * will probably be spinning in there. If not (i.e. pseries), the + * secondary will continue on and try to online itself/idle/etc. If it + * survives that, we need to find these + * possible-but-not-online-but-should-be CPUs and chaperone them into + * kexec_smp_wait(). */ for_each_online_cpu(i) { if (i == my_cpu) continue; - while (paca[i].kexec_state < wait_state) { + while (paca_ptrs[i]->kexec_state < wait_state) { barrier(); if (i != notified) { printk(KERN_INFO "kexec: waiting for cpu %d " "(physical %d) to enter %i state\n", - i, paca[i].hw_cpu_id, wait_state); + i, paca_ptrs[i]->hw_cpu_id, wait_state); notified = i; } } @@ -322,18 +323,24 @@ void default_machine_kexec(struct kimage *image) kexec_stack.thread_info.cpu = current_thread_info()->cpu; /* We need a static PACA, too; copy this CPU's PACA over and switch to - * it. Also poison per_cpu_offset to catch anyone using non-static - * data. + * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using + * non-static data. */ memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct)); kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL; - paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) - - kexec_paca.paca_index; +#ifdef CONFIG_PPC_PSERIES + kexec_paca.lppaca_ptr = NULL; +#endif + paca_ptrs[kexec_paca.paca_index] = &kexec_paca; + setup_paca(&kexec_paca); - /* XXX: If anyone does 'dynamic lppacas' this will also need to be - * switched to a static version! + /* + * The lppaca should be unregistered at this point so the HV won't + * touch it. In the case of a crash, none of the lppacas are + * unregistered so there is not much we can do about it here. */ + /* * On Book3S, the copy must happen with the MMU off if we are either * using Radix page tables or we are not in an LPAR since we can diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c index e4395f937d63..45e0b7d5f200 100644 --- a/arch/powerpc/kernel/machine_kexec_file_64.c +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -43,7 +43,7 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, /* We don't support crash kernels yet. */ if (image->type == KEXEC_TYPE_CRASH) - return -ENOTSUPP; + return -EOPNOTSUPP; for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { fops = kexec_file_loaders[i]; diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 3280953a82cf..fa267e94090a 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -144,44 +144,6 @@ _GLOBAL_TOC(flush_dcache_range) blr EXPORT_SYMBOL(flush_dcache_range) -/* - * Like above, but works on non-mapped physical addresses. - * Use only for non-LPAR setups ! It also assumes real mode - * is cacheable. Used for flushing out the DART before using - * it as uncacheable memory - * - * flush_dcache_phys_range(unsigned long start, unsigned long stop) - * - * flush all bytes from start to stop-1 inclusive - */ -_GLOBAL(flush_dcache_phys_range) - ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mfmsr r5 /* Disable MMU Data Relocation */ - ori r0,r5,MSR_DR - xori r0,r0,MSR_DR - sync - mtmsr r0 - sync - isync - mtctr r8 -0: dcbst 0,r6 - add r6,r6,r7 - bdnz 0b - sync - isync - mtmsr r5 /* Re-enable MMU Data Relocation */ - sync - isync - blr - _GLOBAL(flush_inval_dcache_range) ld r10,PPC64_CACHES@toc(r2) lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 496d6393bd41..ba681dac7b46 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -207,8 +207,7 @@ int nvram_write_os_partition(struct nvram_os_partition *part, tmp_index = part->index; - rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), - &tmp_index); + rc = ppc_md.nvram_write((char *)&info, sizeof(info), &tmp_index); if (rc <= 0) { pr_err("%s: Failed nvram_write (%d)\n", __func__, rc); return rc; @@ -244,9 +243,7 @@ int nvram_read_partition(struct nvram_os_partition *part, char *buff, tmp_index = part->index; if (part->os_partition) { - rc = ppc_md.nvram_read((char *)&info, - sizeof(struct err_log_info), - &tmp_index); + rc = ppc_md.nvram_read((char *)&info, sizeof(info), &tmp_index); if (rc <= 0) { pr_err("%s: Failed nvram_read (%d)\n", __func__, rc); return rc; @@ -1173,7 +1170,7 @@ int __init nvram_scan_partitions(void) "detected: 0-length partition\n"); goto out; } - tmp_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); + tmp_part = kmalloc(sizeof(*tmp_part), GFP_KERNEL); err = -ENOMEM; if (!tmp_part) { printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n"); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 95ffedf14885..0ee3e6d50f28 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -20,116 +20,105 @@ #include "setup.h" -#ifdef CONFIG_PPC_BOOK3S +#ifndef CONFIG_SMP +#define boot_cpuid 0 +#endif + +static void *__init alloc_paca_data(unsigned long size, unsigned long align, + unsigned long limit, int cpu) +{ + unsigned long pa; + int nid; + + /* + * boot_cpuid paca is allocated very early before cpu_to_node is up. + * Set bottom-up mode, because the boot CPU should be on node-0, + * which will put its paca in the right place. + */ + if (cpu == boot_cpuid) { + nid = -1; + memblock_set_bottom_up(true); + } else { + nid = early_cpu_to_node(cpu); + } + + pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE); + if (!pa) { + pa = memblock_alloc_base(size, align, limit); + if (!pa) + panic("cannot allocate paca data"); + } + + if (cpu == boot_cpuid) + memblock_set_bottom_up(false); + + return __va(pa); +} + +#ifdef CONFIG_PPC_PSERIES /* - * The structure which the hypervisor knows about - this structure - * should not cross a page boundary. The vpa_init/register_vpa call - * is now known to fail if the lppaca structure crosses a page - * boundary. The lppaca is also used on POWER5 pSeries boxes. - * The lppaca is 640 bytes long, and cannot readily - * change since the hypervisor knows its layout, so a 1kB alignment - * will suffice to ensure that it doesn't cross a page boundary. + * See asm/lppaca.h for more detail. + * + * lppaca structures must must be 1kB in size, L1 cache line aligned, + * and not cross 4kB boundary. A 1kB size and 1kB alignment will satisfy + * these requirements. */ -struct lppaca lppaca[] = { - [0 ... (NR_LPPACAS-1)] = { +static inline void init_lppaca(struct lppaca *lppaca) +{ + BUILD_BUG_ON(sizeof(struct lppaca) != 640); + + *lppaca = (struct lppaca) { .desc = cpu_to_be32(0xd397d781), /* "LpPa" */ - .size = cpu_to_be16(sizeof(struct lppaca)), + .size = cpu_to_be16(0x400), .fpregs_in_use = 1, .slb_count = cpu_to_be16(64), .vmxregs_in_use = 0, - .page_ins = 0, - }, + .page_ins = 0, }; }; -static struct lppaca *extra_lppacas; -static long __initdata lppaca_size; - -static void __init allocate_lppacas(int nr_cpus, unsigned long limit) -{ - if (nr_cpus <= NR_LPPACAS) - return; - - lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) * - (nr_cpus - NR_LPPACAS)); - extra_lppacas = __va(memblock_alloc_base(lppaca_size, - PAGE_SIZE, limit)); -} - -static struct lppaca * __init new_lppaca(int cpu) +static struct lppaca * __init new_lppaca(int cpu, unsigned long limit) { struct lppaca *lp; + size_t size = 0x400; - if (cpu < NR_LPPACAS) - return &lppaca[cpu]; + BUILD_BUG_ON(size < sizeof(struct lppaca)); + + if (early_cpu_has_feature(CPU_FTR_HVMODE)) + return NULL; - lp = extra_lppacas + (cpu - NR_LPPACAS); - *lp = lppaca[0]; + lp = alloc_paca_data(size, 0x400, limit, cpu); + init_lppaca(lp); return lp; } - -static void __init free_lppacas(void) -{ - long new_size = 0, nr; - - if (!lppaca_size) - return; - nr = num_possible_cpus() - NR_LPPACAS; - if (nr > 0) - new_size = PAGE_ALIGN(nr * sizeof(struct lppaca)); - if (new_size >= lppaca_size) - return; - - memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size); - lppaca_size = new_size; -} - -#else - -static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { } -static inline void free_lppacas(void) { } - #endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_BOOK3S_64 /* - * 3 persistent SLBs are registered here. The buffer will be zero + * 3 persistent SLBs are allocated here. The buffer will be zero * initially, hence will all be invaild until we actually write them. * * If you make the number of persistent SLB entries dynamic, please also * update PR KVM to flush and restore them accordingly. */ -static struct slb_shadow * __initdata slb_shadow; - -static void __init allocate_slb_shadows(int nr_cpus, int limit) -{ - int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus); - - if (early_radix_enabled()) - return; - - slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit)); - memset(slb_shadow, 0, size); -} - -static struct slb_shadow * __init init_slb_shadow(int cpu) +static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) { struct slb_shadow *s; - if (early_radix_enabled()) - return NULL; - - s = &slb_shadow[cpu]; + if (cpu != boot_cpuid) { + /* + * Boot CPU comes here before early_radix_enabled + * is parsed (e.g., for disable_radix). So allocate + * always and this will be fixed up in free_unused_pacas. + */ + if (early_radix_enabled()) + return NULL; + } - /* - * When we come through here to initialise boot_paca, the slb_shadow - * buffers are not allocated yet. That's OK, we'll get one later in - * boot, but make sure we don't corrupt memory at 0. - */ - if (!slb_shadow) - return NULL; + s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu); + memset(s, 0, sizeof(*s)); s->persistent = cpu_to_be32(SLB_NUM_BOLTED); s->buffer_length = cpu_to_be32(sizeof(*s)); @@ -137,10 +126,6 @@ static struct slb_shadow * __init init_slb_shadow(int cpu) return s; } -#else /* !CONFIG_PPC_BOOK3S_64 */ - -static void __init allocate_slb_shadows(int nr_cpus, int limit) { } - #endif /* CONFIG_PPC_BOOK3S_64 */ /* The Paca is an array with one entry per processor. Each contains an @@ -152,14 +137,15 @@ static void __init allocate_slb_shadows(int nr_cpus, int limit) { } * processors. The processor VPD array needs one entry per physical * processor (not thread). */ -struct paca_struct *paca; -EXPORT_SYMBOL(paca); +struct paca_struct **paca_ptrs __read_mostly; +EXPORT_SYMBOL(paca_ptrs); void __init initialise_paca(struct paca_struct *new_paca, int cpu) { -#ifdef CONFIG_PPC_BOOK3S - new_paca->lppaca_ptr = new_lppaca(cpu); -#else +#ifdef CONFIG_PPC_PSERIES + new_paca->lppaca_ptr = NULL; +#endif +#ifdef CONFIG_PPC_BOOK3E new_paca->kernel_pgd = swapper_pg_dir; #endif new_paca->lock_token = 0x8000; @@ -173,7 +159,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->__current = &init_task; new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; #ifdef CONFIG_PPC_BOOK3S_64 - new_paca->slb_shadow_ptr = init_slb_shadow(cpu); + new_paca->slb_shadow_ptr = NULL; #endif #ifdef CONFIG_PPC_BOOK3E @@ -203,12 +189,25 @@ void setup_paca(struct paca_struct *new_paca) } -static int __initdata paca_size; +static int __initdata paca_nr_cpu_ids; +static int __initdata paca_ptrs_size; +static int __initdata paca_struct_size; + +void __init allocate_paca_ptrs(void) +{ + paca_nr_cpu_ids = nr_cpu_ids; + + paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids; + paca_ptrs = __va(memblock_alloc(paca_ptrs_size, 0)); + memset(paca_ptrs, 0x88, paca_ptrs_size); +} -void __init allocate_pacas(void) +void __init allocate_paca(int cpu) { u64 limit; - int cpu; + struct paca_struct *paca; + + BUG_ON(cpu >= paca_nr_cpu_ids); #ifdef CONFIG_PPC_BOOK3S_64 /* @@ -220,40 +219,44 @@ void __init allocate_pacas(void) limit = ppc64_rma_size; #endif - paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); - - paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit)); - memset(paca, 0, paca_size); - - printk(KERN_DEBUG "Allocated %u bytes for %u pacas at %p\n", - paca_size, nr_cpu_ids, paca); - - allocate_lppacas(nr_cpu_ids, limit); - - allocate_slb_shadows(nr_cpu_ids, limit); + paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES, + limit, cpu); + paca_ptrs[cpu] = paca; + memset(paca, 0, sizeof(struct paca_struct)); - /* Can't use for_each_*_cpu, as they aren't functional yet */ - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - initialise_paca(&paca[cpu], cpu); + initialise_paca(paca, cpu); +#ifdef CONFIG_PPC_PSERIES + paca->lppaca_ptr = new_lppaca(cpu, limit); +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); +#endif + paca_struct_size += sizeof(struct paca_struct); } void __init free_unused_pacas(void) { - int new_size; - - new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); + int new_ptrs_size; - if (new_size >= paca_size) - return; + new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids; + if (new_ptrs_size < paca_ptrs_size) + memblock_free(__pa(paca_ptrs) + new_ptrs_size, + paca_ptrs_size - new_ptrs_size); - memblock_free(__pa(paca) + new_size, paca_size - new_size); + paca_nr_cpu_ids = nr_cpu_ids; + paca_ptrs_size = new_ptrs_size; - printk(KERN_DEBUG "Freed %u bytes for unused pacas\n", - paca_size - new_size); - - paca_size = new_size; +#ifdef CONFIG_PPC_BOOK3S_64 + if (early_radix_enabled()) { + /* Ugly fixup, see new_slb_shadow() */ + memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr), + sizeof(struct slb_shadow)); + paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL; + } +#endif - free_lppacas(); + printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n", + paca_ptrs_size + paca_struct_size, nr_cpu_ids); } void copy_mm_to_paca(struct mm_struct *mm) @@ -265,7 +268,8 @@ void copy_mm_to_paca(struct mm_struct *mm) #ifdef CONFIG_PPC_MM_SLICES VM_BUG_ON(!mm->context.slb_addr_limit); get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit; - get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize; + memcpy(&get_paca()->mm_ctx_low_slices_psize, + &context->low_slices_psize, sizeof(context->low_slices_psize)); memcpy(&get_paca()->mm_ctx_high_slices_psize, &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); #else /* CONFIG_PPC_MM_SLICES */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1738c4127b32..1237f13fed51 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -173,7 +173,7 @@ void __msr_check_and_clear(unsigned long bits) EXPORT_SYMBOL(__msr_check_and_clear); #ifdef CONFIG_PPC_FPU -void __giveup_fpu(struct task_struct *tsk) +static void __giveup_fpu(struct task_struct *tsk) { unsigned long msr; @@ -556,7 +556,7 @@ void restore_math(struct pt_regs *regs) regs->msr = msr; } -void save_all(struct task_struct *tsk) +static void save_all(struct task_struct *tsk) { unsigned long usermsr; @@ -718,7 +718,8 @@ static void set_debug_reg_defaults(struct thread_struct *thread) { thread->hw_brk.address = 0; thread->hw_brk.type = 0; - set_breakpoint(&thread->hw_brk); + if (ppc_breakpoint_available()) + set_breakpoint(&thread->hw_brk); } #endif /* !CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ @@ -815,9 +816,14 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk) memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); if (cpu_has_feature(CPU_FTR_DAWR)) + // Power8 or later set_dawr(brk); - else + else if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + // Power7 or earlier set_dabr(brk); + else + // Shouldn't happen due to higher level checks + WARN_ON_ONCE(1); } void set_breakpoint(struct arch_hw_breakpoint *brk) @@ -827,6 +833,18 @@ void set_breakpoint(struct arch_hw_breakpoint *brk) preempt_enable(); } +/* Check if we have DAWR or DABR hardware */ +bool ppc_breakpoint_available(void) +{ + if (cpu_has_feature(CPU_FTR_DAWR)) + return true; /* POWER8 DAWR */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return false; /* POWER9 with DAWR disabled */ + /* DABR: Everything but POWER8 and POWER9 */ + return true; +} +EXPORT_SYMBOL_GPL(ppc_breakpoint_available); + #ifdef CONFIG_PPC64 DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); #endif diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 4dffef947b8a..9dbed488aba1 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -291,11 +291,11 @@ static inline void identical_pvr_fixup(unsigned long node) static void __init check_cpu_feature_properties(unsigned long node) { - unsigned long i; + int i; struct feature_property *fp = feature_properties; const __be32 *prop; - for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) { + for (i = 0; i < (int)ARRAY_SIZE(feature_properties); ++i, ++fp) { prop = of_get_flat_dt_prop(node, fp->name, NULL); if (prop && be32_to_cpup(prop) >= fp->min_value) { cur_cpu_spec->cpu_features |= fp->cpu_feature; @@ -365,7 +365,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node, DBG("boot cpu: logical %d physical %d\n", found, be32_to_cpu(intserv[found_thread])); boot_cpuid = found; - set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread])); /* * PAPR defines "logical" PVR values for cpus that @@ -403,7 +402,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; else if (!dt_cpu_ftrs_in_use()) cur_cpu_spec->cpu_features |= CPU_FTR_SMT; + allocate_paca(boot_cpuid); #endif + set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread])); return 0; } @@ -744,7 +745,7 @@ void __init early_init_devtree(void *params) * FIXME .. and the initrd too? */ move_device_tree(); - allocate_pacas(); + allocate_paca_ptrs(); DBG("Scanning CPUs ...\n"); @@ -874,5 +875,15 @@ EXPORT_SYMBOL(cpu_to_chip_id); bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { +#ifdef CONFIG_SMP + /* + * Early firmware scanning must use this rather than + * get_hard_smp_processor_id because we don't have pacas allocated + * until memory topology is discovered. + */ + if (cpu_to_phys_id != NULL) + return (int)phys_id == cpu_to_phys_id[cpu]; +#endif + return (int)phys_id == get_hard_smp_processor_id(cpu); } diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index acf4b2e0530c..f9d6befb55a6 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -171,7 +171,7 @@ static unsigned long __initdata prom_tce_alloc_start; static unsigned long __initdata prom_tce_alloc_end; #endif -static bool __initdata prom_radix_disable; +static bool prom_radix_disable __initdata = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); struct platform_support { bool hash_mmu; @@ -641,9 +641,19 @@ static void __init early_cmdline_parse(void) opt = strstr(prom_cmd_line, "disable_radix"); if (opt) { - prom_debug("Radix disabled from cmdline\n"); - prom_radix_disable = true; + opt += 13; + if (*opt && *opt == '=') { + bool val; + + if (kstrtobool(++opt, &val)) + prom_radix_disable = false; + else + prom_radix_disable = val; + } else + prom_radix_disable = true; } + if (prom_radix_disable) + prom_debug("Radix disabled from cmdline\n"); } #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) @@ -1110,7 +1120,8 @@ static void __init prom_check_platform_support(void) } } - if (supported.radix_mmu && supported.radix_gtse) { + if (supported.radix_mmu && supported.radix_gtse && + IS_ENABLED(CONFIG_PPC_RADIX_MMU)) { /* Radix preferred - but we require GTSE for now */ prom_debug("Asking for radix with GTSE\n"); ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); @@ -1809,16 +1820,8 @@ static void __init prom_initialize_tce_table(void) * size to 4 MB. This is enough to map 2GB of PCI DMA space. * By doing this, we avoid the pitfalls of trying to DMA to * MMIO space and the DMA alias hole. - * - * On POWER4, firmware sets the TCE region by assuming - * each TCE table is 8MB. Using this memory for anything - * else will impact performance, so we always allocate 8MB. - * Anton */ - if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p)) - minsize = 8UL << 20; - else - minsize = 4UL << 20; + minsize = 4UL << 20; /* Align to the greater of the align or size */ align = max(minalign, minsize); diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 12640f7e726b..acb6b9226352 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -19,7 +19,7 @@ WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush _end enter_prom memcpy memset reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start -strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 +strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224 reloc_got2 kernstart_addr memstart_addr linux_banner _stext __prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index ca72d7391d40..d23cf632edf0 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -41,6 +41,7 @@ #include <asm/switch_to.h> #include <asm/tm.h> #include <asm/asm-prototypes.h> +#include <asm/debug.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -2378,6 +2379,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, struct perf_event_attr attr; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #ifndef CONFIG_PPC_ADV_DEBUG_REGS + bool set_bp = true; struct arch_hw_breakpoint hw_brk; #endif @@ -2411,9 +2413,10 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, hw_brk.address = data & (~HW_BRK_TYPE_DABR); hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; hw_brk.len = 8; + set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR); #ifdef CONFIG_HAVE_HW_BREAKPOINT bp = thread->ptrace_bps[0]; - if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) { + if (!set_bp) { if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; @@ -2450,6 +2453,9 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, return PTR_ERR(bp); } +#else /* !CONFIG_HAVE_HW_BREAKPOINT */ + if (set_bp && (!ppc_breakpoint_available())) + return -ENODEV; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ task->thread.hw_brk = hw_brk; #else /* CONFIG_PPC_ADV_DEBUG_REGS */ @@ -2904,6 +2910,9 @@ static long ppc_set_hwdebug(struct task_struct *child, if (child->thread.hw_brk.address) return -ENOSPC; + if (!ppc_breakpoint_available()) + return -ENODEV; + child->thread.hw_brk = brk; return 1; @@ -3052,7 +3061,10 @@ long arch_ptrace(struct task_struct *child, long request, #endif #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ dbginfo.num_instruction_bps = 0; - dbginfo.num_data_bps = 1; + if (ppc_breakpoint_available()) + dbginfo.num_data_bps = 1; + else + dbginfo.num_data_bps = 0; dbginfo.num_condition_regs = 0; #ifdef CONFIG_PPC64 dbginfo.data_bp_alignment = 8; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c new file mode 100644 index 000000000000..bab5a27ea805 --- /dev/null +++ b/arch/powerpc/kernel/security.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Security related flags and so on. +// +// Copyright 2018, Michael Ellerman, IBM Corporation. + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/seq_buf.h> + +#include <asm/security_features.h> + + +unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + bool thread_priv; + + thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV); + + if (rfi_flush || thread_priv) { + struct seq_buf s; + seq_buf_init(&s, buf, PAGE_SIZE - 1); + + seq_buf_printf(&s, "Mitigation: "); + + if (rfi_flush) + seq_buf_printf(&s, "RFI Flush"); + + if (rfi_flush && thread_priv) + seq_buf_printf(&s, ", "); + + if (thread_priv) + seq_buf_printf(&s, "L1D private per thread"); + + seq_buf_printf(&s, "\n"); + + return s.len; + } + + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && + !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) +{ + bool bcs, ccd, ori; + struct seq_buf s; + + seq_buf_init(&s, buf, PAGE_SIZE - 1); + + bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED); + ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED); + ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31); + + if (bcs || ccd) { + seq_buf_printf(&s, "Mitigation: "); + + if (bcs) + seq_buf_printf(&s, "Indirect branch serialisation (kernel only)"); + + if (bcs && ccd) + seq_buf_printf(&s, ", "); + + if (ccd) + seq_buf_printf(&s, "Indirect branch cache disabled"); + } else + seq_buf_printf(&s, "Vulnerable"); + + if (ori) + seq_buf_printf(&s, ", ori31 speculation barrier enabled"); + + seq_buf_printf(&s, "\n"); + + return s.len; +} diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index d73ec518ef80..0af5c11b9e78 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -437,6 +437,8 @@ static void __init cpu_init_thread_core_maps(int tpc) } +u32 *cpu_to_phys_id = NULL; + /** * setup_cpu_maps - initialize the following cpu maps: * cpu_possible_mask @@ -463,6 +465,10 @@ void __init smp_setup_cpu_maps(void) DBG("smp_setup_cpu_maps()\n"); + cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32), + __alignof__(u32))); + memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32)); + for_each_node_by_type(dn, "cpu") { const __be32 *intserv; __be32 cpu_be; @@ -480,6 +486,7 @@ void __init smp_setup_cpu_maps(void) intserv = of_get_property(dn, "reg", &len); if (!intserv) { cpu_be = cpu_to_be32(cpu); + /* XXX: what is this? uninitialized?? */ intserv = &cpu_be; /* assume logical == phys */ len = 4; } @@ -499,8 +506,8 @@ void __init smp_setup_cpu_maps(void) "enable-method", "spin-table"); set_cpu_present(cpu, avail); - set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j])); set_cpu_possible(cpu, true); + cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]); cpu++; } @@ -835,6 +842,23 @@ static __init void print_system_info(void) pr_info("-----------------------------------------------------\n"); } +#ifdef CONFIG_SMP +static void smp_setup_pacas(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + allocate_paca(cpu); + set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]); + } + + memblock_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32)); + cpu_to_phys_id = NULL; +} +#endif + /* * Called into from start_kernel this initializes memblock, which is used * to manage page allocation until mem_init is called. @@ -888,8 +912,8 @@ void __init setup_arch(char **cmdline_p) /* Check the SMT related command line arguments (ppc64). */ check_smt_enabled(); - /* On BookE, setup per-core TLB data structures. */ - setup_tlb_core_data(); + /* Parse memory topology */ + mem_topology_setup(); /* * Release secondary cpus out of their spinloops at 0x60 now that @@ -899,6 +923,11 @@ void __init setup_arch(char **cmdline_p) * so smp_release_cpus() does nothing for them. */ #ifdef CONFIG_SMP + smp_setup_pacas(); + + /* On BookE, setup per-core TLB data structures. */ + setup_tlb_core_data(); + smp_release_cpus(); #endif @@ -919,6 +948,8 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_PPC64 if (!radix_enabled()) init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; +#elif defined(CONFIG_PPC_8xx) + init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW; #else #error "context.addr_limit not initialized." #endif diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 3fc11e30308f..d144df54ad40 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -46,13 +46,10 @@ static inline void emergency_stack_init(void) { }; #endif #ifdef CONFIG_PPC64 -void record_spr_defaults(void); -#else -static inline void record_spr_defaults(void) { }; -#endif - -#ifdef CONFIG_PPC64 u64 ppc64_bolted_size(void); + +/* Default SPR values from firmware/kexec */ +extern unsigned long spr_default_dscr; #endif /* diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 51ebc01fff52..74457485574b 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -39,6 +39,7 @@ #include <asm/udbg.h> #include <asm/code-patching.h> #include <asm/cpu_has_feature.h> +#include <asm/asm-prototypes.h> #define DBG(fmt...) @@ -121,7 +122,7 @@ notrace void __init machine_init(u64 dt_ptr) } /* Checks "l2cr=xxxx" command-line option */ -int __init ppc_setup_l2cr(char *str) +static int __init ppc_setup_l2cr(char *str) { if (cpu_has_feature(CPU_FTR_L2CR)) { unsigned long val = simple_strtoul(str, NULL, 0); @@ -134,7 +135,7 @@ int __init ppc_setup_l2cr(char *str) __setup("l2cr=", ppc_setup_l2cr); /* Checks "l3cr=xxxx" command-line option */ -int __init ppc_setup_l3cr(char *str) +static int __init ppc_setup_l3cr(char *str) { if (cpu_has_feature(CPU_FTR_L3CR)) { unsigned long val = simple_strtoul(str, NULL, 0); @@ -180,7 +181,7 @@ EXPORT_SYMBOL(nvram_sync); #endif /* CONFIG_NVRAM */ -int __init ppc_init(void) +static int __init ppc_init(void) { /* clear the progress line */ if (ppc_md.progress) @@ -192,7 +193,6 @@ int __init ppc_init(void) } return 0; } - arch_initcall(ppc_init); void __init irqstack_early_init(void) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index c388cc3357fa..66f2b6299c40 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -110,7 +110,7 @@ void __init setup_tlb_core_data(void) if (cpu_first_thread_sibling(boot_cpuid) == first) first = boot_cpuid; - paca[cpu].tcd_ptr = &paca[first].tcd; + paca_ptrs[cpu]->tcd_ptr = &paca_ptrs[first]->tcd; /* * If we have threads, we need either tlbsrx. @@ -254,6 +254,14 @@ static void cpu_ready_for_interrupts(void) get_paca()->kernel_msr = MSR_KERNEL; } +unsigned long spr_default_dscr = 0; + +void __init record_spr_defaults(void) +{ + if (early_cpu_has_feature(CPU_FTR_DSCR)) + spr_default_dscr = mfspr(SPRN_DSCR); +} + /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of @@ -304,7 +312,11 @@ void __init early_setup(unsigned long dt_ptr) early_init_devtree(__va(dt_ptr)); /* Now we know the logical id of our boot cpu, setup the paca. */ - setup_paca(&paca[boot_cpuid]); + if (boot_cpuid != 0) { + /* Poison paca_ptrs[0] again if it's not the boot cpu */ + memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0])); + } + setup_paca(paca_ptrs[boot_cpuid]); fixup_boot_paca(); /* @@ -599,6 +611,21 @@ __init u64 ppc64_bolted_size(void) #endif } +static void *__init alloc_stack(unsigned long limit, int cpu) +{ + unsigned long pa; + + pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit, + early_cpu_to_node(cpu), MEMBLOCK_NONE); + if (!pa) { + pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); + if (!pa) + panic("cannot allocate stacks"); + } + + return __va(pa); +} + void __init irqstack_early_init(void) { u64 limit = ppc64_bolted_size(); @@ -610,12 +637,8 @@ void __init irqstack_early_init(void) * accessed in realmode. */ for_each_possible_cpu(i) { - softirq_ctx[i] = (struct thread_info *) - __va(memblock_alloc_base(THREAD_SIZE, - THREAD_SIZE, limit)); - hardirq_ctx[i] = (struct thread_info *) - __va(memblock_alloc_base(THREAD_SIZE, - THREAD_SIZE, limit)); + softirq_ctx[i] = alloc_stack(limit, i); + hardirq_ctx[i] = alloc_stack(limit, i); } } @@ -623,20 +646,21 @@ void __init irqstack_early_init(void) void __init exc_lvl_early_init(void) { unsigned int i; - unsigned long sp; for_each_possible_cpu(i) { - sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); - critirq_ctx[i] = (struct thread_info *)__va(sp); - paca[i].crit_kstack = __va(sp + THREAD_SIZE); + void *sp; + + sp = alloc_stack(ULONG_MAX, i); + critirq_ctx[i] = sp; + paca_ptrs[i]->crit_kstack = sp + THREAD_SIZE; - sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); - dbgirq_ctx[i] = (struct thread_info *)__va(sp); - paca[i].dbg_kstack = __va(sp + THREAD_SIZE); + sp = alloc_stack(ULONG_MAX, i); + dbgirq_ctx[i] = sp; + paca_ptrs[i]->dbg_kstack = sp + THREAD_SIZE; - sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); - mcheckirq_ctx[i] = (struct thread_info *)__va(sp); - paca[i].mc_kstack = __va(sp + THREAD_SIZE); + sp = alloc_stack(ULONG_MAX, i); + mcheckirq_ctx[i] = sp; + paca_ptrs[i]->mc_kstack = sp + THREAD_SIZE; } if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) @@ -690,23 +714,24 @@ void __init emergency_stack_init(void) for_each_possible_cpu(i) { struct thread_info *ti; - ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); + + ti = alloc_stack(limit, i); memset(ti, 0, THREAD_SIZE); emerg_stack_init_thread_info(ti, i); - paca[i].emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 /* emergency stack for NMI exception handling. */ - ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); + ti = alloc_stack(limit, i); memset(ti, 0, THREAD_SIZE); emerg_stack_init_thread_info(ti, i); - paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE; /* emergency stack for machine check exception handling. */ - ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); + ti = alloc_stack(limit, i); memset(ti, 0, THREAD_SIZE); emerg_stack_init_thread_info(ti, i); - paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE; #endif } } @@ -762,7 +787,7 @@ void __init setup_per_cpu_areas(void) delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; - paca[cpu].data_offset = __per_cpu_offset[cpu]; + paca_ptrs[cpu]->data_offset = __per_cpu_offset[cpu]; } } #endif @@ -846,9 +871,6 @@ static void do_nothing(void *unused) void rfi_flush_enable(bool enable) { - if (rfi_flush == enable) - return; - if (enable) { do_rfi_flush_fixups(enabled_flush_types); on_each_cpu(do_nothing, NULL, 1); @@ -863,6 +885,10 @@ static void init_fallback_flush(void) u64 l1d_size, limit; int cpu; + /* Only allocate the fallback flush area once (at boot time). */ + if (l1d_flush_fallback_area) + return; + l1d_size = ppc64_caches.l1d.size; limit = min(ppc64_bolted_size(), ppc64_rma_size); @@ -875,23 +901,24 @@ static void init_fallback_flush(void) memset(l1d_flush_fallback_area, 0, l1d_size * 2); for_each_possible_cpu(cpu) { - paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; - paca[cpu].l1d_flush_size = l1d_size; + struct paca_struct *paca = paca_ptrs[cpu]; + paca->rfi_flush_fallback_area = l1d_flush_fallback_area; + paca->l1d_flush_size = l1d_size; } } -void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) +void setup_rfi_flush(enum l1d_flush_type types, bool enable) { if (types & L1D_FLUSH_FALLBACK) { - pr_info("rfi-flush: Using fallback displacement flush\n"); + pr_info("rfi-flush: fallback displacement flush available\n"); init_fallback_flush(); } if (types & L1D_FLUSH_ORI) - pr_info("rfi-flush: Using ori type flush\n"); + pr_info("rfi-flush: ori type flush available\n"); if (types & L1D_FLUSH_MTTRIG) - pr_info("rfi-flush: Using mttrig type flush\n"); + pr_info("rfi-flush: mttrig type flush available\n"); enabled_flush_types = types; @@ -902,13 +929,19 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { + bool enable; + if (val == 1) - rfi_flush_enable(true); + enable = true; else if (val == 0) - rfi_flush_enable(false); + enable = false; else return -EINVAL; + /* Only do anything if we're changing state */ + if (enable != rfi_flush) + rfi_flush_enable(enable); + return 0; } @@ -927,12 +960,4 @@ static __init int rfi_flush_debugfs_init(void) } device_initcall(rfi_flush_debugfs_init); #endif - -ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) -{ - if (rfi_flush) - return sprintf(buf, "Mitigation: RFI Flush\n"); - - return sprintf(buf, "Vulnerable\n"); -} #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 7c59d88b9d86..a6467f843acf 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -49,6 +49,11 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, #else /* CONFIG_PPC64 */ +extern long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, + struct pt_regs *regs); +extern long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, + struct pt_regs *regs); + static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct task_struct *tsk) { diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index a46de0035214..492f03451877 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1045,7 +1045,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx, struct ucontext __user *new_ctx, int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) { - unsigned char tmp; + unsigned char tmp __maybe_unused; int ctx_has_vsx_region = 0; #ifdef CONFIG_PPC64 @@ -1231,7 +1231,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, { struct sig_dbg_op op; int i; - unsigned char tmp; + unsigned char tmp __maybe_unused; unsigned long new_msr = regs->msr; #ifdef CONFIG_PPC_ADV_DEBUG_REGS unsigned long new_dbcr0 = current->thread.debug.dbcr0; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index bbe7634b3a43..e16ec7b3b427 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -123,8 +123,8 @@ int smp_generic_kick_cpu(int nr) * cpu_start field to become non-zero After we set cpu_start, * the processor will continue on to secondary_start */ - if (!paca[nr].cpu_start) { - paca[nr].cpu_start = 1; + if (!paca_ptrs[nr]->cpu_start) { + paca_ptrs[nr]->cpu_start = 1; smp_mb(); return 0; } @@ -565,19 +565,28 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) } #endif +#ifdef CONFIG_NMI_IPI +static void stop_this_cpu(struct pt_regs *regs) +#else static void stop_this_cpu(void *dummy) +#endif { /* Remove this CPU */ set_cpu_online(smp_processor_id(), false); - local_irq_disable(); + hard_irq_disable(); + spin_begin(); while (1) - ; + spin_cpu_relax(); } void smp_send_stop(void) { +#ifdef CONFIG_NMI_IPI + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, stop_this_cpu, 1000000); +#else smp_call_function(stop_this_cpu, NULL, 0); +#endif } struct thread_info *current_set[NR_CPUS]; @@ -657,7 +666,7 @@ void smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != boot_cpuid); #ifdef CONFIG_PPC64 - paca[boot_cpuid].__current = current; + paca_ptrs[boot_cpuid]->__current = current; #endif set_numa_node(numa_cpu_lookup_table[boot_cpuid]); current_set[boot_cpuid] = task_thread_info(current); @@ -748,8 +757,8 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) struct thread_info *ti = task_thread_info(idle); #ifdef CONFIG_PPC64 - paca[cpu].__current = idle; - paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD; + paca_ptrs[cpu]->__current = idle; + paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD; #endif ti->cpu = cpu; secondary_ti = current_set[cpu] = ti; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 04d0bbd7a1dd..755dc98a57ae 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -20,6 +20,7 @@ #include <asm/firmware.h> #include "cacheinfo.h" +#include "setup.h" #ifdef CONFIG_PPC64 #include <asm/paca.h> @@ -588,21 +589,18 @@ static DEVICE_ATTR(dscr_default, 0600, static void sysfs_create_dscr_default(void) { - int err = 0; - if (cpu_has_feature(CPU_FTR_DSCR)) - err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); -} + if (cpu_has_feature(CPU_FTR_DSCR)) { + int err = 0; + int cpu; -void __init record_spr_defaults(void) -{ - int cpu; + dscr_default = spr_default_dscr; + for_each_possible_cpu(cpu) + paca_ptrs[cpu]->dscr_default = dscr_default; - if (cpu_has_feature(CPU_FTR_DSCR)) { - dscr_default = mfspr(SPRN_DSCR); - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - paca[cpu].dscr_default = dscr_default; + err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); } } + #endif /* CONFIG_PPC64 */ #ifdef HAS_PPC_PMC_PA6T diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a32823dcd9a4..360e71d455cc 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -266,6 +266,9 @@ void accumulate_stolen_time(void) static inline u64 calculate_stolen_time(u64 stop_tb) { + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return 0; + if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) return scan_dispatch_log(stop_tb); @@ -1234,7 +1237,7 @@ void calibrate_delay(void) static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) { ppc_md.get_rtc_time(tm); - return rtc_valid_tm(tm); + return 0; } static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1e48d157196a..a2ef0c0e6c31 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -208,6 +208,12 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, } raw_local_irq_restore(flags); + /* + * system_reset_excption handles debugger, crash dump, panic, for 0x100 + */ + if (TRAP(regs) == 0x100) + return; + crash_fadump(regs, "die oops"); if (kexec_should_crash(current)) @@ -272,8 +278,13 @@ void die(const char *str, struct pt_regs *regs, long err) { unsigned long flags; - if (debugger(regs)) - return; + /* + * system_reset_excption handles debugger, crash dump, panic, for 0x100 + */ + if (TRAP(regs) != 0x100) { + if (debugger(regs)) + return; + } flags = oops_begin(regs); if (__die(str, regs, err)) @@ -460,7 +471,7 @@ static inline int check_io_access(struct pt_regs *regs) /* single-step stuff */ #define single_stepping(regs) (current->thread.debug.dbcr0 & DBCR0_IC) #define clear_single_step(regs) (current->thread.debug.dbcr0 &= ~DBCR0_IC) - +#define clear_br_trace(regs) do {} while(0) #else /* On non-4xx, the reason for the machine check or program exception is in the MSR. */ @@ -473,6 +484,7 @@ static inline int check_io_access(struct pt_regs *regs) #define single_stepping(regs) ((regs)->msr & MSR_SE) #define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) +#define clear_br_trace(regs) ((regs)->msr &= ~MSR_BE) #endif #if defined(CONFIG_E500) @@ -988,6 +1000,7 @@ void single_step_exception(struct pt_regs *regs) enum ctx_state prev_state = exception_enter(); clear_single_step(regs); + clear_br_trace(regs); if (kprobe_post_handler(regs)) return; @@ -1495,18 +1508,6 @@ bail: exception_exit(prev_state); } -void slb_miss_bad_addr(struct pt_regs *regs) -{ - enum ctx_state prev_state = exception_enter(); - - if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); - else - bad_page_fault(regs, regs->dar, SIGSEGV); - - exception_exit(prev_state); -} - void StackOverflow(struct pt_regs *regs) { printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 22b01a3962f0..b44ec104a5a1 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -99,26 +99,28 @@ static struct vdso_patch_def vdso_patches[] = { CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE, "__kernel_sync_dicache", "__kernel_sync_dicache_p5" }, +#ifdef CONFIG_PPC32 { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_gettimeofday", NULL }, { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_clock_gettime", NULL }, { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_clock_getres", NULL }, { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_get_tbfreq", NULL }, { - CPU_FTR_USE_TB, 0, + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, "__kernel_time", NULL }, +#endif }; /* |