diff options
Diffstat (limited to 'arch')
23 files changed, 559 insertions, 92 deletions
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index a1ebcbc3931f..cf00ff0d121d 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -209,8 +209,9 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000) #define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000) #define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000) -#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x0000400000000000) +#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000) #define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000) +#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000) #ifndef __ASSEMBLY__ @@ -457,7 +458,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ - CPU_FTR_P9_TLBIE_BUG | CPU_FTR_P9_TIDR) + CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR) #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1) #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 8e8514efb124..ee62776e5433 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -452,9 +452,100 @@ static inline u32 kvmppc_get_xics_latch(void) return xirr; } -static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) +/* + * To avoid the need to unnecessarily exit fully to the host kernel, an IPI to + * a CPU thread that's running/napping inside of a guest is by default regarded + * as a request to wake the CPU (if needed) and continue execution within the + * guest, potentially to process new state like externally-generated + * interrupts or IPIs sent from within the guest itself (e.g. H_PROD/H_IPI). + * + * To force an exit to the host kernel, kvmppc_set_host_ipi() must be called + * prior to issuing the IPI to set the corresponding 'host_ipi' flag in the + * target CPU's PACA. To avoid unnecessary exits to the host, this flag should + * be immediately cleared via kvmppc_clear_host_ipi() by the IPI handler on + * the receiving side prior to processing the IPI work. + * + * NOTE: + * + * We currently issue an smp_mb() at the beginning of kvmppc_set_host_ipi(). + * This is to guard against sequences such as the following: + * + * CPU + * X: smp_muxed_ipi_set_message(): + * X: smp_mb() + * X: message[RESCHEDULE] = 1 + * X: doorbell_global_ipi(42): + * X: kvmppc_set_host_ipi(42) + * X: ppc_msgsnd_sync()/smp_mb() + * X: ppc_msgsnd() -> 42 + * 42: doorbell_exception(): // from CPU X + * 42: ppc_msgsync() + * 105: smp_muxed_ipi_set_message(): + * 105: smb_mb() + * // STORE DEFERRED DUE TO RE-ORDERING + * --105: message[CALL_FUNCTION] = 1 + * | 105: doorbell_global_ipi(42): + * | 105: kvmppc_set_host_ipi(42) + * | 42: kvmppc_clear_host_ipi(42) + * | 42: smp_ipi_demux_relaxed() + * | 42: // returns to executing guest + * | // RE-ORDERED STORE COMPLETES + * ->105: message[CALL_FUNCTION] = 1 + * 105: ppc_msgsnd_sync()/smp_mb() + * 105: ppc_msgsnd() -> 42 + * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored + * 105: // hangs waiting on 42 to process messages/call_single_queue + * + * We also issue an smp_mb() at the end of kvmppc_clear_host_ipi(). This is + * to guard against sequences such as the following (as well as to create + * a read-side pairing with the barrier in kvmppc_set_host_ipi()): + * + * CPU + * X: smp_muxed_ipi_set_message(): + * X: smp_mb() + * X: message[RESCHEDULE] = 1 + * X: doorbell_global_ipi(42): + * X: kvmppc_set_host_ipi(42) + * X: ppc_msgsnd_sync()/smp_mb() + * X: ppc_msgsnd() -> 42 + * 42: doorbell_exception(): // from CPU X + * 42: ppc_msgsync() + * // STORE DEFERRED DUE TO RE-ORDERING + * -- 42: kvmppc_clear_host_ipi(42) + * | 42: smp_ipi_demux_relaxed() + * | 105: smp_muxed_ipi_set_message(): + * | 105: smb_mb() + * | 105: message[CALL_FUNCTION] = 1 + * | 105: doorbell_global_ipi(42): + * | 105: kvmppc_set_host_ipi(42) + * | // RE-ORDERED STORE COMPLETES + * -> 42: kvmppc_clear_host_ipi(42) + * 42: // returns to executing guest + * 105: ppc_msgsnd_sync()/smp_mb() + * 105: ppc_msgsnd() -> 42 + * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored + * 105: // hangs waiting on 42 to process messages/call_single_queue + */ +static inline void kvmppc_set_host_ipi(int cpu) { - paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi; + /* + * order stores of IPI messages vs. setting of host_ipi flag + * + * pairs with the barrier in kvmppc_clear_host_ipi() + */ + smp_mb(); + paca_ptrs[cpu]->kvm_hstate.host_ipi = 1; +} + +static inline void kvmppc_clear_host_ipi(int cpu) +{ + paca_ptrs[cpu]->kvm_hstate.host_ipi = 0; + /* + * order clearing of host_ipi flag vs. processing of IPI messages + * + * pairs with the barrier in kvmppc_set_host_ipi() + */ + smp_mb(); } static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) @@ -486,7 +577,10 @@ static inline u32 kvmppc_get_xics_latch(void) return 0; } -static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) +static inline void kvmppc_set_host_ipi(int cpu) +{} + +static inline void kvmppc_clear_host_ipi(int cpu) {} static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index ec3714cf0989..b3cbb1136bce 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -475,9 +475,10 @@ #define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ #define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ #define SPRN_PCR 0x152 /* Processor compatibility register */ -#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ -#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ -#define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */ +#define PCR_VEC_DIS (__MASK(63-0)) /* Vec. disable (bit NA since POWER8) */ +#define PCR_VSX_DIS (__MASK(63-1)) /* VSX disable (bit NA since POWER8) */ +#define PCR_TM_DIS (__MASK(63-2)) /* Trans. memory disable (POWER8) */ +#define PCR_HIGH_BITS (PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS) /* * These bits are used in the function kvmppc_set_arch_compat() to specify and * determine both the compatibility level which we want to emulate and the @@ -486,6 +487,8 @@ #define PCR_ARCH_207 0x8 /* Architecture 2.07 */ #define PCR_ARCH_206 0x4 /* Architecture 2.06 */ #define PCR_ARCH_205 0x2 /* Architecture 2.05 */ +#define PCR_LOW_BITS (PCR_ARCH_207 | PCR_ARCH_206 | PCR_ARCH_205) +#define PCR_MASK ~(PCR_HIGH_BITS | PCR_LOW_BITS) /* PCR Reserved Bits */ #define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ #define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */ #define SPRN_TLBVPNR 0x155 /* P7 TLB control register */ diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 3239a9fe6c1c..a460298c7ddb 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -23,6 +23,7 @@ _GLOBAL(__setup_cpu_power7) beqlr li r0,0 mtspr SPRN_LPID,r0 + LOAD_REG_IMMEDIATE(r0, PCR_MASK) mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) @@ -37,6 +38,7 @@ _GLOBAL(__restore_cpu_power7) beqlr li r0,0 mtspr SPRN_LPID,r0 + LOAD_REG_IMMEDIATE(r0, PCR_MASK) mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) @@ -54,6 +56,7 @@ _GLOBAL(__setup_cpu_power8) beqlr li r0,0 mtspr SPRN_LPID,r0 + LOAD_REG_IMMEDIATE(r0, PCR_MASK) mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH @@ -76,6 +79,7 @@ _GLOBAL(__restore_cpu_power8) beqlr li r0,0 mtspr SPRN_LPID,r0 + LOAD_REG_IMMEDIATE(r0, PCR_MASK) mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH @@ -98,6 +102,7 @@ _GLOBAL(__setup_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mtspr SPRN_PID,r0 + LOAD_REG_IMMEDIATE(r0, PCR_MASK) mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) @@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mtspr SPRN_PID,r0 + LOAD_REG_IMMEDIATE(r0, PCR_MASK) mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 804b1a6196fa..f17ff1200eaa 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -33,7 +33,7 @@ void doorbell_global_ipi(int cpu) { u32 tag = get_hard_smp_processor_id(cpu); - kvmppc_set_host_ipi(cpu, 1); + kvmppc_set_host_ipi(cpu); /* Order previous accesses vs. msgsnd, which is treated as a store */ ppc_msgsnd_sync(); ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); @@ -48,7 +48,7 @@ void doorbell_core_ipi(int cpu) { u32 tag = cpu_thread_in_core(cpu); - kvmppc_set_host_ipi(cpu, 1); + kvmppc_set_host_ipi(cpu); /* Order previous accesses vs. msgsnd, which is treated as a store */ ppc_msgsnd_sync(); ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); @@ -84,7 +84,7 @@ void doorbell_exception(struct pt_regs *regs) may_hard_irq_enable(); - kvmppc_set_host_ipi(smp_processor_id(), 0); + kvmppc_clear_host_ipi(smp_processor_id()); __this_cpu_inc(irq_stat.doorbell_irqs); smp_ipi_demux_relaxed(); /* already performed the barrier */ diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index bd95318d2202..180b3a5d1001 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -101,7 +101,7 @@ static void __restore_cpu_cpufeatures(void) if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); - mtspr(SPRN_PCR, 0); + mtspr(SPRN_PCR, PCR_MASK); } mtspr(SPRN_FSCR, system_registers.fscr); @@ -144,6 +144,7 @@ static void __init cpufeatures_setup_cpu(void) mtspr(SPRN_HFSCR, 0); } mtspr(SPRN_FSCR, 0); + mtspr(SPRN_PCR, PCR_MASK); /* * LPCR does not get cleared, to match behaviour with secondaries @@ -691,9 +692,37 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f) return true; } +/* + * Handle POWER9 broadcast tlbie invalidation issue using + * cpu feature flag. + */ +static __init void update_tlbie_feature_flag(unsigned long pvr) +{ + if (PVR_VER(pvr) == PVR_POWER9) { + /* + * Set the tlbie feature flag for anything below + * Nimbus DD 2.3 and Cumulus DD 1.3 + */ + if ((pvr & 0xe000) == 0) { + /* Nimbus */ + if ((pvr & 0xfff) < 0x203) + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } else if ((pvr & 0xc000) == 0) { + /* Cumulus */ + if ((pvr & 0xfff) < 0x103) + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } else { + WARN_ONCE(1, "Unknown PVR"); + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } + + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG; + } +} + static __init void cpufeatures_cpu_quirks(void) { - int version = mfspr(SPRN_PVR); + unsigned long version = mfspr(SPRN_PVR); /* * Not all quirks can be derived from the cpufeatures device tree. @@ -712,10 +741,10 @@ static __init void cpufeatures_cpu_quirks(void) if ((version & 0xffff0000) == 0x004e0000) { cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR); - cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR; } + update_tlbie_feature_flag(version); /* * PKEY was not in the initial base or feature node * specification, but it should become optional in the next diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 0a91dee51245..bc8a551013be 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1960,7 +1960,7 @@ static int eeh_debugfs_break_device(struct pci_dev *pdev) pci_err(pdev, "Going to break: %pR\n", bar); if (pdev->is_virtfn) { -#ifndef CONFIG_IOV +#ifndef CONFIG_PCI_IOV return -ENXIO; #else /* @@ -1980,7 +1980,7 @@ static int eeh_debugfs_break_device(struct pci_dev *pdev) pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); pos += PCI_SRIOV_CTRL; bit = PCI_SRIOV_CTRL_MSE; -#endif /* !CONFIG_IOV */ +#endif /* !CONFIG_PCI_IOV */ } else { bit = PCI_COMMAND_MEMORY; pos = PCI_COMMAND; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index efd8f93bc9dc..709cf1fd4cf4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -401,8 +401,11 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) spin_lock(&vc->lock); vc->arch_compat = arch_compat; - /* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit */ - vc->pcr = host_pcr_bit - guest_pcr_bit; + /* + * Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit + * Also set all reserved PCR bits + */ + vc->pcr = (host_pcr_bit - guest_pcr_bit) | PCR_MASK; spin_unlock(&vc->lock); return 0; @@ -3410,7 +3413,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, } if (vc->pcr) - mtspr(SPRN_PCR, vc->pcr); + mtspr(SPRN_PCR, vc->pcr | PCR_MASK); mtspr(SPRN_DPDES, vc->dpdes); mtspr(SPRN_VTB, vc->vtb); @@ -3490,7 +3493,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, vc->vtb = mfspr(SPRN_VTB); mtspr(SPRN_DPDES, 0); if (vc->pcr) - mtspr(SPRN_PCR, 0); + mtspr(SPRN_PCR, PCR_MASK); if (vc->tb_offset_applied) { u64 new_tb = mftb() - vc->tb_offset_applied; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index fff90f2c3de2..cdf30c6eaf54 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -29,7 +29,7 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) { struct kvmppc_vcore *vc = vcpu->arch.vcore; - hr->pcr = vc->pcr; + hr->pcr = vc->pcr | PCR_MASK; hr->dpdes = vc->dpdes; hr->hfscr = vcpu->arch.hfscr; hr->tb_offset = vc->tb_offset; @@ -65,7 +65,7 @@ static void byteswap_hv_regs(struct hv_guest_state *hr) hr->lpid = swab32(hr->lpid); hr->vcpu_token = swab32(hr->vcpu_token); hr->lpcr = swab64(hr->lpcr); - hr->pcr = swab64(hr->pcr); + hr->pcr = swab64(hr->pcr) | PCR_MASK; hr->amor = swab64(hr->amor); hr->dpdes = swab64(hr->dpdes); hr->hfscr = swab64(hr->hfscr); @@ -148,7 +148,7 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) { struct kvmppc_vcore *vc = vcpu->arch.vcore; - vc->pcr = hr->pcr; + vc->pcr = hr->pcr | PCR_MASK; vc->dpdes = hr->dpdes; vcpu->arch.hfscr = hr->hfscr; vcpu->arch.dawr = hr->dawr0; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 7186c65c61c9..220305454c23 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -433,6 +433,37 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r) (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); } +static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid) +{ + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + /* Radix flush for a hash guest */ + + unsigned long rb,rs,prs,r,ric; + + rb = PPC_BIT(52); /* IS = 2 */ + rs = 0; /* lpid = 0 */ + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + ric = 0; /* RIC_FLSUH_TLB */ + + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie + */ + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), + "i"(ric), "r"(rs) : "memory"); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : + "r" (rb_value), "r" (lpid)); + } +} + static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, long npages, int global, bool need_sync) { @@ -451,16 +482,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, "r" (rbvalues[i]), "r" (kvm->arch.lpid)); } - if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { - /* - * Need the extra ptesync to make sure we don't - * re-order the tlbie - */ - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : - "r" (rbvalues[0]), "r" (kvm->arch.lpid)); - } - + fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid); asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } else { if (need_sync) diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 4d2ec77d806c..287d5911df0f 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -58,7 +58,7 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) hcpu = hcore << threads_shift; kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); - kvmppc_set_host_ipi(hcpu, 1); + kvmppc_set_host_ipi(hcpu); smp_mb(); kvmhv_rm_send_ipi(hcpu); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9a05b0d932ef..74a9cfe84aee 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -644,8 +644,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) /* Load guest PCR value to select appropriate compat mode */ 37: ld r7, VCORE_PCR(r5) - cmpdi r7, 0 + LOAD_REG_IMMEDIATE(r6, PCR_MASK) + cmpld r7, r6 beq 38f + or r7, r7, r6 mtspr SPRN_PCR, r7 38: @@ -1913,10 +1915,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* Reset PCR */ ld r0, VCORE_PCR(r5) - cmpdi r0, 0 + LOAD_REG_IMMEDIATE(r6, PCR_MASK) + cmpld r0, r6 beq 18f - li r0, 0 - mtspr SPRN_PCR, r0 + mtspr SPRN_PCR, r6 18: /* Signal secondary CPUs to continue */ stb r0,VCORE_IN_GUEST(r5) diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 90ab4f31e2b3..523e42eb11da 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -197,9 +197,32 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize, return va; } -static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) +static inline void fixup_tlbie_vpn(unsigned long vpn, int psize, + int apsize, int ssize) { - if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + /* Radix flush for a hash guest */ + + unsigned long rb,rs,prs,r,ric; + + rb = PPC_BIT(52); /* IS = 2 */ + rs = 0; /* lpid = 0 */ + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + ric = 0; /* RIC_FLSUH_TLB */ + + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie + */ + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), + "i"(ric), "r"(rs) : "memory"); + } + + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { /* Need the extra ptesync to ensure we don't reorder tlbie*/ asm volatile("ptesync": : :"memory"); ___tlbie(vpn, psize, apsize, ssize); @@ -283,7 +306,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize, asm volatile("ptesync": : :"memory"); } else { __tlbie(vpn, psize, apsize, ssize); - fixup_tlbie(vpn, psize, apsize, ssize); + fixup_tlbie_vpn(vpn, psize, apsize, ssize); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) @@ -856,7 +879,7 @@ static void native_flush_hash_range(unsigned long number, int local) /* * Just do one more with the last used values. */ - fixup_tlbie(vpn, psize, psize, ssize); + fixup_tlbie_vpn(vpn, psize, psize, ssize); asm volatile("eieio; tlbsync; ptesync":::"memory"); if (lock_tlbie) diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index 2d0cb5ba9a47..0ba30b8b935b 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -256,8 +256,21 @@ void destroy_context(struct mm_struct *mm) #ifdef CONFIG_SPAPR_TCE_IOMMU WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); #endif + /* + * For tasks which were successfully initialized we end up calling + * arch_exit_mmap() which clears the process table entry. And + * arch_exit_mmap() is called before the required fullmm TLB flush + * which does a RIC=2 flush. Hence for an initialized task, we do clear + * any cached process table entries. + * + * The condition below handles the error case during task init. We have + * set the process table entry early and if we fail a task + * initialization, we need to ensure the process table entry is zeroed. + * We need not worry about process table entry caches because the task + * never ran with the PID value. + */ if (radix_enabled()) - WARN_ON(process_tb[mm->context.id].prtb0 != 0); + process_tb[mm->context.id].prtb0 = 0; else subpage_prot_free(mm); destroy_contexts(&mm->context); diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 631be42abd33..67af871190c6 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -196,22 +196,83 @@ static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid trace_tlbie(lpid, 0, rb, rs, ric, prs, r); } -static inline void fixup_tlbie(void) + +static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, + unsigned long ap) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); + } +} + +static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, + unsigned long ap) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_pid(0, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); + } +} + +static inline void fixup_tlbie_pid(unsigned long pid) { - unsigned long pid = 0; + /* + * We can use any address for the invalidation, pick one which is + * probably unused as an optimisation. + */ unsigned long va = ((1UL << 52) - 1); - if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_pid(0, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { asm volatile("ptesync": : :"memory"); __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); } } + +static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, + unsigned long ap) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); + } +} + static inline void fixup_tlbie_lpid(unsigned long lpid) { + /* + * We can use any address for the invalidation, pick one which is + * probably unused as an optimisation. + */ unsigned long va = ((1UL << 52) - 1); - if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_lpid(0, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { asm volatile("ptesync": : :"memory"); __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); } @@ -258,6 +319,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) switch (ric) { case RIC_FLUSH_TLB: __tlbie_pid(pid, RIC_FLUSH_TLB); + fixup_tlbie_pid(pid); break; case RIC_FLUSH_PWC: __tlbie_pid(pid, RIC_FLUSH_PWC); @@ -265,8 +327,8 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) case RIC_FLUSH_ALL: default: __tlbie_pid(pid, RIC_FLUSH_ALL); + fixup_tlbie_pid(pid); } - fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -315,6 +377,7 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) switch (ric) { case RIC_FLUSH_TLB: __tlbie_lpid(lpid, RIC_FLUSH_TLB); + fixup_tlbie_lpid(lpid); break; case RIC_FLUSH_PWC: __tlbie_lpid(lpid, RIC_FLUSH_PWC); @@ -322,8 +385,8 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) case RIC_FLUSH_ALL: default: __tlbie_lpid(lpid, RIC_FLUSH_ALL); + fixup_tlbie_lpid(lpid); } - fixup_tlbie_lpid(lpid); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -390,6 +453,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end, for (addr = start; addr < end; addr += page_size) __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); + + fixup_tlbie_va_range(addr - page_size, pid, ap); } static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, @@ -399,7 +464,7 @@ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, asm volatile("ptesync": : :"memory"); __tlbie_va(va, pid, ap, ric); - fixup_tlbie(); + fixup_tlbie_va(va, pid, ap); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -457,7 +522,7 @@ static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, asm volatile("ptesync": : :"memory"); __tlbie_lpid_va(va, lpid, ap, ric); - fixup_tlbie_lpid(lpid); + fixup_tlbie_lpid_va(va, lpid, ap); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -469,7 +534,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end, if (also_pwc) __tlbie_pid(pid, RIC_FLUSH_PWC); __tlbie_va_range(start, end, pid, page_size, psize); - fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -856,7 +920,7 @@ is_local: if (gflush) __tlbie_va_range(gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G); - fixup_tlbie(); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); } else { _tlbiel_va_range_multicast(mm, diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 802387b231ad..0e6ed4413eea 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -12,6 +12,14 @@ #include <asm/code-patching.h> #include <mm/mmu_decl.h> +static pgprot_t kasan_prot_ro(void) +{ + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) + return PAGE_READONLY; + + return PAGE_KERNEL_RO; +} + static void kasan_populate_pte(pte_t *ptep, pgprot_t prot) { unsigned long va = (unsigned long)kasan_early_shadow_page; @@ -26,6 +34,7 @@ static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned l { pmd_t *pmd; unsigned long k_cur, k_next; + pgprot_t prot = slab_is_available() ? kasan_prot_ro() : PAGE_KERNEL; pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start); @@ -43,10 +52,7 @@ static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned l if (!new) return -ENOMEM; - if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) - kasan_populate_pte(new, PAGE_READONLY); - else - kasan_populate_pte(new, PAGE_KERNEL_RO); + kasan_populate_pte(new, prot); smp_wmb(); /* See comment in __pte_alloc */ @@ -103,11 +109,23 @@ static int __ref kasan_init_region(void *start, size_t size) static void __init kasan_remap_early_shadow_ro(void) { - if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) - kasan_populate_pte(kasan_early_shadow_pte, PAGE_READONLY); - else - kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL_RO); + pgprot_t prot = kasan_prot_ro(); + unsigned long k_start = KASAN_SHADOW_START; + unsigned long k_end = KASAN_SHADOW_END; + unsigned long k_cur; + phys_addr_t pa = __pa(kasan_early_shadow_page); + + kasan_populate_pte(kasan_early_shadow_pte, prot); + + for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); + pte_t *ptep = pte_offset_kernel(pmd, k_cur); + + if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) + continue; + __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); + } flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); } diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 94cd96b9b7bb..fbd6e6b7bbf2 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -193,7 +193,7 @@ static void pnv_smp_cpu_kill_self(void) * for coming online, which are handled via * generic_check_cpu_restart() calls. */ - kvmppc_set_host_ipi(cpu, 0); + kvmppc_clear_host_ipi(cpu); srr1 = pnv_cpu_offline(cpu); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 36b846f6e74e..b53359258d99 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -56,6 +56,22 @@ EXPORT_SYMBOL(plpar_hcall); EXPORT_SYMBOL(plpar_hcall9); EXPORT_SYMBOL(plpar_hcall_norets); +/* + * H_BLOCK_REMOVE supported block size for this page size in segment who's base + * page size is that page size. + * + * The first index is the segment base page size, the second one is the actual + * page size. + */ +static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init; + +/* + * Due to the involved complexity, and that the current hypervisor is only + * returning this value or 0, we are limiting the support of the H_BLOCK_REMOVE + * buffer size to 8 size block. + */ +#define HBLKRM_SUPPORTED_BLOCK_SIZE 8 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static u8 dtl_mask = DTL_LOG_PREEMPT; #else @@ -984,6 +1000,17 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, #define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL #define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL +/* + * Returned true if we are supporting this block size for the specified segment + * base page size and actual page size. + * + * Currently, we only support 8 size block. + */ +static inline bool is_supported_hlbkrm(int bpsize, int psize) +{ + return (hblkrm_size[bpsize][psize] == HBLKRM_SUPPORTED_BLOCK_SIZE); +} + /** * H_BLOCK_REMOVE caller. * @idx should point to the latest @param entry set with a PTEX. @@ -1143,7 +1170,8 @@ static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, if (lock_tlbie) spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); - if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) + /* Assuming THP size is 16M */ + if (is_supported_hlbkrm(psize, MMU_PAGE_16M)) hugepage_block_invalidate(slot, vpn, count, psize, ssize); else hugepage_bulk_invalidate(slot, vpn, count, psize, ssize); @@ -1312,6 +1340,137 @@ static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch, } /* + * TLB Block Invalidate Characteristics + * + * These characteristics define the size of the block the hcall H_BLOCK_REMOVE + * is able to process for each couple segment base page size, actual page size. + * + * The ibm,get-system-parameter properties is returning a buffer with the + * following layout: + * + * [ 2 bytes size of the RTAS buffer (excluding these 2 bytes) ] + * ----------------- + * TLB Block Invalidate Specifiers: + * [ 1 byte LOG base 2 of the TLB invalidate block size being specified ] + * [ 1 byte Number of page sizes (N) that are supported for the specified + * TLB invalidate block size ] + * [ 1 byte Encoded segment base page size and actual page size + * MSB=0 means 4k segment base page size and actual page size + * MSB=1 the penc value in mmu_psize_def ] + * ... + * ----------------- + * Next TLB Block Invalidate Specifiers... + * ----------------- + * [ 0 ] + */ +static inline void set_hblkrm_bloc_size(int bpsize, int psize, + unsigned int block_size) +{ + if (block_size > hblkrm_size[bpsize][psize]) + hblkrm_size[bpsize][psize] = block_size; +} + +/* + * Decode the Encoded segment base page size and actual page size. + * PAPR specifies: + * - bit 7 is the L bit + * - bits 0-5 are the penc value + * If the L bit is 0, this means 4K segment base page size and actual page size + * otherwise the penc value should be read. + */ +#define HBLKRM_L_MASK 0x80 +#define HBLKRM_PENC_MASK 0x3f +static inline void __init check_lp_set_hblkrm(unsigned int lp, + unsigned int block_size) +{ + unsigned int bpsize, psize; + + /* First, check the L bit, if not set, this means 4K */ + if ((lp & HBLKRM_L_MASK) == 0) { + set_hblkrm_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size); + return; + } + + lp &= HBLKRM_PENC_MASK; + for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) { + struct mmu_psize_def *def = &mmu_psize_defs[bpsize]; + + for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { + if (def->penc[psize] == lp) { + set_hblkrm_bloc_size(bpsize, psize, block_size); + return; + } + } + } +} + +#define SPLPAR_TLB_BIC_TOKEN 50 + +/* + * The size of the TLB Block Invalidate Characteristics is variable. But at the + * maximum it will be the number of possible page sizes *2 + 10 bytes. + * Currently MMU_PAGE_COUNT is 16, which means 42 bytes. Use a cache line size + * (128 bytes) for the buffer to get plenty of space. + */ +#define SPLPAR_TLB_BIC_MAXLENGTH 128 + +void __init pseries_lpar_read_hblkrm_characteristics(void) +{ + unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH]; + int call_status, len, idx, bpsize; + + spin_lock(&rtas_data_buf_lock); + memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE); + call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, + NULL, + SPLPAR_TLB_BIC_TOKEN, + __pa(rtas_data_buf), + RTAS_DATA_BUF_SIZE); + memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH); + local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0'; + spin_unlock(&rtas_data_buf_lock); + + if (call_status != 0) { + pr_warn("%s %s Error calling get-system-parameter (0x%x)\n", + __FILE__, __func__, call_status); + return; + } + + /* + * The first two (2) bytes of the data in the buffer are the length of + * the returned data, not counting these first two (2) bytes. + */ + len = be16_to_cpu(*((u16 *)local_buffer)) + 2; + if (len > SPLPAR_TLB_BIC_MAXLENGTH) { + pr_warn("%s too large returned buffer %d", __func__, len); + return; + } + + idx = 2; + while (idx < len) { + u8 block_shift = local_buffer[idx++]; + u32 block_size; + unsigned int npsize; + + if (!block_shift) + break; + + block_size = 1 << block_shift; + + for (npsize = local_buffer[idx++]; + npsize > 0 && idx < len; npsize--) + check_lp_set_hblkrm((unsigned int) local_buffer[idx++], + block_size); + } + + for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) + for (idx = 0; idx < MMU_PAGE_COUNT; idx++) + if (hblkrm_size[bpsize][idx]) + pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d", + bpsize, idx, hblkrm_size[bpsize][idx]); +} + +/* * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie * lock. */ @@ -1330,7 +1489,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) if (lock_tlbie) spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); - if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) { + if (is_supported_hlbkrm(batch->psize, batch->psize)) { do_block_remove(number, batch, param); goto out; } diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index a5ac371a3f06..61883291defc 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -65,29 +65,21 @@ static int drc_pmem_bind(struct papr_scm_priv *p) cond_resched(); } while (rc == H_BUSY); - if (rc) { - /* H_OVERLAP needs a separate error path */ - if (rc == H_OVERLAP) - return -EBUSY; - - dev_err(&p->pdev->dev, "bind err: %lld\n", rc); - return -ENXIO; - } + if (rc) + return rc; p->bound_addr = saved; - - dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res); - - return 0; + dev_dbg(&p->pdev->dev, "bound drc 0x%x to %pR\n", p->drc_index, &p->res); + return rc; } -static int drc_pmem_unbind(struct papr_scm_priv *p) +static void drc_pmem_unbind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; uint64_t token = 0; int64_t rc; - dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index); + dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index); /* NB: unbind has the same retry requirements as drc_pmem_bind() */ do { @@ -110,12 +102,48 @@ static int drc_pmem_unbind(struct papr_scm_priv *p) if (rc) dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); else - dev_dbg(&p->pdev->dev, "unbind drc %x complete\n", + dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n", p->drc_index); - return rc == H_SUCCESS ? 0 : -ENXIO; + return; } +static int drc_pmem_query_n_bind(struct papr_scm_priv *p) +{ + unsigned long start_addr; + unsigned long end_addr; + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + int64_t rc; + + + rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, + p->drc_index, 0); + if (rc) + goto err_out; + start_addr = ret[0]; + + /* Make sure the full region is bound. */ + rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, + p->drc_index, p->blocks - 1); + if (rc) + goto err_out; + end_addr = ret[0]; + + if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size)) + goto err_out; + + p->bound_addr = start_addr; + dev_dbg(&p->pdev->dev, "bound drc 0x%x to %pR\n", p->drc_index, &p->res); + return rc; + +err_out: + dev_info(&p->pdev->dev, + "Failed to query, trying an unbind followed by bind"); + drc_pmem_unbind(p); + return drc_pmem_bind(p); +} + + static int papr_scm_meta_get(struct papr_scm_priv *p, struct nd_cmd_get_config_data_hdr *hdr) { @@ -436,14 +464,14 @@ static int papr_scm_probe(struct platform_device *pdev) rc = drc_pmem_bind(p); /* If phyp says drc memory still bound then force unbound and retry */ - if (rc == -EBUSY) { - dev_warn(&pdev->dev, "Retrying bind after unbinding\n"); - drc_pmem_unbind(p); - rc = drc_pmem_bind(p); - } + if (rc == H_OVERLAP) + rc = drc_pmem_query_n_bind(p); - if (rc) + if (rc != H_SUCCESS) { + dev_err(&p->pdev->dev, "bind err: %d\n", rc); + rc = -ENXIO; goto err; + } /* setup the resource for the newly bound range */ p->res.start = p->bound_addr; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index a6624d4bd9d0..13fa370a87e4 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -112,5 +112,6 @@ static inline unsigned long cmo_get_page_size(void) int dlpar_workqueue_init(void); void pseries_setup_rfi_flush(void); +void pseries_lpar_read_hblkrm_characteristics(void); #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index f8adcd0e4589..0a40201f315f 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -744,6 +744,7 @@ static void __init pSeries_setup_arch(void) pseries_setup_rfi_flush(); setup_stf_barrier(); + pseries_lpar_read_hblkrm_characteristics(); /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 485569ff7ef1..7d13d2ef5a90 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -140,7 +140,7 @@ static unsigned int icp_native_get_irq(void) static void icp_native_cause_ipi(int cpu) { - kvmppc_set_host_ipi(cpu, 1); + kvmppc_set_host_ipi(cpu); icp_native_set_qirr(cpu, IPI_PRIORITY); } @@ -179,7 +179,7 @@ void icp_native_flush_interrupt(void) if (vec == XICS_IPI) { /* Clear pending IPI */ int cpu = smp_processor_id(); - kvmppc_set_host_ipi(cpu, 0); + kvmppc_clear_host_ipi(cpu); icp_native_set_qirr(cpu, 0xff); } else { pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n", @@ -200,7 +200,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id) { int cpu = smp_processor_id(); - kvmppc_set_host_ipi(cpu, 0); + kvmppc_clear_host_ipi(cpu); icp_native_set_qirr(cpu, 0xff); return smp_ipi_demux(); diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index 8bb8dd7dd6ad..68fd2540b093 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -126,7 +126,7 @@ static void icp_opal_cause_ipi(int cpu) { int hw_cpu = get_hard_smp_processor_id(cpu); - kvmppc_set_host_ipi(cpu, 1); + kvmppc_set_host_ipi(cpu); opal_int_set_mfrr(hw_cpu, IPI_PRIORITY); } @@ -134,7 +134,7 @@ static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id) { int cpu = smp_processor_id(); - kvmppc_set_host_ipi(cpu, 0); + kvmppc_clear_host_ipi(cpu); opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff); return smp_ipi_demux(); @@ -157,7 +157,7 @@ void icp_opal_flush_interrupt(void) if (vec == XICS_IPI) { /* Clear pending IPI */ int cpu = smp_processor_id(); - kvmppc_set_host_ipi(cpu, 0); + kvmppc_clear_host_ipi(cpu); opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff); } else { pr_err("XICS: hw interrupt 0x%x to offline cpu, " |