summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm/book3s_hv.c
diff options
context:
space:
mode:
authorNicholas Piggin <npiggin@gmail.com>2021-05-28 19:07:33 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2021-06-10 22:12:13 +1000
commit9dc2babc185e0a24fbb48098daafd552cac157fa (patch)
tree1cdff9ed9a4eb60423c7a0e8b570ea04ea7af65a /arch/powerpc/kvm/book3s_hv.c
parent48013cbc504e064d2318f24482cfbe3c53e0a812 (diff)
downloadlinux-9dc2babc185e0a24fbb48098daafd552cac157fa.tar.bz2
KVM: PPC: Book3S HV P9: Stop handling hcalls in real-mode in the P9 path
In the interest of minimising the amount of code that is run in "real-mode", don't handle hcalls in real mode in the P9 path. This requires some new handlers for H_CEDE and xics-on-xive to be added before xive is pulled or cede logic is checked. This introduces a change in radix guest behaviour where radix guests that execute 'sc 1' in userspace now get a privilege fault whereas previously the 'sc 1' would be reflected as a syscall interrupt to the guest kernel. That reflection is only required for hash guests that run PR KVM. Background: In POWER8 and earlier processors, it is very expensive to exit from the HV real mode context of a guest hypervisor interrupt, and switch to host virtual mode. On those processors, guest->HV interrupts reach the hypervisor with the MMU off because the MMU is loaded with guest context (LPCR, SDR1, SLB), and the other threads in the sub-core need to be pulled out of the guest too. Then the primary must save off guest state, invalidate SLB and ERAT, and load up host state before the MMU can be enabled to run in host virtual mode (~= regular Linux mode). Hash guests also require a lot of hcalls to run due to the nature of the MMU architecture and paravirtualisation design. The XICS interrupt controller requires hcalls to run. So KVM traditionally tries hard to avoid the full exit, by handling hcalls and other interrupts in real mode as much as possible. By contrast, POWER9 has independent MMU context per-thread, and in radix mode the hypervisor is in host virtual memory mode when the HV interrupt is taken. Radix guests do not require significant hcalls to manage their translations, and xive guests don't need hcalls to handle interrupts. So it's much less important for performance to handle hcalls in real mode on POWER9. One caveat is that the TCE hcalls are performance critical, real-mode variants introduced for POWER8 in order to achieve 10GbE performance. Real mode TCE hcalls were found to be less important on POWER9, which was able to drive 40GBe networking without them (using the virt mode hcalls) but performance is still important. These hcalls will benefit from subsequent guest entry/exit optimisation including possibly a faster "partial exit" that does not entirely switch to host context to handle the hcall. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru> Reviewed-by: Cédric Le Goater <clg@kaod.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210528090752.3542186-14-npiggin@gmail.com
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv.c')
-rw-r--r--arch/powerpc/kvm/book3s_hv.c79
1 files changed, 68 insertions, 11 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3ec1dc1bad16..8df02be9be72 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -899,6 +899,10 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
* H_SUCCESS if the source vcore wasn't idle (e.g. if it may
* have useful work to do and should not confer) so we don't
* recheck that here.
+ *
+ * In the case of the P9 single vcpu per vcore case, the real
+ * mode handler is not called but no other threads are in the
+ * source vcore.
*/
spin_lock(&vcore->lock);
@@ -1142,12 +1146,13 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
}
/*
- * Handle H_CEDE in the nested virtualization case where we haven't
- * called the real-mode hcall handlers in book3s_hv_rmhandlers.S.
+ * Handle H_CEDE in the P9 path where we don't call the real-mode hcall
+ * handlers in book3s_hv_rmhandlers.S.
+ *
* This has to be done early, not in kvmppc_pseries_do_hcall(), so
* that the cede logic in kvmppc_run_single_vcpu() works properly.
*/
-static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
+static void kvmppc_cede(struct kvm_vcpu *vcpu)
{
vcpu->arch.shregs.msr |= MSR_EE;
vcpu->arch.ceded = 1;
@@ -1400,13 +1405,29 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
}
case BOOK3S_INTERRUPT_SYSCALL:
{
- /* hcall - punt to userspace */
int i;
- /* hypercall with MSR_PR has already been handled in rmode,
- * and never reaches here.
- */
+ if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
+ /*
+ * Guest userspace executed sc 1. This can only be
+ * reached by the P9 path because the old path
+ * handles this case in realmode hcall handlers.
+ *
+ * Radix guests can not run PR KVM or nested HV hash
+ * guests which might run PR KVM, so this is always
+ * a privilege fault. Send a program check to guest
+ * kernel.
+ */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ r = RESUME_GUEST;
+ break;
+ }
+ /*
+ * hcall - gather args and set exit_reason. This will next be
+ * handled by kvmppc_pseries_do_hcall which may be able to deal
+ * with it and resume guest, or may punt to userspace.
+ */
run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
for (i = 0; i < 9; ++i)
run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
@@ -3664,6 +3685,12 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
return trap;
}
+static inline bool hcall_is_xics(unsigned long req)
+{
+ return req == H_EOI || req == H_CPPR || req == H_IPI ||
+ req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
+}
+
/*
* Virtual-mode guest entry for POWER9 and later when the host and
* guest are both using the radix MMU. The LPIDR has already been set.
@@ -3787,15 +3814,36 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
- kvmppc_nested_cede(vcpu);
+ kvmppc_cede(vcpu);
kvmppc_set_gpr(vcpu, 3, 0);
trap = 0;
}
} else {
kvmppc_xive_push_vcpu(vcpu);
trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
+ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
+ !(vcpu->arch.shregs.msr & MSR_PR)) {
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+ /* H_CEDE has to be handled now, not later */
+ if (req == H_CEDE) {
+ kvmppc_cede(vcpu);
+ kvmppc_xive_rearm_escalation(vcpu); /* may un-cede */
+ kvmppc_set_gpr(vcpu, 3, 0);
+ trap = 0;
+
+ /* XICS hcalls must be handled before xive is pulled */
+ } else if (hcall_is_xics(req)) {
+ int ret;
+
+ ret = kvmppc_xive_xics_hcall(vcpu, req);
+ if (ret != H_TOO_HARD) {
+ kvmppc_set_gpr(vcpu, 3, ret);
+ trap = 0;
+ }
+ }
+ }
kvmppc_xive_pull_vcpu(vcpu);
-
}
vcpu->arch.slb_max = 0;
@@ -4461,8 +4509,17 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
else
r = kvmppc_run_vcpu(vcpu);
- if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
- !(vcpu->arch.shregs.msr & MSR_PR)) {
+ if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
+ if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) {
+ /*
+ * These should have been caught reflected
+ * into the guest by now. Final sanity check:
+ * don't allow userspace to execute hcalls in
+ * the hypervisor.
+ */
+ r = RESUME_GUEST;
+ continue;
+ }
trace_kvm_hcall_enter(vcpu);
r = kvmppc_pseries_do_hcall(vcpu);
trace_kvm_hcall_exit(vcpu, r);