From 6f6a657c99980485c265363447b269083dd1dc3a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 22 Aug 2019 22:30:21 +0800 Subject: KVM/Hyper-V/VMX: Add direct tlb flush support Hyper-V provides direct tlb flush function which helps L1 Hypervisor to handle Hyper-V tlb flush request from L2 guest. Add the function support for VMX. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Tianyu Lan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4a99be1fae4e..8bab3e53d1e1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -486,6 +486,35 @@ static int hv_remote_flush_tlb(struct kvm *kvm) return hv_remote_flush_tlb_with_range(kvm, NULL); } +static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) +{ + struct hv_enlightened_vmcs *evmcs; + struct hv_partition_assist_pg **p_hv_pa_pg = + &vcpu->kvm->arch.hyperv.hv_pa_pg; + /* + * Synthetic VM-Exit is not enabled in current code and so All + * evmcs in singe VM shares same assist page. + */ + if (!*p_hv_pa_pg) { + *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!*p_hv_pa_pg) + return -ENOMEM; + pr_debug("KVM: Hyper-V: allocated PA_PG for %llx\n", + (u64)&vcpu->kvm); + } + + evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs; + + evmcs->partition_assist_page = + __pa(*p_hv_pa_pg); + evmcs->hv_vm_id = (u64)vcpu->kvm; + evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; + + pr_debug("KVM: Hyper-V: enabled DIRECT flush for %llx\n", + (u64)vcpu->kvm); + return 0; +} + #endif /* IS_ENABLED(CONFIG_HYPERV) */ /* @@ -6511,6 +6540,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + if (static_branch_unlikely(&enable_evmcs)) + current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index; + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (vmx->host_debugctlmsr) update_debugctlmsr(vmx->host_debugctlmsr); @@ -6578,6 +6610,7 @@ static struct kvm *vmx_vm_alloc(void) static void vmx_vm_free(struct kvm *kvm) { + kfree(kvm->arch.hyperv.hv_pa_pg); vfree(to_kvm_vmx(kvm)); } @@ -7837,6 +7870,7 @@ static void vmx_exit(void) if (!vp_ap) continue; + vp_ap->nested_control.features.directhypercall = 0; vp_ap->current_nested_vmcs = 0; vp_ap->enlighten_vmentry = 0; } @@ -7876,6 +7910,11 @@ static int __init vmx_init(void) pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); static_branch_enable(&enable_evmcs); } + + if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) + vmx_x86_ops.enable_direct_tlbflush + = hv_enable_direct_tlbflush; + } else { enlightened_vmcs = false; } -- cgit v1.2.3 From bc8a0aafcbb82e5c7dde486c03e6c51e3896d797 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:27 -0700 Subject: KVM: x86: Relocate MMIO exit stats counting Move the stat.mmio_exits update into x86_emulate_instruction(). This is both a bug fix, e.g. the current update flows will incorrectly increment mmio_exits on emulation failure, and a preparatory change to set the stage for eliminating EMULATE_DONE and company. Reviewed-by: Vitaly Kuznetsov Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 2 -- arch/x86/kvm/vmx/vmx.c | 1 - arch/x86/kvm/x86.c | 2 ++ 3 files changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a10af9c87f8a..138dcdef8a06 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5456,8 +5456,6 @@ emulate: case EMULATE_DONE: return 1; case EMULATE_USER_EXIT: - ++vcpu->stat.mmio_exits; - /* fall through */ case EMULATE_FAIL: return 0; default: diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 8bab3e53d1e1..8f61250fc1c0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5201,7 +5201,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) err = kvm_emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { - ++vcpu->stat.mmio_exits; ret = 0; goto out; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7e51924c4a1c..2b92c2ca1d79 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6703,6 +6703,8 @@ restart: } r = EMULATE_USER_EXIT; } else if (vcpu->mmio_needed) { + ++vcpu->stat.mmio_exits; + if (!vcpu->mmio_is_write) writeback = false; r = EMULATE_USER_EXIT; -- cgit v1.2.3 From a6c6ed1e81060e110258dbc7118fda07e9971e48 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:30 -0700 Subject: KVM: x86: Don't attempt VMWare emulation on #GP with non-zero error code The VMware backdoor hooks #GP faults on IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero error code for their #GP. Re-injecting #GP instead of attempting emulation on a non-zero error code will allow a future patch to move #GP injection (for emulation failure) into kvm_emulate_instruction() without having to plumb in the error code. Reviewed-and-tested-by: Vitaly Kuznetsov Reviewed-by: Liran Alon Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 10 +++++++++- arch/x86/kvm/vmx/vmx.c | 12 +++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7e352e81df31..00ec27bc2946 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2772,12 +2772,20 @@ static int gp_interception(struct vcpu_svm *svm) WARN_ON_ONCE(!enable_vmware_backdoor); + /* + * VMware backdoor emulation on #GP interception only handles IN{S}, + * OUT{S}, and RDPMC, none of which generate a non-zero error code. + */ + if (error_code) { + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + return 1; + } er = kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); if (er == EMULATE_USER_EXIT) return 0; else if (er != EMULATE_DONE) - kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); return 1; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 8f61250fc1c0..d1bac3cb2440 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4539,12 +4539,22 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { WARN_ON_ONCE(!enable_vmware_backdoor); + + /* + * VMware backdoor emulation on #GP interception only handles + * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero + * error code on #GP. + */ + if (error_code) { + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + return 1; + } er = kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); if (er == EMULATE_USER_EXIT) return 0; else if (er != EMULATE_DONE) - kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); return 1; } -- cgit v1.2.3 From 42cbf06872cc44fb8d2d6665fa6494b5925eaf1c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:31 -0700 Subject: KVM: x86: Move #GP injection for VMware into x86_emulate_instruction() Immediately inject a #GP when VMware emulation fails and return EMULATE_DONE instead of propagating EMULATE_FAIL up the stack. This helps pave the way for removing EMULATE_FAIL altogether. Rename EMULTYPE_VMWARE to EMULTYPE_VMWARE_GP to document that the x86 emulator is called to handle VMware #GP interception, e.g. why a #GP is injected on emulation failure for EMULTYPE_VMWARE_GP. Drop EMULTYPE_NO_UD_ON_FAIL as a standalone type. The "no #UD on fail" is used only in the VMWare case and is obsoleted by having the emulator itself reinject #GP. Signed-off-by: Sean Christopherson Reviewed-by: Liran Alon Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +-- arch/x86/kvm/svm.c | 10 ++-------- arch/x86/kvm/vmx/vmx.c | 10 ++-------- arch/x86/kvm/x86.c | 14 +++++++++----- 4 files changed, 14 insertions(+), 23 deletions(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 13b35f94dec4..80071d02b87d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1325,8 +1325,7 @@ enum emulation_result { #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) #define EMULTYPE_ALLOW_RETRY (1 << 3) -#define EMULTYPE_NO_UD_ON_FAIL (1 << 4) -#define EMULTYPE_VMWARE (1 << 5) +#define EMULTYPE_VMWARE_GP (1 << 5) int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 00ec27bc2946..c7aa55e192c0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2768,7 +2768,6 @@ static int gp_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; u32 error_code = svm->vmcb->control.exit_info_1; - int er; WARN_ON_ONCE(!enable_vmware_backdoor); @@ -2780,13 +2779,8 @@ static int gp_interception(struct vcpu_svm *svm) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); return 1; } - er = kvm_emulate_instruction(vcpu, - EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); - if (er == EMULATE_USER_EXIT) - return 0; - else if (er != EMULATE_DONE) - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return 1; + return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP) != + EMULATE_USER_EXIT; } static bool is_erratum_383(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d1bac3cb2440..d4371599abe3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4522,7 +4522,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) u32 intr_info, ex_no, error_code; unsigned long cr2, rip, dr6; u32 vect_info; - enum emulation_result er; vect_info = vmx->idt_vectoring_info; intr_info = vmx->exit_intr_info; @@ -4549,13 +4548,8 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); return 1; } - er = kvm_emulate_instruction(vcpu, - EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); - if (er == EMULATE_USER_EXIT) - return 0; - else if (er != EMULATE_DONE) - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return 1; + return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP) != + EMULATE_USER_EXIT; } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e6063f20259e..09753b699d81 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6307,8 +6307,10 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) ++vcpu->stat.insn_emulation_fail; trace_kvm_emulate_insn_failed(vcpu); - if (emulation_type & EMULTYPE_NO_UD_ON_FAIL) - return EMULATE_FAIL; + if (emulation_type & EMULTYPE_VMWARE_GP) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return EMULATE_DONE; + } kvm_queue_exception(vcpu, UD_VECTOR); @@ -6648,9 +6650,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, } } - if ((emulation_type & EMULTYPE_VMWARE) && - !is_vmware_backdoor_opcode(ctxt)) - return EMULATE_FAIL; + if ((emulation_type & EMULTYPE_VMWARE_GP) && + !is_vmware_backdoor_opcode(ctxt)) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return EMULATE_DONE; + } if (emulation_type & EMULTYPE_SKIP) { kvm_rip_write(vcpu, ctxt->_eip); -- cgit v1.2.3 From 1051778f6e1e25c1203869586f4431785a35c13c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:35 -0700 Subject: KVM: x86: Handle emulation failure directly in kvm_task_switch() Consolidate the reporting of emulation failure into kvm_task_switch() so that it can return EMULATE_USER_EXIT. This helps pave the way for removing EMULATE_FAIL altogether. This also fixes a theoretical bug where task switch interception could suppress an EMULATE_USER_EXIT return. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 11 ++--------- arch/x86/kvm/vmx/vmx.c | 14 +++----------- arch/x86/kvm/x86.c | 9 ++++++--- 3 files changed, 11 insertions(+), 23 deletions(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fc40052fa334..69e53aa3969d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3892,17 +3892,10 @@ static int task_switch_interception(struct vcpu_svm *svm) if (int_type != SVM_EXITINTINFO_TYPE_SOFT) int_vec = -1; - if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, - has_error_code, error_code) == EMULATE_FAIL) - goto fail; - return 1; -fail: - svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - svm->vcpu.run->internal.ndata = 0; - return 0; + return kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, + has_error_code, error_code) != EMULATE_USER_EXIT; } static int cpuid_interception(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d4371599abe3..498270cfc71d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5074,21 +5074,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) type != INTR_TYPE_NMI_INTR)) skip_emulated_instruction(vcpu); - if (kvm_task_switch(vcpu, tss_selector, - type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason, - has_error_code, error_code) == EMULATE_FAIL) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; - } - /* * TODO: What about debug traps on tss switch? * Are we supposed to inject them and update dr6? */ - - return 1; + return kvm_task_switch(vcpu, tss_selector, + type == INTR_TYPE_SOFT_INTR ? idt_index : -1, + reason, has_error_code, error_code) != EMULATE_USER_EXIT; } static int handle_ept_violation(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 802dfb926ca7..bdee7e39accb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8693,9 +8693,12 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason, has_error_code, error_code); - - if (ret) - return EMULATE_FAIL; + if (ret) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return EMULATE_USER_EXIT; + } kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); -- cgit v1.2.3 From 9497e1f2ec93f0c8a7832c3c1eb3af8159800cd8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:36 -0700 Subject: KVM: x86: Move triple fault request into RM int injection Request triple fault in kvm_inject_realmode_interrupt() instead of returning EMULATE_FAIL and deferring to the caller. All existing callers request triple fault and it's highly unlikely Real Mode is going to acquire new features. While this consolidates a small amount of code, the real goal is to remove the last reference to EMULATE_FAIL. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 9 +++------ arch/x86/kvm/x86.c | 17 ++++++++--------- arch/x86/kvm/x86.h | 2 +- 3 files changed, 12 insertions(+), 16 deletions(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 498270cfc71d..67733eef184c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1556,8 +1556,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) int inc_eip = 0; if (kvm_exception_is_soft(nr)) inc_eip = vcpu->arch.event_exit_inst_len; - if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + kvm_inject_realmode_interrupt(vcpu, nr, inc_eip); return; } @@ -4306,8 +4305,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) int inc_eip = 0; if (vcpu->arch.interrupt.soft) inc_eip = vcpu->arch.event_exit_inst_len; - if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + kvm_inject_realmode_interrupt(vcpu, irq, inc_eip); return; } intr = irq | INTR_INFO_VALID_MASK; @@ -4343,8 +4341,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->nmi_known_unmasked = false; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0); return; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bdee7e39accb..92b6690d0512 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6273,7 +6273,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) +void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) { struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; int ret; @@ -6285,14 +6285,13 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) ctxt->_eip = ctxt->eip + inc_eip; ret = emulate_int_real(ctxt, irq); - if (ret != X86EMUL_CONTINUE) - return EMULATE_FAIL; - - ctxt->eip = ctxt->_eip; - kvm_rip_write(vcpu, ctxt->eip); - kvm_set_rflags(vcpu, ctxt->eflags); - - return EMULATE_DONE; + if (ret != X86EMUL_CONTINUE) { + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + } else { + ctxt->eip = ctxt->_eip; + kvm_rip_write(vcpu, ctxt->eip); + kvm_set_rflags(vcpu, ctxt->eflags); + } } EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b5274e2a53cf..dbf7442a822b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -261,7 +261,7 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) } void kvm_set_pending_timer(struct kvm_vcpu *vcpu); -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); +void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr); u64 get_kvmclock_ns(struct kvm *kvm); -- cgit v1.2.3 From 8fff2710eaf50aff1e6a7aa0607fe5288b619404 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:37 -0700 Subject: KVM: VMX: Remove EMULATE_FAIL handling in handle_invalid_guest_state() Now that EMULATE_FAIL is completely unused, remove the last remaning usage where KVM does something functional in response to EMULATE_FAIL. Leave the check in place as a WARN_ON_ONCE to provide a better paper trail when EMULATE_{DONE,FAIL,USER_EXIT} are completely removed. Opportunistically remove the gotos in handle_invalid_guest_state(). With the EMULATE_FAIL handling gone there is no need to have a common handler for emulation failure and the gotos only complicate things, e.g. the signal_pending() check always returns '1', but this is far from obvious when glancing through the code. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 67733eef184c..9c92b2993f5a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5169,8 +5169,7 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu) static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - enum emulation_result err = EMULATE_DONE; - int ret = 1; + enum emulation_result err; bool intr_window_requested; unsigned count = 130; @@ -5193,38 +5192,38 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) err = kvm_emulate_instruction(vcpu, 0); - if (err == EMULATE_USER_EXIT) { - ret = 0; - goto out; - } + if (err == EMULATE_USER_EXIT) + return 0; - if (err != EMULATE_DONE) - goto emulation_error; + if (WARN_ON_ONCE(err == EMULATE_FAIL)) + return 1; if (vmx->emulation_required && !vmx->rmode.vm86_active && - vcpu->arch.exception.pending) - goto emulation_error; + vcpu->arch.exception.pending) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; + } if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; - ret = kvm_vcpu_halt(vcpu); - goto out; + return kvm_vcpu_halt(vcpu); } + /* + * Note, return 1 and not 0, vcpu_run() is responsible for + * morphing the pending signal into the proper return code. + */ if (signal_pending(current)) - goto out; + return 1; + if (need_resched()) schedule(); } -out: - return ret; - -emulation_error: - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; + return 1; } static void grow_ple_window(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 60fc3d02d5b8829b91b7b443ef6c7e8f0bbae868 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:38 -0700 Subject: KVM: x86: Remove emulation_result enums, EMULATE_{DONE,FAIL,USER_EXIT} Deferring emulation failure handling (in some cases) to the caller of x86_emulate_instruction() has proven fragile, e.g. multiple instances of KVM not setting run->exit_reason on EMULATE_FAIL, largely due to it being difficult to discern what emulation types can return what result, and which combination of types and results are handled where. Now that x86_emulate_instruction() always handles emulation failure, i.e. EMULATION_FAIL is only referenced in callers, remove the emulation_result enums entirely. Per KVM's existing exit handling conventions, return '0' and '1' for "exit to userspace" and "resume guest" respectively. Doing so cleans up many callers, e.g. they can return kvm_emulate_instruction() directly instead of having to interpret its result. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 6 ----- arch/x86/kvm/mmu.c | 14 ++-------- arch/x86/kvm/svm.c | 22 +++++++--------- arch/x86/kvm/vmx/vmx.c | 28 ++++++++------------ arch/x86/kvm/x86.c | 57 ++++++++++++++++++++--------------------- 5 files changed, 49 insertions(+), 78 deletions(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f3c623dbd5ab..26f85a24b5e7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1315,12 +1315,6 @@ extern u64 kvm_default_tsc_scaling_ratio; extern u64 kvm_mce_cap_supported; -enum emulation_result { - EMULATE_DONE, /* no further processing */ - EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */ - EMULATE_FAIL, /* can't emulate this instruction */ -}; - #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 138dcdef8a06..84e1a9c661dd 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5383,7 +5383,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, void *insn, int insn_len) { int r, emulation_type = 0; - enum emulation_result er; bool direct = vcpu->arch.mmu->direct_map; /* With shadow page tables, fault_address contains a GVA or nGPA. */ @@ -5450,17 +5449,8 @@ emulate: return 1; } - er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); - - switch (er) { - case EMULATE_DONE: - return 1; - case EMULATE_USER_EXIT: - case EMULATE_FAIL: - return 0; - default: - BUG(); - } + return x86_emulate_instruction(vcpu, cr2, emulation_type, insn, + insn_len); } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 69e53aa3969d..cb7ad362e9b9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -787,7 +787,7 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) kvm_rip_write(vcpu, svm->next_rip); svm_set_interrupt_shadow(vcpu, 0); - return EMULATE_DONE; + return 1; } static void svm_queue_exception(struct kvm_vcpu *vcpu) @@ -2779,8 +2779,7 @@ static int gp_interception(struct vcpu_svm *svm) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); return 1; } - return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP) != - EMULATE_USER_EXIT; + return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); } static bool is_erratum_383(void) @@ -2878,7 +2877,7 @@ static int io_interception(struct vcpu_svm *svm) string = (io_info & SVM_IOIO_STR_MASK) != 0; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; if (string) - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; @@ -3885,17 +3884,15 @@ static int task_switch_interception(struct vcpu_svm *svm) int_type == SVM_EXITINTINFO_TYPE_SOFT || (int_type == SVM_EXITINTINFO_TYPE_EXEPT && (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) { - if (skip_emulated_instruction(&svm->vcpu) == EMULATE_USER_EXIT) + if (!skip_emulated_instruction(&svm->vcpu)) return 0; } if (int_type != SVM_EXITINTINFO_TYPE_SOFT) int_vec = -1; - - return kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, - has_error_code, error_code) != EMULATE_USER_EXIT; + has_error_code, error_code); } static int cpuid_interception(struct vcpu_svm *svm) @@ -3916,7 +3913,7 @@ static int iret_interception(struct vcpu_svm *svm) static int invlpg_interception(struct vcpu_svm *svm) { if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) - return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(&svm->vcpu, 0); kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); return kvm_skip_emulated_instruction(&svm->vcpu); @@ -3924,13 +3921,12 @@ static int invlpg_interception(struct vcpu_svm *svm) static int emulate_on_interception(struct vcpu_svm *svm) { - return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(&svm->vcpu, 0); } static int rsm_interception(struct vcpu_svm *svm) { - return kvm_emulate_instruction_from_buffer(&svm->vcpu, - rsm_ins_bytes, 2) == EMULATE_DONE; + return kvm_emulate_instruction_from_buffer(&svm->vcpu, rsm_ins_bytes, 2); } static int rdpmc_interception(struct vcpu_svm *svm) @@ -4719,7 +4715,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm) ret = avic_unaccel_trap_write(svm); } else { /* Handling Fault */ - ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); + ret = kvm_emulate_instruction(&svm->vcpu, 0); } return ret; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9c92b2993f5a..e71dc36850cb 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1516,7 +1516,7 @@ static int __skip_emulated_instruction(struct kvm_vcpu *vcpu) /* skipping an emulated instruction also counts */ vmx_set_interrupt_shadow(vcpu, 0); - return EMULATE_DONE; + return 1; } static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu) @@ -4468,7 +4468,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * Cause the #SS fault with 0 error code in VM86 mode. */ if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { - if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) { + if (kvm_emulate_instruction(vcpu, 0)) { if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; return kvm_vcpu_halt(vcpu); @@ -4545,8 +4545,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); return 1; } - return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP) != - EMULATE_USER_EXIT; + return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); } /* @@ -4643,7 +4642,7 @@ static int handle_io(struct kvm_vcpu *vcpu) ++vcpu->stat.io_exits; if (string) - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; @@ -4717,7 +4716,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) static int handle_desc(struct kvm_vcpu *vcpu) { WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); } static int handle_cr(struct kvm_vcpu *vcpu) @@ -4933,7 +4932,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu) static int handle_invd(struct kvm_vcpu *vcpu) { - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); } static int handle_invlpg(struct kvm_vcpu *vcpu) @@ -5000,7 +4999,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } } - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); } static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) @@ -5077,7 +5076,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) */ return kvm_task_switch(vcpu, tss_selector, type == INTR_TYPE_SOFT_INTR ? idt_index : -1, - reason, has_error_code, error_code) != EMULATE_USER_EXIT; + reason, has_error_code, error_code); } static int handle_ept_violation(struct kvm_vcpu *vcpu) @@ -5149,8 +5148,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) return kvm_skip_emulated_instruction(vcpu); else - return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) == - EMULATE_DONE; + return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP); } return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); @@ -5169,7 +5167,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu) static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - enum emulation_result err; bool intr_window_requested; unsigned count = 130; @@ -5190,14 +5187,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; - err = kvm_emulate_instruction(vcpu, 0); - - if (err == EMULATE_USER_EXIT) + if (!kvm_emulate_instruction(vcpu, 0)) return 0; - if (WARN_ON_ONCE(err == EMULATE_FAIL)) - return 1; - if (vmx->emulation_required && !vmx->rmode.vm86_active && vcpu->arch.exception.pending) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 92b6690d0512..a83b269126a0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5451,7 +5451,7 @@ int handle_ud(struct kvm_vcpu *vcpu) emul_type = EMULTYPE_TRAP_UD_FORCED; } - return kvm_emulate_instruction(vcpu, emul_type) != EMULATE_USER_EXIT; + return kvm_emulate_instruction(vcpu, emul_type); } EXPORT_SYMBOL_GPL(handle_ud); @@ -6302,14 +6302,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) if (emulation_type & EMULTYPE_VMWARE_GP) { kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return EMULATE_DONE; + return 1; } if (emulation_type & EMULTYPE_SKIP) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - return EMULATE_USER_EXIT; + return 0; } kvm_queue_exception(vcpu, UD_VECTOR); @@ -6318,10 +6318,10 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - return EMULATE_USER_EXIT; + return 0; } - return EMULATE_DONE; + return 1; } static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, @@ -6485,10 +6485,10 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu) kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; - return EMULATE_USER_EXIT; + return 0; } kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS); - return EMULATE_DONE; + return 1; } int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) @@ -6497,7 +6497,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) int r; r = kvm_x86_ops->skip_emulated_instruction(vcpu); - if (unlikely(r != EMULATE_DONE)) + if (unlikely(!r)) return 0; /* @@ -6510,7 +6510,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) */ if (unlikely(rflags & X86_EFLAGS_TF)) r = kvm_vcpu_do_singlestep(vcpu); - return r == EMULATE_DONE; + return r; } EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); @@ -6529,7 +6529,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) kvm_run->debug.arch.pc = eip; kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; - *r = EMULATE_USER_EXIT; + *r = 0; return true; } } @@ -6545,7 +6545,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) vcpu->arch.dr6 &= ~DR_TRAP_BITS; vcpu->arch.dr6 |= dr6 | DR6_RTM; kvm_queue_exception(vcpu, DB_VECTOR); - *r = EMULATE_DONE; + *r = 1; return true; } } @@ -6632,11 +6632,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if ((emulation_type & EMULTYPE_TRAP_UD) || (emulation_type & EMULTYPE_TRAP_UD_FORCED)) { kvm_queue_exception(vcpu, UD_VECTOR); - return EMULATE_DONE; + return 1; } if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) - return EMULATE_DONE; + return 1; if (ctxt->have_exception) { /* * #UD should result in just EMULATION_FAILED, and trap-like @@ -6645,7 +6645,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR || exception_type(ctxt->exception.vector) == EXCPT_TRAP); inject_emulated_exception(vcpu); - return EMULATE_DONE; + return 1; } return handle_emulation_failure(vcpu, emulation_type); } @@ -6654,7 +6654,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if ((emulation_type & EMULTYPE_VMWARE_GP) && !is_vmware_backdoor_opcode(ctxt)) { kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return EMULATE_DONE; + return 1; } if (emulation_type & EMULTYPE_SKIP) { @@ -6662,11 +6662,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (ctxt->eflags & X86_EFLAGS_RF) kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF); kvm_x86_ops->set_interrupt_shadow(vcpu, 0); - return EMULATE_DONE; + return 1; } if (retry_instruction(ctxt, cr2, emulation_type)) - return EMULATE_DONE; + return 1; /* this is needed for vmware backdoor interface to work since it changes registers values during IO operation */ @@ -6682,18 +6682,18 @@ restart: r = x86_emulate_insn(ctxt); if (r == EMULATION_INTERCEPTED) - return EMULATE_DONE; + return 1; if (r == EMULATION_FAILED) { if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) - return EMULATE_DONE; + return 1; return handle_emulation_failure(vcpu, emulation_type); } if (ctxt->have_exception) { - r = EMULATE_DONE; + r = 1; if (inject_emulated_exception(vcpu)) return r; } else if (vcpu->arch.pio.count) { @@ -6704,18 +6704,18 @@ restart: writeback = false; vcpu->arch.complete_userspace_io = complete_emulated_pio; } - r = EMULATE_USER_EXIT; + r = 0; } else if (vcpu->mmio_needed) { ++vcpu->stat.mmio_exits; if (!vcpu->mmio_is_write) writeback = false; - r = EMULATE_USER_EXIT; + r = 0; vcpu->arch.complete_userspace_io = complete_emulated_mmio; } else if (r == EMULATION_RESTART) goto restart; else - r = EMULATE_DONE; + r = 1; if (writeback) { unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); @@ -6724,7 +6724,7 @@ restart: if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) { kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE && ctxt->tf) + if (r && ctxt->tf) r = kvm_vcpu_do_singlestep(vcpu); __kvm_set_rflags(vcpu, ctxt->eflags); } @@ -8319,12 +8319,11 @@ static int vcpu_run(struct kvm_vcpu *vcpu) static inline int complete_emulated_io(struct kvm_vcpu *vcpu) { int r; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - if (r != EMULATE_DONE) - return 0; - return 1; + return r; } static int complete_emulated_pio(struct kvm_vcpu *vcpu) @@ -8696,13 +8695,13 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - return EMULATE_USER_EXIT; + return 0; } kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); - return EMULATE_DONE; + return 1; } EXPORT_SYMBOL_GPL(kvm_task_switch); -- cgit v1.2.3 From 1957aa63be5395307f8b80458f011871b34617ad Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:39 -0700 Subject: KVM: VMX: Handle single-step #DB for EMULTYPE_SKIP on EPT misconfig VMX's EPT misconfig flow to handle fast-MMIO path falls back to decoding the instruction to determine the instruction length when running as a guest (Hyper-V doesn't fill VMCS.VM_EXIT_INSTRUCTION_LEN because it's technically not defined for EPT misconfigs). Rather than implement the slow skip in VMX's generic skip_emulated_instruction(), handle_ept_misconfig() directly calls kvm_emulate_instruction() with EMULTYPE_SKIP, which intentionally doesn't do single-step detection, and so handle_ept_misconfig() misses a single-step #DB. Rework the EPT misconfig fallback case to route it through kvm_skip_emulated_instruction() so that single-step #DBs and interrupt shadow updates are handled automatically. I.e. make VMX's slow skip logic match SVM's and have the SVM flow not intentionally avoid the shadow update. Alternatively, the handle_ept_misconfig() could manually handle single- step detection, but that results in EMULTYPE_SKIP having split logic for the interrupt shadow vs. single-step #DBs, and split emulator logic is largely what led to this mess in the first place. Modifying SVM to mirror VMX flow isn't really an option as SVM's case isn't limited to a specific exit reason, i.e. handling the slow skip in skip_emulated_instruction() is mandatory for all intents and purposes. Drop VMX's skip_emulated_instruction() wrapper since it can now fail, and instead WARN if it fails unexpectedly, e.g. if exit_reason somehow becomes corrupted. Cc: Vitaly Kuznetsov Fixes: d391f12070672 ("x86/kvm/vmx: do not use vm-exit instruction length for fast MMIO when running nested") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 17 +++++++++-------- arch/x86/kvm/vmx/vmx.c | 52 +++++++++++++++++++++----------------------------- arch/x86/kvm/x86.c | 6 +++++- 3 files changed, 36 insertions(+), 39 deletions(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index cb7ad362e9b9..ce75296b1b10 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -777,14 +777,15 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) svm->next_rip = svm->vmcb->control.next_rip; } - if (!svm->next_rip) - return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP); - - if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) - printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n", - __func__, kvm_rip_read(vcpu), svm->next_rip); - - kvm_rip_write(vcpu, svm->next_rip); + if (!svm->next_rip) { + if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) + return 0; + } else { + if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) + pr_err("%s: ip 0x%lx next 0x%llx\n", + __func__, kvm_rip_read(vcpu), svm->next_rip); + kvm_rip_write(vcpu, svm->next_rip); + } svm_set_interrupt_shadow(vcpu, 0); return 1; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e71dc36850cb..ef98311ad153 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1501,17 +1501,27 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) return 0; } -/* - * Returns an int to be compatible with SVM implementation (which can fail). - * Do not use directly, use skip_emulated_instruction() instead. - */ -static int __skip_emulated_instruction(struct kvm_vcpu *vcpu) +static int skip_emulated_instruction(struct kvm_vcpu *vcpu) { unsigned long rip; - rip = kvm_rip_read(vcpu); - rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - kvm_rip_write(vcpu, rip); + /* + * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on + * undefined behavior: Intel's SDM doesn't mandate the VMCS field be + * set when EPT misconfig occurs. In practice, real hardware updates + * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors + * (namely Hyper-V) don't set it due to it being undefined behavior, + * i.e. we end up advancing IP with some random value. + */ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || + to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) { + rip = kvm_rip_read(vcpu); + rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + kvm_rip_write(vcpu, rip); + } else { + if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) + return 0; + } /* skipping an emulated instruction also counts */ vmx_set_interrupt_shadow(vcpu, 0); @@ -1519,11 +1529,6 @@ static int __skip_emulated_instruction(struct kvm_vcpu *vcpu) return 1; } -static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu) -{ - (void)__skip_emulated_instruction(vcpu); -} - static void vmx_clear_hlt(struct kvm_vcpu *vcpu) { /* @@ -4587,7 +4592,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) vcpu->arch.dr6 &= ~DR_TRAP_BITS; vcpu->arch.dr6 |= dr6 | DR6_RTM; if (is_icebp(intr_info)) - skip_emulated_instruction(vcpu); + WARN_ON(!skip_emulated_instruction(vcpu)); kvm_queue_exception(vcpu, DB_VECTOR); return 1; @@ -5068,7 +5073,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && type != INTR_TYPE_EXT_INTR && type != INTR_TYPE_NMI_INTR)) - skip_emulated_instruction(vcpu); + WARN_ON(!skip_emulated_instruction(vcpu)); /* * TODO: What about debug traps on tss switch? @@ -5135,20 +5140,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) if (!is_guest_mode(vcpu) && !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { trace_kvm_fast_mmio(gpa); - /* - * Doing kvm_skip_emulated_instruction() depends on undefined - * behavior: Intel's manual doesn't mandate - * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG - * occurs and while on real hardware it was observed to be set, - * other hypervisors (namely Hyper-V) don't set it, we end up - * advancing IP with some random value. Disable fast mmio when - * running nested and keep it for real hardware in hope that - * VM_EXIT_INSTRUCTION_LEN will always be set correctly. - */ - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) - return kvm_skip_emulated_instruction(vcpu); - else - return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP); + return kvm_skip_emulated_instruction(vcpu); } return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); @@ -7722,7 +7714,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .run = vmx_vcpu_run, .handle_exit = vmx_handle_exit, - .skip_emulated_instruction = __skip_emulated_instruction, + .skip_emulated_instruction = skip_emulated_instruction, .set_interrupt_shadow = vmx_set_interrupt_shadow, .get_interrupt_shadow = vmx_get_interrupt_shadow, .patch_hypercall = vmx_patch_hypercall, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a83b269126a0..c38d247dbffb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6657,11 +6657,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return 1; } + /* + * Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks + * for kvm_skip_emulated_instruction(). The caller is responsible for + * updating interruptibility state and injecting single-step #DBs. + */ if (emulation_type & EMULTYPE_SKIP) { kvm_rip_write(vcpu, ctxt->_eip); if (ctxt->eflags & X86_EFLAGS_RF) kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF); - kvm_x86_ops->set_interrupt_shadow(vcpu, 0); return 1; } -- cgit v1.2.3 From e69e72faa3a0709dd23df6a4ca060a15e99168a1 Mon Sep 17 00:00:00 2001 From: Tao Xu Date: Tue, 16 Jul 2019 14:55:49 +0800 Subject: KVM: x86: Add support for user wait instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UMONITOR, UMWAIT and TPAUSE are a set of user wait instructions. This patch adds support for user wait instructions in KVM. Availability of the user wait instructions is indicated by the presence of the CPUID feature flag WAITPKG CPUID.0x07.0x0:ECX[5]. User wait instructions may be executed at any privilege level, and use 32bit IA32_UMWAIT_CONTROL MSR to set the maximum time. The behavior of user wait instructions in VMX non-root operation is determined first by the setting of the "enable user wait and pause" secondary processor-based VM-execution control bit 26. If the VM-execution control is 0, UMONITOR/UMWAIT/TPAUSE cause an invalid-opcode exception (#UD). If the VM-execution control is 1, treatment is based on the setting of the “RDTSC exiting†VM-execution control. Because KVM never enables RDTSC exiting, if the instruction causes a delay, the amount of time delayed is called here the physical delay. The physical delay is first computed by determining the virtual delay. If IA32_UMWAIT_CONTROL[31:2] is zero, the virtual delay is the value in EDX:EAX minus the value that RDTSC would return; if IA32_UMWAIT_CONTROL[31:2] is not zero, the virtual delay is the minimum of that difference and AND(IA32_UMWAIT_CONTROL,FFFFFFFCH). Because umwait and tpause can put a (psysical) CPU into a power saving state, by default we dont't expose it to kvm and enable it only when guest CPUID has it. Detailed information about user wait instructions can be found in the latest Intel 64 and IA-32 Architectures Software Developer's Manual. Co-developed-by: Jingqi Liu Signed-off-by: Jingqi Liu Signed-off-by: Tao Xu Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 1 + arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/vmx/capabilities.h | 6 ++++++ arch/x86/kvm/vmx/nested.c | 1 + arch/x86/kvm/vmx/vmx.c | 18 ++++++++++++++++++ 5 files changed, 27 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index b15e6465870f..fdad81626829 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -69,6 +69,7 @@ #define SECONDARY_EXEC_PT_USE_GPA 0x01000000 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000 #define SECONDARY_EXEC_TSC_SCALING 0x02000000 +#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_NMI_EXITING 0x00000008 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index dd5985eb61b4..4e5a835989d1 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -360,7 +360,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/; /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index d6664ee3d127..7aa69716d516 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -247,6 +247,12 @@ static inline bool vmx_xsaves_supported(void) SECONDARY_EXEC_XSAVES; } +static inline bool vmx_waitpkg_supported(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; +} + static inline bool cpu_has_vmx_tsc_scaling(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 75ed0a63abbe..c5b7ba795f95 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2089,6 +2089,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_XSAVES | + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_ENABLE_VMFUNC); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ef98311ad153..bb55f54e29b1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2323,6 +2323,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, SECONDARY_EXEC_RDRAND_EXITING | SECONDARY_EXEC_ENABLE_PML | SECONDARY_EXEC_TSC_SCALING | + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX | SECONDARY_EXEC_ENABLE_VMFUNC | @@ -4059,6 +4060,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) } } + if (vmx_waitpkg_supported()) { + bool waitpkg_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG); + + if (!waitpkg_enabled) + exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + + if (nested) { + if (waitpkg_enabled) + vmx->nested.msrs.secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + else + vmx->nested.msrs.secondary_ctls_high &= + ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + } + } + vmx->secondary_exec_control = exec_control; } -- cgit v1.2.3 From 6e3ba4abcea5681eebbfc10f1b56c9fbe80b6685 Mon Sep 17 00:00:00 2001 From: Tao Xu Date: Tue, 16 Jul 2019 14:55:50 +0800 Subject: KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL UMWAIT and TPAUSE instructions use 32bit IA32_UMWAIT_CONTROL at MSR index E1H to determines the maximum time in TSC-quanta that the processor can reside in either C0.1 or C0.2. This patch emulates MSR IA32_UMWAIT_CONTROL in guest and differentiate IA32_UMWAIT_CONTROL between host and guest. The variable mwait_control_cached in arch/x86/kernel/cpu/umwait.c caches the MSR value, so this patch uses it to avoid frequently rdmsr of IA32_UMWAIT_CONTROL. Co-developed-by: Jingqi Liu Signed-off-by: Jingqi Liu Signed-off-by: Tao Xu Signed-off-by: Paolo Bonzini --- arch/x86/kernel/cpu/umwait.c | 6 ++++++ arch/x86/kvm/vmx/vmx.c | 36 ++++++++++++++++++++++++++++++++++++ arch/x86/kvm/vmx/vmx.h | 9 +++++++++ arch/x86/kvm/x86.c | 2 ++ 4 files changed, 53 insertions(+) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c index 32b4dc9030aa..c222f283b456 100644 --- a/arch/x86/kernel/cpu/umwait.c +++ b/arch/x86/kernel/cpu/umwait.c @@ -17,6 +17,12 @@ */ static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); +u32 get_umwait_control_msr(void) +{ + return umwait_control_cached; +} +EXPORT_SYMBOL_GPL(get_umwait_control_msr); + /* * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by * hardware or BIOS before kernel boot. diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index bb55f54e29b1..52dd2241b749 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1733,6 +1733,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) #endif case MSR_EFER: return kvm_get_msr_common(vcpu, msr_info); + case MSR_IA32_UMWAIT_CONTROL: + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) + return 1; + + msr_info->data = vmx->msr_ia32_umwait_control; + break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) @@ -1906,6 +1912,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vmcs_write64(GUEST_BNDCFGS, data); break; + case MSR_IA32_UMWAIT_CONTROL: + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) + return 1; + + /* The reserved bit 1 and non-32 bit [63:32] should be zero */ + if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) + return 1; + + vmx->msr_ia32_umwait_control = data; + break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) @@ -4211,6 +4227,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; + vmx->msr_ia32_umwait_control = 0; + vcpu->arch.microcode_version = 0x100000000ULL; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); vmx->hv_deadline_tsc = -1; @@ -6384,6 +6402,23 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host, false); } +static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) +{ + u32 host_umwait_control; + + if (!vmx_has_waitpkg(vmx)) + return; + + host_umwait_control = get_umwait_control_msr(); + + if (vmx->msr_ia32_umwait_control != host_umwait_control) + add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, + vmx->msr_ia32_umwait_control, + host_umwait_control, false); + else + clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); +} + static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6478,6 +6513,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) pt_guest_enter(vmx); atomic_switch_perf_msrs(vmx); + atomic_switch_umwait_control_msr(vmx); if (enable_preemption_timer) vmx_update_hv_timer(vcpu); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 64d5a4890aa9..bee16687dc0b 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -14,6 +14,8 @@ extern const u32 vmx_msr_index[]; extern u64 host_efer; +extern u32 get_umwait_control_msr(void); + #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 #define MSR_TYPE_RW 3 @@ -211,6 +213,7 @@ struct vcpu_vmx { #endif u64 spec_ctrl; + u32 msr_ia32_umwait_control; u32 secondary_exec_control; @@ -497,6 +500,12 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx) vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); } +static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) +{ + return vmx->secondary_exec_control & + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; +} + void dump_vmcs(void); #endif /* __KVM_X86_VMX_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c38d247dbffb..977b36348bed 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1145,6 +1145,8 @@ static u32 msrs_to_save[] = { MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B, MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B, MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, + MSR_IA32_UMWAIT_CONTROL, + MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3, MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, -- cgit v1.2.3 From bf653b78f9608d66db174eabcbda7454c8fde6d5 Mon Sep 17 00:00:00 2001 From: Tao Xu Date: Tue, 16 Jul 2019 14:55:51 +0800 Subject: KVM: vmx: Introduce handle_unexpected_vmexit and handle WAITPKG vmexit As the latest Intel 64 and IA-32 Architectures Software Developer's Manual, UMWAIT and TPAUSE instructions cause a VM exit if the RDTSC exiting and enable user wait and pause VM-execution controls are both 1. Because KVM never enable RDTSC exiting, the vm-exit for UMWAIT and TPAUSE should never happen. Considering EXIT_REASON_XSAVES and EXIT_REASON_XRSTORS is also unexpected VM-exit for KVM. Introduce a common exit helper handle_unexpected_vmexit() to handle these unexpected VM-exit. Suggested-by: Sean Christopherson Co-developed-by: Jingqi Liu Signed-off-by: Jingqi Liu Signed-off-by: Tao Xu Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/vmx.h | 6 +++++- arch/x86/kvm/vmx/nested.c | 4 ++++ arch/x86/kvm/vmx/vmx.c | 28 ++++++++++++---------------- 3 files changed, 21 insertions(+), 17 deletions(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index f01950aa7fae..3eb8411ab60e 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -86,6 +86,8 @@ #define EXIT_REASON_PML_FULL 62 #define EXIT_REASON_XSAVES 63 #define EXIT_REASON_XRSTORS 64 +#define EXIT_REASON_UMWAIT 67 +#define EXIT_REASON_TPAUSE 68 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -144,7 +146,9 @@ { EXIT_REASON_RDSEED, "RDSEED" }, \ { EXIT_REASON_PML_FULL, "PML_FULL" }, \ { EXIT_REASON_XSAVES, "XSAVES" }, \ - { EXIT_REASON_XRSTORS, "XRSTORS" } + { EXIT_REASON_XRSTORS, "XRSTORS" }, \ + { EXIT_REASON_UMWAIT, "UMWAIT" }, \ + { EXIT_REASON_TPAUSE, "TPAUSE" } #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index c5b7ba795f95..77548ef1ad3b 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5470,6 +5470,10 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) case EXIT_REASON_ENCLS: /* SGX is never exposed to L1 */ return false; + case EXIT_REASON_UMWAIT: + case EXIT_REASON_TPAUSE: + return nested_cpu_has2(vmcs12, + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE); default: return true; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 52dd2241b749..a7c9922e3905 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5007,20 +5007,6 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) return 1; } -static int handle_xsaves(struct kvm_vcpu *vcpu) -{ - kvm_skip_emulated_instruction(vcpu); - WARN(1, "this should never happen\n"); - return 1; -} - -static int handle_xrstors(struct kvm_vcpu *vcpu) -{ - kvm_skip_emulated_instruction(vcpu); - WARN(1, "this should never happen\n"); - return 1; -} - static int handle_apic_access(struct kvm_vcpu *vcpu) { if (likely(fasteoi)) { @@ -5514,6 +5500,14 @@ static int handle_encls(struct kvm_vcpu *vcpu) return 1; } +static int handle_unexpected_vmexit(struct kvm_vcpu *vcpu) +{ + kvm_skip_emulated_instruction(vcpu); + WARN_ONCE(1, "Unexpected VM-Exit Reason = 0x%x", + vmcs_read32(VM_EXIT_REASON)); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -5565,13 +5559,15 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_INVVPID] = handle_vmx_instruction, [EXIT_REASON_RDRAND] = handle_invalid_op, [EXIT_REASON_RDSEED] = handle_invalid_op, - [EXIT_REASON_XSAVES] = handle_xsaves, - [EXIT_REASON_XRSTORS] = handle_xrstors, + [EXIT_REASON_XSAVES] = handle_unexpected_vmexit, + [EXIT_REASON_XRSTORS] = handle_unexpected_vmexit, [EXIT_REASON_PML_FULL] = handle_pml_full, [EXIT_REASON_INVPCID] = handle_invpcid, [EXIT_REASON_VMFUNC] = handle_vmx_instruction, [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, [EXIT_REASON_ENCLS] = handle_encls, + [EXIT_REASON_UMWAIT] = handle_unexpected_vmexit, + [EXIT_REASON_TPAUSE] = handle_unexpected_vmexit, }; static const int kvm_vmx_max_exit_handlers = -- cgit v1.2.3 From 52a9fcbc73a30375cad8d9e7b519e1ad97b0880a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2019 13:41:07 -0700 Subject: KVM: VMX: Optimize VMX instruction error and fault handling Rework the VMX instruction helpers using asm-goto to branch directly to error/fault "handlers" in lieu of using __ex(), i.e. the generic ____kvm_handle_fault_on_reboot(). Branching directly to fault handling code during fixup avoids the extra JMP that is inserted after every VMX instruction when using the generic "fault on reboot" (see commit 3901336ed9887, "x86/kvm: Don't call kvm_spurious_fault() from .fixup"). Opportunistically clean up the helpers so that they all have consistent error handling and messages. Leave the usage of ____kvm_handle_fault_on_reboot() (via __ex()) in kvm_cpu_vmxoff() and nested_vmx_check_vmentry_hw() as is. The VMXOFF case is not a fast path, i.e. the cleanliness of __ex() is worth the JMP, and the extra JMP in nested_vmx_check_vmentry_hw() is unavoidable. Note, VMREAD cannot get the asm-goto treatment as output operands aren't compatible with GCC's asm-goto due to internal compiler restrictions. Acked-by: Paolo Bonzini Cc: Josh Poimboeuf Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/ops.h | 72 ++++++++++++++++++++++++++++---------------------- arch/x86/kvm/vmx/vmx.c | 34 ++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 32 deletions(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h index 2200fb698dd0..79e25d49d4d9 100644 --- a/arch/x86/kvm/vmx/ops.h +++ b/arch/x86/kvm/vmx/ops.h @@ -14,6 +14,12 @@ #define __ex_clear(x, reg) \ ____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg) +void vmwrite_error(unsigned long field, unsigned long value); +void vmclear_error(struct vmcs *vmcs, u64 phys_addr); +void vmptrld_error(struct vmcs *vmcs, u64 phys_addr); +void invvpid_error(unsigned long ext, u16 vpid, gva_t gva); +void invept_error(unsigned long ext, u64 eptp, gpa_t gpa); + static __always_inline void vmcs_check16(unsigned long field) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, @@ -103,21 +109,39 @@ static __always_inline unsigned long vmcs_readl(unsigned long field) return __vmcs_readl(field); } -static noinline void vmwrite_error(unsigned long field, unsigned long value) -{ - printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", - field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); - dump_stack(); -} +#define vmx_asm1(insn, op1, error_args...) \ +do { \ + asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + ".byte 0x2e\n\t" /* branch not taken hint */ \ + "jna %l[error]\n\t" \ + _ASM_EXTABLE(1b, %l[fault]) \ + : : op1 : "cc" : error, fault); \ + return; \ +error: \ + insn##_error(error_args); \ + return; \ +fault: \ + kvm_spurious_fault(); \ +} while (0) + +#define vmx_asm2(insn, op1, op2, error_args...) \ +do { \ + asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + ".byte 0x2e\n\t" /* branch not taken hint */ \ + "jna %l[error]\n\t" \ + _ASM_EXTABLE(1b, %l[fault]) \ + : : op1, op2 : "cc" : error, fault); \ + return; \ +error: \ + insn##_error(error_args); \ + return; \ +fault: \ + kvm_spurious_fault(); \ +} while (0) static __always_inline void __vmcs_writel(unsigned long field, unsigned long value) { - bool error; - - asm volatile (__ex("vmwrite %2, %1") CC_SET(na) - : CC_OUT(na) (error) : "r"(field), "rm"(value)); - if (unlikely(error)) - vmwrite_error(field, value); + vmx_asm2(vmwrite, "r"(field), "rm"(value), field, value); } static __always_inline void vmcs_write16(unsigned long field, u16 value) @@ -182,28 +206,18 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) static inline void vmcs_clear(struct vmcs *vmcs) { u64 phys_addr = __pa(vmcs); - bool error; - asm volatile (__ex("vmclear %1") CC_SET(na) - : CC_OUT(na) (error) : "m"(phys_addr)); - if (unlikely(error)) - printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", - vmcs, phys_addr); + vmx_asm1(vmclear, "m"(phys_addr), vmcs, phys_addr); } static inline void vmcs_load(struct vmcs *vmcs) { u64 phys_addr = __pa(vmcs); - bool error; if (static_branch_unlikely(&enable_evmcs)) return evmcs_load(phys_addr); - asm volatile (__ex("vmptrld %1") CC_SET(na) - : CC_OUT(na) (error) : "m"(phys_addr)); - if (unlikely(error)) - printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n", - vmcs, phys_addr); + vmx_asm1(vmptrld, "m"(phys_addr), vmcs, phys_addr); } static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva) @@ -213,11 +227,8 @@ static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva) u64 rsvd : 48; u64 gva; } operand = { vpid, 0, gva }; - bool error; - asm volatile (__ex("invvpid %2, %1") CC_SET(na) - : CC_OUT(na) (error) : "r"(ext), "m"(operand)); - BUG_ON(error); + vmx_asm2(invvpid, "r"(ext), "m"(operand), ext, vpid, gva); } static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) @@ -225,11 +236,8 @@ static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) struct { u64 eptp, gpa; } operand = {eptp, gpa}; - bool error; - asm volatile (__ex("invept %2, %1") CC_SET(na) - : CC_OUT(na) (error) : "r"(ext), "m"(operand)); - BUG_ON(error); + vmx_asm2(invept, "r"(ext), "m"(operand), ext, eptp, gpa); } static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a7c9922e3905..52c7614b5ecd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -343,6 +343,40 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit void vmx_vmexit(void); +#define vmx_insn_failed(fmt...) \ +do { \ + WARN_ONCE(1, fmt); \ + pr_warn_ratelimited(fmt); \ +} while (0) + +noinline void vmwrite_error(unsigned long field, unsigned long value) +{ + vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n", + field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); +} + +noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr) +{ + vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr); +} + +noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr) +{ + vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr); +} + +noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva) +{ + vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n", + ext, vpid, gva); +} + +noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa) +{ + vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n", + ext, eptp, gpa); +} + static DEFINE_PER_CPU(struct vmcs *, vmxarea); DEFINE_PER_CPU(struct vmcs *, current_vmcs); /* -- cgit v1.2.3 From 6e2020977e3e692a8bb31258c1f8251521f4fdb7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2019 13:41:08 -0700 Subject: KVM: VMX: Add error handling to VMREAD helper Now that VMREAD flows require a taken branch, courtesy of commit 3901336ed9887 ("x86/kvm: Don't call kvm_spurious_fault() from .fixup") bite the bullet and add full error handling to VMREAD, i.e. replace the JMP added by __ex()/____kvm_handle_fault_on_reboot() with a hinted Jcc. To minimize the code footprint, add a helper function, vmread_error(), to handle both faults and failures so that the inline flow has a single CALL. Acked-by: Paolo Bonzini Cc: Josh Poimboeuf Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/ops.h | 21 +++++++++++++++++---- arch/x86/kvm/vmx/vmx.c | 8 ++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h index 79e25d49d4d9..45eaedee2ac0 100644 --- a/arch/x86/kvm/vmx/ops.h +++ b/arch/x86/kvm/vmx/ops.h @@ -11,9 +11,8 @@ #include "vmcs.h" #define __ex(x) __kvm_handle_fault_on_reboot(x) -#define __ex_clear(x, reg) \ - ____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg) +asmlinkage void vmread_error(unsigned long field, bool fault); void vmwrite_error(unsigned long field, unsigned long value); void vmclear_error(struct vmcs *vmcs, u64 phys_addr); void vmptrld_error(struct vmcs *vmcs, u64 phys_addr); @@ -68,8 +67,22 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) { unsigned long value; - asm volatile (__ex_clear("vmread %1, %0", "%k0") - : "=r"(value) : "r"(field)); + asm volatile("1: vmread %2, %1\n\t" + ".byte 0x3e\n\t" /* branch taken hint */ + "ja 3f\n\t" + "mov %2, %%" _ASM_ARG1 "\n\t" + "xor %%" _ASM_ARG2 ", %%" _ASM_ARG2 "\n\t" + "2: call vmread_error\n\t" + "xor %k1, %k1\n\t" + "3:\n\t" + + ".pushsection .fixup, \"ax\"\n\t" + "4: mov %2, %%" _ASM_ARG1 "\n\t" + "mov $1, %%" _ASM_ARG2 "\n\t" + "jmp 2b\n\t" + ".popsection\n\t" + _ASM_EXTABLE(1b, 4b) + : ASM_CALL_CONSTRAINT, "=r"(value) : "r"(field) : "cc"); return value; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 52c7614b5ecd..1852706e6acc 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -349,6 +349,14 @@ do { \ pr_warn_ratelimited(fmt); \ } while (0) +asmlinkage void vmread_error(unsigned long field, bool fault) +{ + if (fault) + kvm_spurious_fault(); + else + vmx_insn_failed("kvm: vmread failed: field=%lx\n", field); +} + noinline void vmwrite_error(unsigned long field, unsigned long value) { vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n", -- cgit v1.2.3 From cab01850277a827c57270bba48be1d8fe312643d Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 25 Sep 2019 15:30:35 +0200 Subject: KVM: vmx: fix build warnings in hv_enable_direct_tlbflush() on i386 The following was reported on i386: arch/x86/kvm/vmx/vmx.c: In function 'hv_enable_direct_tlbflush': arch/x86/kvm/vmx/vmx.c:503:10: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] pr_debugs() in this function are more or less useless, let's just remove them. evmcs->hv_vm_id can use 'unsigned long' instead of 'u64'. Also, simplify the code a little bit. Reported-by: kbuild test robot Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'arch/x86/kvm/vmx/vmx.c') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1852706e6acc..d4575ffb3cec 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -537,23 +537,19 @@ static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) * Synthetic VM-Exit is not enabled in current code and so All * evmcs in singe VM shares same assist page. */ - if (!*p_hv_pa_pg) { + if (!*p_hv_pa_pg) *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!*p_hv_pa_pg) - return -ENOMEM; - pr_debug("KVM: Hyper-V: allocated PA_PG for %llx\n", - (u64)&vcpu->kvm); - } + + if (!*p_hv_pa_pg) + return -ENOMEM; evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs; evmcs->partition_assist_page = __pa(*p_hv_pa_pg); - evmcs->hv_vm_id = (u64)vcpu->kvm; + evmcs->hv_vm_id = (unsigned long)vcpu->kvm; evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; - pr_debug("KVM: Hyper-V: enabled DIRECT flush for %llx\n", - (u64)vcpu->kvm); return 0; } -- cgit v1.2.3