diff options
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm.c | 106 |
1 files changed, 78 insertions, 28 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 48c865a4e5dd..7eafc6907861 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -364,6 +364,10 @@ static int avic; module_param(avic, int, S_IRUGO); #endif +/* enable/disable Next RIP Save */ +static int nrips = true; +module_param(nrips, int, 0444); + /* enable/disable Virtual VMLOAD VMSAVE */ static int vls = true; module_param(vls, int, 0444); @@ -770,7 +774,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (svm->vmcb->control.next_rip != 0) { + if (nrips && svm->vmcb->control.next_rip != 0) { WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; } @@ -807,7 +811,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) kvm_deliver_exception_payload(&svm->vcpu); - if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) { + if (nr == BP_VECTOR && !nrips) { unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); /* @@ -1364,6 +1368,11 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); + if (nrips) { + if (!boot_cpu_has(X86_FEATURE_NRIPS)) + nrips = false; + } + if (avic) { if (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC) || @@ -2134,12 +2143,20 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto out; } + svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); + if (!svm->vcpu.arch.user_fpu) { + printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); + err = -ENOMEM; + goto free_partial_svm; + } + svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL_ACCOUNT); if (!svm->vcpu.arch.guest_fpu) { printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); err = -ENOMEM; - goto free_partial_svm; + goto free_user_fpu; } err = kvm_vcpu_init(&svm->vcpu, kvm, id); @@ -2202,6 +2219,8 @@ uninit: kvm_vcpu_uninit(&svm->vcpu); free_svm: kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu); +free_user_fpu: + kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu); free_partial_svm: kmem_cache_free(kvm_vcpu_cache, svm); out: @@ -2232,6 +2251,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_page(virt_to_page(svm->nested.hsave)); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); + kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu); kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu); kmem_cache_free(kvm_vcpu_cache, svm); } @@ -3290,7 +3310,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) vmcb->control.exit_int_info_err, KVM_ISA_SVM); - rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(svm->nested.vmcb), &map); + rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map); if (rc) { if (rc == -EINVAL) kvm_inject_gp(&svm->vcpu, 0); @@ -3580,7 +3600,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) vmcb_gpa = svm->vmcb->save.rax; - rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(vmcb_gpa), &map); + rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map); if (rc) { if (rc == -EINVAL) kvm_inject_gp(&svm->vcpu, 0); @@ -3935,7 +3955,7 @@ static int rdpmc_interception(struct vcpu_svm *svm) { int err; - if (!static_cpu_has(X86_FEATURE_NRIPS)) + if (!nrips) return emulate_on_interception(svm); err = kvm_rdpmc(&svm->vcpu); @@ -5160,10 +5180,13 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) kvm_lapic_set_irr(vec, vcpu->arch.apic); smp_mb__after_atomic(); - if (avic_vcpu_is_running(vcpu)) - wrmsrl(SVM_AVIC_DOORBELL, - kvm_cpu_get_apicid(vcpu->cpu)); - else + if (avic_vcpu_is_running(vcpu)) { + int cpuid = vcpu->cpu; + + if (cpuid != get_cpu()) + wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid)); + put_cpu(); + } else kvm_vcpu_wake_up(vcpu); } @@ -5640,6 +5663,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) clgi(); kvm_load_guest_xcr0(vcpu); + if (lapic_in_kernel(vcpu) && + vcpu->arch.apic->lapic_timer.timer_advance_ns) + kvm_wait_lapic_expire(vcpu); + /* * If this vCPU has touched SPEC_CTRL, restore the guest's value if * it's non-zero. Since vmentry is serialising on affected CPUs, there @@ -5861,9 +5888,9 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) hypercall[2] = 0xd9; } -static void svm_check_processor_compat(void *rtn) +static int __init svm_check_processor_compat(void) { - *(int *)rtn = 0; + return 0; } static bool svm_cpu_has_accelerated_tpr(void) @@ -5875,6 +5902,7 @@ static bool svm_has_emulated_msr(int index) { switch (index) { case MSR_IA32_MCG_EXT_CTL: + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: return false; default: break; @@ -6162,15 +6190,9 @@ out: return ret; } -static void svm_handle_external_intr(struct kvm_vcpu *vcpu) +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) { - local_irq_enable(); - /* - * We must have an instruction with interrupts enabled, so - * the timer interrupt isn't delayed by the interrupt shadow. - */ - asm("nop"); - local_irq_disable(); + } static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) @@ -7117,13 +7139,41 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) { - bool is_user, smap; - - is_user = svm_get_cpl(vcpu) == 3; - smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); + unsigned long cr4 = kvm_read_cr4(vcpu); + bool smep = cr4 & X86_CR4_SMEP; + bool smap = cr4 & X86_CR4_SMAP; + bool is_user = svm_get_cpl(vcpu) == 3; /* - * Detect and workaround Errata 1096 Fam_17h_00_0Fh + * Detect and workaround Errata 1096 Fam_17h_00_0Fh. + * + * Errata: + * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is + * possible that CPU microcode implementing DecodeAssist will fail + * to read bytes of instruction which caused #NPF. In this case, + * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly + * return 0 instead of the correct guest instruction bytes. + * + * This happens because CPU microcode reading instruction bytes + * uses a special opcode which attempts to read data using CPL=0 + * priviledges. The microcode reads CS:RIP and if it hits a SMAP + * fault, it gives up and returns no instruction bytes. + * + * Detection: + * We reach here in case CPU supports DecodeAssist, raised #NPF and + * returned 0 in GuestIntrBytes field of the VMCB. + * First, errata can only be triggered in case vCPU CR4.SMAP=1. + * Second, if vCPU CR4.SMEP=1, errata could only be triggered + * in case vCPU CPL==3 (Because otherwise guest would have triggered + * a SMEP fault instead of #NPF). + * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL. + * As most guests enable SMAP if they have also enabled SMEP, use above + * logic in order to attempt minimize false-positive of detecting errata + * while still preserving all cases semantic correctness. + * + * Workaround: + * To determine what instruction the guest was executing, the hypervisor + * will have to decode the instruction at the instruction pointer. * * In non SEV guest, hypervisor will be able to read the guest * memory to decode the instruction pointer when insn_len is zero @@ -7134,11 +7184,11 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) * instruction pointer so we will not able to workaround it. Lets * print the error and request to kill the guest. */ - if (is_user && smap) { + if (smap && (!smep || is_user)) { if (!sev_guest(vcpu->kvm)) return true; - pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n"); + pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n"); kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); } @@ -7256,7 +7306,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .set_tdp_cr3 = set_tdp_cr3, .check_intercept = svm_check_intercept, - .handle_external_intr = svm_handle_external_intr, + .handle_exit_irqoff = svm_handle_exit_irqoff, .request_immediate_exit = __kvm_request_immediate_exit, |