diff options
author | Maxime Ripard <maxime@cerno.tech> | 2022-04-05 11:06:58 +0200 |
---|---|---|
committer | Maxime Ripard <maxime@cerno.tech> | 2022-04-05 11:06:58 +0200 |
commit | 9cbbd694a58bdf24def2462276514c90cab7cf80 (patch) | |
tree | 98a504890134d34631a6a0ecbce94d3f1ecc21fc /arch/x86/kvm/vmx | |
parent | 71d637823cac7748079a912e0373476c7cf6f985 (diff) | |
parent | 3123109284176b1532874591f7c81f3837bbdc17 (diff) | |
download | linux-9cbbd694a58bdf24def2462276514c90cab7cf80.tar.bz2 |
Merge drm/drm-next into drm-misc-next
Let's start the 5.19 development cycle.
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Diffstat (limited to 'arch/x86/kvm/vmx')
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 32 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.h | 3 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/pmu_intel.c | 23 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/posted_intr.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/posted_intr.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 192 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 5 |
7 files changed, 149 insertions, 116 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ba34e94049c7..f18744f7ff82 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -246,8 +246,7 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx, src = &prev->host_state; dest = &vmx->loaded_vmcs->host_state; - vmx_set_vmcs_host_state(dest, src->cr3, src->fs_sel, src->gs_sel, - src->fs_base, src->gs_base); + vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base); dest->ldt_sel = src->ldt_sel; #ifdef CONFIG_X86_64 dest->ds_sel = src->ds_sel; @@ -321,7 +320,7 @@ static void free_nested(struct kvm_vcpu *vcpu) kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); vmx->nested.pi_desc = NULL; - kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); + kvm_mmu_free_roots(vcpu->kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); nested_release_evmcs(vcpu); @@ -1126,15 +1125,15 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, return -EINVAL; } - if (!nested_ept) - kvm_mmu_new_pgd(vcpu, cr3); - vcpu->arch.cr3 = cr3; kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3); /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */ kvm_init_mmu(vcpu); + if (!nested_ept) + kvm_mmu_new_pgd(vcpu, cr3); + return 0; } @@ -3056,7 +3055,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr4; + unsigned long cr3, cr4; bool vm_fail; if (!nested_early_check) @@ -3079,6 +3078,12 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) */ vmcs_writel(GUEST_RFLAGS, 0); + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } + cr4 = cr4_read_shadow(); if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { vmcs_writel(HOST_CR4, cr4); @@ -4797,7 +4802,8 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, return 0; } -void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu) +void nested_vmx_pmu_refresh(struct kvm_vcpu *vcpu, + bool vcpu_has_perf_global_ctrl) { struct vcpu_vmx *vmx; @@ -4805,7 +4811,7 @@ void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu) return; vmx = to_vmx(vcpu); - if (kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) { + if (vcpu_has_perf_global_ctrl) { vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; vmx->nested.msrs.exit_ctls_high |= @@ -5006,7 +5012,7 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu) vmx->nested.current_vmptr >> PAGE_SHIFT, vmx->nested.cached_vmcs12, 0, VMCS12_SIZE); - kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); + kvm_mmu_free_roots(vcpu->kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); vmx->nested.current_vmptr = INVALID_GPA; } @@ -5465,7 +5471,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); roots_to_free = 0; - if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd, + if (nested_ept_root_matches(mmu->root.hpa, mmu->root.pgd, operand.eptp)) roots_to_free |= KVM_MMU_ROOT_CURRENT; @@ -5485,7 +5491,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) } if (roots_to_free) - kvm_mmu_free_roots(vcpu, mmu, roots_to_free); + kvm_mmu_free_roots(vcpu->kvm, mmu, roots_to_free); return nested_vmx_succeed(vcpu); } @@ -5574,7 +5580,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) * TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR. */ if (!enable_ept) - kvm_mmu_free_guest_mode_roots(vcpu, &vcpu->arch.root_mmu); + kvm_mmu_free_guest_mode_roots(vcpu->kvm, &vcpu->arch.root_mmu); return nested_vmx_succeed(vcpu); } diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index b69a80f43b37..c92cea0b8ccc 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -32,7 +32,8 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata); int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, u32 vmx_instruction_info, bool wr, int len, gva_t *ret); -void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu); +void nested_vmx_pmu_refresh(struct kvm_vcpu *vcpu, + bool vcpu_has_perf_global_ctrl); void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu); bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, int size); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 466d18fc0c5d..bc3f8512bb64 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -389,6 +389,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) struct kvm_pmc *pmc; u32 msr = msr_info->index; u64 data = msr_info->data; + u64 reserved_bits; switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: @@ -443,7 +444,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel) return 0; - if (!(data & pmu->reserved_bits)) { + reserved_bits = pmu->reserved_bits; + if ((pmc->idx == 2) && + (pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED)) + reserved_bits ^= HSW_IN_TX_CHECKPOINTED; + if (!(data & reserved_bits)) { reprogram_gp_counter(pmc, data); return 0; } @@ -485,9 +490,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->counter_bitmask[KVM_PMC_FIXED] = 0; pmu->version = 0; pmu->reserved_bits = 0xffffffff00200000ull; + pmu->raw_event_mask = X86_RAW_EVENT_MASK; entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); - if (!entry || !enable_pmu) + if (!entry || !vcpu->kvm->arch.enable_pmu) return; eax.full = entry->eax; edx.full = entry->edx; @@ -533,15 +539,18 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) entry = kvm_find_cpuid_entry(vcpu, 7, 0); if (entry && (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && - (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) - pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED; + (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) { + pmu->reserved_bits ^= HSW_IN_TX; + pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); + } bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters); bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters); - nested_vmx_pmu_entry_exit_ctls_update(vcpu); + nested_vmx_pmu_refresh(vcpu, + intel_is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)); if (intel_pmu_lbr_is_compatible(vcpu)) x86_perf_get_lbr(&lbr_desc->records); @@ -565,7 +574,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) pmu->gp_counters[i].current_config = 0; } - for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { + for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { pmu->fixed_counters[i].type = KVM_PMC_FIXED; pmu->fixed_counters[i].vcpu = vcpu; pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; @@ -591,7 +600,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) pmc->counter = pmc->eventsel = 0; } - for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { + for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { pmc = &pmu->fixed_counters[i]; pmc_stop_counter(pmc); diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index aa1fe9085d77..3834bb30ce54 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -244,7 +244,7 @@ void vmx_pi_start_assignment(struct kvm *kvm) } /* - * pi_update_irte - set IRTE for Posted-Interrupts + * vmx_pi_update_irte - set IRTE for Posted-Interrupts * * @kvm: kvm * @host_irq: host irq of the interrupt @@ -252,8 +252,8 @@ void vmx_pi_start_assignment(struct kvm *kvm) * @set: set or unset PI * returns 0 on success, < 0 on failure */ -int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, - bool set) +int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set) { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h index eb14e76b84ef..9a45d5c9f116 100644 --- a/arch/x86/kvm/vmx/posted_intr.h +++ b/arch/x86/kvm/vmx/posted_intr.h @@ -97,8 +97,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu); void pi_wakeup_handler(void); void __init pi_init_cpu(int cpu); bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu); -int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, - bool set); +int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set); void vmx_pi_start_assignment(struct kvm *kvm); #endif /* __KVM_X86_VMX_POSTED_INTR_H */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index aca3ae2a02f3..04d170c4b61e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -541,11 +541,6 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) return flexpriority_enabled && lapic_in_kernel(vcpu); } -static inline bool report_flexpriority(void) -{ - return flexpriority_enabled; -} - static int possible_passthrough_msr_slot(u32 msr) { u32 i; @@ -645,10 +640,10 @@ static void __loaded_vmcs_clear(void *arg) /* * Ensure all writes to loaded_vmcs, including deleting it from its - * current percpu list, complete before setting loaded_vmcs->vcpu to - * -1, otherwise a different cpu can see vcpu == -1 first and add - * loaded_vmcs to its percpu list before it's deleted from this cpu's - * list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs(). + * current percpu list, complete before setting loaded_vmcs->cpu to + * -1, otherwise a different cpu can see loaded_vmcs->cpu == -1 first + * and add loaded_vmcs to its percpu list before it's deleted from this + * cpu's list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs(). */ smp_wmb(); @@ -1080,14 +1075,9 @@ static void pt_guest_exit(struct vcpu_vmx *vmx) wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); } -void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3, - u16 fs_sel, u16 gs_sel, - unsigned long fs_base, unsigned long gs_base) +void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, + unsigned long fs_base, unsigned long gs_base) { - if (unlikely(cr3 != host->cr3)) { - vmcs_writel(HOST_CR3, cr3); - host->cr3 = cr3; - } if (unlikely(fs_sel != host->fs_sel)) { if (!(fs_sel & 7)) vmcs_write16(HOST_FS_SELECTOR, fs_sel); @@ -1182,9 +1172,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) gs_base = segment_base(gs_sel); #endif - vmx_set_vmcs_host_state(host_state, __get_current_cr3_fast(), - fs_sel, gs_sel, fs_base, gs_base); - + vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); vmx->guest_state_loaded = true; } @@ -2341,7 +2329,7 @@ fault: return -EFAULT; } -static int hardware_enable(void) +static int vmx_hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2382,7 +2370,7 @@ static void vmclear_local_loaded_vmcss(void) __loaded_vmcs_clear(v); } -static void hardware_disable(void) +static void vmx_hardware_disable(void) { vmclear_local_loaded_vmcss(); @@ -2878,21 +2866,17 @@ static void enter_rmode(struct kvm_vcpu *vcpu) int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct vmx_uret_msr *msr = vmx_find_uret_msr(vmx, MSR_EFER); /* Nothing to do if hardware doesn't support EFER. */ - if (!msr) + if (!vmx_find_uret_msr(vmx, MSR_EFER)) return 0; vcpu->arch.efer = efer; - if (efer & EFER_LMA) { - vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); - msr->data = efer; - } else { - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); + if (efer & EFER_LMA) + vm_entry_controls_setbit(vmx, VM_ENTRY_IA32E_MODE); + else + vm_entry_controls_clearbit(vmx, VM_ENTRY_IA32E_MODE); - msr->data = efer & ~EFER_LME; - } vmx_setup_uret_msrs(vmx); return 0; } @@ -2918,7 +2902,6 @@ static void enter_lmode(struct kvm_vcpu *vcpu) static void exit_lmode(struct kvm_vcpu *vcpu) { - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); } @@ -2957,7 +2940,7 @@ static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu) static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; - u64 root_hpa = mmu->root_hpa; + u64 root_hpa = mmu->root.hpa; /* No flush required if the current context is invalid. */ if (!VALID_PAGE(root_hpa)) @@ -3937,31 +3920,33 @@ static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, #ifdef CONFIG_SMP if (vcpu->mode == IN_GUEST_MODE) { /* - * The vector of interrupt to be delivered to vcpu had - * been set in PIR before this function. + * The vector of the virtual has already been set in the PIR. + * Send a notification event to deliver the virtual interrupt + * unless the vCPU is the currently running vCPU, i.e. the + * event is being sent from a fastpath VM-Exit handler, in + * which case the PIR will be synced to the vIRR before + * re-entering the guest. * - * Following cases will be reached in this block, and - * we always send a notification event in all cases as - * explained below. + * When the target is not the running vCPU, the following + * possibilities emerge: * - * Case 1: vcpu keeps in non-root mode. Sending a - * notification event posts the interrupt to vcpu. + * Case 1: vCPU stays in non-root mode. Sending a notification + * event posts the interrupt to the vCPU. * - * Case 2: vcpu exits to root mode and is still - * runnable. PIR will be synced to vIRR before the - * next vcpu entry. Sending a notification event in - * this case has no effect, as vcpu is not in root - * mode. + * Case 2: vCPU exits to root mode and is still runnable. The + * PIR will be synced to the vIRR before re-entering the guest. + * Sending a notification event is ok as the host IRQ handler + * will ignore the spurious event. * - * Case 3: vcpu exits to root mode and is blocked. - * vcpu_block() has already synced PIR to vIRR and - * never blocks vcpu if vIRR is not cleared. Therefore, - * a blocked vcpu here does not wait for any requested - * interrupts in PIR, and sending a notification event - * which has no effect is safe here. + * Case 3: vCPU exits to root mode and is blocked. vcpu_block() + * has already synced PIR to vIRR and never blocks the vCPU if + * the vIRR is not empty. Therefore, a blocked vCPU here does + * not wait for any requested interrupts in PIR, and sending a + * notification event also results in a benign, spurious event. */ - apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); + if (vcpu != kvm_get_running_vcpu()) + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); return; } #endif @@ -4041,6 +4026,21 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) return 0; } +static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector) +{ + struct kvm_vcpu *vcpu = apic->vcpu; + + if (vmx_deliver_posted_interrupt(vcpu, vector)) { + kvm_lapic_set_irr(vector, apic); + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); + } else { + trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, + trig_mode, vector); + } +} + /* * Set up the vmcs's constant host-state fields, i.e., host-state fields that * will not change in the lifetime of the guest. @@ -5169,7 +5169,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) if (!kvm_require_dr(vcpu, dr)) return 1; - if (kvm_x86_ops.get_cpl(vcpu) > 0) + if (vmx_get_cpl(vcpu) > 0) goto out; dr7 = vmcs_readl(GUEST_DR7); @@ -5302,9 +5302,16 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) static int handle_apic_write(struct kvm_vcpu *vcpu) { unsigned long exit_qualification = vmx_get_exit_qual(vcpu); - u32 offset = exit_qualification & 0xfff; - /* APIC-write VM exit is trap-like and thus no need to adjust IP */ + /* + * APIC-write VM-Exit is trap-like, KVM doesn't need to advance RIP and + * hardware has done any necessary aliasing, offset adjustments, etc... + * for the access. I.e. the correct value has already been written to + * the vAPIC page for the correct 16-byte chunk. KVM needs only to + * retrieve the register value and emulate the access. + */ + u32 offset = exit_qualification & 0xff0; + kvm_apic_write_nodecode(vcpu, offset); return 1; } @@ -6754,7 +6761,7 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - kvm_guest_enter_irqoff(); + guest_state_enter_irqoff(); /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) @@ -6770,13 +6777,13 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vcpu->arch.cr2 = native_read_cr2(); - kvm_guest_exit_irqoff(); + guest_state_exit_irqoff(); } static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr4; + unsigned long cr3, cr4; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!enable_vnmi && @@ -6819,6 +6826,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); vcpu->arch.regs_dirty = 0; + /* + * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately + * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time + * it switches back to the current->mm, which can occur in KVM context + * when switching to a temporary mm to patch kernel code, e.g. if KVM + * toggles a static key while handling a VM-Exit. + */ + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } + cr4 = cr4_read_shadow(); if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { vmcs_writel(HOST_CR4, cr4); @@ -6954,7 +6974,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) return vmx_exit_handlers_fastpath(vcpu); } -static void vmx_free_vcpu(struct kvm_vcpu *vcpu) +static void vmx_vcpu_free(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6965,7 +6985,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) free_loaded_vmcs(vmx->loaded_vmcs); } -static int vmx_create_vcpu(struct kvm_vcpu *vcpu) +static int vmx_vcpu_create(struct kvm_vcpu *vcpu) { struct vmx_uret_msr *tsx_ctrl; struct vcpu_vmx *vmx; @@ -7327,11 +7347,11 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmx_secondary_exec_control(vmx)); if (nested_vmx_allowed(vcpu)) - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= + vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; else - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= + vmx->msr_ia32_feature_control_valid_bits &= ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX); @@ -7644,6 +7664,7 @@ static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) if (ret) return ret; + vmx->nested.nested_run_pending = 1; vmx->nested.smm.guest_mode = false; } return 0; @@ -7669,7 +7690,7 @@ static void vmx_migrate_timers(struct kvm_vcpu *vcpu) } } -static void hardware_unsetup(void) +static void vmx_hardware_unsetup(void) { kvm_set_posted_intr_wakeup_handler(NULL); @@ -7679,34 +7700,33 @@ static void hardware_unsetup(void) free_kvm_area(); } -static bool vmx_check_apicv_inhibit_reasons(ulong bit) +static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) { ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | BIT(APICV_INHIBIT_REASON_ABSENT) | BIT(APICV_INHIBIT_REASON_HYPERV) | BIT(APICV_INHIBIT_REASON_BLOCKIRQ); - return supported & BIT(bit); + return supported & BIT(reason); } static struct kvm_x86_ops vmx_x86_ops __initdata = { .name = "kvm_intel", - .hardware_unsetup = hardware_unsetup, + .hardware_unsetup = vmx_hardware_unsetup, - .hardware_enable = hardware_enable, - .hardware_disable = hardware_disable, - .cpu_has_accelerated_tpr = report_flexpriority, + .hardware_enable = vmx_hardware_enable, + .hardware_disable = vmx_hardware_disable, .has_emulated_msr = vmx_has_emulated_msr, .vm_size = sizeof(struct kvm_vmx), .vm_init = vmx_vm_init, - .vcpu_create = vmx_create_vcpu, - .vcpu_free = vmx_free_vcpu, + .vcpu_create = vmx_vcpu_create, + .vcpu_free = vmx_vcpu_free, .vcpu_reset = vmx_vcpu_reset, - .prepare_guest_switch = vmx_prepare_switch_to_guest, + .prepare_switch_to_guest = vmx_prepare_switch_to_guest, .vcpu_load = vmx_vcpu_load, .vcpu_put = vmx_vcpu_put, @@ -7734,21 +7754,21 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .set_rflags = vmx_set_rflags, .get_if_flag = vmx_get_if_flag, - .tlb_flush_all = vmx_flush_tlb_all, - .tlb_flush_current = vmx_flush_tlb_current, - .tlb_flush_gva = vmx_flush_tlb_gva, - .tlb_flush_guest = vmx_flush_tlb_guest, + .flush_tlb_all = vmx_flush_tlb_all, + .flush_tlb_current = vmx_flush_tlb_current, + .flush_tlb_gva = vmx_flush_tlb_gva, + .flush_tlb_guest = vmx_flush_tlb_guest, .vcpu_pre_run = vmx_vcpu_pre_run, - .run = vmx_vcpu_run, + .vcpu_run = vmx_vcpu_run, .handle_exit = vmx_handle_exit, .skip_emulated_instruction = vmx_skip_emulated_instruction, .update_emulated_instruction = vmx_update_emulated_instruction, .set_interrupt_shadow = vmx_set_interrupt_shadow, .get_interrupt_shadow = vmx_get_interrupt_shadow, .patch_hypercall = vmx_patch_hypercall, - .set_irq = vmx_inject_irq, - .set_nmi = vmx_inject_nmi, + .inject_irq = vmx_inject_irq, + .inject_nmi = vmx_inject_nmi, .queue_exception = vmx_queue_exception, .cancel_injection = vmx_cancel_injection, .interrupt_allowed = vmx_interrupt_allowed, @@ -7768,7 +7788,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .hwapic_isr_update = vmx_hwapic_isr_update, .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, .sync_pir_to_irr = vmx_sync_pir_to_irr, - .deliver_posted_interrupt = vmx_deliver_posted_interrupt, + .deliver_interrupt = vmx_deliver_interrupt, .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt, .set_tss_addr = vmx_set_tss_addr, @@ -7801,8 +7821,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .pmu_ops = &intel_pmu_ops, .nested_ops = &vmx_nested_ops, - .update_pi_irte = pi_update_irte, - .start_assignment = vmx_pi_start_assignment, + .pi_update_irte = vmx_pi_update_irte, + .pi_start_assignment = vmx_pi_start_assignment, #ifdef CONFIG_X86_64 .set_hv_timer = vmx_set_hv_timer, @@ -7955,12 +7975,11 @@ static __init int hardware_setup(void) if (!enable_apicv) vmx_x86_ops.sync_pir_to_irr = NULL; - if (cpu_has_vmx_tsc_scaling()) { + if (cpu_has_vmx_tsc_scaling()) kvm_has_tsc_control = true; - kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; - kvm_tsc_scaling_ratio_frac_bits = 48; - } + kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; + kvm_tsc_scaling_ratio_frac_bits = 48; kvm_has_bus_lock_exit = cpu_has_vmx_bus_lock_detection(); set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ @@ -8037,7 +8056,7 @@ static __init int hardware_setup(void) vmx_set_cpu_caps(); r = alloc_kvm_area(); - if (r) + if (r && nested) nested_vmx_hardware_unsetup(); kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler); @@ -8117,7 +8136,6 @@ static int __init vmx_init(void) ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= KVM_EVMCS_VERSION) { - int cpu; /* Check that we have assist pages on all online CPUs */ for_each_online_cpu(cpu) { diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 7f2c82e7f38f..9c6bfcd84008 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -374,9 +374,8 @@ int allocate_vpid(void); void free_vpid(int vpid); void vmx_set_constant_host_state(struct vcpu_vmx *vmx); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); -void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3, - u16 fs_sel, u16 gs_sel, - unsigned long fs_base, unsigned long gs_base); +void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, + unsigned long fs_base, unsigned long gs_base); int vmx_get_cpl(struct kvm_vcpu *vcpu); bool vmx_emulation_required(struct kvm_vcpu *vcpu); unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); |