summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx
diff options
context:
space:
mode:
authorMaxime Ripard <maxime@cerno.tech>2022-04-05 11:06:58 +0200
committerMaxime Ripard <maxime@cerno.tech>2022-04-05 11:06:58 +0200
commit9cbbd694a58bdf24def2462276514c90cab7cf80 (patch)
tree98a504890134d34631a6a0ecbce94d3f1ecc21fc /arch/x86/kvm/vmx
parent71d637823cac7748079a912e0373476c7cf6f985 (diff)
parent3123109284176b1532874591f7c81f3837bbdc17 (diff)
downloadlinux-9cbbd694a58bdf24def2462276514c90cab7cf80.tar.bz2
Merge drm/drm-next into drm-misc-next
Let's start the 5.19 development cycle. Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Diffstat (limited to 'arch/x86/kvm/vmx')
-rw-r--r--arch/x86/kvm/vmx/nested.c32
-rw-r--r--arch/x86/kvm/vmx/nested.h3
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c23
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c6
-rw-r--r--arch/x86/kvm/vmx/posted_intr.h4
-rw-r--r--arch/x86/kvm/vmx/vmx.c192
-rw-r--r--arch/x86/kvm/vmx/vmx.h5
7 files changed, 149 insertions, 116 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index ba34e94049c7..f18744f7ff82 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -246,8 +246,7 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
src = &prev->host_state;
dest = &vmx->loaded_vmcs->host_state;
- vmx_set_vmcs_host_state(dest, src->cr3, src->fs_sel, src->gs_sel,
- src->fs_base, src->gs_base);
+ vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
dest->ldt_sel = src->ldt_sel;
#ifdef CONFIG_X86_64
dest->ds_sel = src->ds_sel;
@@ -321,7 +320,7 @@ static void free_nested(struct kvm_vcpu *vcpu)
kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
vmx->nested.pi_desc = NULL;
- kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
+ kvm_mmu_free_roots(vcpu->kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
nested_release_evmcs(vcpu);
@@ -1126,15 +1125,15 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
return -EINVAL;
}
- if (!nested_ept)
- kvm_mmu_new_pgd(vcpu, cr3);
-
vcpu->arch.cr3 = cr3;
kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
kvm_init_mmu(vcpu);
+ if (!nested_ept)
+ kvm_mmu_new_pgd(vcpu, cr3);
+
return 0;
}
@@ -3056,7 +3055,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr4;
+ unsigned long cr3, cr4;
bool vm_fail;
if (!nested_early_check)
@@ -3079,6 +3078,12 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
*/
vmcs_writel(GUEST_RFLAGS, 0);
+ cr3 = __get_current_cr3_fast();
+ if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
+ vmcs_writel(HOST_CR3, cr3);
+ vmx->loaded_vmcs->host_state.cr3 = cr3;
+ }
+
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4);
@@ -4797,7 +4802,8 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
return 0;
}
-void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
+void nested_vmx_pmu_refresh(struct kvm_vcpu *vcpu,
+ bool vcpu_has_perf_global_ctrl)
{
struct vcpu_vmx *vmx;
@@ -4805,7 +4811,7 @@ void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
return;
vmx = to_vmx(vcpu);
- if (kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
+ if (vcpu_has_perf_global_ctrl) {
vmx->nested.msrs.entry_ctls_high |=
VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
vmx->nested.msrs.exit_ctls_high |=
@@ -5006,7 +5012,7 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
vmx->nested.current_vmptr >> PAGE_SHIFT,
vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
- kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
+ kvm_mmu_free_roots(vcpu->kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
vmx->nested.current_vmptr = INVALID_GPA;
}
@@ -5465,7 +5471,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
roots_to_free = 0;
- if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd,
+ if (nested_ept_root_matches(mmu->root.hpa, mmu->root.pgd,
operand.eptp))
roots_to_free |= KVM_MMU_ROOT_CURRENT;
@@ -5485,7 +5491,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
}
if (roots_to_free)
- kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
+ kvm_mmu_free_roots(vcpu->kvm, mmu, roots_to_free);
return nested_vmx_succeed(vcpu);
}
@@ -5574,7 +5580,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
* TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR.
*/
if (!enable_ept)
- kvm_mmu_free_guest_mode_roots(vcpu, &vcpu->arch.root_mmu);
+ kvm_mmu_free_guest_mode_roots(vcpu->kvm, &vcpu->arch.root_mmu);
return nested_vmx_succeed(vcpu);
}
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index b69a80f43b37..c92cea0b8ccc 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -32,7 +32,8 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
-void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
+void nested_vmx_pmu_refresh(struct kvm_vcpu *vcpu,
+ bool vcpu_has_perf_global_ctrl);
void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu);
bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
int size);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 466d18fc0c5d..bc3f8512bb64 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -389,6 +389,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
struct kvm_pmc *pmc;
u32 msr = msr_info->index;
u64 data = msr_info->data;
+ u64 reserved_bits;
switch (msr) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
@@ -443,7 +444,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
if (data == pmc->eventsel)
return 0;
- if (!(data & pmu->reserved_bits)) {
+ reserved_bits = pmu->reserved_bits;
+ if ((pmc->idx == 2) &&
+ (pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED))
+ reserved_bits ^= HSW_IN_TX_CHECKPOINTED;
+ if (!(data & reserved_bits)) {
reprogram_gp_counter(pmc, data);
return 0;
}
@@ -485,9 +490,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
pmu->version = 0;
pmu->reserved_bits = 0xffffffff00200000ull;
+ pmu->raw_event_mask = X86_RAW_EVENT_MASK;
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
- if (!entry || !enable_pmu)
+ if (!entry || !vcpu->kvm->arch.enable_pmu)
return;
eax.full = entry->eax;
edx.full = entry->edx;
@@ -533,15 +539,18 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
entry = kvm_find_cpuid_entry(vcpu, 7, 0);
if (entry &&
(boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
- (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM)))
- pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED;
+ (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) {
+ pmu->reserved_bits ^= HSW_IN_TX;
+ pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
+ }
bitmap_set(pmu->all_valid_pmc_idx,
0, pmu->nr_arch_gp_counters);
bitmap_set(pmu->all_valid_pmc_idx,
INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
- nested_vmx_pmu_entry_exit_ctls_update(vcpu);
+ nested_vmx_pmu_refresh(vcpu,
+ intel_is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL));
if (intel_pmu_lbr_is_compatible(vcpu))
x86_perf_get_lbr(&lbr_desc->records);
@@ -565,7 +574,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
pmu->gp_counters[i].current_config = 0;
}
- for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
+ for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
pmu->fixed_counters[i].type = KVM_PMC_FIXED;
pmu->fixed_counters[i].vcpu = vcpu;
pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
@@ -591,7 +600,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
pmc->counter = pmc->eventsel = 0;
}
- for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
+ for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
pmc = &pmu->fixed_counters[i];
pmc_stop_counter(pmc);
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index aa1fe9085d77..3834bb30ce54 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -244,7 +244,7 @@ void vmx_pi_start_assignment(struct kvm *kvm)
}
/*
- * pi_update_irte - set IRTE for Posted-Interrupts
+ * vmx_pi_update_irte - set IRTE for Posted-Interrupts
*
* @kvm: kvm
* @host_irq: host irq of the interrupt
@@ -252,8 +252,8 @@ void vmx_pi_start_assignment(struct kvm *kvm)
* @set: set or unset PI
* returns 0 on success, < 0 on failure
*/
-int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
- bool set)
+int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set)
{
struct kvm_kernel_irq_routing_entry *e;
struct kvm_irq_routing_table *irq_rt;
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
index eb14e76b84ef..9a45d5c9f116 100644
--- a/arch/x86/kvm/vmx/posted_intr.h
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -97,8 +97,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
void pi_wakeup_handler(void);
void __init pi_init_cpu(int cpu);
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
-int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
- bool set);
+int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set);
void vmx_pi_start_assignment(struct kvm *kvm);
#endif /* __KVM_X86_VMX_POSTED_INTR_H */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index aca3ae2a02f3..04d170c4b61e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -541,11 +541,6 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
return flexpriority_enabled && lapic_in_kernel(vcpu);
}
-static inline bool report_flexpriority(void)
-{
- return flexpriority_enabled;
-}
-
static int possible_passthrough_msr_slot(u32 msr)
{
u32 i;
@@ -645,10 +640,10 @@ static void __loaded_vmcs_clear(void *arg)
/*
* Ensure all writes to loaded_vmcs, including deleting it from its
- * current percpu list, complete before setting loaded_vmcs->vcpu to
- * -1, otherwise a different cpu can see vcpu == -1 first and add
- * loaded_vmcs to its percpu list before it's deleted from this cpu's
- * list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs().
+ * current percpu list, complete before setting loaded_vmcs->cpu to
+ * -1, otherwise a different cpu can see loaded_vmcs->cpu == -1 first
+ * and add loaded_vmcs to its percpu list before it's deleted from this
+ * cpu's list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs().
*/
smp_wmb();
@@ -1080,14 +1075,9 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
}
-void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3,
- u16 fs_sel, u16 gs_sel,
- unsigned long fs_base, unsigned long gs_base)
+void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
+ unsigned long fs_base, unsigned long gs_base)
{
- if (unlikely(cr3 != host->cr3)) {
- vmcs_writel(HOST_CR3, cr3);
- host->cr3 = cr3;
- }
if (unlikely(fs_sel != host->fs_sel)) {
if (!(fs_sel & 7))
vmcs_write16(HOST_FS_SELECTOR, fs_sel);
@@ -1182,9 +1172,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
gs_base = segment_base(gs_sel);
#endif
- vmx_set_vmcs_host_state(host_state, __get_current_cr3_fast(),
- fs_sel, gs_sel, fs_base, gs_base);
-
+ vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
vmx->guest_state_loaded = true;
}
@@ -2341,7 +2329,7 @@ fault:
return -EFAULT;
}
-static int hardware_enable(void)
+static int vmx_hardware_enable(void)
{
int cpu = raw_smp_processor_id();
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2382,7 +2370,7 @@ static void vmclear_local_loaded_vmcss(void)
__loaded_vmcs_clear(v);
}
-static void hardware_disable(void)
+static void vmx_hardware_disable(void)
{
vmclear_local_loaded_vmcss();
@@ -2878,21 +2866,17 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct vmx_uret_msr *msr = vmx_find_uret_msr(vmx, MSR_EFER);
/* Nothing to do if hardware doesn't support EFER. */
- if (!msr)
+ if (!vmx_find_uret_msr(vmx, MSR_EFER))
return 0;
vcpu->arch.efer = efer;
- if (efer & EFER_LMA) {
- vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
- msr->data = efer;
- } else {
- vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
+ if (efer & EFER_LMA)
+ vm_entry_controls_setbit(vmx, VM_ENTRY_IA32E_MODE);
+ else
+ vm_entry_controls_clearbit(vmx, VM_ENTRY_IA32E_MODE);
- msr->data = efer & ~EFER_LME;
- }
vmx_setup_uret_msrs(vmx);
return 0;
}
@@ -2918,7 +2902,6 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
static void exit_lmode(struct kvm_vcpu *vcpu)
{
- vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
}
@@ -2957,7 +2940,7 @@ static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.mmu;
- u64 root_hpa = mmu->root_hpa;
+ u64 root_hpa = mmu->root.hpa;
/* No flush required if the current context is invalid. */
if (!VALID_PAGE(root_hpa))
@@ -3937,31 +3920,33 @@ static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
#ifdef CONFIG_SMP
if (vcpu->mode == IN_GUEST_MODE) {
/*
- * The vector of interrupt to be delivered to vcpu had
- * been set in PIR before this function.
+ * The vector of the virtual has already been set in the PIR.
+ * Send a notification event to deliver the virtual interrupt
+ * unless the vCPU is the currently running vCPU, i.e. the
+ * event is being sent from a fastpath VM-Exit handler, in
+ * which case the PIR will be synced to the vIRR before
+ * re-entering the guest.
*
- * Following cases will be reached in this block, and
- * we always send a notification event in all cases as
- * explained below.
+ * When the target is not the running vCPU, the following
+ * possibilities emerge:
*
- * Case 1: vcpu keeps in non-root mode. Sending a
- * notification event posts the interrupt to vcpu.
+ * Case 1: vCPU stays in non-root mode. Sending a notification
+ * event posts the interrupt to the vCPU.
*
- * Case 2: vcpu exits to root mode and is still
- * runnable. PIR will be synced to vIRR before the
- * next vcpu entry. Sending a notification event in
- * this case has no effect, as vcpu is not in root
- * mode.
+ * Case 2: vCPU exits to root mode and is still runnable. The
+ * PIR will be synced to the vIRR before re-entering the guest.
+ * Sending a notification event is ok as the host IRQ handler
+ * will ignore the spurious event.
*
- * Case 3: vcpu exits to root mode and is blocked.
- * vcpu_block() has already synced PIR to vIRR and
- * never blocks vcpu if vIRR is not cleared. Therefore,
- * a blocked vcpu here does not wait for any requested
- * interrupts in PIR, and sending a notification event
- * which has no effect is safe here.
+ * Case 3: vCPU exits to root mode and is blocked. vcpu_block()
+ * has already synced PIR to vIRR and never blocks the vCPU if
+ * the vIRR is not empty. Therefore, a blocked vCPU here does
+ * not wait for any requested interrupts in PIR, and sending a
+ * notification event also results in a benign, spurious event.
*/
- apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
+ if (vcpu != kvm_get_running_vcpu())
+ apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
return;
}
#endif
@@ -4041,6 +4026,21 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
return 0;
}
+static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector)
+{
+ struct kvm_vcpu *vcpu = apic->vcpu;
+
+ if (vmx_deliver_posted_interrupt(vcpu, vector)) {
+ kvm_lapic_set_irr(vector, apic);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_vcpu_kick(vcpu);
+ } else {
+ trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
+ trig_mode, vector);
+ }
+}
+
/*
* Set up the vmcs's constant host-state fields, i.e., host-state fields that
* will not change in the lifetime of the guest.
@@ -5169,7 +5169,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
if (!kvm_require_dr(vcpu, dr))
return 1;
- if (kvm_x86_ops.get_cpl(vcpu) > 0)
+ if (vmx_get_cpl(vcpu) > 0)
goto out;
dr7 = vmcs_readl(GUEST_DR7);
@@ -5302,9 +5302,16 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
static int handle_apic_write(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
- u32 offset = exit_qualification & 0xfff;
- /* APIC-write VM exit is trap-like and thus no need to adjust IP */
+ /*
+ * APIC-write VM-Exit is trap-like, KVM doesn't need to advance RIP and
+ * hardware has done any necessary aliasing, offset adjustments, etc...
+ * for the access. I.e. the correct value has already been written to
+ * the vAPIC page for the correct 16-byte chunk. KVM needs only to
+ * retrieve the register value and emulate the access.
+ */
+ u32 offset = exit_qualification & 0xff0;
+
kvm_apic_write_nodecode(vcpu, offset);
return 1;
}
@@ -6754,7 +6761,7 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
struct vcpu_vmx *vmx)
{
- kvm_guest_enter_irqoff();
+ guest_state_enter_irqoff();
/* L1D Flush includes CPU buffer clear to mitigate MDS */
if (static_branch_unlikely(&vmx_l1d_should_flush))
@@ -6770,13 +6777,13 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vcpu->arch.cr2 = native_read_cr2();
- kvm_guest_exit_irqoff();
+ guest_state_exit_irqoff();
}
static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr4;
+ unsigned long cr3, cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
@@ -6819,6 +6826,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
vcpu->arch.regs_dirty = 0;
+ /*
+ * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately
+ * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
+ * it switches back to the current->mm, which can occur in KVM context
+ * when switching to a temporary mm to patch kernel code, e.g. if KVM
+ * toggles a static key while handling a VM-Exit.
+ */
+ cr3 = __get_current_cr3_fast();
+ if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
+ vmcs_writel(HOST_CR3, cr3);
+ vmx->loaded_vmcs->host_state.cr3 = cr3;
+ }
+
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4);
@@ -6954,7 +6974,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
return vmx_exit_handlers_fastpath(vcpu);
}
-static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_free(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6965,7 +6985,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
free_loaded_vmcs(vmx->loaded_vmcs);
}
-static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
+static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
{
struct vmx_uret_msr *tsx_ctrl;
struct vcpu_vmx *vmx;
@@ -7327,11 +7347,11 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vmx_secondary_exec_control(vmx));
if (nested_vmx_allowed(vcpu))
- to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
+ vmx->msr_ia32_feature_control_valid_bits |=
FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
else
- to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
+ vmx->msr_ia32_feature_control_valid_bits &=
~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX);
@@ -7644,6 +7664,7 @@ static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
if (ret)
return ret;
+ vmx->nested.nested_run_pending = 1;
vmx->nested.smm.guest_mode = false;
}
return 0;
@@ -7669,7 +7690,7 @@ static void vmx_migrate_timers(struct kvm_vcpu *vcpu)
}
}
-static void hardware_unsetup(void)
+static void vmx_hardware_unsetup(void)
{
kvm_set_posted_intr_wakeup_handler(NULL);
@@ -7679,34 +7700,33 @@ static void hardware_unsetup(void)
free_kvm_area();
}
-static bool vmx_check_apicv_inhibit_reasons(ulong bit)
+static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
{
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
BIT(APICV_INHIBIT_REASON_ABSENT) |
BIT(APICV_INHIBIT_REASON_HYPERV) |
BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
- return supported & BIT(bit);
+ return supported & BIT(reason);
}
static struct kvm_x86_ops vmx_x86_ops __initdata = {
.name = "kvm_intel",
- .hardware_unsetup = hardware_unsetup,
+ .hardware_unsetup = vmx_hardware_unsetup,
- .hardware_enable = hardware_enable,
- .hardware_disable = hardware_disable,
- .cpu_has_accelerated_tpr = report_flexpriority,
+ .hardware_enable = vmx_hardware_enable,
+ .hardware_disable = vmx_hardware_disable,
.has_emulated_msr = vmx_has_emulated_msr,
.vm_size = sizeof(struct kvm_vmx),
.vm_init = vmx_vm_init,
- .vcpu_create = vmx_create_vcpu,
- .vcpu_free = vmx_free_vcpu,
+ .vcpu_create = vmx_vcpu_create,
+ .vcpu_free = vmx_vcpu_free,
.vcpu_reset = vmx_vcpu_reset,
- .prepare_guest_switch = vmx_prepare_switch_to_guest,
+ .prepare_switch_to_guest = vmx_prepare_switch_to_guest,
.vcpu_load = vmx_vcpu_load,
.vcpu_put = vmx_vcpu_put,
@@ -7734,21 +7754,21 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_rflags = vmx_set_rflags,
.get_if_flag = vmx_get_if_flag,
- .tlb_flush_all = vmx_flush_tlb_all,
- .tlb_flush_current = vmx_flush_tlb_current,
- .tlb_flush_gva = vmx_flush_tlb_gva,
- .tlb_flush_guest = vmx_flush_tlb_guest,
+ .flush_tlb_all = vmx_flush_tlb_all,
+ .flush_tlb_current = vmx_flush_tlb_current,
+ .flush_tlb_gva = vmx_flush_tlb_gva,
+ .flush_tlb_guest = vmx_flush_tlb_guest,
.vcpu_pre_run = vmx_vcpu_pre_run,
- .run = vmx_vcpu_run,
+ .vcpu_run = vmx_vcpu_run,
.handle_exit = vmx_handle_exit,
.skip_emulated_instruction = vmx_skip_emulated_instruction,
.update_emulated_instruction = vmx_update_emulated_instruction,
.set_interrupt_shadow = vmx_set_interrupt_shadow,
.get_interrupt_shadow = vmx_get_interrupt_shadow,
.patch_hypercall = vmx_patch_hypercall,
- .set_irq = vmx_inject_irq,
- .set_nmi = vmx_inject_nmi,
+ .inject_irq = vmx_inject_irq,
+ .inject_nmi = vmx_inject_nmi,
.queue_exception = vmx_queue_exception,
.cancel_injection = vmx_cancel_injection,
.interrupt_allowed = vmx_interrupt_allowed,
@@ -7768,7 +7788,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hwapic_isr_update = vmx_hwapic_isr_update,
.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
.sync_pir_to_irr = vmx_sync_pir_to_irr,
- .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
+ .deliver_interrupt = vmx_deliver_interrupt,
.dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
.set_tss_addr = vmx_set_tss_addr,
@@ -7801,8 +7821,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.pmu_ops = &intel_pmu_ops,
.nested_ops = &vmx_nested_ops,
- .update_pi_irte = pi_update_irte,
- .start_assignment = vmx_pi_start_assignment,
+ .pi_update_irte = vmx_pi_update_irte,
+ .pi_start_assignment = vmx_pi_start_assignment,
#ifdef CONFIG_X86_64
.set_hv_timer = vmx_set_hv_timer,
@@ -7955,12 +7975,11 @@ static __init int hardware_setup(void)
if (!enable_apicv)
vmx_x86_ops.sync_pir_to_irr = NULL;
- if (cpu_has_vmx_tsc_scaling()) {
+ if (cpu_has_vmx_tsc_scaling())
kvm_has_tsc_control = true;
- kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
- kvm_tsc_scaling_ratio_frac_bits = 48;
- }
+ kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
+ kvm_tsc_scaling_ratio_frac_bits = 48;
kvm_has_bus_lock_exit = cpu_has_vmx_bus_lock_detection();
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
@@ -8037,7 +8056,7 @@ static __init int hardware_setup(void)
vmx_set_cpu_caps();
r = alloc_kvm_area();
- if (r)
+ if (r && nested)
nested_vmx_hardware_unsetup();
kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
@@ -8117,7 +8136,6 @@ static int __init vmx_init(void)
ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
(ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
KVM_EVMCS_VERSION) {
- int cpu;
/* Check that we have assist pages on all online CPUs */
for_each_online_cpu(cpu) {
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 7f2c82e7f38f..9c6bfcd84008 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -374,9 +374,8 @@ int allocate_vpid(void);
void free_vpid(int vpid);
void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
-void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3,
- u16 fs_sel, u16 gs_sel,
- unsigned long fs_base, unsigned long gs_base);
+void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
+ unsigned long fs_base, unsigned long gs_base);
int vmx_get_cpl(struct kvm_vcpu *vcpu);
bool vmx_emulation_required(struct kvm_vcpu *vcpu);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);