summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx')
-rw-r--r--arch/x86/kvm/vmx/nested.c37
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c4
-rw-r--r--arch/x86/kvm/vmx/vmx.c112
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
4 files changed, 64 insertions, 91 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b2f0b5e9cd63..bcca0b80e0d0 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2167,15 +2167,13 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
/*
- * The PML address never changes, so it is constant in vmcs02.
- * Conceptually we want to copy the PML index from vmcs01 here,
- * and then back to vmcs01 on nested vmexit. But since we flush
- * the log and reset GUEST_PML_INDEX on each vmexit, the PML
- * index is also effectively constant in vmcs02.
+ * PML is emulated for L2, but never enabled in hardware as the MMU
+ * handles A/D emulation. Disabling PML for L2 also avoids having to
+ * deal with filtering out L2 GPAs from the buffer.
*/
if (enable_pml) {
- vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
- vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+ vmcs_write64(PML_ADDRESS, 0);
+ vmcs_write16(GUEST_PML_INDEX, -1);
}
if (cpu_has_vmx_encls_vmexit())
@@ -2210,7 +2208,7 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
{
- u32 exec_control, vmcs12_exec_ctrl;
+ u32 exec_control;
u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
@@ -2284,11 +2282,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_ENABLE_VMFUNC);
if (nested_cpu_has(vmcs12,
- CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
- vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
- ~SECONDARY_EXEC_ENABLE_PML;
- exec_control |= vmcs12_exec_ctrl;
- }
+ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+ exec_control |= vmcs12->secondary_vm_exec_control;
+
+ /* PML is emulated and never enabled in hardware for L2. */
+ exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
/* VMCS shadowing for L2 is emulated for now */
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
@@ -4200,9 +4198,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &ignored))
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
- if (!enable_ept)
- vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
-
nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
@@ -4495,6 +4490,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
vmx_set_virtual_apic_mode(vcpu);
}
+ if (vmx->nested.update_vmcs01_cpu_dirty_logging) {
+ vmx->nested.update_vmcs01_cpu_dirty_logging = false;
+ vmx_update_cpu_dirty_logging(vcpu);
+ }
+
/* Unpin physical memory we referred to in vmcs02 */
if (vmx->nested.apic_access_page) {
kvm_release_page_clean(vmx->nested.apic_access_page);
@@ -5793,7 +5793,10 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
case EXIT_REASON_PREEMPTION_TIMER:
return true;
case EXIT_REASON_PML_FULL:
- /* We emulate PML support to L1. */
+ /*
+ * PML is emulated for an L1 VMM and should never be enabled in
+ * vmcs02, always "handle" PML_FULL by exiting to userspace.
+ */
return true;
case EXIT_REASON_VMFUNC:
/* VM functions are emulated through L2->L0 vmexits. */
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index d1df618cb7de..9efc1a6b8693 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -298,7 +298,7 @@ int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu)
if (IS_ERR(event)) {
pr_debug_ratelimited("%s: failed %ld\n",
__func__, PTR_ERR(event));
- return -ENOENT;
+ return PTR_ERR(event);
}
lbr_desc->event = event;
pmu->event_count++;
@@ -320,7 +320,7 @@ static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
if (!intel_pmu_is_valid_lbr_msr(vcpu, index))
return false;
- if (!lbr_desc->event && !intel_pmu_create_guest_lbr_event(vcpu))
+ if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu) < 0)
goto dummy;
/*
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e0a3a9be654b..50810d471462 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4277,7 +4277,12 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
*/
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
- if (!enable_pml)
+ /*
+ * PML is enabled/disabled when dirty logging of memsmlots changes, but
+ * it needs to be set here when dirty logging is already active, e.g.
+ * if this vCPU was created after dirty logging was enabled.
+ */
+ if (!vcpu->kvm->arch.cpu_dirty_logging_count)
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
if (cpu_has_vmx_xsaves()) {
@@ -4295,18 +4300,8 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
}
vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP);
-
- /*
- * Expose INVPCID if and only if PCID is also exposed to the guest.
- * INVPCID takes a #UD when it's disabled in the VMCS, but a #GP or #PF
- * if CR4.PCIDE=0. Enumerating CPUID.INVPCID=1 would lead to incorrect
- * behavior from the guest perspective (it would expect #GP or #PF).
- */
- if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
- guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
-
vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdseed, RDSEED);
@@ -5776,24 +5771,6 @@ static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
}
-/*
- * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
- * Called before reporting dirty_bitmap to userspace.
- */
-static void kvm_flush_pml_buffers(struct kvm *kvm)
-{
- int i;
- struct kvm_vcpu *vcpu;
- /*
- * We only need to kick vcpu out of guest mode here, as PML buffer
- * is flushed at beginning of all VMEXITs, and it's obvious that only
- * vcpus running in guest are possible to have unflushed GPAs in PML
- * buffer.
- */
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_vcpu_kick(vcpu);
-}
-
static void vmx_dump_sel(char *name, uint32_t sel)
{
pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
@@ -5976,9 +5953,10 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
* updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
* querying dirty_bitmap, we only need to kick all vcpus out of guest
* mode as if vcpus is in root mode, the PML buffer must has been
- * flushed already.
+ * flushed already. Note, PML is never enabled in hardware while
+ * running L2.
*/
- if (enable_pml)
+ if (enable_pml && !is_guest_mode(vcpu))
vmx_flush_pml_buffer(vcpu);
/*
@@ -5995,6 +5973,13 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
if (is_guest_mode(vcpu)) {
/*
+ * PML is never enabled when running L2, bail immediately if a
+ * PML full exit occurs as something is horribly wrong.
+ */
+ if (exit_reason.basic == EXIT_REASON_PML_FULL)
+ goto unexpected_vmexit;
+
+ /*
* The host physical addresses of some pages of guest memory
* are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
* Page). The CPU may write to these pages via their host
@@ -6851,13 +6836,15 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
kvm_machine_check();
+ if (likely(!vmx->exit_reason.failed_vmentry))
+ vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+
trace_kvm_exit(vmx->exit_reason.full, vcpu, KVM_ISA_VMX);
if (unlikely(vmx->exit_reason.failed_vmentry))
return EXIT_FASTPATH_NONE;
vmx->loaded_vmcs->launched = 1;
- vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
@@ -7330,8 +7317,8 @@ static __init void vmx_set_cpu_caps(void)
/* CPUID 0x7 */
if (kvm_mpx_supported())
kvm_cpu_cap_check_and_set(X86_FEATURE_MPX);
- if (cpu_has_vmx_invpcid())
- kvm_cpu_cap_check_and_set(X86_FEATURE_INVPCID);
+ if (!cpu_has_vmx_invpcid())
+ kvm_cpu_cap_clear(X86_FEATURE_INVPCID);
if (vmx_pt_mode_is_host_guest())
kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT);
@@ -7509,30 +7496,24 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
shrink_ple_window(vcpu);
}
-static void vmx_slot_enable_log_dirty(struct kvm *kvm,
- struct kvm_memory_slot *slot)
+void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
{
- if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
- kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
- kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
-}
-
-static void vmx_slot_disable_log_dirty(struct kvm *kvm,
- struct kvm_memory_slot *slot)
-{
- kvm_mmu_slot_set_dirty(kvm, slot);
-}
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
-static void vmx_flush_log_dirty(struct kvm *kvm)
-{
- kvm_flush_pml_buffers(kvm);
-}
+ if (is_guest_mode(vcpu)) {
+ vmx->nested.update_vmcs01_cpu_dirty_logging = true;
+ return;
+ }
-static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- gfn_t offset, unsigned long mask)
-{
- kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
+ /*
+ * Note, cpu_dirty_logging_count can be changed concurrent with this
+ * code, but in that case another update request will be made and so
+ * the guest will never run with a stale PML value.
+ */
+ if (vcpu->kvm->arch.cpu_dirty_logging_count)
+ secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_ENABLE_PML);
+ else
+ secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML);
}
static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -7642,11 +7623,6 @@ static bool vmx_check_apicv_inhibit_reasons(ulong bit)
return supported & BIT(bit);
}
-static int vmx_cpu_dirty_log_size(void)
-{
- return enable_pml ? PML_ENTITY_NUM : 0;
-}
-
static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hardware_unsetup = hardware_unsetup,
@@ -7746,10 +7722,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.sched_in = vmx_sched_in,
- .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
- .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
- .flush_log_dirty = vmx_flush_log_dirty,
- .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+ .cpu_dirty_log_size = PML_ENTITY_NUM,
+ .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging,
.pre_block = vmx_pre_block,
.post_block = vmx_post_block,
@@ -7777,7 +7751,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.msr_filter_changed = vmx_msr_filter_changed,
.complete_emulated_msr = kvm_complete_insn_gp,
- .cpu_dirty_log_size = vmx_cpu_dirty_log_size,
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
};
@@ -7894,13 +7867,8 @@ static __init int hardware_setup(void)
if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
enable_pml = 0;
- if (!enable_pml) {
- vmx_x86_ops.slot_enable_log_dirty = NULL;
- vmx_x86_ops.slot_disable_log_dirty = NULL;
- vmx_x86_ops.flush_log_dirty = NULL;
- vmx_x86_ops.enable_log_dirty_pt_masked = NULL;
- vmx_x86_ops.cpu_dirty_log_size = NULL;
- }
+ if (!enable_pml)
+ vmx_x86_ops.cpu_dirty_log_size = 0;
if (!cpu_has_vmx_preemption_timer())
enable_preemption_timer = false;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 12c53d05a902..89da5e1251f1 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -165,6 +165,7 @@ struct nested_vmx {
bool change_vmcs01_virtual_apic_mode;
bool reload_vmcs01_apic_access_page;
+ bool update_vmcs01_cpu_dirty_logging;
/*
* Enlightened VMCS has been enabled. It does not mean that L1 has to
@@ -393,6 +394,7 @@ int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu,
u32 msr, int type, bool value);
+void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
static inline u8 vmx_get_rvi(void)
{