summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r--arch/x86/kvm/svm/avic.c87
-rw-r--r--arch/x86/kvm/svm/nested.c308
-rw-r--r--arch/x86/kvm/svm/pmu.c31
-rw-r--r--arch/x86/kvm/svm/sev.c162
-rw-r--r--arch/x86/kvm/svm/svm.c224
-rw-r--r--arch/x86/kvm/svm/svm.h61
6 files changed, 628 insertions, 245 deletions
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index a1cf9c31273b..54fe03714f8a 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -165,9 +165,8 @@ free_avic:
return err;
}
-void avic_init_vmcb(struct vcpu_svm *svm)
+void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
{
- struct vmcb *vmcb = svm->vmcb;
struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
@@ -285,11 +284,77 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu)
put_cpu();
}
-static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
- u32 icrl, u32 icrh)
+/*
+ * A fast-path version of avic_kick_target_vcpus(), which attempts to match
+ * destination APIC ID to vCPU without looping through all vCPUs.
+ */
+static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source,
+ u32 icrl, u32 icrh, u32 index)
{
+ u32 dest, apic_id;
struct kvm_vcpu *vcpu;
+ int dest_mode = icrl & APIC_DEST_MASK;
+ int shorthand = icrl & APIC_SHORT_MASK;
+ struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
+ u32 *avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page);
+
+ if (shorthand != APIC_DEST_NOSHORT)
+ return -EINVAL;
+
+ /*
+ * The AVIC incomplete IPI #vmexit info provides index into
+ * the physical APIC ID table, which can be used to derive
+ * guest physical APIC ID.
+ */
+ if (dest_mode == APIC_DEST_PHYSICAL) {
+ apic_id = index;
+ } else {
+ if (!apic_x2apic_mode(source)) {
+ /* For xAPIC logical mode, the index is for logical APIC table. */
+ apic_id = avic_logical_id_table[index] & 0x1ff;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ /*
+ * Assuming vcpu ID is the same as physical apic ID,
+ * and use it to retrieve the target vCPU.
+ */
+ vcpu = kvm_get_vcpu_by_id(kvm, apic_id);
+ if (!vcpu)
+ return -EINVAL;
+
+ if (apic_x2apic_mode(vcpu->arch.apic))
+ dest = icrh;
+ else
+ dest = GET_APIC_DEST_FIELD(icrh);
+
+ /*
+ * Try matching the destination APIC ID with the vCPU.
+ */
+ if (kvm_apic_match_dest(vcpu, source, shorthand, dest, dest_mode)) {
+ vcpu->arch.apic->irr_pending = true;
+ svm_complete_interrupt_delivery(vcpu,
+ icrl & APIC_MODE_MASK,
+ icrl & APIC_INT_LEVELTRIG,
+ icrl & APIC_VECTOR_MASK);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
+ u32 icrl, u32 icrh, u32 index)
+{
unsigned long i;
+ struct kvm_vcpu *vcpu;
+
+ if (!avic_kick_target_vcpus_fast(kvm, source, icrl, icrh, index))
+ return;
+
+ trace_kvm_avic_kick_vcpu_slowpath(icrh, icrl, index);
/*
* Wake any target vCPUs that are blocking, i.e. waiting for a wake
@@ -316,7 +381,7 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
u32 icrl = svm->vmcb->control.exit_info_1;
u32 id = svm->vmcb->control.exit_info_2 >> 32;
- u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
+ u32 index = svm->vmcb->control.exit_info_2 & 0x1FF;
struct kvm_lapic *apic = vcpu->arch.apic;
trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
@@ -343,7 +408,7 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
* set the appropriate IRR bits on the valid target
* vcpus. So, we just need to kick the appropriate vcpu.
*/
- avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh);
+ avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index);
break;
case AVIC_IPI_FAILURE_INVALID_TARGET:
break;
@@ -357,6 +422,13 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
return 1;
}
+unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu)
+{
+ if (is_guest_mode(vcpu))
+ return APICV_INHIBIT_REASON_NESTED;
+ return 0;
+}
+
static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
{
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
@@ -837,7 +909,8 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
BIT(APICV_INHIBIT_REASON_IRQWIN) |
BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
BIT(APICV_INHIBIT_REASON_X2APIC) |
- BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
+ BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
+ BIT(APICV_INHIBIT_REASON_SEV);
return supported & BIT(reason);
}
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 96bab464967f..bed5e1692cef 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -36,40 +36,45 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
struct x86_exception *fault)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb *vmcb = svm->vmcb;
- if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
+ if (vmcb->control.exit_code != SVM_EXIT_NPF) {
/*
* TODO: track the cause of the nested page fault, and
* correctly fill in the high bits of exit_info_1.
*/
- svm->vmcb->control.exit_code = SVM_EXIT_NPF;
- svm->vmcb->control.exit_code_hi = 0;
- svm->vmcb->control.exit_info_1 = (1ULL << 32);
- svm->vmcb->control.exit_info_2 = fault->address;
+ vmcb->control.exit_code = SVM_EXIT_NPF;
+ vmcb->control.exit_code_hi = 0;
+ vmcb->control.exit_info_1 = (1ULL << 32);
+ vmcb->control.exit_info_2 = fault->address;
}
- svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
- svm->vmcb->control.exit_info_1 |= fault->error_code;
+ vmcb->control.exit_info_1 &= ~0xffffffffULL;
+ vmcb->control.exit_info_1 |= fault->error_code;
nested_svm_vmexit(svm);
}
-static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_exception *fault)
+static bool nested_svm_handle_page_fault_workaround(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault)
{
- struct vcpu_svm *svm = to_svm(vcpu);
- WARN_ON(!is_guest_mode(vcpu));
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb *vmcb = svm->vmcb;
+
+ WARN_ON(!is_guest_mode(vcpu));
if (vmcb12_is_intercept(&svm->nested.ctl,
INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
- !svm->nested.nested_run_pending) {
- svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
- svm->vmcb->control.exit_code_hi = 0;
- svm->vmcb->control.exit_info_1 = fault->error_code;
- svm->vmcb->control.exit_info_2 = fault->address;
- nested_svm_vmexit(svm);
- } else {
- kvm_inject_page_fault(vcpu, fault);
- }
+ !WARN_ON_ONCE(svm->nested.nested_run_pending)) {
+ vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
+ vmcb->control.exit_code_hi = 0;
+ vmcb->control.exit_info_1 = fault->error_code;
+ vmcb->control.exit_info_2 = fault->address;
+ nested_svm_vmexit(svm);
+ return true;
+ }
+
+ return false;
}
static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
@@ -121,6 +126,20 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
}
+static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
+{
+ if (!svm->v_vmload_vmsave_enabled)
+ return true;
+
+ if (!nested_npt_enabled(svm))
+ return true;
+
+ if (!(svm->nested.ctl.virt_ext & VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK))
+ return true;
+
+ return false;
+}
+
void recalc_intercepts(struct vcpu_svm *svm)
{
struct vmcb_control_area *c, *h;
@@ -162,8 +181,17 @@ void recalc_intercepts(struct vcpu_svm *svm)
if (!intercept_smi)
vmcb_clr_intercept(c, INTERCEPT_SMI);
- vmcb_set_intercept(c, INTERCEPT_VMLOAD);
- vmcb_set_intercept(c, INTERCEPT_VMSAVE);
+ if (nested_vmcb_needs_vls_intercept(svm)) {
+ /*
+ * If the virtual VMLOAD/VMSAVE is not enabled for the L2,
+ * we must intercept these instructions to correctly
+ * emulate them in case L1 doesn't intercept them.
+ */
+ vmcb_set_intercept(c, INTERCEPT_VMLOAD);
+ vmcb_set_intercept(c, INTERCEPT_VMSAVE);
+ } else {
+ WARN_ON(!(c->virt_ext & VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK));
+ }
}
/*
@@ -413,6 +441,10 @@ void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
*/
mask &= ~V_IRQ_MASK;
}
+
+ if (nested_vgif_enabled(svm))
+ mask |= V_GIF_MASK;
+
svm->nested.ctl.int_ctl &= ~mask;
svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask;
}
@@ -454,11 +486,6 @@ static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm,
vmcb12->control.exit_int_info = exit_int_info;
}
-static inline bool nested_npt_enabled(struct vcpu_svm *svm)
-{
- return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
-}
-
static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
{
/*
@@ -515,6 +542,8 @@ void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
{
bool new_vmcb12 = false;
+ struct vmcb *vmcb01 = svm->vmcb01.ptr;
+ struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
nested_vmcb02_compute_g_pat(svm);
@@ -526,18 +555,18 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
}
if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) {
- svm->vmcb->save.es = vmcb12->save.es;
- svm->vmcb->save.cs = vmcb12->save.cs;
- svm->vmcb->save.ss = vmcb12->save.ss;
- svm->vmcb->save.ds = vmcb12->save.ds;
- svm->vmcb->save.cpl = vmcb12->save.cpl;
- vmcb_mark_dirty(svm->vmcb, VMCB_SEG);
+ vmcb02->save.es = vmcb12->save.es;
+ vmcb02->save.cs = vmcb12->save.cs;
+ vmcb02->save.ss = vmcb12->save.ss;
+ vmcb02->save.ds = vmcb12->save.ds;
+ vmcb02->save.cpl = vmcb12->save.cpl;
+ vmcb_mark_dirty(vmcb02, VMCB_SEG);
}
if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DT))) {
- svm->vmcb->save.gdtr = vmcb12->save.gdtr;
- svm->vmcb->save.idtr = vmcb12->save.idtr;
- vmcb_mark_dirty(svm->vmcb, VMCB_DT);
+ vmcb02->save.gdtr = vmcb12->save.gdtr;
+ vmcb02->save.idtr = vmcb12->save.idtr;
+ vmcb_mark_dirty(vmcb02, VMCB_DT);
}
kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
@@ -554,47 +583,59 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
/* In case we don't even reach vcpu_run, the fields are not updated */
- svm->vmcb->save.rax = vmcb12->save.rax;
- svm->vmcb->save.rsp = vmcb12->save.rsp;
- svm->vmcb->save.rip = vmcb12->save.rip;
+ vmcb02->save.rax = vmcb12->save.rax;
+ vmcb02->save.rsp = vmcb12->save.rsp;
+ vmcb02->save.rip = vmcb12->save.rip;
/* These bits will be set properly on the first execution when new_vmc12 is true */
if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
- svm->vmcb->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
+ vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
- vmcb_mark_dirty(svm->vmcb, VMCB_DR);
+ vmcb_mark_dirty(vmcb02, VMCB_DR);
+ }
+
+ if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+ /*
+ * Reserved bits of DEBUGCTL are ignored. Be consistent with
+ * svm_set_msr's definition of reserved bits.
+ */
+ svm_copy_lbrs(vmcb02, vmcb12);
+ vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
+ svm_update_lbrv(&svm->vcpu);
+
+ } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
+ svm_copy_lbrs(vmcb02, vmcb01);
}
}
static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
{
- const u32 int_ctl_vmcb01_bits =
- V_INTR_MASKING_MASK | V_GIF_MASK | V_GIF_ENABLE_MASK;
-
- const u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
+ u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
+ u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct vmcb *vmcb01 = svm->vmcb01.ptr;
+ struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
/*
* Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
* exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
*/
- /*
- * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id,
- * avic_physical_id.
- */
- WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
+ if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
+ int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
+ else
+ int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
/* Copied from vmcb01. msrpm_base can be overwritten later. */
- svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl;
- svm->vmcb->control.iopm_base_pa = svm->vmcb01.ptr->control.iopm_base_pa;
- svm->vmcb->control.msrpm_base_pa = svm->vmcb01.ptr->control.msrpm_base_pa;
+ vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
+ vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
+ vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa;
/* Done at vmrun: asid. */
/* Also overwritten later if necessary. */
- svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
+ vmcb02->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
/* nested_cr3. */
if (nested_npt_enabled(svm))
@@ -605,21 +646,53 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
svm->nested.ctl.tsc_offset,
svm->tsc_ratio_msr);
- svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset;
+ vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) {
WARN_ON(!svm->tsc_scaling_enabled);
nested_svm_update_tsc_ratio_msr(vcpu);
}
- svm->vmcb->control.int_ctl =
+ vmcb02->control.int_ctl =
(svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
- (svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits);
-
- svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
- svm->vmcb->control.int_state = svm->nested.ctl.int_state;
- svm->vmcb->control.event_inj = svm->nested.ctl.event_inj;
- svm->vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err;
+ (vmcb01->control.int_ctl & int_ctl_vmcb01_bits);
+
+ vmcb02->control.int_vector = svm->nested.ctl.int_vector;
+ vmcb02->control.int_state = svm->nested.ctl.int_state;
+ vmcb02->control.event_inj = svm->nested.ctl.event_inj;
+ vmcb02->control.event_inj_err = svm->nested.ctl.event_inj_err;
+
+ vmcb02->control.virt_ext = vmcb01->control.virt_ext &
+ LBR_CTL_ENABLE_MASK;
+ if (svm->lbrv_enabled)
+ vmcb02->control.virt_ext |=
+ (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
+
+ if (!nested_vmcb_needs_vls_intercept(svm))
+ vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+
+ if (kvm_pause_in_guest(svm->vcpu.kvm)) {
+ /* use guest values since host doesn't use them */
+ vmcb02->control.pause_filter_count =
+ svm->pause_filter_enabled ?
+ svm->nested.ctl.pause_filter_count : 0;
+
+ vmcb02->control.pause_filter_thresh =
+ svm->pause_threshold_enabled ?
+ svm->nested.ctl.pause_filter_thresh : 0;
+
+ } else if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
+ /* use host values when guest doesn't use them */
+ vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
+ vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
+ } else {
+ /*
+ * Intercept every PAUSE otherwise and
+ * ignore both host and guest values
+ */
+ vmcb02->control.pause_filter_count = 0;
+ vmcb02->control.pause_filter_thresh = 0;
+ }
nested_svm_transition_tlb_flush(vcpu);
@@ -680,14 +753,14 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
if (ret)
return ret;
- if (!npt_enabled)
- vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
-
if (!from_vmrun)
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
svm_set_gif(svm, true);
+ if (kvm_vcpu_apicv_active(vcpu))
+ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+
return 0;
}
@@ -698,6 +771,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
struct vmcb *vmcb12;
struct kvm_host_map map;
u64 vmcb12_gpa;
+ struct vmcb *vmcb01 = svm->vmcb01.ptr;
if (!svm->nested.hsave_msr) {
kvm_inject_gp(vcpu, 0);
@@ -741,14 +815,14 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
* Since vmcb01 is not in use, we can use it to store some of the L1
* state.
*/
- svm->vmcb01.ptr->save.efer = vcpu->arch.efer;
- svm->vmcb01.ptr->save.cr0 = kvm_read_cr0(vcpu);
- svm->vmcb01.ptr->save.cr4 = vcpu->arch.cr4;
- svm->vmcb01.ptr->save.rflags = kvm_get_rflags(vcpu);
- svm->vmcb01.ptr->save.rip = kvm_rip_read(vcpu);
+ vmcb01->save.efer = vcpu->arch.efer;
+ vmcb01->save.cr0 = kvm_read_cr0(vcpu);
+ vmcb01->save.cr4 = vcpu->arch.cr4;
+ vmcb01->save.rflags = kvm_get_rflags(vcpu);
+ vmcb01->save.rip = kvm_rip_read(vcpu);
if (!npt_enabled)
- svm->vmcb01.ptr->save.cr3 = kvm_read_cr3(vcpu);
+ vmcb01->save.cr3 = kvm_read_cr3(vcpu);
svm->nested.nested_run_pending = 1;
@@ -814,14 +888,12 @@ void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
int nested_svm_vmexit(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct vmcb *vmcb01 = svm->vmcb01.ptr;
+ struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
struct vmcb *vmcb12;
- struct vmcb *vmcb = svm->vmcb;
struct kvm_host_map map;
int rc;
- /* Triple faults in L2 should never escape. */
- WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
-
rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
if (rc) {
if (rc == -EINVAL)
@@ -843,57 +915,68 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
/* Give the current vmcb to the guest */
- vmcb12->save.es = vmcb->save.es;
- vmcb12->save.cs = vmcb->save.cs;
- vmcb12->save.ss = vmcb->save.ss;
- vmcb12->save.ds = vmcb->save.ds;
- vmcb12->save.gdtr = vmcb->save.gdtr;
- vmcb12->save.idtr = vmcb->save.idtr;
+ vmcb12->save.es = vmcb02->save.es;
+ vmcb12->save.cs = vmcb02->save.cs;
+ vmcb12->save.ss = vmcb02->save.ss;
+ vmcb12->save.ds = vmcb02->save.ds;
+ vmcb12->save.gdtr = vmcb02->save.gdtr;
+ vmcb12->save.idtr = vmcb02->save.idtr;
vmcb12->save.efer = svm->vcpu.arch.efer;
vmcb12->save.cr0 = kvm_read_cr0(vcpu);
vmcb12->save.cr3 = kvm_read_cr3(vcpu);
- vmcb12->save.cr2 = vmcb->save.cr2;
+ vmcb12->save.cr2 = vmcb02->save.cr2;
vmcb12->save.cr4 = svm->vcpu.arch.cr4;
vmcb12->save.rflags = kvm_get_rflags(vcpu);
vmcb12->save.rip = kvm_rip_read(vcpu);
vmcb12->save.rsp = kvm_rsp_read(vcpu);
vmcb12->save.rax = kvm_rax_read(vcpu);
- vmcb12->save.dr7 = vmcb->save.dr7;
+ vmcb12->save.dr7 = vmcb02->save.dr7;
vmcb12->save.dr6 = svm->vcpu.arch.dr6;
- vmcb12->save.cpl = vmcb->save.cpl;
+ vmcb12->save.cpl = vmcb02->save.cpl;
- vmcb12->control.int_state = vmcb->control.int_state;
- vmcb12->control.exit_code = vmcb->control.exit_code;
- vmcb12->control.exit_code_hi = vmcb->control.exit_code_hi;
- vmcb12->control.exit_info_1 = vmcb->control.exit_info_1;
- vmcb12->control.exit_info_2 = vmcb->control.exit_info_2;
+ vmcb12->control.int_state = vmcb02->control.int_state;
+ vmcb12->control.exit_code = vmcb02->control.exit_code;
+ vmcb12->control.exit_code_hi = vmcb02->control.exit_code_hi;
+ vmcb12->control.exit_info_1 = vmcb02->control.exit_info_1;
+ vmcb12->control.exit_info_2 = vmcb02->control.exit_info_2;
if (vmcb12->control.exit_code != SVM_EXIT_ERR)
nested_save_pending_event_to_vmcb12(svm, vmcb12);
if (svm->nrips_enabled)
- vmcb12->control.next_rip = vmcb->control.next_rip;
+ vmcb12->control.next_rip = vmcb02->control.next_rip;
vmcb12->control.int_ctl = svm->nested.ctl.int_ctl;
vmcb12->control.tlb_ctl = svm->nested.ctl.tlb_ctl;
vmcb12->control.event_inj = svm->nested.ctl.event_inj;
vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;
+ if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02->control.pause_filter_count)
+ vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
+
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
svm_switch_vmcb(svm, &svm->vmcb01);
+ if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+ svm_copy_lbrs(vmcb12, vmcb02);
+ svm_update_lbrv(vcpu);
+ } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
+ svm_copy_lbrs(vmcb01, vmcb02);
+ svm_update_lbrv(vcpu);
+ }
+
/*
* On vmexit the GIF is set to false and
* no event can be injected in L1.
*/
svm_set_gif(svm, false);
- svm->vmcb->control.exit_int_info = 0;
+ vmcb01->control.exit_int_info = 0;
svm->vcpu.arch.tsc_offset = svm->vcpu.arch.l1_tsc_offset;
- if (svm->vmcb->control.tsc_offset != svm->vcpu.arch.tsc_offset) {
- svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
- vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+ if (vmcb01->control.tsc_offset != svm->vcpu.arch.tsc_offset) {
+ vmcb01->control.tsc_offset = svm->vcpu.arch.tsc_offset;
+ vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
}
if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) {
@@ -907,13 +990,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
/*
* Restore processor state that had been saved in vmcb01
*/
- kvm_set_rflags(vcpu, svm->vmcb->save.rflags);
- svm_set_efer(vcpu, svm->vmcb->save.efer);
- svm_set_cr0(vcpu, svm->vmcb->save.cr0 | X86_CR0_PE);
- svm_set_cr4(vcpu, svm->vmcb->save.cr4);
- kvm_rax_write(vcpu, svm->vmcb->save.rax);
- kvm_rsp_write(vcpu, svm->vmcb->save.rsp);
- kvm_rip_write(vcpu, svm->vmcb->save.rip);
+ kvm_set_rflags(vcpu, vmcb01->save.rflags);
+ svm_set_efer(vcpu, vmcb01->save.efer);
+ svm_set_cr0(vcpu, vmcb01->save.cr0 | X86_CR0_PE);
+ svm_set_cr4(vcpu, vmcb01->save.cr4);
+ kvm_rax_write(vcpu, vmcb01->save.rax);
+ kvm_rsp_write(vcpu, vmcb01->save.rsp);
+ kvm_rip_write(vcpu, vmcb01->save.rip);
svm->vcpu.arch.dr7 = DR7_FIXED_1;
kvm_update_dr7(&svm->vcpu);
@@ -931,7 +1014,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
nested_svm_uninit_mmu_context(vcpu);
- rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false, true);
+ rc = nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true);
if (rc)
return 1;
@@ -949,9 +1032,16 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
* right now so that it an be accounted for before we execute
* L1's next instruction.
*/
- if (unlikely(svm->vmcb->save.rflags & X86_EFLAGS_TF))
+ if (unlikely(vmcb01->save.rflags & X86_EFLAGS_TF))
kvm_queue_exception(&(svm->vcpu), DB_VECTOR);
+ /*
+ * Un-inhibit the AVIC right away, so that other vCPUs can start
+ * to benefit from it right away.
+ */
+ if (kvm_apicv_activated(vcpu->kvm))
+ kvm_vcpu_update_apicv(vcpu);
+
return 0;
}
@@ -1162,12 +1252,13 @@ static bool nested_exit_on_exception(struct vcpu_svm *svm)
static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
{
unsigned int nr = svm->vcpu.arch.exception.nr;
+ struct vmcb *vmcb = svm->vmcb;
- svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
- svm->vmcb->control.exit_code_hi = 0;
+ vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
+ vmcb->control.exit_code_hi = 0;
if (svm->vcpu.arch.exception.has_error_code)
- svm->vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code;
+ vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code;
/*
* EXITINFO2 is undefined for all exception intercepts other
@@ -1175,11 +1266,11 @@ static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
*/
if (nr == PF_VECTOR) {
if (svm->vcpu.arch.exception.nested_apf)
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
+ vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
else if (svm->vcpu.arch.exception.has_payload)
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
+ vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
else
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+ vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
} else if (nr == DB_VECTOR) {
/* See inject_pending_event. */
kvm_deliver_exception_payload(&svm->vcpu);
@@ -1567,6 +1658,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
struct kvm_x86_nested_ops svm_nested_ops = {
.leave_nested = svm_leave_nested,
.check_events = svm_check_nested_events,
+ .handle_page_fault_workaround = nested_svm_handle_page_fault_workaround,
.triple_fault = nested_svm_triple_fault,
.get_nested_state_pages = svm_get_nested_state_pages,
.get_state = svm_get_nested_state,
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 24eb935b6f85..136039fc6d01 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -45,6 +45,22 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
[7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
};
+/* duplicated from amd_f17h_perfmon_event_map. */
+static struct kvm_event_hw_type_mapping amd_f17h_event_mapping[] = {
+ [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
+ [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
+ [2] = { 0x60, 0xff, PERF_COUNT_HW_CACHE_REFERENCES },
+ [3] = { 0x64, 0x09, PERF_COUNT_HW_CACHE_MISSES },
+ [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+ [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+ [6] = { 0x87, 0x02, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
+ [7] = { 0x87, 0x01, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
+};
+
+/* amd_pmc_perf_hw_id depends on these being the same size */
+static_assert(ARRAY_SIZE(amd_event_mapping) ==
+ ARRAY_SIZE(amd_f17h_event_mapping));
+
static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type)
{
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
@@ -140,6 +156,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
{
+ struct kvm_event_hw_type_mapping *event_mapping;
u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
int i;
@@ -148,15 +165,20 @@ static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
if (WARN_ON(pmc_is_fixed(pmc)))
return PERF_COUNT_HW_MAX;
+ if (guest_cpuid_family(pmc->vcpu) >= 0x17)
+ event_mapping = amd_f17h_event_mapping;
+ else
+ event_mapping = amd_event_mapping;
+
for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++)
- if (amd_event_mapping[i].eventsel == event_select
- && amd_event_mapping[i].unit_mask == unit_mask)
+ if (event_mapping[i].eventsel == event_select
+ && event_mapping[i].unit_mask == unit_mask)
break;
if (i == ARRAY_SIZE(amd_event_mapping))
return PERF_COUNT_HW_MAX;
- return amd_event_mapping[i].event_type;
+ return event_mapping[i].event_type;
}
/* check if a PMC is enabled by comparing it against global_ctrl bits. Because
@@ -257,6 +279,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
if (pmc) {
pmc->counter += data - pmc_read_counter(pmc);
+ pmc_update_sample_period(pmc);
return 0;
}
/* MSR_EVNTSELn */
@@ -319,7 +342,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
}
}
-struct kvm_pmu_ops amd_pmu_ops = {
+struct kvm_pmu_ops amd_pmu_ops __initdata = {
.pmc_perf_hw_id = amd_pmc_perf_hw_id,
.pmc_is_enabled = amd_pmc_is_enabled,
.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 75fa6dd268f0..51fd985cf21d 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -260,6 +260,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
INIT_LIST_HEAD(&sev->regions_list);
INIT_LIST_HEAD(&sev->mirror_vms);
+ kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV);
+
return 0;
e_free:
@@ -465,6 +467,7 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
page_virtual = kmap_atomic(pages[i]);
clflush_cache_range(page_virtual, PAGE_SIZE);
kunmap_atomic(page_virtual);
+ cond_resched();
}
}
@@ -559,12 +562,20 @@ e_unpin:
static int sev_es_sync_vmsa(struct vcpu_svm *svm)
{
- struct vmcb_save_area *save = &svm->vmcb->save;
+ struct sev_es_save_area *save = svm->sev_es.vmsa;
/* Check some debug related fields before encrypting the VMSA */
- if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1))
+ if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1))
return -EINVAL;
+ /*
+ * SEV-ES will use a VMSA that is pointed to by the VMCB, not
+ * the traditional VMSA that is part of the VMCB. Copy the
+ * traditional VMSA as it has been built so far (in prep
+ * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
+ */
+ memcpy(save, &svm->vmcb->save, sizeof(svm->vmcb->save));
+
/* Sync registgers */
save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX];
@@ -592,14 +603,6 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
save->xss = svm->vcpu.arch.ia32_xss;
save->dr6 = svm->vcpu.arch.dr6;
- /*
- * SEV-ES will use a VMSA that is pointed to by the VMCB, not
- * the traditional VMSA that is part of the VMCB. Copy the
- * traditional VMSA as it has been built so far (in prep
- * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
- */
- memcpy(svm->sev_es.vmsa, save, sizeof(*save));
-
return 0;
}
@@ -685,7 +688,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (params.len > SEV_FW_BLOB_MAX_SIZE)
return -EINVAL;
- blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
+ blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT);
if (!blob)
return -ENOMEM;
@@ -805,7 +808,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
if (!IS_ALIGNED(dst_paddr, 16) ||
!IS_ALIGNED(paddr, 16) ||
!IS_ALIGNED(size, 16)) {
- tpage = (void *)alloc_page(GFP_KERNEL);
+ tpage = (void *)alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!tpage)
return -ENOMEM;
@@ -1091,7 +1094,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (params.len > SEV_FW_BLOB_MAX_SIZE)
return -EINVAL;
- blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
+ blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT);
if (!blob)
return -ENOMEM;
@@ -1173,7 +1176,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
return -EINVAL;
/* allocate the memory to hold the session data blob */
- session_data = kmalloc(params.session_len, GFP_KERNEL_ACCOUNT);
+ session_data = kzalloc(params.session_len, GFP_KERNEL_ACCOUNT);
if (!session_data)
return -ENOMEM;
@@ -1297,11 +1300,11 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* allocate memory for header and transport buffer */
ret = -ENOMEM;
- hdr = kmalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
+ hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
if (!hdr)
goto e_unpin;
- trans_data = kmalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
+ trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
if (!trans_data)
goto e_free_hdr;
@@ -1591,24 +1594,51 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
atomic_set_release(&src_sev->migration_in_progress, 0);
}
+/* vCPU mutex subclasses. */
+enum sev_migration_role {
+ SEV_MIGRATION_SOURCE = 0,
+ SEV_MIGRATION_TARGET,
+ SEV_NR_MIGRATION_ROLES,
+};
-static int sev_lock_vcpus_for_migration(struct kvm *kvm)
+static int sev_lock_vcpus_for_migration(struct kvm *kvm,
+ enum sev_migration_role role)
{
struct kvm_vcpu *vcpu;
unsigned long i, j;
+ bool first = true;
kvm_for_each_vcpu(i, vcpu, kvm) {
- if (mutex_lock_killable(&vcpu->mutex))
+ if (mutex_lock_killable_nested(&vcpu->mutex, role))
goto out_unlock;
+
+ if (first) {
+ /*
+ * Reset the role to one that avoids colliding with
+ * the role used for the first vcpu mutex.
+ */
+ role = SEV_NR_MIGRATION_ROLES;
+ first = false;
+ } else {
+ mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
+ }
}
return 0;
out_unlock:
+
+ first = true;
kvm_for_each_vcpu(j, vcpu, kvm) {
if (i == j)
break;
+ if (first)
+ first = false;
+ else
+ mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_);
+
+
mutex_unlock(&vcpu->mutex);
}
return -EINTR;
@@ -1618,8 +1648,15 @@ static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
unsigned long i;
+ bool first = true;
kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (first)
+ first = false;
+ else
+ mutex_acquire(&vcpu->mutex.dep_map,
+ SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_);
+
mutex_unlock(&vcpu->mutex);
}
}
@@ -1745,10 +1782,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
charged = true;
}
- ret = sev_lock_vcpus_for_migration(kvm);
+ ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE);
if (ret)
goto out_dst_cgroup;
- ret = sev_lock_vcpus_for_migration(source_kvm);
+ ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET);
if (ret)
goto out_dst_vcpu;
@@ -2223,51 +2260,47 @@ int sev_cpu_init(struct svm_cpu_data *sd)
* Pages used by hardware to hold guest encrypted state must be flushed before
* returning them to the system.
*/
-static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
- unsigned long len)
+static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
{
+ int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
+
/*
- * If hardware enforced cache coherency for encrypted mappings of the
- * same physical page is supported, nothing to do.
+ * Note! The address must be a kernel address, as regular page walk
+ * checks are performed by VM_PAGE_FLUSH, i.e. operating on a user
+ * address is non-deterministic and unsafe. This function deliberately
+ * takes a pointer to deter passing in a user address.
*/
- if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
- return;
+ unsigned long addr = (unsigned long)va;
/*
- * If the VM Page Flush MSR is supported, use it to flush the page
- * (using the page virtual address and the guest ASID).
+ * If CPU enforced cache coherency for encrypted mappings of the
+ * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache
+ * flush is still needed in order to work properly with DMA devices.
*/
- if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
- struct kvm_sev_info *sev;
- unsigned long va_start;
- u64 start, stop;
-
- /* Align start and stop to page boundaries. */
- va_start = (unsigned long)va;
- start = (u64)va_start & PAGE_MASK;
- stop = PAGE_ALIGN((u64)va_start + len);
-
- if (start < stop) {
- sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+ if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) {
+ clflush_cache_range(va, PAGE_SIZE);
+ return;
+ }
- while (start < stop) {
- wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
- start | sev->asid);
+ /*
+ * VM Page Flush takes a host virtual address and a guest ASID. Fall
+ * back to WBINVD if this faults so as not to make any problems worse
+ * by leaving stale encrypted data in the cache.
+ */
+ if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid)))
+ goto do_wbinvd;
- start += PAGE_SIZE;
- }
+ return;
- return;
- }
+do_wbinvd:
+ wbinvd_on_all_cpus();
+}
- WARN(1, "Address overflow, using WBINVD\n");
- }
+void sev_guest_memory_reclaimed(struct kvm *kvm)
+{
+ if (!sev_guest(kvm))
+ return;
- /*
- * Hardware should always have one of the above features,
- * but if not, use WBINVD and issue a warning.
- */
- WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
wbinvd_on_all_cpus();
}
@@ -2281,7 +2314,8 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
svm = to_svm(vcpu);
if (vcpu->arch.guest_state_protected)
- sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE);
+ sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
+
__free_page(virt_to_page(svm->sev_es.vmsa));
if (svm->sev_es.ghcb_sa_free)
@@ -2735,8 +2769,12 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
reason_set, reason_code);
- ret = -EINVAL;
- break;
+ vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+ vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SEV_TERM;
+ vcpu->run->system_event.ndata = 1;
+ vcpu->run->system_event.data[0] = control->ghcb_gpa;
+
+ return 0;
}
default:
/* Error, keep GHCB MSR value as-is */
@@ -2919,6 +2957,14 @@ void sev_es_init_vmcb(struct vcpu_svm *svm)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+
+ if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) &&
+ (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP) ||
+ guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID))) {
+ set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1);
+ if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP))
+ svm_clr_intercept(svm, INTERCEPT_RDTSCP);
+ }
}
void sev_es_vcpu_reset(struct vcpu_svm *svm)
@@ -2932,7 +2978,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
sev_enc_bit));
}
-void sev_es_prepare_switch_to_guest(struct vmcb_save_area *hostsa)
+void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
{
/*
* As an SEV-ES guest, hardware will restore the host state on VMEXIT,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index bd4c64b362d2..200045f71df0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -62,8 +62,6 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
#define SEG_TYPE_LDT 2
#define SEG_TYPE_BUSY_TSS16 3
-#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
-
static bool erratum_383_found __read_mostly;
u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
@@ -101,6 +99,7 @@ static const struct svm_direct_access_msrs {
{ .index = MSR_EFER, .always = false },
{ .index = MSR_IA32_CR_PAT, .always = false },
{ .index = MSR_AMD64_SEV_ES_GHCB, .always = true },
+ { .index = MSR_TSC_AUX, .always = false },
{ .index = MSR_INVALID, .always = false },
};
@@ -172,7 +171,7 @@ static int vls = true;
module_param(vls, int, 0444);
/* enable/disable Virtual GIF */
-static int vgif = true;
+int vgif = true;
module_param(vgif, int, 0444);
/* enable/disable LBR virtualization */
@@ -189,6 +188,9 @@ module_param(tsc_scaling, int, 0444);
static bool avic;
module_param(avic, bool, 0444);
+static bool force_avic;
+module_param_unsafe(force_avic, bool, 0444);
+
bool __read_mostly dump_invalid_vmcb;
module_param(dump_invalid_vmcb, bool, 0644);
@@ -790,6 +792,17 @@ static void init_msrpm_offsets(void)
}
}
+void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
+{
+ to_vmcb->save.dbgctl = from_vmcb->save.dbgctl;
+ to_vmcb->save.br_from = from_vmcb->save.br_from;
+ to_vmcb->save.br_to = from_vmcb->save.br_to;
+ to_vmcb->save.last_excp_from = from_vmcb->save.last_excp_from;
+ to_vmcb->save.last_excp_to = from_vmcb->save.last_excp_to;
+
+ vmcb_mark_dirty(to_vmcb, VMCB_LBR);
+}
+
static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -799,6 +812,10 @@ static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+
+ /* Move the LBR msrs to the vmcb02 so that the guest can see them. */
+ if (is_guest_mode(vcpu))
+ svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
}
static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
@@ -810,6 +827,67 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
+
+ /*
+ * Move the LBR msrs back to the vmcb01 to avoid copying them
+ * on nested guest entries.
+ */
+ if (is_guest_mode(vcpu))
+ svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
+}
+
+static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index)
+{
+ /*
+ * If the LBR virtualization is disabled, the LBR msrs are always
+ * kept in the vmcb01 to avoid copying them on nested guest entries.
+ *
+ * If nested, and the LBR virtualization is enabled/disabled, the msrs
+ * are moved between the vmcb01 and vmcb02 as needed.
+ */
+ struct vmcb *vmcb =
+ (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ?
+ svm->vmcb : svm->vmcb01.ptr;
+
+ switch (index) {
+ case MSR_IA32_DEBUGCTLMSR:
+ return vmcb->save.dbgctl;
+ case MSR_IA32_LASTBRANCHFROMIP:
+ return vmcb->save.br_from;
+ case MSR_IA32_LASTBRANCHTOIP:
+ return vmcb->save.br_to;
+ case MSR_IA32_LASTINTFROMIP:
+ return vmcb->save.last_excp_from;
+ case MSR_IA32_LASTINTTOIP:
+ return vmcb->save.last_excp_to;
+ default:
+ KVM_BUG(false, svm->vcpu.kvm,
+ "%s: Unknown MSR 0x%x", __func__, index);
+ return 0;
+ }
+}
+
+void svm_update_lbrv(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) &
+ DEBUGCTLMSR_LBR;
+
+ bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
+ LBR_CTL_ENABLE_MASK);
+
+ if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled))
+ if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))
+ enable_lbrv = true;
+
+ if (enable_lbrv == current_enable_lbrv)
+ return;
+
+ if (enable_lbrv)
+ svm_enable_lbrv(vcpu);
+ else
+ svm_disable_lbrv(vcpu);
}
void disable_nmi_singlestep(struct vcpu_svm *svm)
@@ -831,6 +909,9 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
+ if (kvm_pause_in_guest(vcpu->kvm) || !old)
+ return;
+
control->pause_filter_count = __grow_ple_window(old,
pause_filter_count,
pause_filter_count_grow,
@@ -849,6 +930,9 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
+ if (kvm_pause_in_guest(vcpu->kvm) || !old)
+ return;
+
control->pause_filter_count =
__shrink_ple_window(old,
pause_filter_count,
@@ -960,6 +1044,8 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
+
+ svm->v_vmload_vmsave_enabled = false;
} else {
/*
* If hardware supports Virtual VMLOAD VMSAVE then enable it
@@ -979,8 +1065,9 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
static void init_vmcb(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb_control_area *control = &svm->vmcb->control;
- struct vmcb_save_area *save = &svm->vmcb->save;
+ struct vmcb *vmcb = svm->vmcb01.ptr;
+ struct vmcb_control_area *control = &vmcb->control;
+ struct vmcb_save_area *save = &vmcb->save;
svm_set_intercept(svm, INTERCEPT_CR0_READ);
svm_set_intercept(svm, INTERCEPT_CR3_READ);
@@ -1104,7 +1191,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
if (kvm_vcpu_apicv_active(vcpu))
- avic_init_vmcb(svm);
+ avic_init_vmcb(svm, vmcb);
if (vgif) {
svm_clr_intercept(svm, INTERCEPT_STGI);
@@ -1122,10 +1209,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
}
}
- svm_hv_init_vmcb(svm->vmcb);
+ svm_hv_init_vmcb(vmcb);
init_vmcb_after_set_cpuid(vcpu);
- vmcb_mark_all_dirty(svm->vmcb);
+ vmcb_mark_all_dirty(vmcb);
enable_gif(svm);
}
@@ -1270,8 +1357,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
*/
vmsave(__sme_page_pa(sd->save_area));
if (sev_es_guest(vcpu->kvm)) {
- struct vmcb_save_area *hostsa;
- hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
+ struct sev_es_save_area *hostsa;
+ hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
sev_es_prepare_switch_to_guest(hostsa);
}
@@ -1380,7 +1467,7 @@ static void svm_set_vintr(struct vcpu_svm *svm)
/*
* The following fields are ignored when AVIC is enabled
*/
- WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
+ WARN_ON(kvm_vcpu_apicv_activated(&svm->vcpu));
svm_set_intercept(svm, INTERCEPT_VINTR);
@@ -2142,7 +2229,7 @@ void svm_set_gif(struct vcpu_svm *svm, bool value)
* Likewise, clear the VINTR intercept, we will set it
* again while processing KVM_REQ_EVENT if needed.
*/
- if (vgif_enabled(svm))
+ if (vgif)
svm_clr_intercept(svm, INTERCEPT_STGI);
if (svm_is_intercept(svm, INTERCEPT_VINTR))
svm_clear_vintr(svm);
@@ -2160,7 +2247,7 @@ void svm_set_gif(struct vcpu_svm *svm, bool value)
* in use, we still rely on the VINTR intercept (rather than
* STGI) to detect an open interrupt window.
*/
- if (!vgif_enabled(svm))
+ if (!vgif)
svm_clear_vintr(svm);
}
}
@@ -2575,25 +2662,12 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_TSC_AUX:
msr_info->data = svm->tsc_aux;
break;
- /*
- * Nobody will change the following 5 values in the VMCB so we can
- * safely return them on rdmsr. They will always be 0 until LBRV is
- * implemented.
- */
case MSR_IA32_DEBUGCTLMSR:
- msr_info->data = svm->vmcb->save.dbgctl;
- break;
case MSR_IA32_LASTBRANCHFROMIP:
- msr_info->data = svm->vmcb->save.br_from;
- break;
case MSR_IA32_LASTBRANCHTOIP:
- msr_info->data = svm->vmcb->save.br_to;
- break;
case MSR_IA32_LASTINTFROMIP:
- msr_info->data = svm->vmcb->save.last_excp_from;
- break;
case MSR_IA32_LASTINTTOIP:
- msr_info->data = svm->vmcb->save.last_excp_to;
+ msr_info->data = svm_get_lbr_msr(svm, msr_info->index);
break;
case MSR_VM_HSAVE_PA:
msr_info->data = svm->nested.hsave_msr;
@@ -2839,12 +2913,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (data & DEBUGCTL_RESERVED_BITS)
return 1;
- svm->vmcb->save.dbgctl = data;
- vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
- if (data & (1ULL<<0))
- svm_enable_lbrv(vcpu);
+ if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK)
+ svm->vmcb->save.dbgctl = data;
else
- svm_disable_lbrv(vcpu);
+ svm->vmcb01.ptr->save.dbgctl = data;
+
+ svm_update_lbrv(vcpu);
+
break;
case MSR_VM_HSAVE_PA:
/*
@@ -2901,9 +2976,16 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu)
svm_clear_vintr(to_svm(vcpu));
/*
- * For AVIC, the only reason to end up here is ExtINTs.
+ * If not running nested, for AVIC, the only reason to end up here is ExtINTs.
* In this case AVIC was temporarily disabled for
* requesting the IRQ window and we have to re-enable it.
+ *
+ * If running nested, still remove the VM wide AVIC inhibit to
+ * support case in which the interrupt window was requested when the
+ * vCPU was not running nested.
+
+ * All vCPUs which run still run nested, will remain to have their
+ * AVIC still inhibited due to per-cpu AVIC inhibition.
*/
kvm_clear_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
@@ -2914,7 +2996,6 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu)
static int pause_interception(struct kvm_vcpu *vcpu)
{
bool in_kernel;
-
/*
* CPL is not made available for an SEV-ES guest, therefore
* vcpu->arch.preempted_in_kernel can never be true. Just
@@ -2922,8 +3003,7 @@ static int pause_interception(struct kvm_vcpu *vcpu)
*/
in_kernel = !sev_es_guest(vcpu->kvm) && svm_get_cpl(vcpu) == 0;
- if (!kvm_pause_in_guest(vcpu->kvm))
- grow_ple_window(vcpu);
+ grow_ple_window(vcpu);
kvm_vcpu_on_spin(vcpu, in_kernel);
return kvm_skip_emulated_instruction(vcpu);
@@ -3117,8 +3197,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
"tr:",
save01->tr.selector, save01->tr.attrib,
save01->tr.limit, save01->tr.base);
- pr_err("cpl: %d efer: %016llx\n",
- save->cpl, save->efer);
+ pr_err("vmpl: %d cpl: %d efer: %016llx\n",
+ save->vmpl, save->cpl, save->efer);
pr_err("%-15s %016llx %-13s %016llx\n",
"cr0:", save->cr0, "cr2:", save->cr2);
pr_err("%-15s %016llx %-13s %016llx\n",
@@ -3496,14 +3576,20 @@ static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
* enabled, the STGI interception will not occur. Enable the irq
* window under the assumption that the hardware will set the GIF.
*/
- if (vgif_enabled(svm) || gif_set(svm)) {
+ if (vgif || gif_set(svm)) {
/*
* IRQ window is not needed when AVIC is enabled,
* unless we have pending ExtINT since it cannot be injected
- * via AVIC. In such case, we need to temporarily disable AVIC,
+ * via AVIC. In such case, KVM needs to temporarily disable AVIC,
* and fallback to injecting IRQ via V_IRQ.
+ *
+ * If running nested, AVIC is already locally inhibited
+ * on this vCPU, therefore there is no need to request
+ * the VM wide AVIC inhibition.
*/
- kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
+ if (!is_guest_mode(vcpu))
+ kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
+
svm_set_vintr(svm);
}
}
@@ -3516,7 +3602,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
return; /* IRET will cause a vm exit */
if (!gif_set(svm)) {
- if (vgif_enabled(svm))
+ if (vgif)
svm_set_intercept(svm, INTERCEPT_STGI);
return; /* STGI will cause a vm exit */
}
@@ -3865,7 +3951,7 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
hv_track_root_tdp(vcpu, root_hpa);
cr3 = vcpu->arch.cr3;
- } else if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
+ } else if (vcpu->arch.mmu->root_role.level >= PT64_ROOT_4LEVEL) {
cr3 = __sme_set(root_hpa) | kvm_get_active_pcid(vcpu);
} else {
/* PCID in the guest should be impossible with a 32-bit MMU. */
@@ -3946,6 +4032,17 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR);
+ svm->lbrv_enabled = lbrv && guest_cpuid_has(vcpu, X86_FEATURE_LBRV);
+
+ svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
+
+ svm->pause_filter_enabled = kvm_cpu_cap_has(X86_FEATURE_PAUSEFILTER) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_PAUSEFILTER);
+
+ svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD);
+
+ svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF);
svm_recalc_instruction_intercepts(vcpu, svm);
@@ -3963,13 +4060,6 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
*/
if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC))
kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_X2APIC);
-
- /*
- * Currently, AVIC does not work with nested virtualization.
- * So, we disable AVIC when cpuid for SVM is set in the L1 guest.
- */
- if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM))
- kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_NESTED);
}
init_vmcb_after_set_cpuid(vcpu);
}
@@ -4224,7 +4314,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
- ret = nested_svm_vmexit(svm);
+ ret = nested_svm_simple_vmexit(svm, SVM_EXIT_SW);
if (ret)
return ret;
@@ -4321,7 +4411,7 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
if (!gif_set(svm)) {
- if (vgif_enabled(svm))
+ if (vgif)
svm_set_intercept(svm, INTERCEPT_STGI);
/* STGI will cause a vm exit */
} else {
@@ -4605,7 +4695,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.sched_in = svm_sched_in,
- .pmu_ops = &amd_pmu_ops,
.nested_ops = &svm_nested_ops,
.deliver_interrupt = svm_deliver_interrupt,
@@ -4620,6 +4709,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.mem_enc_ioctl = sev_mem_enc_ioctl,
.mem_enc_register_region = sev_mem_enc_register_region,
.mem_enc_unregister_region = sev_mem_enc_unregister_region,
+ .guest_memory_reclaimed = sev_guest_memory_reclaimed,
.vm_copy_enc_context_from = sev_vm_copy_enc_context_from,
.vm_move_enc_context_from = sev_vm_move_enc_context_from,
@@ -4632,6 +4722,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.complete_emulated_msr = svm_complete_emulated_msr,
.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
+ .vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
};
/*
@@ -4695,6 +4786,20 @@ static __init void svm_set_cpu_caps(void)
if (tsc_scaling)
kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);
+ if (vls)
+ kvm_cpu_cap_set(X86_FEATURE_V_VMSAVE_VMLOAD);
+ if (lbrv)
+ kvm_cpu_cap_set(X86_FEATURE_LBRV);
+
+ if (boot_cpu_has(X86_FEATURE_PAUSEFILTER))
+ kvm_cpu_cap_set(X86_FEATURE_PAUSEFILTER);
+
+ if (boot_cpu_has(X86_FEATURE_PFTHRESHOLD))
+ kvm_cpu_cap_set(X86_FEATURE_PFTHRESHOLD);
+
+ if (vgif)
+ kvm_cpu_cap_set(X86_FEATURE_VGIF);
+
/* Nested VM can receive #VMEXIT instead of triggering #GP */
kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
}
@@ -4788,6 +4893,9 @@ static __init int svm_hardware_setup(void)
get_npt_level(), PG_LEVEL_1G);
pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
+ /* Setup shadow_me_value and shadow_me_mask */
+ kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask);
+
/* Note, SEV setup consumes npt_enabled. */
sev_hardware_setup();
@@ -4806,15 +4914,20 @@ static __init int svm_hardware_setup(void)
nrips = false;
}
- enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
+ enable_apicv = avic = avic && npt_enabled && (boot_cpu_has(X86_FEATURE_AVIC) || force_avic);
if (enable_apicv) {
- pr_info("AVIC enabled\n");
+ if (!boot_cpu_has(X86_FEATURE_AVIC)) {
+ pr_warn("AVIC is not supported in CPUID but force enabled");
+ pr_warn("Your system might crash and burn");
+ } else
+ pr_info("AVIC enabled\n");
amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
} else {
svm_x86_ops.vcpu_blocking = NULL;
svm_x86_ops.vcpu_unblocking = NULL;
+ svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL;
}
if (vls) {
@@ -4879,6 +4992,7 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = {
.check_processor_compatibility = svm_check_processor_compat,
.runtime_ops = &svm_x86_ops,
+ .pmu_ops = &amd_pmu_ops,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f77a7d2d39dd..21c5460e947a 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -29,10 +29,11 @@
#define IOPM_SIZE PAGE_SIZE * 3
#define MSRPM_SIZE PAGE_SIZE * 2
-#define MAX_DIRECT_ACCESS_MSRS 20
+#define MAX_DIRECT_ACCESS_MSRS 21
#define MSRPM_OFFSETS 16
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
+extern int vgif;
extern bool intercept_smi;
/*
@@ -181,7 +182,7 @@ struct svm_nested_state {
struct vcpu_sev_es_state {
/* SEV-ES support */
- struct vmcb_save_area *vmsa;
+ struct sev_es_save_area *vmsa;
struct ghcb *ghcb;
struct kvm_host_map ghcb_map;
bool received_first_sipi;
@@ -231,9 +232,14 @@ struct vcpu_svm {
unsigned int3_injected;
unsigned long int3_rip;
- /* cached guest cpuid flags for faster access */
+ /* optional nested SVM features that are enabled for this guest */
bool nrips_enabled : 1;
bool tsc_scaling_enabled : 1;
+ bool v_vmload_vmsave_enabled : 1;
+ bool lbrv_enabled : 1;
+ bool pause_filter_enabled : 1;
+ bool pause_threshold_enabled : 1;
+ bool vgif_enabled : 1;
u32 ldr_reg;
u32 dfr_reg;
@@ -452,44 +458,70 @@ static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
return vmcb_is_intercept(&svm->vmcb->control, bit);
}
-static inline bool vgif_enabled(struct vcpu_svm *svm)
+static inline bool nested_vgif_enabled(struct vcpu_svm *svm)
{
- return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
+ return svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
+}
+
+static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm)
+{
+ if (!vgif)
+ return NULL;
+
+ if (is_guest_mode(&svm->vcpu) && !nested_vgif_enabled(svm))
+ return svm->nested.vmcb02.ptr;
+ else
+ return svm->vmcb01.ptr;
}
static inline void enable_gif(struct vcpu_svm *svm)
{
- if (vgif_enabled(svm))
- svm->vmcb->control.int_ctl |= V_GIF_MASK;
+ struct vmcb *vmcb = get_vgif_vmcb(svm);
+
+ if (vmcb)
+ vmcb->control.int_ctl |= V_GIF_MASK;
else
svm->vcpu.arch.hflags |= HF_GIF_MASK;
}
static inline void disable_gif(struct vcpu_svm *svm)
{
- if (vgif_enabled(svm))
- svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
+ struct vmcb *vmcb = get_vgif_vmcb(svm);
+
+ if (vmcb)
+ vmcb->control.int_ctl &= ~V_GIF_MASK;
else
svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
}
static inline bool gif_set(struct vcpu_svm *svm)
{
- if (vgif_enabled(svm))
- return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
+ struct vmcb *vmcb = get_vgif_vmcb(svm);
+
+ if (vmcb)
+ return !!(vmcb->control.int_ctl & V_GIF_MASK);
else
return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
}
+static inline bool nested_npt_enabled(struct vcpu_svm *svm)
+{
+ return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
+}
+
/* svm.c */
#define MSR_INVALID 0xffffffffU
+#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+
extern bool dump_invalid_vmcb;
u32 svm_msrpm_offset(u32 msr);
u32 *svm_vcpu_alloc_msrpm(void);
void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
void svm_vcpu_free_msrpm(u32 *msrpm);
+void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
+void svm_update_lbrv(struct kvm_vcpu *vcpu);
int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
@@ -574,7 +606,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
int avic_ga_log_notifier(u32 ga_tag);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
-void avic_init_vmcb(struct vcpu_svm *svm);
+void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);
int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
int avic_init_vcpu(struct vcpu_svm *svm);
@@ -592,6 +624,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
void avic_ring_doorbell(struct kvm_vcpu *vcpu);
+unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu);
/* sev.c */
@@ -609,6 +642,8 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
struct kvm_enc_region *range);
int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd);
int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd);
+void sev_guest_memory_reclaimed(struct kvm *kvm);
+
void pre_sev_run(struct vcpu_svm *svm, int cpu);
void __init sev_set_cpu_caps(void);
void __init sev_hardware_setup(void);
@@ -620,7 +655,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
void sev_es_init_vmcb(struct vcpu_svm *svm);
void sev_es_vcpu_reset(struct vcpu_svm *svm);
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
-void sev_es_prepare_switch_to_guest(struct vmcb_save_area *hostsa);
+void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
void sev_es_unmap_ghcb(struct vcpu_svm *svm);
/* vmenter.S */