From fa7a549d321a4189677b0cea86e58d9db7977f7b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Jul 2021 17:37:49 -0400 Subject: KVM: x86: accept userspace interrupt only if no event is injected Once an exception has been injected, any side effects related to the exception (such as setting CR2 or DR6) have been taked place. Therefore, once KVM sets the VM-entry interruption information field or the AMD EVENTINJ field, the next VM-entry must deliver that exception. Pending interrupts are processed after injected exceptions, so in theory it would not be a problem to use KVM_INTERRUPT when an injected exception is present. However, DOSEMU is using run->ready_for_interrupt_injection to detect interrupt windows and then using KVM_SET_SREGS/KVM_SET_REGS to inject the interrupt manually. For this to work, the interrupt window must be delayed after the completion of the previous event injection. Cc: stable@vger.kernel.org Reported-by: Stas Sergeev Tested-by: Stas Sergeev Fixes: 71cc849b7093 ("KVM: x86: Fix split-irqchip vs interrupt injection window request") Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4116567f3d44..e5d5c5ed7dd4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4358,8 +4358,17 @@ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) { - return kvm_arch_interrupt_allowed(vcpu) && - kvm_cpu_accept_dm_intr(vcpu); + /* + * Do not cause an interrupt window exit if an exception + * is pending or an event needs reinjection; userspace + * might want to inject the interrupt manually using KVM_SET_REGS + * or KVM_SET_SREGS. For that to work, we must be at an + * instruction boundary and with no events half-injected. + */ + return (kvm_arch_interrupt_allowed(vcpu) && + kvm_cpu_accept_dm_intr(vcpu) && + !kvm_event_needs_reinjection(vcpu) && + !vcpu->arch.exception.pending); } static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 2e2f1e8d0450c561c0c936b4b67e8b5a95975fb7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 30 Jul 2021 14:26:22 +0200 Subject: KVM: x86: hyper-v: Check access to hypercall before reading XMM registers In case guest doesn't have access to the particular hypercall we can avoid reading XMM registers. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Siddharth Chandrasekaran Signed-off-by: Paolo Bonzini Message-Id: <20210730122625.112848-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index b07592ca92f0..cb7e045905a5 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2173,9 +2173,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; hc.rep = !!(hc.rep_cnt || hc.rep_idx); - if (hc.fast && is_xmm_fast_hypercall(&hc)) - kvm_hv_hypercall_read_xmm(&hc); - trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, hc.ingpa, hc.outgpa); @@ -2184,6 +2181,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) goto hypercall_complete; } + if (hc.fast && is_xmm_fast_hypercall(&hc)) + kvm_hv_hypercall_read_xmm(&hc); + switch (hc.code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: if (unlikely(hc.rep)) { -- cgit v1.2.3 From f5714bbb5b3120b33dfbf3d81ffc0b98ae4cd4c1 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 30 Jul 2021 14:26:23 +0200 Subject: KVM: x86: Introduce trace_kvm_hv_hypercall_done() Hypercall failures are unusual with potentially far going consequences so it would be useful to see their results when tracing. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Siddharth Chandrasekaran Signed-off-by: Paolo Bonzini Message-Id: <20210730122625.112848-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 1 + arch/x86/kvm/trace.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index cb7e045905a5..2945b93dbadd 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2016,6 +2016,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { + trace_kvm_hv_hypercall_done(result); kvm_hv_hypercall_set_result(vcpu, result); ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index b484141ea15b..03ebe368333e 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -92,6 +92,21 @@ TRACE_EVENT(kvm_hv_hypercall, __entry->outgpa) ); +TRACE_EVENT(kvm_hv_hypercall_done, + TP_PROTO(u64 result), + TP_ARGS(result), + + TP_STRUCT__entry( + __field(__u64, result) + ), + + TP_fast_assign( + __entry->result = result; + ), + + TP_printk("result 0x%llx", __entry->result) +); + /* * Tracepoint for Xen hypercall. */ -- cgit v1.2.3 From 4e62aa96d6e55c1b2a4e841f1f8601eae81e81ae Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 30 Jul 2021 14:26:24 +0200 Subject: KVM: x86: hyper-v: Check if guest is allowed to use XMM registers for hypercall input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TLFS states that "Availability of the XMM fast hypercall interface is indicated via the “Hypervisor Feature Identification” CPUID Leaf (0x40000003, see section 2.4.4) ... Any attempt to use this interface when the hypervisor does not indicate availability will result in a #UD fault." Implement the check for 'strict' mode (KVM_CAP_HYPERV_ENFORCE_CPUID). Signed-off-by: Vitaly Kuznetsov Reviewed-by: Siddharth Chandrasekaran Signed-off-by: Paolo Bonzini Message-Id: <20210730122625.112848-4-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 2945b93dbadd..0b38f944c6b6 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2140,6 +2140,7 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code) int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct kvm_hv_hcall hc; u64 ret = HV_STATUS_SUCCESS; @@ -2177,13 +2178,21 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, hc.ingpa, hc.outgpa); - if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) { + if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) { ret = HV_STATUS_ACCESS_DENIED; goto hypercall_complete; } - if (hc.fast && is_xmm_fast_hypercall(&hc)) + if (hc.fast && is_xmm_fast_hypercall(&hc)) { + if (unlikely(hv_vcpu->enforce_cpuid && + !(hv_vcpu->cpuid_cache.features_edx & + HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + kvm_hv_hypercall_read_xmm(&hc); + } switch (hc.code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: -- cgit v1.2.3 From 179c6c27bf487273652efc99acd3ba512a23c137 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 3 Aug 2021 09:27:46 -0700 Subject: KVM: SVM: Fix off-by-one indexing when nullifying last used SEV VMCB Use the raw ASID, not ASID-1, when nullifying the last used VMCB when freeing an SEV ASID. The consumer, pre_sev_run(), indexes the array by the raw ASID, thus KVM could get a false negative when checking for a different VMCB if KVM manages to reallocate the same ASID+VMCB combo for a new VM. Note, this cannot cause a functional issue _in the current code_, as pre_sev_run() also checks which pCPU last did VMRUN for the vCPU, and last_vmentry_cpu is initialized to -1 during vCPU creation, i.e. is guaranteed to mismatch on the first VMRUN. However, prior to commit 8a14fe4f0c54 ("kvm: x86: Move last_cpu into kvm_vcpu_arch as last_vmentry_cpu"), SVM tracked pCPU on its own and zero-initialized the last_cpu variable. Thus it's theoretically possible that older versions of KVM could miss a TLB flush if the first VMRUN is on pCPU0 and the ASID and VMCB exactly match those of a prior VM. Fixes: 70cd94e60c73 ("KVM: SVM: VMRUN should use associated ASID when SEV is enabled") Cc: Tom Lendacky Cc: Brijesh Singh Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6710d9ee2e4b..4d0aba185412 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -189,7 +189,7 @@ static void sev_asid_free(struct kvm_sev_info *sev) for_each_possible_cpu(cpu) { sd = per_cpu(svm_data, cpu); - sd->sev_vmcbs[pos] = NULL; + sd->sev_vmcbs[sev->asid] = NULL; } mutex_unlock(&sev_bitmap_lock); -- cgit v1.2.3 From bb2baeb214a71cda47d50dce80414016117ddda0 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Mon, 2 Aug 2021 11:09:03 -0700 Subject: KVM: SVM: improve the code readability for ASID management KVM SEV code uses bitmaps to manage ASID states. ASID 0 was always skipped because it is never used by VM. Thus, in existing code, ASID value and its bitmap postion always has an 'offset-by-1' relationship. Both SEV and SEV-ES shares the ASID space, thus KVM uses a dynamic range [min_asid, max_asid] to handle SEV and SEV-ES ASIDs separately. Existing code mixes the usage of ASID value and its bitmap position by using the same variable called 'min_asid'. Fix the min_asid usage: ensure that its usage is consistent with its name; allocate extra size for ASID 0 to ensure that each ASID has the same value with its bitmap position. Add comments on ASID bitmap allocation to clarify the size change. Signed-off-by: Mingwei Zhang Cc: Tom Lendacky Cc: Marc Orr Cc: David Rientjes Cc: Alper Gun Cc: Dionna Glaze Cc: Sean Christopherson Cc: Vipin Sharma Cc: Peter Gonda Cc: Joerg Roedel Message-Id: <20210802180903.159381-1-mizhang@google.com> [Fix up sev_asid_free to also index by ASID, as suggested by Sean Christopherson, and use nr_asids in sev_cpu_init. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 4d0aba185412..7fbce342eec4 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -64,6 +64,7 @@ static DEFINE_MUTEX(sev_bitmap_lock); unsigned int max_sev_asid; static unsigned int min_sev_asid; static unsigned long sev_me_mask; +static unsigned int nr_asids; static unsigned long *sev_asid_bitmap; static unsigned long *sev_reclaim_asid_bitmap; @@ -78,11 +79,11 @@ struct enc_region { /* Called with the sev_bitmap_lock held, or on shutdown */ static int sev_flush_asids(int min_asid, int max_asid) { - int ret, pos, error = 0; + int ret, asid, error = 0; /* Check if there are any ASIDs to reclaim before performing a flush */ - pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid); - if (pos >= max_asid) + asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); + if (asid > max_asid) return -EBUSY; /* @@ -115,15 +116,15 @@ static bool __sev_recycle_asids(int min_asid, int max_asid) /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, - max_sev_asid); - bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid); + nr_asids); + bitmap_zero(sev_reclaim_asid_bitmap, nr_asids); return true; } static int sev_asid_new(struct kvm_sev_info *sev) { - int pos, min_asid, max_asid, ret; + int asid, min_asid, max_asid, ret; bool retry = true; enum misc_res_type type; @@ -143,11 +144,11 @@ static int sev_asid_new(struct kvm_sev_info *sev) * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. */ - min_asid = sev->es_active ? 0 : min_sev_asid - 1; + min_asid = sev->es_active ? 1 : min_sev_asid; max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: - pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid); - if (pos >= max_asid) { + asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); + if (asid > max_asid) { if (retry && __sev_recycle_asids(min_asid, max_asid)) { retry = false; goto again; @@ -157,11 +158,11 @@ again: goto e_uncharge; } - __set_bit(pos, sev_asid_bitmap); + __set_bit(asid, sev_asid_bitmap); mutex_unlock(&sev_bitmap_lock); - return pos + 1; + return asid; e_uncharge: misc_cg_uncharge(type, sev->misc_cg, 1); put_misc_cg(sev->misc_cg); @@ -179,13 +180,12 @@ static int sev_get_asid(struct kvm *kvm) static void sev_asid_free(struct kvm_sev_info *sev) { struct svm_cpu_data *sd; - int cpu, pos; + int cpu; enum misc_res_type type; mutex_lock(&sev_bitmap_lock); - pos = sev->asid - 1; - __set_bit(pos, sev_reclaim_asid_bitmap); + __set_bit(sev->asid, sev_reclaim_asid_bitmap); for_each_possible_cpu(cpu) { sd = per_cpu(svm_data, cpu); @@ -1857,12 +1857,17 @@ void __init sev_hardware_setup(void) min_sev_asid = edx; sev_me_mask = 1UL << (ebx & 0x3f); - /* Initialize SEV ASID bitmaps */ - sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + /* + * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap, + * even though it's never used, so that the bitmap is indexed by the + * actual ASID. + */ + nr_asids = max_sev_asid + 1; + sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_asid_bitmap) goto out; - sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_reclaim_asid_bitmap) { bitmap_free(sev_asid_bitmap); sev_asid_bitmap = NULL; @@ -1907,7 +1912,7 @@ void sev_hardware_teardown(void) return; /* No need to take sev_bitmap_lock, all VMs have been destroyed. */ - sev_flush_asids(0, max_sev_asid); + sev_flush_asids(1, max_sev_asid); bitmap_free(sev_asid_bitmap); bitmap_free(sev_reclaim_asid_bitmap); @@ -1921,7 +1926,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) if (!sev_enabled) return 0; - sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL); + sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL); if (!sd->sev_vmcbs) return -ENOMEM; -- cgit v1.2.3 From d5aaad6f83420efb8357ac8e11c868708b22d0a9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 4 Aug 2021 14:46:09 -0700 Subject: KVM: x86/mmu: Fix per-cpu counter corruption on 32-bit builds Take a signed 'long' instead of an 'unsigned long' for the number of pages to add/subtract to the total number of pages used by the MMU. This fixes a zero-extension bug on 32-bit kernels that effectively corrupts the per-cpu counter used by the shrinker. Per-cpu counters take a signed 64-bit value on both 32-bit and 64-bit kernels, whereas kvm_mod_used_mmu_pages() takes an unsigned long and thus an unsigned 32-bit value on 32-bit kernels. As a result, the value used to adjust the per-cpu counter is zero-extended (unsigned -> signed), not sign-extended (signed -> signed), and so KVM's intended -1 gets morphed to 4294967295 and effectively corrupts the counter. This was found by a staggering amount of sheer dumb luck when running kvm-unit-tests on a 32-bit KVM build. The shrinker just happened to kick in while running tests and do_shrink_slab() logged an error about trying to free a negative number of objects. The truly lucky part is that the kernel just happened to be a slightly stale build, as the shrinker no longer yells about negative objects as of commit 18bb473e5031 ("mm: vmscan: shrink deferred objects proportional to priority"). vmscan: shrink_slab: mmu_shrink_scan+0x0/0x210 [kvm] negative objects to delete nr=-858993460 Fixes: bc8a3d8925a8 ("kvm: mmu: Fix overflow on kvm mmu page limit calculation") Cc: stable@vger.kernel.org Cc: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20210804214609.1096003-1-seanjc@google.com> Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 66f7f5bc3482..c4f4fa23320e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1644,7 +1644,7 @@ static int is_empty_shadow_page(u64 *spt) * aggregate version in order to make the slab shrinker * faster */ -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr) +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr) { kvm->arch.n_used_mmu_pages += nr; percpu_counter_add(&kvm_total_used_mmu_pages, nr); -- cgit v1.2.3