diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-04-22 17:58:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-04-22 17:58:36 -0700 |
commit | bb4ce2c65881a2b9bdcd384f54a260a12a89dd91 (patch) | |
tree | ec4d74f4a0097bc661059b4b35261024b3a3fde2 /arch/x86/kvm/svm/sev.c | |
parent | 06fb4ecfeac7e00d6704fa5ed19299f2fefb3cc9 (diff) | |
parent | e852be8b148e117e25be1c98cf72ee489b05919e (diff) | |
download | linux-bb4ce2c65881a2b9bdcd384f54a260a12a89dd91.tar.bz2 |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
"The main and larger change here is a workaround for AMD's lack of
cache coherency for encrypted-memory guests.
I have another patch pending, but it's waiting for review from the
architecture maintainers.
RISC-V:
- Remove 's' & 'u' as valid ISA extension
- Do not allow disabling the base extensions 'i'/'m'/'a'/'c'
x86:
- Fix NMI watchdog in guests on AMD
- Fix for SEV cache incoherency issues
- Don't re-acquire SRCU lock in complete_emulated_io()
- Avoid NULL pointer deref if VM creation fails
- Fix race conditions between APICv disabling and vCPU creation
- Bugfixes for disabling of APICv
- Preserve BSP MSR_KVM_POLL_CONTROL across suspend/resume
selftests:
- Do not use bitfields larger than 32-bits, they differ between GCC
and clang"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
kvm: selftests: introduce and use more page size-related constants
kvm: selftests: do not use bitfields larger than 32-bits for PTEs
KVM: SEV: add cache flush to solve SEV cache incoherency issues
KVM: SVM: Flush when freeing encrypted pages even on SME_COHERENT CPUs
KVM: SVM: Simplify and harden helper to flush SEV guest page(s)
KVM: selftests: Silence compiler warning in the kvm_page_table_test
KVM: x86/pmu: Update AMD PMC sample period to fix guest NMI-watchdog
x86/kvm: Preserve BSP MSR_KVM_POLL_CONTROL across suspend/resume
KVM: SPDX style and spelling fixes
KVM: x86: Skip KVM_GUESTDBG_BLOCKIRQ APICv update if APICv is disabled
KVM: x86: Pend KVM_REQ_APICV_UPDATE during vCPU creation to fix a race
KVM: nVMX: Defer APICv updates while L2 is active until L1 is active
KVM: x86: Tag APICv DISABLE inhibit, not ABSENT, if APICv is disabled
KVM: Initialize debugfs_dentry when a VM is created to avoid NULL deref
KVM: Add helpers to wrap vcpu->srcu_idx and yell if it's abused
KVM: RISC-V: Use kvm_vcpu.srcu_idx, drop RISC-V's unnecessary copy
KVM: x86: Don't re-acquire SRCU lock in complete_emulated_io()
RISC-V: KVM: Restrict the extensions that can be disabled
RISC-V: KVM: Remove 's' & 'u' as valid ISA extension
Diffstat (limited to 'arch/x86/kvm/svm/sev.c')
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 67 |
1 files changed, 32 insertions, 35 deletions
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 537aaddc852f..0ad70c12c7c3 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2226,51 +2226,47 @@ int sev_cpu_init(struct svm_cpu_data *sd) * Pages used by hardware to hold guest encrypted state must be flushed before * returning them to the system. */ -static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va, - unsigned long len) +static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) { + int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid; + /* - * If hardware enforced cache coherency for encrypted mappings of the - * same physical page is supported, nothing to do. + * Note! The address must be a kernel address, as regular page walk + * checks are performed by VM_PAGE_FLUSH, i.e. operating on a user + * address is non-deterministic and unsafe. This function deliberately + * takes a pointer to deter passing in a user address. */ - if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) - return; + unsigned long addr = (unsigned long)va; /* - * If the VM Page Flush MSR is supported, use it to flush the page - * (using the page virtual address and the guest ASID). + * If CPU enforced cache coherency for encrypted mappings of the + * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache + * flush is still needed in order to work properly with DMA devices. */ - if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) { - struct kvm_sev_info *sev; - unsigned long va_start; - u64 start, stop; - - /* Align start and stop to page boundaries. */ - va_start = (unsigned long)va; - start = (u64)va_start & PAGE_MASK; - stop = PAGE_ALIGN((u64)va_start + len); - - if (start < stop) { - sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; + if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) { + clflush_cache_range(va, PAGE_SIZE); + return; + } - while (start < stop) { - wrmsrl(MSR_AMD64_VM_PAGE_FLUSH, - start | sev->asid); + /* + * VM Page Flush takes a host virtual address and a guest ASID. Fall + * back to WBINVD if this faults so as not to make any problems worse + * by leaving stale encrypted data in the cache. + */ + if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid))) + goto do_wbinvd; - start += PAGE_SIZE; - } + return; - return; - } +do_wbinvd: + wbinvd_on_all_cpus(); +} - WARN(1, "Address overflow, using WBINVD\n"); - } +void sev_guest_memory_reclaimed(struct kvm *kvm) +{ + if (!sev_guest(kvm)) + return; - /* - * Hardware should always have one of the above features, - * but if not, use WBINVD and issue a warning. - */ - WARN_ONCE(1, "Using WBINVD to flush guest memory\n"); wbinvd_on_all_cpus(); } @@ -2284,7 +2280,8 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) svm = to_svm(vcpu); if (vcpu->arch.guest_state_protected) - sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE); + sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa); + __free_page(virt_to_page(svm->sev_es.vmsa)); if (svm->sev_es.ghcb_sa_free) |