diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2022-05-20 07:16:27 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2022-05-25 05:09:23 -0400 |
commit | 47e8eec83262083c7da220446551eaad614218ea (patch) | |
tree | 1bcdf6cb6541441d1042fdf68c2f7982d80a9178 /arch/x86/kvm | |
parent | 825be3b5abae1e67db45ff7d4b9a7764a2419bd9 (diff) | |
parent | 5c0ad551e9aa6188f2bda0977c1cb6768a2b74ef (diff) | |
download | linux-47e8eec83262083c7da220446551eaad614218ea.tar.bz2 |
Merge tag 'kvmarm-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for 5.19
- Add support for the ARMv8.6 WFxT extension
- Guard pages for the EL2 stacks
- Trap and emulate AArch32 ID registers to hide unsupported features
- Ability to select and save/restore the set of hypercalls exposed
to the guest
- Support for PSCI-initiated suspend in collaboration with userspace
- GICv3 register-based LPI invalidation support
- Move host PMU event merging into the vcpu data structure
- GICv3 ITS save/restore fixes
- The usual set of small-scale cleanups and fixes
[Due to the conflict, KVM_SYSTEM_EVENT_SEV_TERM is relocated
from 4 to 6. - Paolo]
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/cpuid.c | 19 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.h | 9 | ||||
-rw-r--r-- | arch/x86/kvm/svm/pmu.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 67 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/pmu_intel.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 60 |
11 files changed, 109 insertions, 69 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 732724ea5b10..0c1ba6aa0765 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1090,12 +1090,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) case 0x80000000: entry->eax = min(entry->eax, 0x80000021); /* - * Serializing LFENCE is reported in a multitude of ways, - * and NullSegClearsBase is not reported in CPUID on Zen2; - * help userspace by providing the CPUID leaf ourselves. + * Serializing LFENCE is reported in a multitude of ways, and + * NullSegClearsBase is not reported in CPUID on Zen2; help + * userspace by providing the CPUID leaf ourselves. + * + * However, only do it if the host has CPUID leaf 0x8000001d. + * QEMU thinks that it can query the host blindly for that + * CPUID leaf if KVM reports that it supports 0x8000001d or + * above. The processor merrily returns values from the + * highest Intel leaf which QEMU tries to use as the guest's + * 0x8000001d. Even worse, this can result in an infinite + * loop if said highest leaf has no subleaves indexed by ECX. */ - if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC) - || !static_cpu_has_bug(X86_BUG_NULL_SEG)) + if (entry->eax >= 0x8000001d && + (static_cpu_has(X86_FEATURE_LFENCE_RDTSC) + || !static_cpu_has_bug(X86_BUG_NULL_SEG))) entry->eax = max(entry->eax, 0x80000021); break; case 0x80000001: diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 2a53b6c9495c..e745f443b6a8 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -135,6 +135,15 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value) return sample_period; } +static inline void pmc_update_sample_period(struct kvm_pmc *pmc) +{ + if (!pmc->perf_event || pmc->is_paused) + return; + + perf_event_period(pmc->perf_event, + get_sample_period(pmc, pmc->counter)); +} + void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel); void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx); void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 47e8eaca1e90..136039fc6d01 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -279,6 +279,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { pmc->counter += data - pmc_read_counter(pmc); + pmc_update_sample_period(pmc); return 0; } /* MSR_EVNTSELn */ diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index b67ce873d5d2..94d62c9958b9 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2226,51 +2226,47 @@ int sev_cpu_init(struct svm_cpu_data *sd) * Pages used by hardware to hold guest encrypted state must be flushed before * returning them to the system. */ -static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va, - unsigned long len) +static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) { + int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid; + /* - * If hardware enforced cache coherency for encrypted mappings of the - * same physical page is supported, nothing to do. + * Note! The address must be a kernel address, as regular page walk + * checks are performed by VM_PAGE_FLUSH, i.e. operating on a user + * address is non-deterministic and unsafe. This function deliberately + * takes a pointer to deter passing in a user address. */ - if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) - return; + unsigned long addr = (unsigned long)va; /* - * If the VM Page Flush MSR is supported, use it to flush the page - * (using the page virtual address and the guest ASID). + * If CPU enforced cache coherency for encrypted mappings of the + * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache + * flush is still needed in order to work properly with DMA devices. */ - if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) { - struct kvm_sev_info *sev; - unsigned long va_start; - u64 start, stop; - - /* Align start and stop to page boundaries. */ - va_start = (unsigned long)va; - start = (u64)va_start & PAGE_MASK; - stop = PAGE_ALIGN((u64)va_start + len); - - if (start < stop) { - sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; + if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) { + clflush_cache_range(va, PAGE_SIZE); + return; + } - while (start < stop) { - wrmsrl(MSR_AMD64_VM_PAGE_FLUSH, - start | sev->asid); + /* + * VM Page Flush takes a host virtual address and a guest ASID. Fall + * back to WBINVD if this faults so as not to make any problems worse + * by leaving stale encrypted data in the cache. + */ + if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid))) + goto do_wbinvd; - start += PAGE_SIZE; - } + return; - return; - } +do_wbinvd: + wbinvd_on_all_cpus(); +} - WARN(1, "Address overflow, using WBINVD\n"); - } +void sev_guest_memory_reclaimed(struct kvm *kvm) +{ + if (!sev_guest(kvm)) + return; - /* - * Hardware should always have one of the above features, - * but if not, use WBINVD and issue a warning. - */ - WARN_ONCE(1, "Using WBINVD to flush guest memory\n"); wbinvd_on_all_cpus(); } @@ -2284,7 +2280,8 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) svm = to_svm(vcpu); if (vcpu->arch.guest_state_protected) - sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE); + sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa); + __free_page(virt_to_page(svm->sev_es.vmsa)); if (svm->sev_es.ghcb_sa_free) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 3b49337998ec..63880b33ce37 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4709,6 +4709,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .mem_enc_ioctl = sev_mem_enc_ioctl, .mem_enc_register_region = sev_mem_enc_register_region, .mem_enc_unregister_region = sev_mem_enc_unregister_region, + .guest_memory_reclaimed = sev_guest_memory_reclaimed, .vm_copy_enc_context_from = sev_vm_copy_enc_context_from, .vm_move_enc_context_from = sev_vm_move_enc_context_from, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 32220a1b0ea2..45a87b2a8b3c 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -642,6 +642,8 @@ int sev_mem_enc_unregister_region(struct kvm *kvm, struct kvm_enc_region *range); int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd); int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd); +void sev_guest_memory_reclaimed(struct kvm *kvm); + void pre_sev_run(struct vcpu_svm *svm, int cpu); void __init sev_set_cpu_caps(void); void __init sev_hardware_setup(void); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index a6688663da4d..f5cb18e00e78 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4640,6 +4640,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); } + if (vmx->nested.update_vmcs01_apicv_status) { + vmx->nested.update_vmcs01_apicv_status = false; + kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); + } + if ((vm_exit_reason != -1) && (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))) vmx->nested.need_vmcs12_to_shadow_sync = true; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 9db662399487..37e9eb32e3d9 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -431,15 +431,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !(msr & MSR_PMC_FULL_WIDTH_BIT)) data = (s64)(s32)data; pmc->counter += data - pmc_read_counter(pmc); - if (pmc->perf_event && !pmc->is_paused) - perf_event_period(pmc->perf_event, - get_sample_period(pmc, data)); + pmc_update_sample_period(pmc); return 0; } else if ((pmc = get_fixed_pmc(pmu, msr))) { pmc->counter += data - pmc_read_counter(pmc); - if (pmc->perf_event && !pmc->is_paused) - perf_event_period(pmc->perf_event, - get_sample_period(pmc, data)); + pmc_update_sample_period(pmc); return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5fb37e75fd31..cbbcf97d9e66 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4174,6 +4174,11 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + if (is_guest_mode(vcpu)) { + vmx->nested.update_vmcs01_apicv_status = true; + return; + } + pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); if (cpu_has_secondary_exec_ctrls()) { if (kvm_vcpu_apicv_active(vcpu)) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 9c6bfcd84008..b98c7e96697a 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -183,6 +183,7 @@ struct nested_vmx { bool change_vmcs01_virtual_apic_mode; bool reload_vmcs01_apic_access_page; bool update_vmcs01_cpu_dirty_logging; + bool update_vmcs01_apicv_status; /* * Enlightened VMCS has been enabled. It does not mean that L1 has to diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8b62c9b7795e..bc507d6414f4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9164,7 +9164,7 @@ static void kvm_apicv_init(struct kvm *kvm) if (!enable_apicv) set_or_clear_apicv_inhibit(inhibits, - APICV_INHIBIT_REASON_ABSENT, true); + APICV_INHIBIT_REASON_DISABLE, true); } static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) @@ -9954,6 +9954,11 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); } +void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) +{ + static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm); +} + static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { if (!lapic_in_kernel(vcpu)) @@ -10164,7 +10169,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* Store vcpu->apicv_active before vcpu->mode. */ smp_store_release(&vcpu->mode, IN_GUEST_MODE); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + kvm_vcpu_srcu_read_unlock(vcpu); /* * 1) We should set ->mode before checking ->requests. Please see @@ -10195,7 +10200,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) smp_wmb(); local_irq_enable(); preempt_enable(); - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); r = 1; goto cancel_injection; } @@ -10313,7 +10318,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) local_irq_enable(); preempt_enable(); - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); /* * Profile KVM exit RIPs: @@ -10343,7 +10348,7 @@ out: } /* Called within kvm->srcu read side. */ -static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) +static inline int vcpu_block(struct kvm_vcpu *vcpu) { bool hv_timer; @@ -10359,12 +10364,12 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) if (hv_timer) kvm_lapic_switch_to_sw_timer(vcpu); - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + kvm_vcpu_srcu_read_unlock(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) kvm_vcpu_halt(vcpu); else kvm_vcpu_block(vcpu); - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); if (hv_timer) kvm_lapic_switch_to_hv_timer(vcpu); @@ -10406,7 +10411,6 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) static int vcpu_run(struct kvm_vcpu *vcpu) { int r; - struct kvm *kvm = vcpu->kvm; vcpu->arch.l1tf_flush_l1d = true; @@ -10414,7 +10418,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (kvm_vcpu_running(vcpu)) { r = vcpu_enter_guest(vcpu); } else { - r = vcpu_block(kvm, vcpu); + r = vcpu_block(vcpu); } if (r <= 0) @@ -10436,9 +10440,9 @@ static int vcpu_run(struct kvm_vcpu *vcpu) } if (__xfer_to_guest_mode_work_pending()) { - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + kvm_vcpu_srcu_read_unlock(vcpu); r = xfer_to_guest_mode_handle_work(vcpu); - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); if (r) return r; } @@ -10449,12 +10453,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) static inline int complete_emulated_io(struct kvm_vcpu *vcpu) { - int r; - - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - return r; + return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); } static int complete_emulated_pio(struct kvm_vcpu *vcpu) @@ -10546,7 +10545,6 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; - struct kvm *kvm = vcpu->kvm; int r; vcpu_load(vcpu); @@ -10554,7 +10552,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_run->flags = 0; kvm_load_guest_fpu(vcpu); - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { r = -EINTR; @@ -10566,9 +10564,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu)); - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + kvm_vcpu_srcu_read_unlock(vcpu); kvm_vcpu_block(vcpu); - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); if (kvm_apic_accept_events(vcpu) < 0) { r = 0; @@ -10629,7 +10627,7 @@ out: if (kvm_run->kvm_valid_regs) store_regs(vcpu); post_kvm_run_save(vcpu); - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + kvm_vcpu_srcu_read_unlock(vcpu); kvm_sigset_deactivate(vcpu); vcpu_put(vcpu); @@ -11047,6 +11045,9 @@ static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm) struct kvm_vcpu *vcpu; unsigned long i; + if (!enable_apicv) + return; + down_write(&kvm->arch.apicv_update_lock); kvm_for_each_vcpu(i, vcpu, kvm) { @@ -11258,8 +11259,21 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) r = kvm_create_lapic(vcpu, lapic_timer_advance_ns); if (r < 0) goto fail_mmu_destroy; - if (kvm_apicv_activated(vcpu->kvm)) + + /* + * Defer evaluating inhibits until the vCPU is first run, as + * this vCPU will not get notified of any changes until this + * vCPU is visible to other vCPUs (marked online and added to + * the set of vCPUs). Opportunistically mark APICv active as + * VMX in particularly is highly unlikely to have inhibits. + * Ignore the current per-VM APICv state so that vCPU creation + * is guaranteed to run with a deterministic value, the request + * will ensure the vCPU gets the correct state before VM-Entry. + */ + if (enable_apicv) { vcpu->arch.apicv_active = true; + kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); + } } else static_branch_inc(&kvm_has_noapic_vcpu); |