summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r--arch/x86/kvm/svm/avic.c123
-rw-r--r--arch/x86/kvm/svm/pmu.c2
-rw-r--r--arch/x86/kvm/svm/svm.c490
-rw-r--r--arch/x86/kvm/svm/svm.h17
4 files changed, 331 insertions, 301 deletions
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 0e5b49294086..90364d02f22a 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -295,13 +295,16 @@ static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
struct kvm_vcpu *vcpu;
unsigned long i;
+ /*
+ * Wake any target vCPUs that are blocking, i.e. waiting for a wake
+ * event. There's no need to signal doorbells, as hardware has handled
+ * vCPUs that were in guest at the time of the IPI, and vCPUs that have
+ * since entered the guest will have processed pending IRQs at VMRUN.
+ */
kvm_for_each_vcpu(i, vcpu, kvm) {
- bool m = kvm_apic_match_dest(vcpu, source,
- icrl & APIC_SHORT_MASK,
- GET_APIC_DEST_FIELD(icrh),
- icrl & APIC_DEST_MASK);
-
- if (m && !avic_vcpu_is_running(vcpu))
+ if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK,
+ GET_APIC_DEST_FIELD(icrh),
+ icrl & APIC_DEST_MASK))
kvm_vcpu_wake_up(vcpu);
}
}
@@ -672,9 +675,22 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
return -1;
kvm_lapic_set_irr(vec, vcpu->arch.apic);
+
+ /*
+ * Pairs with the smp_mb_*() after setting vcpu->guest_mode in
+ * vcpu_enter_guest() to ensure the write to the vIRR is ordered before
+ * the read of guest_mode, which guarantees that either VMRUN will see
+ * and process the new vIRR entry, or that the below code will signal
+ * the doorbell if the vCPU is already running in the guest.
+ */
smp_mb__after_atomic();
- if (avic_vcpu_is_running(vcpu)) {
+ /*
+ * Signal the doorbell to tell hardware to inject the IRQ if the vCPU
+ * is in the guest. If the vCPU is not in the guest, hardware will
+ * automatically process AVIC interrupts at VMRUN.
+ */
+ if (vcpu->mode == IN_GUEST_MODE) {
int cpu = READ_ONCE(vcpu->cpu);
/*
@@ -688,8 +704,13 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
if (cpu != get_cpu())
wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
put_cpu();
- } else
+ } else {
+ /*
+ * Wake the vCPU if it was blocking. KVM will then detect the
+ * pending IRQ when checking if the vCPU has a wake event.
+ */
kvm_vcpu_wake_up(vcpu);
+ }
return 0;
}
@@ -957,6 +978,8 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
int h_physical_id = kvm_cpu_get_apicid(cpu);
struct vcpu_svm *svm = to_svm(vcpu);
+ lockdep_assert_preemption_disabled();
+
/*
* Since the host physical APIC id is 8 bits,
* we can support host APIC ID upto 255.
@@ -964,19 +987,25 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
return;
+ /*
+ * No need to update anything if the vCPU is blocking, i.e. if the vCPU
+ * is being scheduled in after being preempted. The CPU entries in the
+ * Physical APIC table and IRTE are consumed iff IsRun{ning} is '1'.
+ * If the vCPU was migrated, its new CPU value will be stuffed when the
+ * vCPU unblocks.
+ */
+ if (kvm_vcpu_is_blocking(vcpu))
+ return;
+
entry = READ_ONCE(*(svm->avic_physical_id_cache));
WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
-
- entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
- if (svm->avic_is_running)
- entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
+ entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
- avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
- svm->avic_is_running);
+ avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
}
void avic_vcpu_put(struct kvm_vcpu *vcpu)
@@ -984,42 +1013,56 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
u64 entry;
struct vcpu_svm *svm = to_svm(vcpu);
+ lockdep_assert_preemption_disabled();
+
entry = READ_ONCE(*(svm->avic_physical_id_cache));
- if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
- avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
+
+ /* Nothing to do if IsRunning == '0' due to vCPU blocking. */
+ if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
+ return;
+
+ avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
-/*
- * This function is called during VCPU halt/unhalt.
- */
-static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
+void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
{
- struct vcpu_svm *svm = to_svm(vcpu);
- int cpu = get_cpu();
-
- WARN_ON(cpu != vcpu->cpu);
- svm->avic_is_running = is_run;
+ if (!kvm_vcpu_apicv_active(vcpu))
+ return;
- if (kvm_vcpu_apicv_active(vcpu)) {
- if (is_run)
- avic_vcpu_load(vcpu, cpu);
- else
- avic_vcpu_put(vcpu);
- }
- put_cpu();
+ preempt_disable();
+
+ /*
+ * Unload the AVIC when the vCPU is about to block, _before_
+ * the vCPU actually blocks.
+ *
+ * Any IRQs that arrive before IsRunning=0 will not cause an
+ * incomplete IPI vmexit on the source, therefore vIRR will also
+ * be checked by kvm_vcpu_check_block() before blocking. The
+ * memory barrier implicit in set_current_state orders writing
+ * IsRunning=0 before reading the vIRR. The processor needs a
+ * matching memory barrier on interrupt delivery between writing
+ * IRR and reading IsRunning; the lack of this barrier might be
+ * the cause of errata #1235).
+ */
+ avic_vcpu_put(vcpu);
+
+ preempt_enable();
}
-void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
+void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- avic_set_running(vcpu, false);
-}
+ int cpu;
-void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
-{
- if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
- kvm_vcpu_update_apicv(vcpu);
- avic_set_running(vcpu, true);
+ if (!kvm_vcpu_apicv_active(vcpu))
+ return;
+
+ cpu = get_cpu();
+ WARN_ON(cpu != vcpu->cpu);
+
+ avic_vcpu_load(vcpu, cpu);
+
+ put_cpu();
}
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 12d8b301065a..5aa45f13b16d 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -101,7 +101,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
{
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
- if (!pmu)
+ if (!enable_pmu)
return NULL;
switch (msr) {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 46bcc706f257..2c99b18d76c0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -192,10 +192,6 @@ module_param(vgif, int, 0444);
static int lbrv = true;
module_param(lbrv, int, 0444);
-/* enable/disable PMU virtualization */
-bool pmu = true;
-module_param(pmu, bool, 0444);
-
static int tsc_scaling = true;
module_param(tsc_scaling, int, 0444);
@@ -873,47 +869,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
}
}
-/*
- * The default MMIO mask is a single bit (excluding the present bit),
- * which could conflict with the memory encryption bit. Check for
- * memory encryption support and override the default MMIO mask if
- * memory encryption is enabled.
- */
-static __init void svm_adjust_mmio_mask(void)
-{
- unsigned int enc_bit, mask_bit;
- u64 msr, mask;
-
- /* If there is no memory encryption support, use existing mask */
- if (cpuid_eax(0x80000000) < 0x8000001f)
- return;
-
- /* If memory encryption is not enabled, use existing mask */
- rdmsrl(MSR_AMD64_SYSCFG, msr);
- if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
- return;
-
- enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
- mask_bit = boot_cpu_data.x86_phys_bits;
-
- /* Increment the mask bit if it is the same as the encryption bit */
- if (enc_bit == mask_bit)
- mask_bit++;
-
- /*
- * If the mask bit location is below 52, then some bits above the
- * physical addressing limit will always be reserved, so use the
- * rsvd_bits() function to generate the mask. This mask, along with
- * the present bit, will be used to generate a page fault with
- * PFER.RSV = 1.
- *
- * If the mask bit location is 52 (or above), then clear the mask.
- */
- mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
-
- kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
-}
-
static void svm_hardware_teardown(void)
{
int cpu;
@@ -928,198 +883,6 @@ static void svm_hardware_teardown(void)
iopm_base = 0;
}
-static __init void svm_set_cpu_caps(void)
-{
- kvm_set_cpu_caps();
-
- supported_xss = 0;
-
- /* CPUID 0x80000001 and 0x8000000A (SVM features) */
- if (nested) {
- kvm_cpu_cap_set(X86_FEATURE_SVM);
-
- if (nrips)
- kvm_cpu_cap_set(X86_FEATURE_NRIPS);
-
- if (npt_enabled)
- kvm_cpu_cap_set(X86_FEATURE_NPT);
-
- if (tsc_scaling)
- kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);
-
- /* Nested VM can receive #VMEXIT instead of triggering #GP */
- kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
- }
-
- /* CPUID 0x80000008 */
- if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
- boot_cpu_has(X86_FEATURE_AMD_SSBD))
- kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
-
- /* AMD PMU PERFCTR_CORE CPUID */
- if (pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
- kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE);
-
- /* CPUID 0x8000001F (SME/SEV features) */
- sev_set_cpu_caps();
-}
-
-static __init int svm_hardware_setup(void)
-{
- int cpu;
- struct page *iopm_pages;
- void *iopm_va;
- int r;
- unsigned int order = get_order(IOPM_SIZE);
-
- /*
- * NX is required for shadow paging and for NPT if the NX huge pages
- * mitigation is enabled.
- */
- if (!boot_cpu_has(X86_FEATURE_NX)) {
- pr_err_ratelimited("NX (Execute Disable) not supported\n");
- return -EOPNOTSUPP;
- }
- kvm_enable_efer_bits(EFER_NX);
-
- iopm_pages = alloc_pages(GFP_KERNEL, order);
-
- if (!iopm_pages)
- return -ENOMEM;
-
- iopm_va = page_address(iopm_pages);
- memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
- iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
-
- init_msrpm_offsets();
-
- supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
-
- if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
- kvm_enable_efer_bits(EFER_FFXSR);
-
- if (tsc_scaling) {
- if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
- tsc_scaling = false;
- } else {
- pr_info("TSC scaling supported\n");
- kvm_has_tsc_control = true;
- kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
- kvm_tsc_scaling_ratio_frac_bits = 32;
- }
- }
-
- tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
-
- /* Check for pause filtering support */
- if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
- pause_filter_count = 0;
- pause_filter_thresh = 0;
- } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
- pause_filter_thresh = 0;
- }
-
- if (nested) {
- printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
- kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
- }
-
- /*
- * KVM's MMU doesn't support using 2-level paging for itself, and thus
- * NPT isn't supported if the host is using 2-level paging since host
- * CR4 is unchanged on VMRUN.
- */
- if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
- npt_enabled = false;
-
- if (!boot_cpu_has(X86_FEATURE_NPT))
- npt_enabled = false;
-
- /* Force VM NPT level equal to the host's paging level */
- kvm_configure_mmu(npt_enabled, get_npt_level(),
- get_npt_level(), PG_LEVEL_1G);
- pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
-
- /* Note, SEV setup consumes npt_enabled. */
- sev_hardware_setup();
-
- svm_hv_hardware_setup();
-
- svm_adjust_mmio_mask();
-
- for_each_possible_cpu(cpu) {
- r = svm_cpu_init(cpu);
- if (r)
- goto err;
- }
-
- if (nrips) {
- if (!boot_cpu_has(X86_FEATURE_NRIPS))
- nrips = false;
- }
-
- enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
-
- if (enable_apicv) {
- pr_info("AVIC enabled\n");
-
- amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
- }
-
- if (vls) {
- if (!npt_enabled ||
- !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
- !IS_ENABLED(CONFIG_X86_64)) {
- vls = false;
- } else {
- pr_info("Virtual VMLOAD VMSAVE supported\n");
- }
- }
-
- if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))
- svm_gp_erratum_intercept = false;
-
- if (vgif) {
- if (!boot_cpu_has(X86_FEATURE_VGIF))
- vgif = false;
- else
- pr_info("Virtual GIF supported\n");
- }
-
- if (lbrv) {
- if (!boot_cpu_has(X86_FEATURE_LBRV))
- lbrv = false;
- else
- pr_info("LBR virtualization supported\n");
- }
-
- if (!pmu)
- pr_info("PMU virtualization is disabled\n");
-
- svm_set_cpu_caps();
-
- /*
- * It seems that on AMD processors PTE's accessed bit is
- * being set by the CPU hardware before the NPF vmexit.
- * This is not expected behaviour and our tests fail because
- * of it.
- * A workaround here is to disable support for
- * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled.
- * In this case userspace can know if there is support using
- * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle
- * it
- * If future AMD CPU models change the behaviour described above,
- * this variable can be changed accordingly
- */
- allow_smaller_maxphyaddr = !npt_enabled;
-
- return 0;
-
-err:
- svm_hardware_teardown();
- return r;
-}
-
static void init_seg(struct vmcb_seg *seg)
{
seg->selector = 0;
@@ -1444,12 +1207,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
if (err)
goto error_free_vmsa_page;
- /* We initialize this flag to true to make sure that the is_running
- * bit would be set the first time the vcpu is loaded.
- */
- if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm))
- svm->avic_is_running = true;
-
svm->msrpm = svm_vcpu_alloc_msrpm();
if (!svm->msrpm) {
err = -ENOMEM;
@@ -3833,6 +3590,11 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
svm_complete_interrupts(vcpu);
}
+static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
+{
+ return 1;
+}
+
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
@@ -4629,8 +4391,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.prepare_guest_switch = svm_prepare_guest_switch,
.vcpu_load = svm_vcpu_load,
.vcpu_put = svm_vcpu_put,
- .vcpu_blocking = svm_vcpu_blocking,
- .vcpu_unblocking = svm_vcpu_unblocking,
+ .vcpu_blocking = avic_vcpu_blocking,
+ .vcpu_unblocking = avic_vcpu_unblocking,
.update_exception_bitmap = svm_update_exception_bitmap,
.get_msr_feature = svm_get_msr_feature,
@@ -4662,6 +4424,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.tlb_flush_gva = svm_flush_tlb_gva,
.tlb_flush_guest = svm_flush_tlb,
+ .vcpu_pre_run = svm_vcpu_pre_run,
.run = svm_vcpu_run,
.handle_exit = handle_exit,
.skip_emulated_instruction = skip_emulated_instruction,
@@ -4742,6 +4505,243 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
};
+/*
+ * The default MMIO mask is a single bit (excluding the present bit),
+ * which could conflict with the memory encryption bit. Check for
+ * memory encryption support and override the default MMIO mask if
+ * memory encryption is enabled.
+ */
+static __init void svm_adjust_mmio_mask(void)
+{
+ unsigned int enc_bit, mask_bit;
+ u64 msr, mask;
+
+ /* If there is no memory encryption support, use existing mask */
+ if (cpuid_eax(0x80000000) < 0x8000001f)
+ return;
+
+ /* If memory encryption is not enabled, use existing mask */
+ rdmsrl(MSR_AMD64_SYSCFG, msr);
+ if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
+ return;
+
+ enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
+ mask_bit = boot_cpu_data.x86_phys_bits;
+
+ /* Increment the mask bit if it is the same as the encryption bit */
+ if (enc_bit == mask_bit)
+ mask_bit++;
+
+ /*
+ * If the mask bit location is below 52, then some bits above the
+ * physical addressing limit will always be reserved, so use the
+ * rsvd_bits() function to generate the mask. This mask, along with
+ * the present bit, will be used to generate a page fault with
+ * PFER.RSV = 1.
+ *
+ * If the mask bit location is 52 (or above), then clear the mask.
+ */
+ mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
+
+ kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
+}
+
+static __init void svm_set_cpu_caps(void)
+{
+ kvm_set_cpu_caps();
+
+ supported_xss = 0;
+
+ /* CPUID 0x80000001 and 0x8000000A (SVM features) */
+ if (nested) {
+ kvm_cpu_cap_set(X86_FEATURE_SVM);
+
+ if (nrips)
+ kvm_cpu_cap_set(X86_FEATURE_NRIPS);
+
+ if (npt_enabled)
+ kvm_cpu_cap_set(X86_FEATURE_NPT);
+
+ if (tsc_scaling)
+ kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);
+
+ /* Nested VM can receive #VMEXIT instead of triggering #GP */
+ kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
+ }
+
+ /* CPUID 0x80000008 */
+ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
+ boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
+
+ /* AMD PMU PERFCTR_CORE CPUID */
+ if (enable_pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+ kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE);
+
+ /* CPUID 0x8000001F (SME/SEV features) */
+ sev_set_cpu_caps();
+}
+
+static __init int svm_hardware_setup(void)
+{
+ int cpu;
+ struct page *iopm_pages;
+ void *iopm_va;
+ int r;
+ unsigned int order = get_order(IOPM_SIZE);
+
+ /*
+ * NX is required for shadow paging and for NPT if the NX huge pages
+ * mitigation is enabled.
+ */
+ if (!boot_cpu_has(X86_FEATURE_NX)) {
+ pr_err_ratelimited("NX (Execute Disable) not supported\n");
+ return -EOPNOTSUPP;
+ }
+ kvm_enable_efer_bits(EFER_NX);
+
+ iopm_pages = alloc_pages(GFP_KERNEL, order);
+
+ if (!iopm_pages)
+ return -ENOMEM;
+
+ iopm_va = page_address(iopm_pages);
+ memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
+ iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
+
+ init_msrpm_offsets();
+
+ supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+
+ if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
+ kvm_enable_efer_bits(EFER_FFXSR);
+
+ if (tsc_scaling) {
+ if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+ tsc_scaling = false;
+ } else {
+ pr_info("TSC scaling supported\n");
+ kvm_has_tsc_control = true;
+ kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
+ kvm_tsc_scaling_ratio_frac_bits = 32;
+ }
+ }
+
+ tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
+
+ /* Check for pause filtering support */
+ if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
+ pause_filter_count = 0;
+ pause_filter_thresh = 0;
+ } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
+ pause_filter_thresh = 0;
+ }
+
+ if (nested) {
+ printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
+ kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
+ }
+
+ /*
+ * KVM's MMU doesn't support using 2-level paging for itself, and thus
+ * NPT isn't supported if the host is using 2-level paging since host
+ * CR4 is unchanged on VMRUN.
+ */
+ if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
+ npt_enabled = false;
+
+ if (!boot_cpu_has(X86_FEATURE_NPT))
+ npt_enabled = false;
+
+ /* Force VM NPT level equal to the host's paging level */
+ kvm_configure_mmu(npt_enabled, get_npt_level(),
+ get_npt_level(), PG_LEVEL_1G);
+ pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
+
+ /* Note, SEV setup consumes npt_enabled. */
+ sev_hardware_setup();
+
+ svm_hv_hardware_setup();
+
+ svm_adjust_mmio_mask();
+
+ for_each_possible_cpu(cpu) {
+ r = svm_cpu_init(cpu);
+ if (r)
+ goto err;
+ }
+
+ if (nrips) {
+ if (!boot_cpu_has(X86_FEATURE_NRIPS))
+ nrips = false;
+ }
+
+ enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
+
+ if (enable_apicv) {
+ pr_info("AVIC enabled\n");
+
+ amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
+ } else {
+ svm_x86_ops.vcpu_blocking = NULL;
+ svm_x86_ops.vcpu_unblocking = NULL;
+ }
+
+ if (vls) {
+ if (!npt_enabled ||
+ !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
+ !IS_ENABLED(CONFIG_X86_64)) {
+ vls = false;
+ } else {
+ pr_info("Virtual VMLOAD VMSAVE supported\n");
+ }
+ }
+
+ if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))
+ svm_gp_erratum_intercept = false;
+
+ if (vgif) {
+ if (!boot_cpu_has(X86_FEATURE_VGIF))
+ vgif = false;
+ else
+ pr_info("Virtual GIF supported\n");
+ }
+
+ if (lbrv) {
+ if (!boot_cpu_has(X86_FEATURE_LBRV))
+ lbrv = false;
+ else
+ pr_info("LBR virtualization supported\n");
+ }
+
+ if (!enable_pmu)
+ pr_info("PMU virtualization is disabled\n");
+
+ svm_set_cpu_caps();
+
+ /*
+ * It seems that on AMD processors PTE's accessed bit is
+ * being set by the CPU hardware before the NPF vmexit.
+ * This is not expected behaviour and our tests fail because
+ * of it.
+ * A workaround here is to disable support for
+ * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled.
+ * In this case userspace can know if there is support using
+ * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle
+ * it
+ * If future AMD CPU models change the behaviour described above,
+ * this variable can be changed accordingly
+ */
+ allow_smaller_maxphyaddr = !npt_enabled;
+
+ return 0;
+
+err:
+ svm_hardware_teardown();
+ return r;
+}
+
+
static struct kvm_x86_init_ops svm_init_ops __initdata = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 9f153c59f2c8..47ef8f4a9358 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -32,7 +32,6 @@
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
extern bool intercept_smi;
-extern bool pmu;
/*
* Clean bits in VMCB.
@@ -226,7 +225,6 @@ struct vcpu_svm {
u32 dfr_reg;
struct page *avic_backing_page;
u64 *avic_physical_id_cache;
- bool avic_is_running;
/*
* Per-vcpu list of struct amd_svm_iommu_ir:
@@ -574,17 +572,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
-static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- u64 *entry = svm->avic_physical_id_cache;
-
- if (!entry)
- return false;
-
- return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
-}
-
int avic_ga_log_notifier(u32 ga_tag);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
@@ -605,8 +592,8 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec);
bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu);
int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
-void svm_vcpu_blocking(struct kvm_vcpu *vcpu);
-void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
+void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
+void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
/* sev.c */