summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm/svm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-15 11:12:21 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-15 11:12:21 -0800
commit8fa590bf344816c925810331eea8387627bbeb40 (patch)
tree86f3fe04b175e172ef2cd9089ba1b8a0f71434f1 /arch/x86/kvm/svm/svm.c
parent057b40f43ce429a02e793adf3cfbf2446a19a38e (diff)
parent549a715b98a13c6d05452be3ad37e980087bb081 (diff)
downloadlinux-8fa590bf344816c925810331eea8387627bbeb40.tar.bz2
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "ARM64: - Enable the per-vcpu dirty-ring tracking mechanism, together with an option to keep the good old dirty log around for pages that are dirtied by something other than a vcpu. - Switch to the relaxed parallel fault handling, using RCU to delay page table reclaim and giving better performance under load. - Relax the MTE ABI, allowing a VMM to use the MAP_SHARED mapping option, which multi-process VMMs such as crosvm rely on (see merge commit 382b5b87a97d: "Fix a number of issues with MTE, such as races on the tags being initialised vs the PG_mte_tagged flag as well as the lack of support for VM_SHARED when KVM is involved. Patches from Catalin Marinas and Peter Collingbourne"). - Merge the pKVM shadow vcpu state tracking that allows the hypervisor to have its own view of a vcpu, keeping that state private. - Add support for the PMUv3p5 architecture revision, bringing support for 64bit counters on systems that support it, and fix the no-quite-compliant CHAIN-ed counter support for the machines that actually exist out there. - Fix a handful of minor issues around 52bit VA/PA support (64kB pages only) as a prefix of the oncoming support for 4kB and 16kB pages. - Pick a small set of documentation and spelling fixes, because no good merge window would be complete without those. s390: - Second batch of the lazy destroy patches - First batch of KVM changes for kernel virtual != physical address support - Removal of a unused function x86: - Allow compiling out SMM support - Cleanup and documentation of SMM state save area format - Preserve interrupt shadow in SMM state save area - Respond to generic signals during slow page faults - Fixes and optimizations for the non-executable huge page errata fix. - Reprogram all performance counters on PMU filter change - Cleanups to Hyper-V emulation and tests - Process Hyper-V TLB flushes from a nested guest (i.e. from a L2 guest running on top of a L1 Hyper-V hypervisor) - Advertise several new Intel features - x86 Xen-for-KVM: - Allow the Xen runstate information to cross a page boundary - Allow XEN_RUNSTATE_UPDATE flag behaviour to be configured - Add support for 32-bit guests in SCHEDOP_poll - Notable x86 fixes and cleanups: - One-off fixes for various emulation flows (SGX, VMXON, NRIPS=0). - Reinstate IBPB on emulated VM-Exit that was incorrectly dropped a few years back when eliminating unnecessary barriers when switching between vmcs01 and vmcs02. - Clean up vmread_error_trampoline() to make it more obvious that params must be passed on the stack, even for x86-64. - Let userspace set all supported bits in MSR_IA32_FEAT_CTL irrespective of the current guest CPUID. - Fudge around a race with TSC refinement that results in KVM incorrectly thinking a guest needs TSC scaling when running on a CPU with a constant TSC, but no hardware-enumerated TSC frequency. - Advertise (on AMD) that the SMM_CTL MSR is not supported - Remove unnecessary exports Generic: - Support for responding to signals during page faults; introduces new FOLL_INTERRUPTIBLE flag that was reviewed by mm folks Selftests: - Fix an inverted check in the access tracking perf test, and restore support for asserting that there aren't too many idle pages when running on bare metal. - Fix build errors that occur in certain setups (unsure exactly what is unique about the problematic setup) due to glibc overriding static_assert() to a variant that requires a custom message. - Introduce actual atomics for clear/set_bit() in selftests - Add support for pinning vCPUs in dirty_log_perf_test. - Rename the so called "perf_util" framework to "memstress". - Add a lightweight psuedo RNG for guest use, and use it to randomize the access pattern and write vs. read percentage in the memstress tests. - Add a common ucall implementation; code dedup and pre-work for running SEV (and beyond) guests in selftests. - Provide a common constructor and arch hook, which will eventually be used by x86 to automatically select the right hypercall (AMD vs. Intel). - A bunch of added/enabled/fixed selftests for ARM64, covering memslots, breakpoints, stage-2 faults and access tracking. - x86-specific selftest changes: - Clean up x86's page table management. - Clean up and enhance the "smaller maxphyaddr" test, and add a related test to cover generic emulation failure. - Clean up the nEPT support checks. - Add X86_PROPERTY_* framework to retrieve multi-bit CPUID values. - Fix an ordering issue in the AMX test introduced by recent conversions to use kvm_cpu_has(), and harden the code to guard against similar bugs in the future. Anything that tiggers caching of KVM's supported CPUID, kvm_cpu_has() in this case, effectively hides opt-in XSAVE features if the caching occurs before the test opts in via prctl(). Documentation: - Remove deleted ioctls from documentation - Clean up the docs for the x86 MSR filter. - Various fixes" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (361 commits) KVM: x86: Add proper ReST tables for userspace MSR exits/flags KVM: selftests: Allocate ucall pool from MEM_REGION_DATA KVM: arm64: selftests: Align VA space allocator with TTBR0 KVM: arm64: Fix benign bug with incorrect use of VA_BITS KVM: arm64: PMU: Fix period computation for 64bit counters with 32bit overflow KVM: x86: Advertise that the SMM_CTL MSR is not supported KVM: x86: remove unnecessary exports KVM: selftests: Fix spelling mistake "probabalistic" -> "probabilistic" tools: KVM: selftests: Convert clear/set_bit() to actual atomics tools: Drop "atomic_" prefix from atomic test_and_set_bit() tools: Drop conflicting non-atomic test_and_{clear,set}_bit() helpers KVM: selftests: Use non-atomic clear/set bit helpers in KVM tests perf tools: Use dedicated non-atomic clear/set bit helpers tools: Take @bit as an "unsigned long" in {clear,set}_bit() helpers KVM: arm64: selftests: Enable single-step without a "full" ucall() KVM: x86: fix APICv/x2AVIC disabled when vm reboot by itself KVM: Remove stale comment about KVM_REQ_UNHALT KVM: Add missing arch for KVM_CREATE_DEVICE and KVM_{SET,GET}_DEVICE_ATTR KVM: Reference to kvm_userspace_memory_region in doc and comments KVM: Delete all references to removed KVM_SET_MEMORY_ALIAS ioctl ...
Diffstat (limited to 'arch/x86/kvm/svm/svm.c')
-rw-r--r--arch/x86/kvm/svm/svm.c63
1 files changed, 43 insertions, 20 deletions
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index ce362e88a567..9a194aa1a75a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -6,6 +6,7 @@
#include "mmu.h"
#include "kvm_cache_regs.h"
#include "x86.h"
+#include "smm.h"
#include "cpuid.h"
#include "pmu.h"
@@ -2708,8 +2709,6 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE;
break;
- case MSR_IA32_PERF_CAPABILITIES:
- return 0;
default:
return KVM_MSR_RET_INVALID;
}
@@ -3724,6 +3723,13 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
/*
+ * Unlike VMX, SVM doesn't provide a way to flush only NPT TLB entries.
+ * A TLB flush for the current ASID flushes both "host" and "guest" TLB
+ * entries, and thus is a superset of Hyper-V's fine grained flushing.
+ */
+ kvm_hv_vcpu_purge_flush_tlb(vcpu);
+
+ /*
* Flush only the current ASID even if the TLB flush was invoked via
* kvm_flush_remote_tlbs(). Although flushing remote TLBs requires all
* ASIDs to be flushed, KVM uses a single ASID for L1 and L2, and
@@ -3889,8 +3895,14 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
- if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
- to_svm(vcpu)->vmcb->control.exit_info_1)
+ struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
+
+ /*
+ * Note, the next RIP must be provided as SRCU isn't held, i.e. KVM
+ * can't read guest memory (dereference memslots) to decode the WRMSR.
+ */
+ if (control->exit_code == SVM_EXIT_MSR && control->exit_info_1 &&
+ nrips && control->next_rip)
return handle_fastpath_set_msr_irqoff(vcpu);
return EXIT_FASTPATH_NONE;
@@ -4102,6 +4114,8 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
return false;
case MSR_IA32_SMBASE:
+ if (!IS_ENABLED(CONFIG_KVM_SMM))
+ return false;
/* SEV-ES guests do not support SMM, so report false */
if (kvm && sev_es_guest(kvm))
return false;
@@ -4358,6 +4372,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
vcpu->arch.mcg_cap &= 0x1ff;
}
+#ifdef CONFIG_KVM_SMM
bool svm_smi_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4385,7 +4400,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return 1;
}
-static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map_save;
@@ -4394,10 +4409,16 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
if (!is_guest_mode(vcpu))
return 0;
- /* FED8h - SVM Guest */
- put_smstate(u64, smstate, 0x7ed8, 1);
- /* FEE0h - SVM Guest VMCB Physical Address */
- put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
+ /*
+ * 32-bit SMRAM format doesn't preserve EFER and SVM state. Userspace is
+ * responsible for ensuring nested SVM and SMIs are mutually exclusive.
+ */
+
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ return 1;
+
+ smram->smram64.svm_guest_flag = 1;
+ smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa;
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
@@ -4419,8 +4440,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
* that, see svm_prepare_switch_to_guest()) which must be
* preserved.
*/
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
- &map_save) == -EINVAL)
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save))
return 1;
BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
@@ -4432,34 +4452,33 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map, map_save;
- u64 saved_efer, vmcb12_gpa;
struct vmcb *vmcb12;
int ret;
+ const struct kvm_smram_state_64 *smram64 = &smram->smram64;
+
if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
return 0;
/* Non-zero if SMI arrived while vCPU was in guest mode. */
- if (!GET_SMSTATE(u64, smstate, 0x7ed8))
+ if (!smram64->svm_guest_flag)
return 0;
if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
return 1;
- saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
- if (!(saved_efer & EFER_SVME))
+ if (!(smram64->efer & EFER_SVME))
return 1;
- vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(smram64->svm_guest_vmcb_gpa), &map))
return 1;
ret = 1;
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL)
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save))
goto unmap_map;
if (svm_allocate_nested(svm))
@@ -4481,7 +4500,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
vmcb12 = map.hva;
nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
- ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
+ ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, vmcb12, false);
if (ret)
goto unmap_save;
@@ -4507,6 +4526,7 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
/* We must be in SMM; RSM will cause a vmexit anyway. */
}
}
+#endif
static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len)
@@ -4782,10 +4802,12 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.pi_update_irte = avic_pi_update_irte,
.setup_mce = svm_setup_mce,
+#ifdef CONFIG_KVM_SMM
.smi_allowed = svm_smi_allowed,
.enter_smm = svm_enter_smm,
.leave_smm = svm_leave_smm,
.enable_smi_window = svm_enable_smi_window,
+#endif
.mem_enc_ioctl = sev_mem_enc_ioctl,
.mem_enc_register_region = sev_mem_enc_register_region,
@@ -4851,6 +4873,7 @@ static __init void svm_set_cpu_caps(void)
{
kvm_set_cpu_caps();
+ kvm_caps.supported_perf_cap = 0;
kvm_caps.supported_xss = 0;
/* CPUID 0x80000001 and 0x8000000A (SVM features) */