summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/svm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r--arch/x86/kvm/svm/nested.c23
-rw-r--r--arch/x86/kvm/svm/sev.c32
-rw-r--r--arch/x86/kvm/svm/svm.c105
-rw-r--r--arch/x86/kvm/svm/svm.h39
4 files changed, 70 insertions, 129 deletions
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 540d43ba2cf4..5e8d8443154e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -764,7 +764,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
svm_switch_vmcb(svm, &svm->vmcb01);
- WARN_ON_ONCE(svm->vmcb->control.exit_code != SVM_EXIT_VMRUN);
/*
* On vmexit the GIF is set to false and
@@ -872,6 +871,15 @@ void svm_free_nested(struct vcpu_svm *svm)
__free_page(virt_to_page(svm->nested.vmcb02.ptr));
svm->nested.vmcb02.ptr = NULL;
+ /*
+ * When last_vmcb12_gpa matches the current vmcb12 gpa,
+ * some vmcb12 fields are not loaded if they are marked clean
+ * in the vmcb12, since in this case they are up to date already.
+ *
+ * When the vmcb02 is freed, this optimization becomes invalid.
+ */
+ svm->nested.last_vmcb12_gpa = INVALID_GPA;
+
svm->nested.initialized = false;
}
@@ -884,9 +892,11 @@ void svm_leave_nested(struct vcpu_svm *svm)
if (is_guest_mode(vcpu)) {
svm->nested.nested_run_pending = 0;
+ svm->nested.vmcb12_gpa = INVALID_GPA;
+
leave_guest_mode(vcpu);
- svm_switch_vmcb(svm, &svm->nested.vmcb02);
+ svm_switch_vmcb(svm, &svm->vmcb01);
nested_svm_uninit_mmu_context(vcpu);
vmcb_mark_all_dirty(svm->vmcb);
@@ -1298,12 +1308,17 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
* L2 registers if needed are moved from the current VMCB to VMCB02.
*/
+ if (is_guest_mode(vcpu))
+ svm_leave_nested(svm);
+ else
+ svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
+
+ svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+
svm->nested.nested_run_pending =
!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
- if (svm->current_vmcb == &svm->vmcb01)
- svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
svm->vmcb01.ptr->save.es = save->es;
svm->vmcb01.ptr->save.cs = save->cs;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 1356ee095cd5..5bc887e9a986 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -763,7 +763,7 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
}
static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
- unsigned long __user dst_uaddr,
+ void __user *dst_uaddr,
unsigned long dst_paddr,
int size, int *err)
{
@@ -787,8 +787,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
if (tpage) {
offset = paddr & 15;
- if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
- page_address(tpage) + offset, size))
+ if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size))
ret = -EFAULT;
}
@@ -800,9 +799,9 @@ e_free:
}
static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
- unsigned long __user vaddr,
+ void __user *vaddr,
unsigned long dst_paddr,
- unsigned long __user dst_vaddr,
+ void __user *dst_vaddr,
int size, int *error)
{
struct page *src_tpage = NULL;
@@ -810,13 +809,12 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
int ret, len = size;
/* If source buffer is not aligned then use an intermediate buffer */
- if (!IS_ALIGNED(vaddr, 16)) {
+ if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
src_tpage = alloc_page(GFP_KERNEL);
if (!src_tpage)
return -ENOMEM;
- if (copy_from_user(page_address(src_tpage),
- (void __user *)(uintptr_t)vaddr, size)) {
+ if (copy_from_user(page_address(src_tpage), vaddr, size)) {
__free_page(src_tpage);
return -EFAULT;
}
@@ -830,7 +828,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
* - copy the source buffer in an intermediate buffer
* - use the intermediate buffer as source buffer
*/
- if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
+ if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
int dst_offset;
dst_tpage = alloc_page(GFP_KERNEL);
@@ -855,7 +853,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
page_address(src_tpage), size);
else {
if (copy_from_user(page_address(dst_tpage) + dst_offset,
- (void __user *)(uintptr_t)vaddr, size)) {
+ vaddr, size)) {
ret = -EFAULT;
goto e_free;
}
@@ -935,15 +933,15 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
if (dec)
ret = __sev_dbg_decrypt_user(kvm,
__sme_page_pa(src_p[0]) + s_off,
- dst_vaddr,
+ (void __user *)dst_vaddr,
__sme_page_pa(dst_p[0]) + d_off,
len, &argp->error);
else
ret = __sev_dbg_encrypt_user(kvm,
__sme_page_pa(src_p[0]) + s_off,
- vaddr,
+ (void __user *)vaddr,
__sme_page_pa(dst_p[0]) + d_off,
- dst_vaddr,
+ (void __user *)dst_vaddr,
len, &argp->error);
sev_unpin_memory(kvm, src_p, n);
@@ -1764,7 +1762,8 @@ e_mirror_unlock:
e_source_unlock:
mutex_unlock(&source_kvm->lock);
e_source_put:
- fput(source_kvm_file);
+ if (source_kvm_file)
+ fput(source_kvm_file);
return ret;
}
@@ -2198,7 +2197,7 @@ vmgexit_err:
return -EINVAL;
}
-static void pre_sev_es_run(struct vcpu_svm *svm)
+void sev_es_unmap_ghcb(struct vcpu_svm *svm)
{
if (!svm->ghcb)
return;
@@ -2234,9 +2233,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
int asid = sev_get_asid(svm->vcpu.kvm);
- /* Perform any SEV-ES pre-run actions */
- pre_sev_es_run(svm);
-
/* Assign the asid allocated with this SEV guest */
svm->asid = asid;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9790c73f2a32..05eca131eaf2 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -212,7 +212,7 @@ DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
* RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to
* defer the restoration of TSC_AUX until the CPU returns to userspace.
*/
-#define TSC_AUX_URET_SLOT 0
+static int tsc_aux_uret_slot __read_mostly = -1;
static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
@@ -447,6 +447,11 @@ static int has_svm(void)
return 0;
}
+ if (pgtable_l5_enabled()) {
+ pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
+ return 0;
+ }
+
return 1;
}
@@ -858,8 +863,8 @@ static __init void svm_adjust_mmio_mask(void)
return;
/* If memory encryption is not enabled, use existing mask */
- rdmsrl(MSR_K8_SYSCFG, msr);
- if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ rdmsrl(MSR_AMD64_SYSCFG, msr);
+ if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
return;
enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
@@ -959,8 +964,7 @@ static __init int svm_hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits = 32;
}
- if (boot_cpu_has(X86_FEATURE_RDTSCP))
- kvm_define_user_return_msr(TSC_AUX_URET_SLOT, MSR_TSC_AUX);
+ tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
/* Check for pause filtering support */
if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
@@ -1100,7 +1104,9 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
return svm->vmcb->control.tsc_offset;
}
-static void svm_check_invpcid(struct vcpu_svm *svm)
+/* Evaluate instruction intercepts that depend on guest CPUID features. */
+static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
+ struct vcpu_svm *svm)
{
/*
* Intercept INVPCID if shadow paging is enabled to sync/free shadow
@@ -1113,6 +1119,13 @@ static void svm_check_invpcid(struct vcpu_svm *svm)
else
svm_clr_intercept(svm, INTERCEPT_INVPCID);
}
+
+ if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) {
+ if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+ svm_clr_intercept(svm, INTERCEPT_RDTSCP);
+ else
+ svm_set_intercept(svm, INTERCEPT_RDTSCP);
+ }
}
static void init_vmcb(struct kvm_vcpu *vcpu)
@@ -1235,8 +1248,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm->current_vmcb->asid_generation = 0;
svm->asid = 0;
- svm->nested.vmcb12_gpa = 0;
- svm->nested.last_vmcb12_gpa = 0;
+ svm->nested.vmcb12_gpa = INVALID_GPA;
+ svm->nested.last_vmcb12_gpa = INVALID_GPA;
vcpu->arch.hflags = 0;
if (!kvm_pause_in_guest(vcpu->kvm)) {
@@ -1248,7 +1261,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_clr_intercept(svm, INTERCEPT_PAUSE);
}
- svm_check_invpcid(svm);
+ svm_recalc_instruction_intercepts(vcpu, svm);
/*
* If the host supports V_SPEC_CTRL then disable the interception
@@ -1424,6 +1437,9 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+ if (sev_es_guest(vcpu->kvm))
+ sev_es_unmap_ghcb(svm);
+
if (svm->guest_state_loaded)
return;
@@ -1445,8 +1461,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
}
}
- if (static_cpu_has(X86_FEATURE_RDTSCP))
- kvm_set_user_return_msr(TSC_AUX_URET_SLOT, svm->tsc_aux, -1ull);
+ if (likely(tsc_aux_uret_slot >= 0))
+ kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
svm->guest_state_loaded = true;
}
@@ -2655,11 +2671,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
break;
case MSR_TSC_AUX:
- if (!boot_cpu_has(X86_FEATURE_RDTSCP))
- return 1;
- if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
- return 1;
msr_info->data = svm->tsc_aux;
break;
/*
@@ -2876,30 +2887,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
break;
case MSR_TSC_AUX:
- if (!boot_cpu_has(X86_FEATURE_RDTSCP))
- return 1;
-
- if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
- return 1;
-
- /*
- * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
- * incomplete and conflicting architectural behavior. Current
- * AMD CPUs completely ignore bits 63:32, i.e. they aren't
- * reserved and always read as zeros. Emulate AMD CPU behavior
- * to avoid explosions if the vCPU is migrated from an AMD host
- * to an Intel host.
- */
- data = (u32)data;
-
/*
* TSC_AUX is usually changed only during boot and never read
* directly. Intercept TSC_AUX instead of exposing it to the
* guest via direct_access_msrs, and switch it via user return.
*/
preempt_disable();
- r = kvm_set_user_return_msr(TSC_AUX_URET_SLOT, data, -1ull);
+ r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
preempt_enable();
if (r)
return 1;
@@ -3084,6 +3078,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_STGI] = stgi_interception,
[SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT] = skinit_interception,
+ [SVM_EXIT_RDTSCP] = kvm_handle_invalid_op,
[SVM_EXIT_WBINVD] = kvm_emulate_wbinvd,
[SVM_EXIT_MONITOR] = kvm_emulate_monitor,
[SVM_EXIT_MWAIT] = kvm_emulate_mwait,
@@ -3710,25 +3705,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
unsigned long vmcb_pa = svm->current_vmcb->pa;
- /*
- * VMENTER enables interrupts (host state), but the kernel state is
- * interrupts disabled when this is invoked. Also tell RCU about
- * it. This is the same logic as for exit_to_user_mode().
- *
- * This ensures that e.g. latency analysis on the host observes
- * guest mode as interrupt enabled.
- *
- * guest_enter_irqoff() informs context tracking about the
- * transition to guest mode and if enabled adjusts RCU state
- * accordingly.
- */
- instrumentation_begin();
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
- instrumentation_end();
-
- guest_enter_irqoff();
- lockdep_hardirqs_on(CALLER_ADDR0);
+ kvm_guest_enter_irqoff();
if (sev_es_guest(vcpu->kvm)) {
__svm_sev_es_vcpu_run(vmcb_pa);
@@ -3748,24 +3725,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
vmload(__sme_page_pa(sd->save_area));
}
- /*
- * VMEXIT disables interrupts (host state), but tracing and lockdep
- * have them in state 'on' as recorded before entering guest mode.
- * Same as enter_from_user_mode().
- *
- * guest_exit_irqoff() restores host context and reinstates RCU if
- * enabled and required.
- *
- * This needs to be done before the below as native_read_msr()
- * contains a tracepoint and x86_spec_ctrl_restore_host() calls
- * into world and some more.
- */
- lockdep_hardirqs_off(CALLER_ADDR0);
- guest_exit_irqoff();
-
- instrumentation_begin();
- trace_hardirqs_off_finish();
- instrumentation_end();
+ kvm_guest_exit_irqoff();
}
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
@@ -4007,8 +3967,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
- /* Check again if INVPCID interception if required */
- svm_check_invpcid(svm);
+ svm_recalc_instruction_intercepts(vcpu, svm);
/* For sev guests, the memory encryption bit is not reserved in CR3. */
if (sev_guest(vcpu->kvm)) {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 84b3133c2251..2c9ece618b29 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -20,6 +20,7 @@
#include <linux/bits.h>
#include <asm/svm.h>
+#include <asm/sev-common.h>
#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
@@ -525,40 +526,9 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
/* sev.c */
-#define GHCB_VERSION_MAX 1ULL
-#define GHCB_VERSION_MIN 1ULL
-
-#define GHCB_MSR_INFO_POS 0
-#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1)
-
-#define GHCB_MSR_SEV_INFO_RESP 0x001
-#define GHCB_MSR_SEV_INFO_REQ 0x002
-#define GHCB_MSR_VER_MAX_POS 48
-#define GHCB_MSR_VER_MAX_MASK 0xffff
-#define GHCB_MSR_VER_MIN_POS 32
-#define GHCB_MSR_VER_MIN_MASK 0xffff
-#define GHCB_MSR_CBIT_POS 24
-#define GHCB_MSR_CBIT_MASK 0xff
-#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \
- ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \
- (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \
- (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \
- GHCB_MSR_SEV_INFO_RESP)
-
-#define GHCB_MSR_CPUID_REQ 0x004
-#define GHCB_MSR_CPUID_RESP 0x005
-#define GHCB_MSR_CPUID_FUNC_POS 32
-#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff
-#define GHCB_MSR_CPUID_VALUE_POS 32
-#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff
-#define GHCB_MSR_CPUID_REG_POS 30
-#define GHCB_MSR_CPUID_REG_MASK 0x3
-
-#define GHCB_MSR_TERM_REQ 0x100
-#define GHCB_MSR_TERM_REASON_SET_POS 12
-#define GHCB_MSR_TERM_REASON_SET_MASK 0xf
-#define GHCB_MSR_TERM_REASON_POS 16
-#define GHCB_MSR_TERM_REASON_MASK 0xff
+#define GHCB_VERSION_MAX 1ULL
+#define GHCB_VERSION_MIN 1ULL
+
extern unsigned int max_sev_asid;
@@ -581,6 +551,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm);
void sev_es_create_vcpu(struct vcpu_svm *svm);
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
+void sev_es_unmap_ghcb(struct vcpu_svm *svm);
/* vmenter.S */