summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-11-27 09:08:40 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-11-27 09:08:40 -0800
commitbf82d38c91f857083f2d1b9770fa3df55db2ca3b (patch)
tree2026340a86d4d0198b749952bedd1d0ab549eb30
parent30a853c1bdede177adedd2de537ea16158125181 (diff)
parentfe08e36be9ecbf6b38714a77c97b1d25b7a6e4b0 (diff)
downloadlinux-bf82d38c91f857083f2d1b9770fa3df55db2ca3b.tar.bz2
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "x86: - Fixes for Xen emulation. While nobody should be enabling it in the kernel (the only public users of the feature are the selftests), the bug effectively allows userspace to read arbitrary memory. - Correctness fixes for nested hypervisors that do not intercept INIT or SHUTDOWN on AMD; the subsequent CPU reset can cause a use-after-free when it disables virtualization extensions. While downgrading the panic to a WARN is quite easy, the full fix is a bit more laborious; there are also tests. This is the bulk of the pull request. - Fix race condition due to incorrect mmu_lock use around make_mmu_pages_available(). Generic: - Obey changes to the kvm.halt_poll_ns module parameter in VMs not using KVM_CAP_HALT_POLL, restoring behavior from before the introduction of the capability" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: Update gfn_to_pfn_cache khva when it moves within the same page KVM: x86/xen: Only do in-kernel acceleration of hypercalls for guest CPL0 KVM: x86/xen: Validate port number in SCHEDOP_poll KVM: x86/mmu: Fix race condition in direct_page_fault KVM: x86: remove exit_int_info warning in svm_handle_exit KVM: selftests: add svm part to triple_fault_test KVM: x86: allow L1 to not intercept triple fault kvm: selftests: add svm nested shutdown test KVM: selftests: move idt_entry to header KVM: x86: forcibly leave nested mode on vCPU reset KVM: x86: add kvm_leave_nested KVM: x86: nSVM: harden svm_free_nested against freeing vmcb02 while still in use KVM: x86: nSVM: leave nested mode on vCPU free KVM: Obey kvm.halt_poll_ns in VMs not using KVM_CAP_HALT_POLL KVM: Avoid re-reading kvm->max_halt_poll_ns during halt-polling KVM: Cap vcpu->halt_poll_ns before halting rather than after
-rw-r--r--arch/x86/kvm/mmu/mmu.c13
-rw-r--r--arch/x86/kvm/svm/nested.c12
-rw-r--r--arch/x86/kvm/svm/svm.c16
-rw-r--r--arch/x86/kvm/vmx/nested.c4
-rw-r--r--arch/x86/kvm/x86.c29
-rw-r--r--arch/x86/kvm/xen.c32
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--tools/testing/selftests/kvm/.gitignore1
-rw-r--r--tools/testing/selftests/kvm/Makefile1
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h13
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c13
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c67
-rw-r--r--tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c73
-rw-r--r--virt/kvm/kvm_main.c52
-rw-r--r--virt/kvm/pfncache.c7
15 files changed, 251 insertions, 83 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 1ccb769f62af..b6f96d47e596 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2443,6 +2443,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
{
bool list_unstable, zapped_root = false;
+ lockdep_assert_held_write(&kvm->mmu_lock);
trace_kvm_mmu_prepare_zap_page(sp);
++kvm->stat.mmu_shadow_zapped;
*nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
@@ -4262,14 +4263,14 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
if (is_page_fault_stale(vcpu, fault, mmu_seq))
goto out_unlock;
- r = make_mmu_pages_available(vcpu);
- if (r)
- goto out_unlock;
-
- if (is_tdp_mmu_fault)
+ if (is_tdp_mmu_fault) {
r = kvm_tdp_mmu_map(vcpu, fault);
- else
+ } else {
+ r = make_mmu_pages_available(vcpu);
+ if (r)
+ goto out_unlock;
r = __direct_map(vcpu, fault);
+ }
out_unlock:
if (is_tdp_mmu_fault)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 4c620999d230..995bc0f90759 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1091,6 +1091,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
static void nested_svm_triple_fault(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SHUTDOWN))
+ return;
+
+ kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN);
}
@@ -1125,6 +1131,9 @@ void svm_free_nested(struct vcpu_svm *svm)
if (!svm->nested.initialized)
return;
+ if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr))
+ svm_switch_vmcb(svm, &svm->vmcb01);
+
svm_vcpu_free_msrpm(svm->nested.msrpm);
svm->nested.msrpm = NULL;
@@ -1143,9 +1152,6 @@ void svm_free_nested(struct vcpu_svm *svm)
svm->nested.initialized = false;
}
-/*
- * Forcibly leave nested mode in order to be able to reset the VCPU later on.
- */
void svm_leave_nested(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4b6d2b050e57..ce362e88a567 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -346,12 +346,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
return 0;
}
-static int is_external_interrupt(u32 info)
-{
- info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
- return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
-}
-
static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1438,6 +1432,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
*/
svm_clear_current_vmcb(svm->vmcb);
+ svm_leave_nested(vcpu);
svm_free_nested(svm);
sev_free_vcpu(vcpu);
@@ -3425,15 +3420,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return 0;
}
- if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
- exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
- exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
- exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
- printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
- "exit_code 0x%x\n",
- __func__, svm->vmcb->control.exit_int_info,
- exit_code);
-
if (exit_fastpath != EXIT_FASTPATH_NONE)
return 1;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0c62352dda6a..5b0d4859e4b7 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4854,6 +4854,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu)
{
+ kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
}
@@ -6440,9 +6441,6 @@ out:
return kvm_state.size;
}
-/*
- * Forcibly leave nested mode in order to be able to reset the VCPU later on.
- */
void vmx_leave_nested(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu)) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 490ec23c8450..2835bd796639 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -628,6 +628,12 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto
ex->payload = payload;
}
+/* Forcibly leave the nested mode in cases like a vCPU reset */
+static void kvm_leave_nested(struct kvm_vcpu *vcpu)
+{
+ kvm_x86_ops.nested_ops->leave_nested(vcpu);
+}
+
static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
unsigned nr, bool has_error, u32 error_code,
bool has_payload, unsigned long payload, bool reinject)
@@ -5195,7 +5201,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
- kvm_x86_ops.nested_ops->leave_nested(vcpu);
+ kvm_leave_nested(vcpu);
kvm_smm_changed(vcpu, events->smi.smm);
}
@@ -9805,7 +9811,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
int kvm_check_nested_events(struct kvm_vcpu *vcpu)
{
- if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
+ if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
kvm_x86_ops.nested_ops->triple_fault(vcpu);
return 1;
}
@@ -10560,15 +10566,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
r = 0;
goto out;
}
- if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
- if (is_guest_mode(vcpu)) {
+ if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
+ if (is_guest_mode(vcpu))
kvm_x86_ops.nested_ops->triple_fault(vcpu);
- } else {
+
+ if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
vcpu->mmio_needed = 0;
r = 0;
- goto out;
}
+ goto out;
}
if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
/* Page is swapped out. Do synthetic halt */
@@ -11997,8 +12004,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
WARN_ON_ONCE(!init_event &&
(old_cr0 || kvm_read_cr3(vcpu) || kvm_read_cr4(vcpu)));
+ /*
+ * SVM doesn't unconditionally VM-Exit on INIT and SHUTDOWN, thus it's
+ * possible to INIT the vCPU while L2 is active. Force the vCPU back
+ * into L1 as EFER.SVME is cleared on INIT (along with all other EFER
+ * bits), i.e. virtualization is disabled.
+ */
+ if (is_guest_mode(vcpu))
+ kvm_leave_nested(vcpu);
+
kvm_lapic_reset(vcpu, init_event);
+ WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu));
vcpu->arch.hflags = 0;
vcpu->arch.smi_pending = 0;
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 2dae413bd62a..f3098c0e386a 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -954,6 +954,14 @@ static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result);
}
+static inline int max_evtchn_port(struct kvm *kvm)
+{
+ if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode)
+ return EVTCHN_2L_NR_CHANNELS;
+ else
+ return COMPAT_EVTCHN_2L_NR_CHANNELS;
+}
+
static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
evtchn_port_t *ports)
{
@@ -1042,6 +1050,10 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
*r = -EFAULT;
goto out;
}
+ if (ports[i] >= max_evtchn_port(vcpu->kvm)) {
+ *r = -EINVAL;
+ goto out;
+ }
}
if (sched_poll.nr_ports == 1)
@@ -1215,6 +1227,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
bool longmode;
u64 input, params[6], r = -ENOSYS;
bool handled = false;
+ u8 cpl;
input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX);
@@ -1242,9 +1255,17 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
params[5] = (u64)kvm_r9_read(vcpu);
}
#endif
+ cpl = static_call(kvm_x86_get_cpl)(vcpu);
trace_kvm_xen_hypercall(input, params[0], params[1], params[2],
params[3], params[4], params[5]);
+ /*
+ * Only allow hypercall acceleration for CPL0. The rare hypercalls that
+ * are permitted in guest userspace can be handled by the VMM.
+ */
+ if (unlikely(cpl > 0))
+ goto handle_in_userspace;
+
switch (input) {
case __HYPERVISOR_xen_version:
if (params[0] == XENVER_version && vcpu->kvm->arch.xen.xen_version) {
@@ -1279,10 +1300,11 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
if (handled)
return kvm_xen_hypercall_set_result(vcpu, r);
+handle_in_userspace:
vcpu->run->exit_reason = KVM_EXIT_XEN;
vcpu->run->xen.type = KVM_EXIT_XEN_HCALL;
vcpu->run->xen.u.hcall.longmode = longmode;
- vcpu->run->xen.u.hcall.cpl = static_call(kvm_x86_get_cpl)(vcpu);
+ vcpu->run->xen.u.hcall.cpl = cpl;
vcpu->run->xen.u.hcall.input = input;
vcpu->run->xen.u.hcall.params[0] = params[0];
vcpu->run->xen.u.hcall.params[1] = params[1];
@@ -1297,14 +1319,6 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
return 0;
}
-static inline int max_evtchn_port(struct kvm *kvm)
-{
- if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode)
- return EVTCHN_2L_NR_CHANNELS;
- else
- return COMPAT_EVTCHN_2L_NR_CHANNELS;
-}
-
static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port)
{
int poll_evtchn = vcpu->arch.xen.poll_evtchn;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 18592bdf4c1b..637a60607c7d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -776,6 +776,7 @@ struct kvm {
struct srcu_struct srcu;
struct srcu_struct irq_srcu;
pid_t userspace_pid;
+ bool override_halt_poll_ns;
unsigned int max_halt_poll_ns;
u32 dirty_ring_size;
bool vm_bugged;
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 2f0d705db9db..05d980fb083d 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -41,6 +41,7 @@
/x86_64/svm_vmcall_test
/x86_64/svm_int_ctl_test
/x86_64/svm_nested_soft_inject_test
+/x86_64/svm_nested_shutdown_test
/x86_64/sync_regs_test
/x86_64/tsc_msrs_test
/x86_64/tsc_scaling_sync
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 0172eb6cb6ee..4a2caef2c939 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -101,6 +101,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test
TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index e8ca0d8a6a7e..5da0c5e2a7af 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -748,6 +748,19 @@ struct ex_regs {
uint64_t rflags;
};
+struct idt_entry {
+ uint16_t offset0;
+ uint16_t selector;
+ uint16_t ist : 3;
+ uint16_t : 5;
+ uint16_t type : 4;
+ uint16_t : 1;
+ uint16_t dpl : 2;
+ uint16_t p : 1;
+ uint16_t offset1;
+ uint32_t offset2; uint32_t reserved;
+};
+
void vm_init_descriptor_tables(struct kvm_vm *vm);
void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
void vm_install_exception_handler(struct kvm_vm *vm, int vector,
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 39c4409ef56a..41c1c73c464d 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1074,19 +1074,6 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
}
}
-struct idt_entry {
- uint16_t offset0;
- uint16_t selector;
- uint16_t ist : 3;
- uint16_t : 5;
- uint16_t type : 4;
- uint16_t : 1;
- uint16_t dpl : 2;
- uint16_t p : 1;
- uint16_t offset1;
- uint32_t offset2; uint32_t reserved;
-};
-
static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
int dpl, unsigned short selector)
{
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
new file mode 100644
index 000000000000..e73fcdef47bb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_nested_shutdown_test
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ __asm__ __volatile__("ud2");
+}
+
+static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+ idt[6].p = 0; // #UD is intercepted but its injection will cause #NP
+ idt[11].p = 0; // #NP is not intercepted and will cause another
+ // #NP that will be converted to #DF
+ idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ /* should not reach here */
+ GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+
+ vcpu_args_set(vcpu, 2, svm_gva, vm->idt);
+ run = vcpu->run;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
+ "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
index 70b44f0b52fe..ead5d878a71c 100644
--- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
+++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
@@ -3,6 +3,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <string.h>
#include <sys/ioctl.h>
@@ -20,10 +21,11 @@ static void l2_guest_code(void)
: : [port] "d" (ARBITRARY_IO_PORT) : "rax");
}
-void l1_guest_code(struct vmx_pages *vmx)
-{
#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+void l1_guest_code_vmx(struct vmx_pages *vmx)
+{
GUEST_ASSERT(vmx->vmcs_gpa);
GUEST_ASSERT(prepare_for_vmx_operation(vmx));
@@ -38,24 +40,53 @@ void l1_guest_code(struct vmx_pages *vmx)
GUEST_DONE();
}
+void l1_guest_code_svm(struct svm_test_data *svm)
+{
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* don't intercept shutdown to test the case of SVM allowing to do so */
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ /* should not reach here, L1 should crash */
+ GUEST_ASSERT(0);
+}
+
int main(void)
{
struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct kvm_vcpu_events events;
- vm_vaddr_t vmx_pages_gva;
struct ucall uc;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX);
+ bool has_svm = kvm_cpu_has(X86_FEATURE_SVM);
+
+ TEST_REQUIRE(has_vmx || has_svm);
TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT));
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
+ if (has_vmx) {
+ vm_vaddr_t vmx_pages_gva;
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ } else {
+ vm_vaddr_t svm_gva;
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm);
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
+ }
+
+ vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
run = vcpu->run;
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
@@ -78,13 +109,21 @@ int main(void)
"No triple fault pending");
vcpu_run(vcpu);
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_DONE:
- break;
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- default:
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
- }
+ if (has_svm) {
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
+ "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ } else {
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+ return 0;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 25d7872b29c1..fab4d3790578 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1198,8 +1198,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
goto out_err_no_arch_destroy_vm;
}
- kvm->max_halt_poll_ns = halt_poll_ns;
-
r = kvm_arch_init_vm(kvm, type);
if (r)
goto out_err_no_arch_destroy_vm;
@@ -3377,9 +3375,6 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
if (val < grow_start)
val = grow_start;
- if (val > vcpu->kvm->max_halt_poll_ns)
- val = vcpu->kvm->max_halt_poll_ns;
-
vcpu->halt_poll_ns = val;
out:
trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
@@ -3483,6 +3478,24 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start,
}
}
+static unsigned int kvm_vcpu_max_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ if (kvm->override_halt_poll_ns) {
+ /*
+ * Ensure kvm->max_halt_poll_ns is not read before
+ * kvm->override_halt_poll_ns.
+ *
+ * Pairs with the smp_wmb() when enabling KVM_CAP_HALT_POLL.
+ */
+ smp_rmb();
+ return READ_ONCE(kvm->max_halt_poll_ns);
+ }
+
+ return READ_ONCE(halt_poll_ns);
+}
+
/*
* Emulate a vCPU halt condition, e.g. HLT on x86, WFI on arm, etc... If halt
* polling is enabled, busy wait for a short time before blocking to avoid the
@@ -3491,12 +3504,18 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start,
*/
void kvm_vcpu_halt(struct kvm_vcpu *vcpu)
{
+ unsigned int max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu);
bool halt_poll_allowed = !kvm_arch_no_poll(vcpu);
- bool do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns;
ktime_t start, cur, poll_end;
bool waited = false;
+ bool do_halt_poll;
u64 halt_ns;
+ if (vcpu->halt_poll_ns > max_halt_poll_ns)
+ vcpu->halt_poll_ns = max_halt_poll_ns;
+
+ do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns;
+
start = cur = poll_end = ktime_get();
if (do_halt_poll) {
ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns);
@@ -3535,18 +3554,21 @@ out:
update_halt_poll_stats(vcpu, start, poll_end, !waited);
if (halt_poll_allowed) {
+ /* Recompute the max halt poll time in case it changed. */
+ max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu);
+
if (!vcpu_valid_wakeup(vcpu)) {
shrink_halt_poll_ns(vcpu);
- } else if (vcpu->kvm->max_halt_poll_ns) {
+ } else if (max_halt_poll_ns) {
if (halt_ns <= vcpu->halt_poll_ns)
;
/* we had a long block, shrink polling */
else if (vcpu->halt_poll_ns &&
- halt_ns > vcpu->kvm->max_halt_poll_ns)
+ halt_ns > max_halt_poll_ns)
shrink_halt_poll_ns(vcpu);
/* we had a short halt and our poll time is too small */
- else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns &&
- halt_ns < vcpu->kvm->max_halt_poll_ns)
+ else if (vcpu->halt_poll_ns < max_halt_poll_ns &&
+ halt_ns < max_halt_poll_ns)
grow_halt_poll_ns(vcpu);
} else {
vcpu->halt_poll_ns = 0;
@@ -4581,6 +4603,16 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
return -EINVAL;
kvm->max_halt_poll_ns = cap->args[0];
+
+ /*
+ * Ensure kvm->override_halt_poll_ns does not become visible
+ * before kvm->max_halt_poll_ns.
+ *
+ * Pairs with the smp_rmb() in kvm_vcpu_max_halt_poll_ns().
+ */
+ smp_wmb();
+ kvm->override_halt_poll_ns = true;
+
return 0;
}
case KVM_CAP_DIRTY_LOG_RING:
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 346e47f15572..7c248193ca26 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -297,7 +297,12 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
if (!gpc->valid || old_uhva != gpc->uhva) {
ret = hva_to_pfn_retry(kvm, gpc);
} else {
- /* If the HVA→PFN mapping was already valid, don't unmap it. */
+ /*
+ * If the HVA→PFN mapping was already valid, don't unmap it.
+ * But do update gpc->khva because the offset within the page
+ * may have changed.
+ */
+ gpc->khva = old_khva + page_offset;
old_pfn = KVM_PFN_ERR_FAULT;
old_khva = NULL;
ret = 0;