From 13fb59276b47db556370bba53b5b55f3849dd8c9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 13 Nov 2019 15:47:06 +0100 Subject: kvm: x86: disable shattered huge page recovery for PREEMPT_RT. If a huge page is recovered (and becomes no executable) while another thread is executing it, the resulting contention on mmu_lock can cause latency spikes. Disabling recovery for PREEMPT_RT kernels fixes this issue. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index fd6012eef9c9..cf718fa23dff 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -51,7 +51,12 @@ extern bool itlb_multihit_kvm_mitigation; static int __read_mostly nx_huge_pages = -1; +#ifdef CONFIG_PREEMPT_RT +/* Recovery can cause latency spikes, disable it for PREEMPT_RT. */ +static uint __read_mostly nx_huge_pages_recovery_ratio = 0; +#else static uint __read_mostly nx_huge_pages_recovery_ratio = 60; +#endif static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp); -- cgit v1.2.3 From e37f9f139f62deddff90c7298ae3a85026a71067 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 13 Nov 2019 13:51:15 +0100 Subject: selftests: kvm: fix build with glibc >= 2.30 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Glibc-2.30 gained gettid() wrapper, selftests fail to compile: lib/assert.c:58:14: error: static declaration of ‘gettid’ follows non-static declaration 58 | static pid_t gettid(void) | ^~~~~~ In file included from /usr/include/unistd.h:1170, from include/test_util.h:18, from lib/assert.c:10: /usr/include/bits/unistd_ext.h:34:16: note: previous declaration of ‘gettid’ was here 34 | extern __pid_t gettid (void) __THROW; | ^~~~~~ Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/assert.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index 4911fc77d0f6..d1cf9f6e0e6b 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -55,7 +55,7 @@ static void test_dump_stack(void) #pragma GCC diagnostic pop } -static pid_t gettid(void) +static pid_t _gettid(void) { return syscall(SYS_gettid); } @@ -72,7 +72,7 @@ test_assert(bool exp, const char *exp_str, fprintf(stderr, "==== Test Assertion Failure ====\n" " %s:%u: %s\n" " pid=%d tid=%d - %s\n", - file, line, exp_str, getpid(), gettid(), + file, line, exp_str, getpid(), _gettid(), strerror(errno)); test_dump_stack(); if (fmt) { -- cgit v1.2.3 From 6cbee2b9eccfc1c93a03aaa286ec93331f7c95e7 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 13 Nov 2019 09:15:21 +0800 Subject: KVM: X86: Reset the three MSR list number variables to 0 in kvm_init_msr_list() When applying commit 7a5ee6edb42e ("KVM: X86: Fix initialization of MSR lists"), it forgot to reset the three MSR lists number varialbes to 0 while removing the useless conditionals. Fixes: 7a5ee6edb42e (KVM: X86: Fix initialization of MSR lists) Signed-off-by: Xiaoyao Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7db5c8ef35dd..5d530521f11d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5130,6 +5130,10 @@ static void kvm_init_msr_list(void) perf_get_x86_pmu_capability(&x86_pmu); + num_msrs_to_save = 0; + num_emulated_msrs = 0; + num_msr_based_features = 0; + for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) { if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0) continue; -- cgit v1.2.3 From b9876e6de123adb52ac693bac08c493e989bd93e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Nov 2019 16:05:23 +0000 Subject: KVM: Forbid /dev/kvm being opened by a compat task when CONFIG_KVM_COMPAT=n On a system without KVM_COMPAT, we prevent IOCTLs from being issued by a compat task. Although this prevents most silly things from happening, it can still confuse a 32bit userspace that is able to open the kvm device (the qemu test suite seems to be pretty mad with this behaviour). Take a more radical approach and return a -ENODEV to the compat task. Reported-by: Peter Maydell Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 524cff24a68d..6a65ed915c7a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -124,7 +124,13 @@ static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, #else static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) { return -EINVAL; } -#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl + +static int kvm_no_compat_open(struct inode *inode, struct file *file) +{ + return is_compat_task() ? -ENODEV : 0; +} +#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \ + .open = kvm_no_compat_open #endif static int hardware_enable_all(void); static void hardware_disable_all(void); -- cgit v1.2.3 From ed69a6cb700880d052a0d085ff2e5bfc108ce238 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 13 Nov 2019 11:30:32 -0800 Subject: KVM: x86/mmu: Take slots_lock when using kvm_mmu_zap_all_fast() Acquire the per-VM slots_lock when zapping all shadow pages as part of toggling nx_huge_pages. The fast zap algorithm relies on exclusivity (via slots_lock) to identify obsolete vs. valid shadow pages, because it uses a single bit for its generation number. Holding slots_lock also obviates the need to acquire a read lock on the VM's srcu. Failing to take slots_lock when toggling nx_huge_pages allows multiple instances of kvm_mmu_zap_all_fast() to run concurrently, as the other user, KVM_SET_USER_MEMORY_REGION, does not take the global kvm_lock. (kvm_mmu_zap_all_fast() does take kvm->mmu_lock, but it can be temporarily dropped by kvm_zap_obsolete_pages(), so it is not enough to enforce exclusivity). Concurrent fast zap instances causes obsolete shadow pages to be incorrectly identified as valid due to the single bit generation number wrapping, which results in stale shadow pages being left in KVM's MMU and leads to all sorts of undesirable behavior. The bug is easily confirmed by running with CONFIG_PROVE_LOCKING and toggling nx_huge_pages via its module param. Note, until commit 4ae5acbc4936 ("KVM: x86/mmu: Take slots_lock when using kvm_mmu_zap_all_fast()", 2019-11-13) the fast zap algorithm used an ulong-sized generation instead of relying on exclusivity for correctness, but all callers except the recently added set_nx_huge_pages() needed to hold slots_lock anyways. Therefore, this patch does not have to be backported to stable kernels. Given that toggling nx_huge_pages is by no means a fast path, force it to conform to the current approach instead of reintroducing the previous generation count. Fixes: b8e8c8303ff28 ("kvm: mmu: ITLB_MULTIHIT mitigation", but NOT FOR STABLE) Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cf718fa23dff..2ce9da58611e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -6285,14 +6285,13 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) if (new_val != old_val) { struct kvm *kvm; - int idx; mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { - idx = srcu_read_lock(&kvm->srcu); + mutex_lock(&kvm->slots_lock); kvm_mmu_zap_all_fast(kvm); - srcu_read_unlock(&kvm->srcu, idx); + mutex_unlock(&kvm->slots_lock); wake_up_process(kvm->arch.nx_lpage_recovery_thread); } -- cgit v1.2.3 From 9cb09e7c1c9af2968d5186ef9085f05641ab65d9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 14 Nov 2019 13:17:39 +0000 Subject: KVM: Add a comment describing the /dev/kvm no_compat handling Add a comment explaining the rational behind having both no_compat open and ioctl callbacks to fend off compat tasks. Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a65ed915c7a..13efc291b1c7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -122,6 +122,13 @@ static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); #define KVM_COMPAT(c) .compat_ioctl = (c) #else +/* + * For architectures that don't implement a compat infrastructure, + * adopt a double line of defense: + * - Prevent a compat task from opening /dev/kvm + * - If the open has been done by a 64bit task, and the KVM fd + * passed to a compat task, let the ioctls fail. + */ static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) { return -EINVAL; } -- cgit v1.2.3