From f445f11eb2cc265dd47da5b2e864df46cd6e5a82 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Thu, 14 Mar 2013 17:13:46 -0700 Subject: KVM: allow host header to be included even for !CONFIG_KVM The new context tracking subsystem unconditionally includes kvm_host.h headers for the guest enter/exit macros. This causes a compile failure when KVM is not enabled. Fix by adding an IS_ENABLED(CONFIG_KVM) check to kvm_host so it can be included/compiled even when KVM is not enabled. Cc: Frederic Weisbecker Signed-off-by: Kevin Hilman Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cad77fe09d77..a9428635c9fd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1,6 +1,8 @@ #ifndef __KVM_HOST_H #define __KVM_HOST_H +#if IS_ENABLED(CONFIG_KVM) + /* * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. @@ -1055,5 +1057,8 @@ static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ +#else +static inline void __guest_enter(void) { return; } +static inline void __guest_exit(void) { return; } +#endif /* IS_ENABLED(CONFIG_KVM) */ #endif - -- cgit v1.2.3 From c09664bb44184b3846e8c5254db4eae4b932682a Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 18 Mar 2013 13:54:32 -0300 Subject: KVM: x86: fix deadlock in clock-in-progress request handling There is a deadlock in pvclock handling: cpu0: cpu1: kvm_gen_update_masterclock() kvm_guest_time_update() spin_lock(pvclock_gtod_sync_lock) local_irq_save(flags) spin_lock(pvclock_gtod_sync_lock) kvm_make_mclock_inprogress_request(kvm) make_all_cpus_request() smp_call_function_many() Now if smp_call_function_many() called by cpu0 tries to call function on cpu1 there will be a deadlock. Fix by moving pvclock_gtod_sync_lock protected section outside irq disabled section. Analyzed by Gleb Natapov Acked-by: Gleb Natapov Reported-and-Tested-by: Yongjie Ren Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f71500af1f81..f7c850b36910 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1416,15 +1416,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kernel_ns = 0; host_tsc = 0; - /* Keep irq disabled to prevent changes to the clock */ - local_irq_save(flags); - this_tsc_khz = __get_cpu_var(cpu_tsc_khz); - if (unlikely(this_tsc_khz == 0)) { - local_irq_restore(flags); - kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); - return 1; - } - /* * If the host uses TSC clock, then passthrough TSC as stable * to the guest. @@ -1436,6 +1427,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kernel_ns = ka->master_kernel_ns; } spin_unlock(&ka->pvclock_gtod_sync_lock); + + /* Keep irq disabled to prevent changes to the clock */ + local_irq_save(flags); + this_tsc_khz = __get_cpu_var(cpu_tsc_khz); + if (unlikely(this_tsc_khz == 0)) { + local_irq_restore(flags); + kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); + return 1; + } if (!use_master_clock) { host_tsc = native_read_tsc(); kernel_ns = get_kernel_ns(); -- cgit v1.2.3 From c300aa64ddf57d9c5d9c898a64b36877345dd4a9 Mon Sep 17 00:00:00 2001 From: Andy Honig Date: Mon, 11 Mar 2013 09:34:52 -0700 Subject: KVM: x86: fix for buffer overflow in handling of MSR_KVM_SYSTEM_TIME (CVE-2013-1796) If the guest sets the GPA of the time_page so that the request to update the time straddles a page then KVM will write onto an incorrect page. The write is done byusing kmap atomic to get a pointer to the page for the time structure and then performing a memcpy to that page starting at an offset that the guest controls. Well behaved guests always provide a 32-byte aligned address, however a malicious guest could use this to corrupt host kernel memory. Tested: Tested against kvmclock unit test. Signed-off-by: Andrew Honig Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f7c850b36910..2ade60c25402 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1959,6 +1959,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* ...but clean it before doing the actual write */ vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); + /* Check that the address is 32-byte aligned. */ + if (vcpu->arch.time_offset & + (sizeof(struct pvclock_vcpu_time_info) - 1)) + break; + vcpu->arch.time_page = gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); -- cgit v1.2.3 From 0b79459b482e85cb7426aa7da683a9f2c97aeae1 Mon Sep 17 00:00:00 2001 From: Andy Honig Date: Wed, 20 Feb 2013 14:48:10 -0800 Subject: KVM: x86: Convert MSR_KVM_SYSTEM_TIME to use gfn_to_hva_cache functions (CVE-2013-1797) There is a potential use after free issue with the handling of MSR_KVM_SYSTEM_TIME. If the guest specifies a GPA in a movable or removable memory such as frame buffers then KVM might continue to write to that address even after it's removed via KVM_SET_USER_MEMORY_REGION. KVM pins the page in memory so it's unlikely to cause an issue, but if the user space component re-purposes the memory previously used for the guest, then the guest will be able to corrupt that memory. Tested: Tested against kvmclock unit test Signed-off-by: Andrew Honig Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/x86.c | 47 ++++++++++++++++++----------------------- 2 files changed, 22 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 635a74d22409..4979778cc7fb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -414,8 +414,8 @@ struct kvm_vcpu_arch { gpa_t time; struct pvclock_vcpu_time_info hv_clock; unsigned int hw_tsc_khz; - unsigned int time_offset; - struct page *time_page; + struct gfn_to_hva_cache pv_time; + bool pv_time_enabled; /* set guest stopped flag in pvclock flags field */ bool pvclock_set_guest_stopped_request; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2ade60c25402..f19ac0aca60d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1406,10 +1406,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) unsigned long flags, this_tsc_khz; struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_arch *ka = &v->kvm->arch; - void *shared_kaddr; s64 kernel_ns, max_kernel_ns; u64 tsc_timestamp, host_tsc; - struct pvclock_vcpu_time_info *guest_hv_clock; + struct pvclock_vcpu_time_info guest_hv_clock; u8 pvclock_flags; bool use_master_clock; @@ -1463,7 +1462,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) local_irq_restore(flags); - if (!vcpu->time_page) + if (!vcpu->pv_time_enabled) return 0; /* @@ -1525,12 +1524,12 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) */ vcpu->hv_clock.version += 2; - shared_kaddr = kmap_atomic(vcpu->time_page); - - guest_hv_clock = shared_kaddr + vcpu->time_offset; + if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, + &guest_hv_clock, sizeof(guest_hv_clock)))) + return 0; /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ - pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED); + pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); if (vcpu->pvclock_set_guest_stopped_request) { pvclock_flags |= PVCLOCK_GUEST_STOPPED; @@ -1543,12 +1542,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.flags = pvclock_flags; - memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, - sizeof(vcpu->hv_clock)); - - kunmap_atomic(shared_kaddr); - - mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock)); return 0; } @@ -1837,10 +1833,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) static void kvmclock_reset(struct kvm_vcpu *vcpu) { - if (vcpu->arch.time_page) { - kvm_release_page_dirty(vcpu->arch.time_page); - vcpu->arch.time_page = NULL; - } + vcpu->arch.pv_time_enabled = false; } static void accumulate_steal_time(struct kvm_vcpu *vcpu) @@ -1947,6 +1940,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_KVM_SYSTEM_TIME_NEW: case MSR_KVM_SYSTEM_TIME: { + u64 gpa_offset; kvmclock_reset(vcpu); vcpu->arch.time = data; @@ -1956,19 +1950,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!(data & 1)) break; - /* ...but clean it before doing the actual write */ - vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); + gpa_offset = data & ~(PAGE_MASK | 1); /* Check that the address is 32-byte aligned. */ - if (vcpu->arch.time_offset & - (sizeof(struct pvclock_vcpu_time_info) - 1)) + if (gpa_offset & (sizeof(struct pvclock_vcpu_time_info) - 1)) break; - vcpu->arch.time_page = - gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); - - if (is_error_page(vcpu->arch.time_page)) - vcpu->arch.time_page = NULL; + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, + &vcpu->arch.pv_time, data & ~1ULL)) + vcpu->arch.pv_time_enabled = false; + else + vcpu->arch.pv_time_enabled = true; break; } @@ -2972,7 +2964,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, */ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) { - if (!vcpu->arch.time_page) + if (!vcpu->arch.pv_time_enabled) return -EINVAL; vcpu->arch.pvclock_set_guest_stopped_request = true; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -6723,6 +6715,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) goto fail_free_wbinvd_dirty_mask; vcpu->arch.ia32_tsc_adjust_msr = 0x0; + vcpu->arch.pv_time_enabled = false; kvm_async_pf_hash_reset(vcpu); kvm_pmu_init(vcpu); -- cgit v1.2.3 From a2c118bfab8bc6b8bb213abfc35201e441693d55 Mon Sep 17 00:00:00 2001 From: Andy Honig Date: Wed, 20 Feb 2013 14:49:16 -0800 Subject: KVM: Fix bounds checking in ioapic indirect register reads (CVE-2013-1798) If the guest specifies a IOAPIC_REG_SELECT with an invalid value and follows that with a read of the IOAPIC_REG_WINDOW KVM does not properly validate that request. ioapic_read_indirect contains an ASSERT(redir_index < IOAPIC_NUM_PINS), but the ASSERT has no effect in non-debug builds. In recent kernels this allows a guest to cause a kernel oops by reading invalid memory. In older kernels (pre-3.3) this allows a guest to read from large ranges of host memory. Tested: tested against apic unit tests. Signed-off-by: Andrew Honig Signed-off-by: Marcelo Tosatti --- virt/kvm/ioapic.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index ce82b9401958..5ba005c00e2f 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -74,9 +74,12 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; u64 redir_content; - ASSERT(redir_index < IOAPIC_NUM_PINS); + if (redir_index < IOAPIC_NUM_PINS) + redir_content = + ioapic->redirtbl[redir_index].bits; + else + redir_content = ~0ULL; - redir_content = ioapic->redirtbl[redir_index].bits; result = (ioapic->ioregsel & 0x1) ? (redir_content >> 32) & 0xffffffff : redir_content & 0xffffffff; -- cgit v1.2.3