From 4355c44f063d3de4f072d796604c7f4ba4085cc3 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 22 Apr 2016 10:38:45 +0100 Subject: MIPS: KVM: Fix timer IRQ race when freezing timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a particularly narrow and subtle race condition when the software emulated guest timer is frozen which can allow a guest timer interrupt to be missed. This happens due to the hrtimer expiry being inexact, so very occasionally the freeze time will be after the moment when the emulated CP0_Count transitions to the same value as CP0_Compare (so an IRQ should be generated), but before the moment when the hrtimer is due to expire (so no IRQ is generated). The IRQ won't be generated when the timer is resumed either, since the resume CP0_Count will already match CP0_Compare. With VZ guests in particular this is far more likely to happen, since the soft timer may be frozen frequently in order to restore the timer state to the hardware guest timer. This happens after 5-10 hours of guest soak testing, resulting in an overflow in guest kernel timekeeping calculations, hanging the guest. A more focussed test case to intentionally hit the race (with the help of a new hypcall to cause the timer state to migrated between hardware & software) hits the condition fairly reliably within around 30 seconds. Instead of relying purely on the inexact hrtimer expiry to determine whether an IRQ should be generated, read the guest CP0_Compare and directly check whether the freeze time is before or after it. Only if CP0_Count is on or after CP0_Compare do we check the hrtimer expiry to determine whether the last IRQ has already been generated (which will have pushed back the expiry by one timer period). Fixes: e30492bbe95a ("MIPS: KVM: Rewrite count/compare timer emulation") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: # 3.16.x- Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'arch/mips') diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index b37954cc880d..ab5681de6ece 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -302,12 +302,31 @@ static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu) */ static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now) { - ktime_t expires; + struct mips_coproc *cop0 = vcpu->arch.cop0; + ktime_t expires, threshold; + uint32_t count, compare; int running; - /* Is the hrtimer pending? */ + /* Calculate the biased and scaled guest CP0_Count */ + count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now); + compare = kvm_read_c0_guest_compare(cop0); + + /* + * Find whether CP0_Count has reached the closest timer interrupt. If + * not, we shouldn't inject it. + */ + if ((int32_t)(count - compare) < 0) + return count; + + /* + * The CP0_Count we're going to return has already reached the closest + * timer interrupt. Quickly check if it really is a new interrupt by + * looking at whether the interval until the hrtimer expiry time is + * less than 1/4 of the timer period. + */ expires = hrtimer_get_expires(&vcpu->arch.comparecount_timer); - if (ktime_compare(now, expires) >= 0) { + threshold = ktime_add_ns(now, vcpu->arch.count_period / 4); + if (ktime_before(expires, threshold)) { /* * Cancel it while we handle it so there's no chance of * interference with the timeout handler. @@ -329,8 +348,7 @@ static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now) } } - /* Return the biased and scaled guest CP0_Count */ - return vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now); + return count; } /** -- cgit v1.2.3 From b45bacd2d048f405c7760e5cc9b60dd67708734f Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 22 Apr 2016 10:38:46 +0100 Subject: MIPS: KVM: Fix timer IRQ race when writing CP0_Compare MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Writing CP0_Compare clears the timer interrupt pending bit (CP0_Cause.TI), but this wasn't being done atomically. If a timer interrupt raced with the write of the guest CP0_Compare, the timer interrupt could end up being pending even though the new CP0_Compare is nowhere near CP0_Count. We were already updating the hrtimer expiry with kvm_mips_update_hrtimer(), which used both kvm_mips_freeze_hrtimer() and kvm_mips_resume_hrtimer(). Close the race window by expanding out kvm_mips_update_hrtimer(), and clearing CP0_Cause.TI and setting CP0_Compare between the freeze and resume. Since the pending timer interrupt should not be cleared when CP0_Compare is written via the KVM user API, an ack argument is added to distinguish the source of the write. Fixes: e30492bbe95a ("MIPS: KVM: Rewrite count/compare timer emulation") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: # 3.16.x- Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 2 +- arch/mips/kvm/emulate.c | 61 ++++++++++++++++++---------------------- arch/mips/kvm/trap_emul.c | 2 +- 3 files changed, 29 insertions(+), 36 deletions(-) (limited to 'arch/mips') diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index f6b12790716c..942b8f6bf35b 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -747,7 +747,7 @@ extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu); void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count); -void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare); +void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack); void kvm_mips_init_count(struct kvm_vcpu *vcpu); int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl); int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume); diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index ab5681de6ece..b8b7860ec1a8 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -437,32 +437,6 @@ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu, hrtimer_start(&vcpu->arch.comparecount_timer, expire, HRTIMER_MODE_ABS); } -/** - * kvm_mips_update_hrtimer() - Update next expiry time of hrtimer. - * @vcpu: Virtual CPU. - * - * Recalculates and updates the expiry time of the hrtimer. This can be used - * after timer parameters have been altered which do not depend on the time that - * the change occurs (in those cases kvm_mips_freeze_hrtimer() and - * kvm_mips_resume_hrtimer() are used directly). - * - * It is guaranteed that no timer interrupts will be lost in the process. - * - * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running). - */ -static void kvm_mips_update_hrtimer(struct kvm_vcpu *vcpu) -{ - ktime_t now; - uint32_t count; - - /* - * freeze_hrtimer takes care of a timer interrupts <= count, and - * resume_hrtimer the hrtimer takes care of a timer interrupts > count. - */ - now = kvm_mips_freeze_hrtimer(vcpu, &count); - kvm_mips_resume_hrtimer(vcpu, now, count); -} - /** * kvm_mips_write_count() - Modify the count and update timer. * @vcpu: Virtual CPU. @@ -558,23 +532,42 @@ int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz) * kvm_mips_write_compare() - Modify compare and update timer. * @vcpu: Virtual CPU. * @compare: New CP0_Compare value. + * @ack: Whether to acknowledge timer interrupt. * * Update CP0_Compare to a new value and update the timeout. + * If @ack, atomically acknowledge any pending timer interrupt, otherwise ensure + * any pending timer interrupt is preserved. */ -void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare) +void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack) { struct mips_coproc *cop0 = vcpu->arch.cop0; + int dc; + u32 old_compare = kvm_read_c0_guest_compare(cop0); + ktime_t now; + uint32_t count; /* if unchanged, must just be an ack */ - if (kvm_read_c0_guest_compare(cop0) == compare) + if (old_compare == compare) { + if (!ack) + return; + kvm_mips_callbacks->dequeue_timer_int(vcpu); + kvm_write_c0_guest_compare(cop0, compare); return; + } + + /* freeze_hrtimer() takes care of timer interrupts <= count */ + dc = kvm_mips_count_disabled(vcpu); + if (!dc) + now = kvm_mips_freeze_hrtimer(vcpu, &count); + + if (ack) + kvm_mips_callbacks->dequeue_timer_int(vcpu); - /* Update compare */ kvm_write_c0_guest_compare(cop0, compare); - /* Update timeout if count enabled */ - if (!kvm_mips_count_disabled(vcpu)) - kvm_mips_update_hrtimer(vcpu); + /* resume_hrtimer() takes care of timer interrupts > count */ + if (!dc) + kvm_mips_resume_hrtimer(vcpu, now, count); } /** @@ -1113,9 +1106,9 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, /* If we are writing to COMPARE */ /* Clear pending timer interrupt, if any */ - kvm_mips_callbacks->dequeue_timer_int(vcpu); kvm_mips_write_compare(vcpu, - vcpu->arch.gprs[rt]); + vcpu->arch.gprs[rt], + true); } else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) { unsigned int old_val, val, change; diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index c4038d2a724c..caa5ea1038a0 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -546,7 +546,7 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, kvm_mips_write_count(vcpu, v); break; case KVM_REG_MIPS_CP0_COMPARE: - kvm_mips_write_compare(vcpu, v); + kvm_mips_write_compare(vcpu, v, false); break; case KVM_REG_MIPS_CP0_CAUSE: /* -- cgit v1.2.3 From f049729c05cc5338174d52e2bee6678131da5e08 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 22 Apr 2016 10:38:47 +0100 Subject: MIPS: KVM: Fix preemptable kvm_mips_get_*_asid() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a couple of places in KVM fault handling code which implicitly use smp_processor_id() via kvm_mips_get_kernel_asid() and kvm_mips_get_user_asid() from preemptable context. This is unsafe as a preemption could cause the guest kernel ASID to be changed, resulting in a host TLB entry being written with the wrong ASID. Fix by disabling preemption around the kvm_mips_get_*_asid() call and the corresponding kvm_mips_host_tlb_write(). Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/mips/kvm/tlb.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'arch/mips') diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index e0e1d0a611fc..60e4ad0016e7 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -268,6 +268,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, int even; struct kvm *kvm = vcpu->kvm; const int flush_dcache_mask = 0; + int ret; if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) { kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr); @@ -299,14 +300,18 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, pfn1 = kvm->arch.guest_pmap[gfn]; } - entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu)); entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | (1 << 2) | (0x1 << 1); entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | (1 << 2) | (0x1 << 1); - return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, - flush_dcache_mask); + preempt_disable(); + entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu)); + ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, + flush_dcache_mask); + preempt_enable(); + + return ret; } EXPORT_SYMBOL_GPL(kvm_mips_handle_kseg0_tlb_fault); @@ -361,6 +366,7 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; struct kvm *kvm = vcpu->kvm; kvm_pfn_t pfn0, pfn1; + int ret; if ((tlb->tlb_hi & VPN2_MASK) == 0) { pfn0 = 0; @@ -387,9 +393,6 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, *hpa1 = pfn1 << PAGE_SHIFT; /* Get attributes from the Guest TLB */ - entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ? - kvm_mips_get_kernel_asid(vcpu) : - kvm_mips_get_user_asid(vcpu)); entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | (tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V); entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | @@ -398,8 +401,15 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, tlb->tlb_lo0, tlb->tlb_lo1); - return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, - tlb->tlb_mask); + preempt_disable(); + entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ? + kvm_mips_get_kernel_asid(vcpu) : + kvm_mips_get_user_asid(vcpu)); + ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, + tlb->tlb_mask); + preempt_enable(); + + return ret; } EXPORT_SYMBOL_GPL(kvm_mips_handle_mapped_seg_tlb_fault); -- cgit v1.2.3 From 556f2a5231bf3ec4ce98d46434920e31787f79d6 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 22 Apr 2016 10:38:48 +0100 Subject: MIPS: KVM: Fix preemption warning reading FPU capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reading the KVM_CAP_MIPS_FPU capability returns cpu_has_fpu, however this uses smp_processor_id() to read the current CPU capabilities (since some old MIPS systems could have FPUs present on only a subset of CPUs). We don't support any such systems, so work around the warning by using raw_cpu_has_fpu instead. We should probably instead claim not to support FPU at all if any one CPU is lacking an FPU, but this should do for now. Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/mips') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 70ef1a43c114..ec3fe09ef15c 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1079,7 +1079,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; case KVM_CAP_MIPS_FPU: - r = !!cpu_has_fpu; + /* We don't handle systems with inconsistent cpu_has_fpu */ + r = !!raw_cpu_has_fpu; break; case KVM_CAP_MIPS_MSA: /* -- cgit v1.2.3 From 4ac334295e4f3b996e9feabe36e79e44bf77a06f Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 22 Apr 2016 10:38:49 +0100 Subject: MIPS: KVM: Add missing disable FPU hazard barriers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the necessary hazard barriers after disabling the FPU in kvm_lose_fpu(), just to be safe. Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/mips') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index ec3fe09ef15c..23b209463238 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1556,8 +1556,10 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) /* Disable MSA & FPU */ disable_msa(); - if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) + if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) { clear_c0_status(ST0_CU1 | ST0_FR); + disable_fpu_hazard(); + } vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA); } else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) { set_c0_status(ST0_CU1); @@ -1568,6 +1570,7 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) /* Disable FPU */ clear_c0_status(ST0_CU1 | ST0_FR); + disable_fpu_hazard(); } preempt_enable(); } -- cgit v1.2.3 From 3491caf2755e9f312666712510d80b00c81ff247 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 13 May 2016 12:16:35 +0200 Subject: KVM: halt_polling: provide a way to qualify wakeups during poll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some wakeups should not be considered a sucessful poll. For example on s390 I/O interrupts are usually floating, which means that _ALL_ CPUs would be considered runnable - letting all vCPUs poll all the time for transactional like workload, even if one vCPU would be enough. This can result in huge CPU usage for large guests. This patch lets architectures provide a way to qualify wakeups if they should be considered a good/bad wakeups in regard to polls. For s390 the implementation will fence of halt polling for anything but known good, single vCPU events. The s390 implementation for floating interrupts does a wakeup for one vCPU, but the interrupt will be delivered by whatever CPU checks first for a pending interrupt. We prefer the woken up CPU by marking the poll of this CPU as "good" poll. This code will also mark several other wakeup reasons like IPI or expired timers as "good". This will of course also mark some events as not sucessful. As KVM on z runs always as a 2nd level hypervisor, we prefer to not poll, unless we are really sure, though. This patch successfully limits the CPU usage for cases like uperf 1byte transactional ping pong workload or wakeup heavy workload like OLTP while still providing a proper speedup. This also introduced a new vcpu stat "halt_poll_no_tuning" that marks wakeups that are considered not good for polling. Signed-off-by: Christian Borntraeger Acked-by: Radim Krčmář (for an earlier version) Cc: David Matlack Cc: Wanpeng Li [Rename config symbol. - Paolo] Signed-off-by: Paolo Bonzini --- arch/arm/include/asm/kvm_host.h | 2 ++ arch/arm64/include/asm/kvm_host.h | 2 ++ arch/mips/include/asm/kvm_host.h | 2 ++ arch/mips/kvm/mips.c | 1 + arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/powerpc/kvm/book3s.c | 1 + arch/powerpc/kvm/booke.c | 1 + arch/s390/include/asm/kvm_host.h | 3 +++ arch/s390/kvm/Kconfig | 1 + arch/s390/kvm/interrupt.c | 5 +++++ arch/s390/kvm/kvm-s390.c | 6 ++++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/x86.c | 1 + include/linux/kvm_host.h | 15 +++++++++++++++ include/trace/events/kvm.h | 11 +++++++---- virt/kvm/Kconfig | 3 +++ virt/kvm/kvm_main.c | 8 ++++++-- 17 files changed, 60 insertions(+), 6 deletions(-) (limited to 'arch/mips') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 385070180c25..4cd8732796ab 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -187,6 +187,7 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u32 halt_successful_poll; u32 halt_attempted_poll; + u32 halt_poll_invalid; u32 halt_wakeup; u32 hvc_exit_stat; u64 wfe_exit_stat; @@ -282,6 +283,7 @@ static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_init_debug(void) {} static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f5c6bd2541ef..d49399d9890d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -293,6 +293,7 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u32 halt_successful_poll; u32 halt_attempted_poll; + u32 halt_poll_invalid; u32 halt_wakeup; u32 hvc_exit_stat; u64 wfe_exit_stat; @@ -357,6 +358,7 @@ static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} void kvm_arm_init_debug(void); void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 942b8f6bf35b..9a37a1044032 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -122,6 +122,7 @@ struct kvm_vcpu_stat { u32 flush_dcache_exits; u32 halt_successful_poll; u32 halt_attempted_poll; + u32 halt_poll_invalid; u32 halt_wakeup; }; @@ -812,5 +813,6 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} #endif /* __MIPS_KVM_HOST_H__ */ diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 23b209463238..dc052fb5c7a2 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -56,6 +56,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU }, + { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU }, { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, {NULL} }; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index a07645c17818..ec35af34a3fb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -116,6 +116,7 @@ struct kvm_vcpu_stat { u32 ext_intr_exits; u32 halt_successful_poll; u32 halt_attempted_poll; + u32 halt_poll_invalid; u32 halt_wakeup; u32 dbell_exits; u32 gdbell_exits; @@ -727,5 +728,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_exit(void) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index b34220d2aa42..47018fcbf7d6 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -54,6 +54,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "queue_intr", VCPU_STAT(queue_intr) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll), }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), }, + { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "pf_storage", VCPU_STAT(pf_storage) }, { "sp_storage", VCPU_STAT(sp_storage) }, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 4d66f44a1657..4afae695899a 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -64,6 +64,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "ext_intr", VCPU_STAT(ext_intr_exits) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, + { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "doorbell", VCPU_STAT(dbell_exits) }, { "guest doorbell", VCPU_STAT(gdbell_exits) }, diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 9282ccf1d136..53d794538067 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -247,6 +247,7 @@ struct kvm_vcpu_stat { u32 exit_instruction; u32 halt_successful_poll; u32 halt_attempted_poll; + u32 halt_poll_invalid; u32 halt_wakeup; u32 instruction_lctl; u32 instruction_lctlg; @@ -696,4 +697,6 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} +void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu); + #endif diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 5ea5af3c7db7..b1900239b0ab 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -28,6 +28,7 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING + select HAVE_KVM_INVALID_WAKEUPS select SRCU select KVM_VFIO ---help--- diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index e55040467eb5..5a80af740d3e 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -977,6 +977,11 @@ no_timer: void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) { + /* + * We cannot move this into the if, as the CPU might be already + * in kvm_vcpu_block without having the waitqueue set (polling) + */ + vcpu->valid_wakeup = true; if (swait_active(&vcpu->wq)) { /* * The vcpu gave up the cpu voluntarily, mark it as a good diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c597201a5ca9..6d8ec3ac9dd8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -65,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, + { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, { "instruction_lctl", VCPU_STAT(instruction_lctl) }, @@ -2992,6 +2993,11 @@ static inline unsigned long nonhyp_mask(int i) return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); } +void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) +{ + vcpu->valid_wakeup = false; +} + static int __init kvm_s390_init(void) { int i; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c66e26280707..c99494b4bdf7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -803,6 +803,7 @@ struct kvm_vcpu_stat { u32 halt_exits; u32 halt_successful_poll; u32 halt_attempted_poll; + u32 halt_poll_invalid; u32 halt_wakeup; u32 request_irq_exits; u32 irq_exits; @@ -1342,5 +1343,6 @@ void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c774cdf553c..bcef92fc41d8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -161,6 +161,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "halt_exits", VCPU_STAT(halt_exits) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, + { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "hypercalls", VCPU_STAT(hypercalls) }, { "request_irq", VCPU_STAT(request_irq_exits) }, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 92a0229044fb..bbcd921d7cb0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -229,6 +229,7 @@ struct kvm_vcpu { sigset_t sigset; struct kvm_vcpu_stat stat; unsigned int halt_poll_ns; + bool valid_wakeup; #ifdef CONFIG_HAS_IOMEM int mmio_needed; @@ -1196,4 +1197,18 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ +#ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS +/* If we wakeup during the poll time, was it a sucessful poll? */ +static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) +{ + return vcpu->valid_wakeup; +} + +#else +static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) +{ + return true; +} +#endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */ + #endif diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index aa69253ecc7d..526fb3d2e43a 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -38,22 +38,25 @@ TRACE_EVENT(kvm_userspace_exit, ); TRACE_EVENT(kvm_vcpu_wakeup, - TP_PROTO(__u64 ns, bool waited), - TP_ARGS(ns, waited), + TP_PROTO(__u64 ns, bool waited, bool valid), + TP_ARGS(ns, waited, valid), TP_STRUCT__entry( __field( __u64, ns ) __field( bool, waited ) + __field( bool, valid ) ), TP_fast_assign( __entry->ns = ns; __entry->waited = waited; + __entry->valid = valid; ), - TP_printk("%s time %lld ns", + TP_printk("%s time %lld ns, polling %s", __entry->waited ? "wait" : "poll", - __entry->ns) + __entry->ns, + __entry->valid ? "valid" : "invalid") ); #if defined(CONFIG_HAVE_KVM_IRQFD) diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 7a79b6853583..e5d6108f5e85 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -41,6 +41,9 @@ config KVM_VFIO config HAVE_KVM_ARCH_TLB_FLUSH_ALL bool +config HAVE_KVM_INVALID_WAKEUPS + bool + config KVM_GENERIC_DIRTYLOG_READ_PROTECT bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ed3d9bb18a56..21f6498d52e3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2028,6 +2028,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) */ if (kvm_vcpu_check_block(vcpu) < 0) { ++vcpu->stat.halt_successful_poll; + if (!vcpu_valid_wakeup(vcpu)) + ++vcpu->stat.halt_poll_invalid; goto out; } cur = ktime_get(); @@ -2057,7 +2059,8 @@ out: if (block_ns <= vcpu->halt_poll_ns) ; /* we had a long block, shrink polling */ - else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns) + else if (!vcpu_valid_wakeup(vcpu) || + (vcpu->halt_poll_ns && block_ns > halt_poll_ns)) shrink_halt_poll_ns(vcpu); /* we had a short halt and our poll time is too small */ else if (vcpu->halt_poll_ns < halt_poll_ns && @@ -2066,7 +2069,8 @@ out: } else vcpu->halt_poll_ns = 0; - trace_kvm_vcpu_wakeup(block_ns, waited); + trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu)); + kvm_arch_vcpu_block_finish(vcpu); } EXPORT_SYMBOL_GPL(kvm_vcpu_block); -- cgit v1.2.3