diff options
Diffstat (limited to 'arch/x86/kvm/vmx/posted_intr.c')
-rw-r--r-- | arch/x86/kvm/vmx/posted_intr.c | 159 |
1 files changed, 84 insertions, 75 deletions
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 1c94783b5a54..88c53c521094 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -11,10 +11,22 @@ #include "vmx.h" /* - * We maintain a per-CPU linked-list of vCPU, so in wakeup_handler() we - * can find which vCPU should be waken up. + * Maintain a per-CPU list of vCPUs that need to be awakened by wakeup_handler() + * when a WAKEUP_VECTOR interrupted is posted. vCPUs are added to the list when + * the vCPU is scheduled out and is blocking (e.g. in HLT) with IRQs enabled. + * The vCPUs posted interrupt descriptor is updated at the same time to set its + * notification vector to WAKEUP_VECTOR, so that posted interrupt from devices + * wake the target vCPUs. vCPUs are removed from the list and the notification + * vector is reset when the vCPU is scheduled in. */ static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); +/* + * Protect the per-CPU list with a per-CPU spinlock to handle task migration. + * When a blocking vCPU is awakened _and_ migrated to a different pCPU, the + * ->sched_in() path will need to take the vCPU off the list of the _previous_ + * CPU. IRQs must be disabled when taking this lock, otherwise deadlock will + * occur if a wakeup IRQ arrives and attempts to acquire the lock. + */ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) @@ -22,6 +34,20 @@ static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) return &(to_vmx(vcpu)->pi_desc); } +static int pi_try_set_control(struct pi_desc *pi_desc, u64 old, u64 new) +{ + /* + * PID.ON can be set at any time by a different vCPU or by hardware, + * e.g. a device. PID.control must be written atomically, and the + * update must be retried with a fresh snapshot an ON change causes + * the cmpxchg to fail. + */ + if (cmpxchg64(&pi_desc->control, old, new) != old) + return -EBUSY; + + return 0; +} + void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) { struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); @@ -29,11 +55,14 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) unsigned int dest; /* - * In case of hot-plug or hot-unplug, we may have to undo - * vmx_vcpu_pi_put even if there is no assigned device. And we - * always keep PI.NDST up to date for simplicity: it makes the - * code easier, and CPU migration is not a fast path. + * To simplify hot-plug and dynamic toggling of APICv, keep PI.NDST and + * PI.SN up-to-date even if there is no assigned device or if APICv is + * deactivated due to a dynamic inhibit bit, e.g. for Hyper-V's SyncIC. */ + if (!enable_apicv || !lapic_in_kernel(vcpu)) + return; + + /* Nothing to do if PI.SN and PI.NDST both have the desired value. */ if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) return; @@ -49,20 +78,17 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) goto after_clear_sn; } - /* The full case. */ - do { - old.control = new.control = pi_desc->control; - - dest = cpu_physical_id(cpu); + /* The full case. Set the new destination and clear SN. */ + dest = cpu_physical_id(cpu); + if (!x2apic_mode) + dest = (dest << 8) & 0xFF00; - if (x2apic_mode) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; + do { + old.control = new.control = READ_ONCE(pi_desc->control); + new.ndst = dest; new.sn = 0; - } while (cmpxchg64(&pi_desc->control, old.control, - new.control) != old.control); + } while (pi_try_set_control(pi_desc, old.control, new.control)); after_clear_sn: @@ -103,29 +129,31 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) struct pi_desc old, new; unsigned int dest; - do { - old.control = new.control = pi_desc->control; - WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, - "Wakeup handler not enabled while the VCPU is blocked\n"); + /* + * Remove the vCPU from the wakeup list of the _previous_ pCPU, which + * will not be the same as the current pCPU if the task was migrated. + */ + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + list_del(&vcpu->blocked_vcpu_list); + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - dest = cpu_physical_id(vcpu->cpu); + dest = cpu_physical_id(vcpu->cpu); + if (!x2apic_mode) + dest = (dest << 8) & 0xFF00; - if (x2apic_mode) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; + WARN(pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR, + "Wakeup handler not enabled while the vCPU was blocking"); + + do { + old.control = new.control = READ_ONCE(pi_desc->control); + + new.ndst = dest; /* set 'NV' to 'notification vector' */ new.nv = POSTED_INTR_VECTOR; - } while (cmpxchg64(&pi_desc->control, old.control, - new.control) != old.control); - - if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - list_del(&vcpu->blocked_vcpu_list); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - vcpu->pre_pcpu = -1; - } + } while (pi_try_set_control(pi_desc, old.control, new.control)); + + vcpu->pre_pcpu = -1; } /* @@ -134,7 +162,6 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) * - Store the vCPU to the wakeup list, so when interrupts happen * we can find the right vCPU to wake up. * - Change the Posted-interrupt descriptor as below: - * 'NDST' <-- vcpu->pre_pcpu * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR * - If 'ON' is set during this process, which means at least one * interrupt is posted for this vCPU, we cannot block it, in @@ -143,68 +170,50 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) */ int pi_pre_block(struct kvm_vcpu *vcpu) { - unsigned int dest; struct pi_desc old, new; struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + unsigned long flags; - if (!vmx_can_use_vtd_pi(vcpu->kvm)) + if (!vmx_can_use_vtd_pi(vcpu->kvm) || + vmx_interrupt_blocked(vcpu)) return 0; - WARN_ON(irqs_disabled()); - local_irq_disable(); - if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { - vcpu->pre_pcpu = vcpu->cpu; - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - list_add_tail(&vcpu->blocked_vcpu_list, - &per_cpu(blocked_vcpu_on_cpu, - vcpu->pre_pcpu)); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - } - - do { - old.control = new.control = pi_desc->control; + local_irq_save(flags); - WARN((pi_desc->sn == 1), - "Warning: SN field of posted-interrupts " - "is set before blocking\n"); + vcpu->pre_pcpu = vcpu->cpu; + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->cpu)); + list_add_tail(&vcpu->blocked_vcpu_list, + &per_cpu(blocked_vcpu_on_cpu, vcpu->cpu)); + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->cpu)); - /* - * Since vCPU can be preempted during this process, - * vcpu->cpu could be different with pre_pcpu, we - * need to set pre_pcpu as the destination of wakeup - * notification event, then we can find the right vCPU - * to wakeup in wakeup handler if interrupts happen - * when the vCPU is in blocked state. - */ - dest = cpu_physical_id(vcpu->pre_pcpu); + WARN(pi_desc->sn == 1, + "Posted Interrupt Suppress Notification set before blocking"); - if (x2apic_mode) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; + do { + old.control = new.control = READ_ONCE(pi_desc->control); /* set 'NV' to 'wakeup vector' */ new.nv = POSTED_INTR_WAKEUP_VECTOR; - } while (cmpxchg64(&pi_desc->control, old.control, - new.control) != old.control); + } while (pi_try_set_control(pi_desc, old.control, new.control)); /* We should not block the vCPU if an interrupt is posted for it. */ - if (pi_test_on(pi_desc) == 1) + if (pi_test_on(pi_desc)) __pi_post_block(vcpu); - local_irq_enable(); + local_irq_restore(flags); return (vcpu->pre_pcpu == -1); } void pi_post_block(struct kvm_vcpu *vcpu) { + unsigned long flags; + if (vcpu->pre_pcpu == -1) return; - WARN_ON(irqs_disabled()); - local_irq_disable(); + local_irq_save(flags); __pi_post_block(vcpu); - local_irq_enable(); + local_irq_restore(flags); } /* @@ -220,7 +229,7 @@ void pi_wakeup_handler(void) blocked_vcpu_list) { struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - if (pi_test_on(pi_desc) == 1) + if (pi_test_on(pi_desc)) kvm_vcpu_kick(vcpu); } spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); |