diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-05 14:47:31 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-05 14:47:31 -0700 | 
| commit | 01227a889ed56ae53aeebb9f93be9d54dd8b2de8 (patch) | |
| tree | d5eba9359a9827e84d4112b84d48c54df5c5acde /virt | |
| parent | 9e6879460c8edb0cd3c24c09b83d06541b5af0dc (diff) | |
| parent | db6ae6158186a17165ef990bda2895ae7594b039 (diff) | |
| download | linux-01227a889ed56ae53aeebb9f93be9d54dd8b2de8.tar.bz2 | |
Merge tag 'kvm-3.10-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Gleb Natapov:
 "Highlights of the updates are:
  general:
   - new emulated device API
   - legacy device assignment is now optional
   - irqfd interface is more generic and can be shared between arches
  x86:
   - VMCS shadow support and other nested VMX improvements
   - APIC virtualization and Posted Interrupt hardware support
   - Optimize mmio spte zapping
  ppc:
    - BookE: in-kernel MPIC emulation with irqfd support
    - Book3S: in-kernel XICS emulation (incomplete)
    - Book3S: HV: migration fixes
    - BookE: more debug support preparation
    - BookE: e6500 support
  ARM:
   - reworking of Hyp idmaps
  s390:
   - ioeventfd for virtio-ccw
  And many other bug fixes, cleanups and improvements"
* tag 'kvm-3.10-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (204 commits)
  kvm: Add compat_ioctl for device control API
  KVM: x86: Account for failing enable_irq_window for NMI window request
  KVM: PPC: Book3S: Add API for in-kernel XICS emulation
  kvm/ppc/mpic: fix missing unlock in set_base_addr()
  kvm/ppc: Hold srcu lock when calling kvm_io_bus_read/write
  kvm/ppc/mpic: remove users
  kvm/ppc/mpic: fix mmio region lists when multiple guests used
  kvm/ppc/mpic: remove default routes from documentation
  kvm: KVM_CAP_IOMMU only available with device assignment
  ARM: KVM: iterate over all CPUs for CPU compatibility check
  KVM: ARM: Fix spelling in error message
  ARM: KVM: define KVM_ARM_MAX_VCPUS unconditionally
  KVM: ARM: Fix API documentation for ONE_REG encoding
  ARM: KVM: promote vfp_host pointer to generic host cpu context
  ARM: KVM: add architecture specific hook for capabilities
  ARM: KVM: perform HYP initilization for hotplugged CPUs
  ARM: KVM: switch to a dual-step HYP init code
  ARM: KVM: rework HYP page table freeing
  ARM: KVM: enforce maximum size for identity mapped code
  ARM: KVM: move to a KVM provided HYP idmap
  ...
Diffstat (limited to 'virt')
| -rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
| -rw-r--r-- | virt/kvm/assigned-dev.c | 43 | ||||
| -rw-r--r-- | virt/kvm/eventfd.c | 53 | ||||
| -rw-r--r-- | virt/kvm/ioapic.c | 163 | ||||
| -rw-r--r-- | virt/kvm/ioapic.h | 27 | ||||
| -rw-r--r-- | virt/kvm/irq_comm.c | 215 | ||||
| -rw-r--r-- | virt/kvm/irqchip.c | 237 | ||||
| -rw-r--r-- | virt/kvm/kvm_main.c | 258 | 
8 files changed, 659 insertions, 340 deletions
| diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index d01b24b72c61..779262f59e25 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -6,6 +6,9 @@ config HAVE_KVM  config HAVE_KVM_IRQCHIP         bool +config HAVE_KVM_IRQ_ROUTING +       bool +  config HAVE_KVM_EVENTFD         bool         select EVENTFD diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 3642239252b0..8db43701016f 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -80,11 +80,12 @@ kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,  		spin_lock(&assigned_dev->intx_mask_lock);  		if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))  			kvm_set_irq(assigned_dev->kvm, -				    assigned_dev->irq_source_id, vector, 1); +				    assigned_dev->irq_source_id, vector, 1, +				    false);  		spin_unlock(&assigned_dev->intx_mask_lock);  	} else  		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, -			    vector, 1); +			    vector, 1, false);  }  static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) @@ -165,7 +166,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)  		container_of(kian, struct kvm_assigned_dev_kernel,  			     ack_notifier); -	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); +	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);  	spin_lock(&dev->intx_mask_lock); @@ -188,7 +189,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)  		if (reassert)  			kvm_set_irq(dev->kvm, dev->irq_source_id, -				    dev->guest_irq, 1); +				    dev->guest_irq, 1, false);  	}  	spin_unlock(&dev->intx_mask_lock); @@ -202,7 +203,7 @@ static void deassign_guest_irq(struct kvm *kvm,  						&assigned_dev->ack_notifier);  	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, -		    assigned_dev->guest_irq, 0); +		    assigned_dev->guest_irq, 0, false);  	if (assigned_dev->irq_source_id != -1)  		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); @@ -901,7 +902,7 @@ static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,  	if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {  		if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {  			kvm_set_irq(match->kvm, match->irq_source_id, -				    match->guest_irq, 0); +				    match->guest_irq, 0, false);  			/*  			 * Masking at hardware-level is performed on demand,  			 * i.e. when an IRQ actually arrives at the host. @@ -982,36 +983,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,  			goto out;  		break;  	} -#ifdef KVM_CAP_IRQ_ROUTING -	case KVM_SET_GSI_ROUTING: { -		struct kvm_irq_routing routing; -		struct kvm_irq_routing __user *urouting; -		struct kvm_irq_routing_entry *entries; - -		r = -EFAULT; -		if (copy_from_user(&routing, argp, sizeof(routing))) -			goto out; -		r = -EINVAL; -		if (routing.nr >= KVM_MAX_IRQ_ROUTES) -			goto out; -		if (routing.flags) -			goto out; -		r = -ENOMEM; -		entries = vmalloc(routing.nr * sizeof(*entries)); -		if (!entries) -			goto out; -		r = -EFAULT; -		urouting = argp; -		if (copy_from_user(entries, urouting->entries, -				   routing.nr * sizeof(*entries))) -			goto out_free_irq_routing; -		r = kvm_set_irq_routing(kvm, entries, routing.nr, -					routing.flags); -	out_free_irq_routing: -		vfree(entries); -		break; -	} -#endif /* KVM_CAP_IRQ_ROUTING */  #ifdef __KVM_HAVE_MSIX  	case KVM_ASSIGN_SET_MSIX_NR: {  		struct kvm_assigned_msix_nr entry_nr; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index adb17f266b28..64ee720b75c7 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -35,7 +35,7 @@  #include "iodev.h" -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING  /*   * --------------------------------------------------------------------   * irqfd: Allows an fd to be used to inject an interrupt to the guest @@ -100,11 +100,13 @@ irqfd_inject(struct work_struct *work)  	struct kvm *kvm = irqfd->kvm;  	if (!irqfd->resampler) { -		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); -		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); +		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1, +				false); +		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0, +				false);  	} else  		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, -			    irqfd->gsi, 1); +			    irqfd->gsi, 1, false);  }  /* @@ -121,7 +123,7 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)  	resampler = container_of(kian, struct _irqfd_resampler, notifier);  	kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, -		    resampler->notifier.gsi, 0); +		    resampler->notifier.gsi, 0, false);  	rcu_read_lock(); @@ -146,7 +148,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)  		list_del(&resampler->link);  		kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);  		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, -			    resampler->notifier.gsi, 0); +			    resampler->notifier.gsi, 0, false);  		kfree(resampler);  	} @@ -225,7 +227,8 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)  		irq = rcu_dereference(irqfd->irq_entry);  		/* An event has been signaled, inject an interrupt */  		if (irq) -			kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); +			kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, +					false);  		else  			schedule_work(&irqfd->inject);  		rcu_read_unlock(); @@ -430,7 +433,7 @@ fail:  void  kvm_eventfd_init(struct kvm *kvm)  { -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING  	spin_lock_init(&kvm->irqfds.lock);  	INIT_LIST_HEAD(&kvm->irqfds.items);  	INIT_LIST_HEAD(&kvm->irqfds.resampler_list); @@ -439,7 +442,7 @@ kvm_eventfd_init(struct kvm *kvm)  	INIT_LIST_HEAD(&kvm->ioeventfds);  } -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING  /*   * shutdown any irqfd's that match fd+gsi   */ @@ -543,7 +546,7 @@ void kvm_irq_routing_update(struct kvm *kvm,   * aggregated from all vm* instances. We need our own isolated single-thread   * queue to prevent deadlock against flushing the normal work-queue.   */ -static int __init irqfd_module_init(void) +int kvm_irqfd_init(void)  {  	irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");  	if (!irqfd_cleanup_wq) @@ -552,13 +555,10 @@ static int __init irqfd_module_init(void)  	return 0;  } -static void __exit irqfd_module_exit(void) +void kvm_irqfd_exit(void)  {  	destroy_workqueue(irqfd_cleanup_wq);  } - -module_init(irqfd_module_init); -module_exit(irqfd_module_exit);  #endif  /* @@ -577,6 +577,7 @@ struct _ioeventfd {  	struct eventfd_ctx  *eventfd;  	u64                  datamatch;  	struct kvm_io_device dev; +	u8                   bus_idx;  	bool                 wildcard;  }; @@ -669,7 +670,8 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)  	struct _ioeventfd *_p;  	list_for_each_entry(_p, &kvm->ioeventfds, list) -		if (_p->addr == p->addr && _p->length == p->length && +		if (_p->bus_idx == p->bus_idx && +		    _p->addr == p->addr && _p->length == p->length &&  		    (_p->wildcard || p->wildcard ||  		     _p->datamatch == p->datamatch))  			return true; @@ -677,15 +679,24 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)  	return false;  } +static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) +{ +	if (flags & KVM_IOEVENTFD_FLAG_PIO) +		return KVM_PIO_BUS; +	if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) +		return KVM_VIRTIO_CCW_NOTIFY_BUS; +	return KVM_MMIO_BUS; +} +  static int  kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)  { -	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; -	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; +	enum kvm_bus              bus_idx;  	struct _ioeventfd        *p;  	struct eventfd_ctx       *eventfd;  	int                       ret; +	bus_idx = ioeventfd_bus_from_flags(args->flags);  	/* must be natural-word sized */  	switch (args->len) {  	case 1: @@ -717,6 +728,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)  	INIT_LIST_HEAD(&p->list);  	p->addr    = args->addr; +	p->bus_idx = bus_idx;  	p->length  = args->len;  	p->eventfd = eventfd; @@ -760,12 +772,12 @@ fail:  static int  kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)  { -	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; -	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; +	enum kvm_bus              bus_idx;  	struct _ioeventfd        *p, *tmp;  	struct eventfd_ctx       *eventfd;  	int                       ret = -ENOENT; +	bus_idx = ioeventfd_bus_from_flags(args->flags);  	eventfd = eventfd_ctx_fdget(args->fd);  	if (IS_ERR(eventfd))  		return PTR_ERR(eventfd); @@ -775,7 +787,8 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)  	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {  		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); -		if (p->eventfd != eventfd  || +		if (p->bus_idx != bus_idx || +		    p->eventfd != eventfd  ||  		    p->addr != args->addr  ||  		    p->length != args->len ||  		    p->wildcard != wildcard) diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 5ba005c00e2f..2d682977ce82 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -50,7 +50,8 @@  #else  #define ioapic_debug(fmt, arg...)  #endif -static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq); +static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq, +		bool line_status);  static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,  					  unsigned long addr, @@ -90,7 +91,80 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,  	return result;  } -static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) +static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) +{ +	ioapic->rtc_status.pending_eoi = 0; +	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); +} + +static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ +	bool new_val, old_val; +	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; +	union kvm_ioapic_redirect_entry *e; + +	e = &ioapic->redirtbl[RTC_GSI]; +	if (!kvm_apic_match_dest(vcpu, NULL, 0,	e->fields.dest_id, +				e->fields.dest_mode)) +		return; + +	new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); +	old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + +	if (new_val == old_val) +		return; + +	if (new_val) { +		__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); +		ioapic->rtc_status.pending_eoi++; +	} else { +		__clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); +		ioapic->rtc_status.pending_eoi--; +	} + +	WARN_ON(ioapic->rtc_status.pending_eoi < 0); +} + +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ +	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + +	spin_lock(&ioapic->lock); +	__rtc_irq_eoi_tracking_restore_one(vcpu); +	spin_unlock(&ioapic->lock); +} + +static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) +{ +	struct kvm_vcpu *vcpu; +	int i; + +	if (RTC_GSI >= IOAPIC_NUM_PINS) +		return; + +	rtc_irq_eoi_tracking_reset(ioapic); +	kvm_for_each_vcpu(i, vcpu, ioapic->kvm) +	    __rtc_irq_eoi_tracking_restore_one(vcpu); +} + +static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) +{ +	if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) +		--ioapic->rtc_status.pending_eoi; + +	WARN_ON(ioapic->rtc_status.pending_eoi < 0); +} + +static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) +{ +	if (ioapic->rtc_status.pending_eoi > 0) +		return true; /* coalesced */ + +	return false; +} + +static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, +		bool line_status)  {  	union kvm_ioapic_redirect_entry *pent;  	int injected = -1; @@ -98,7 +172,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)  	pent = &ioapic->redirtbl[idx];  	if (!pent->fields.mask) { -		injected = ioapic_deliver(ioapic, idx); +		injected = ioapic_deliver(ioapic, idx, line_status);  		if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)  			pent->fields.remote_irr = 1;  	} @@ -119,41 +193,48 @@ static void update_handled_vectors(struct kvm_ioapic *ioapic)  	smp_wmb();  } -void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, -					u64 *eoi_exit_bitmap) +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, +			u32 *tmr)  {  	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;  	union kvm_ioapic_redirect_entry *e; -	struct kvm_lapic_irq irqe;  	int index;  	spin_lock(&ioapic->lock); -	/* traverse ioapic entry to set eoi exit bitmap*/  	for (index = 0; index < IOAPIC_NUM_PINS; index++) {  		e = &ioapic->redirtbl[index];  		if (!e->fields.mask &&  			(e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||  			 kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, -				 index))) { -			irqe.dest_id = e->fields.dest_id; -			irqe.vector = e->fields.vector; -			irqe.dest_mode = e->fields.dest_mode; -			irqe.delivery_mode = e->fields.delivery_mode << 8; -			kvm_calculate_eoi_exitmap(vcpu, &irqe, eoi_exit_bitmap); +				 index) || index == RTC_GSI)) { +			if (kvm_apic_match_dest(vcpu, NULL, 0, +				e->fields.dest_id, e->fields.dest_mode)) { +				__set_bit(e->fields.vector, +					(unsigned long *)eoi_exit_bitmap); +				if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG) +					__set_bit(e->fields.vector, +						(unsigned long *)tmr); +			}  		}  	}  	spin_unlock(&ioapic->lock);  } -EXPORT_SYMBOL_GPL(kvm_ioapic_calculate_eoi_exitmap); -void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm) +#ifdef CONFIG_X86 +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)  {  	struct kvm_ioapic *ioapic = kvm->arch.vioapic; -	if (!kvm_apic_vid_enabled(kvm) || !ioapic) +	if (!ioapic)  		return; -	kvm_make_update_eoibitmap_request(kvm); +	kvm_make_scan_ioapic_request(kvm);  } +#else +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) +{ +	return; +} +#endif  static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)  { @@ -195,16 +276,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)  			kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);  		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG  		    && ioapic->irr & (1 << index)) -			ioapic_service(ioapic, index); -		kvm_ioapic_make_eoibitmap_request(ioapic->kvm); +			ioapic_service(ioapic, index, false); +		kvm_vcpu_request_scan_ioapic(ioapic->kvm);  		break;  	}  } -static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) +static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)  {  	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];  	struct kvm_lapic_irq irqe; +	int ret;  	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "  		     "vector=%x trig_mode=%x\n", @@ -220,11 +302,19 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)  	irqe.level = 1;  	irqe.shorthand = 0; -	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); +	if (irq == RTC_GSI && line_status) { +		BUG_ON(ioapic->rtc_status.pending_eoi != 0); +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, +				ioapic->rtc_status.dest_map); +		ioapic->rtc_status.pending_eoi = ret; +	} else +		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); + +	return ret;  }  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, -		       int level) +		       int level, bool line_status)  {  	u32 old_irr;  	u32 mask = 1 << irq; @@ -244,13 +334,20 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,  		ret = 1;  	} else {  		int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + +		if (irq == RTC_GSI && line_status && +			rtc_irq_check_coalesced(ioapic)) { +			ret = 0; /* coalesced */ +			goto out; +		}  		ioapic->irr |= mask;  		if ((edge && old_irr != ioapic->irr) ||  		    (!edge && !entry.fields.remote_irr)) -			ret = ioapic_service(ioapic, irq); +			ret = ioapic_service(ioapic, irq, line_status);  		else  			ret = 0; /* report coalesced interrupt */  	} +out:  	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);  	spin_unlock(&ioapic->lock); @@ -267,8 +364,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)  	spin_unlock(&ioapic->lock);  } -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, -				     int trigger_mode) +static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, +			struct kvm_ioapic *ioapic, int vector, int trigger_mode)  {  	int i; @@ -278,6 +375,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,  		if (ent->fields.vector != vector)  			continue; +		if (i == RTC_GSI) +			rtc_irq_eoi(ioapic, vcpu);  		/*  		 * We are dropping lock while calling ack notifiers because ack  		 * notifier callbacks for assigned devices call into IOAPIC @@ -296,7 +395,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,  		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);  		ent->fields.remote_irr = 0;  		if (!ent->fields.mask && (ioapic->irr & (1 << i))) -			ioapic_service(ioapic, i); +			ioapic_service(ioapic, i, false);  	}  } @@ -307,12 +406,12 @@ bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)  	return test_bit(vector, ioapic->handled_vectors);  } -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) +void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)  { -	struct kvm_ioapic *ioapic = kvm->arch.vioapic; +	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;  	spin_lock(&ioapic->lock); -	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); +	__kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);  	spin_unlock(&ioapic->lock);  } @@ -410,7 +509,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,  		break;  #ifdef	CONFIG_IA64  	case IOAPIC_REG_EOI: -		__kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG); +		__kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG);  		break;  #endif @@ -431,6 +530,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)  	ioapic->ioregsel = 0;  	ioapic->irr = 0;  	ioapic->id = 0; +	rtc_irq_eoi_tracking_reset(ioapic);  	update_handled_vectors(ioapic);  } @@ -496,7 +596,8 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)  	spin_lock(&ioapic->lock);  	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));  	update_handled_vectors(ioapic); -	kvm_ioapic_make_eoibitmap_request(kvm); +	kvm_vcpu_request_scan_ioapic(kvm); +	kvm_rtc_eoi_tracking_restore_all(ioapic);  	spin_unlock(&ioapic->lock);  	return 0;  } diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 0400a466c50c..615d8c995c3c 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -34,6 +34,17 @@ struct kvm_vcpu;  #define	IOAPIC_INIT			0x5  #define	IOAPIC_EXTINT			0x7 +#ifdef CONFIG_X86 +#define RTC_GSI 8 +#else +#define RTC_GSI -1U +#endif + +struct rtc_status { +	int pending_eoi; +	DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); +}; +  struct kvm_ioapic {  	u64 base_address;  	u32 ioregsel; @@ -47,6 +58,7 @@ struct kvm_ioapic {  	void (*ack_notifier)(void *opaque, int irq);  	spinlock_t lock;  	DECLARE_BITMAP(handled_vectors, 256); +	struct rtc_status rtc_status;  };  #ifdef DEBUG @@ -67,24 +79,25 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)  	return kvm->arch.vioapic;  } +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);  int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,  		int short_hand, int dest, int dest_mode);  int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); +void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, +			int trigger_mode);  bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);  int kvm_ioapic_init(struct kvm *kvm);  void kvm_ioapic_destroy(struct kvm *kvm);  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, -		       int level); +		       int level, bool line_status);  void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);  void kvm_ioapic_reset(struct kvm_ioapic *ioapic);  int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, -		struct kvm_lapic_irq *irq); +		struct kvm_lapic_irq *irq, unsigned long *dest_map);  int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);  int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); -void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm); -void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, -					u64 *eoi_exit_bitmap); - +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, +			u32 *tmr);  #endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index e9073cf4d040..e2e6b4473a96 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -35,7 +35,8 @@  #include "ioapic.h"  static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, -			   struct kvm *kvm, int irq_source_id, int level) +			   struct kvm *kvm, int irq_source_id, int level, +			   bool line_status)  {  #ifdef CONFIG_X86  	struct kvm_pic *pic = pic_irqchip(kvm); @@ -46,10 +47,12 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,  }  static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, -			      struct kvm *kvm, int irq_source_id, int level) +			      struct kvm *kvm, int irq_source_id, int level, +			      bool line_status)  {  	struct kvm_ioapic *ioapic = kvm->arch.vioapic; -	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level); +	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, +				line_status);  }  inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) @@ -63,7 +66,7 @@ inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)  }  int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, -		struct kvm_lapic_irq *irq) +		struct kvm_lapic_irq *irq, unsigned long *dest_map)  {  	int i, r = -1;  	struct kvm_vcpu *vcpu, *lowest = NULL; @@ -74,7 +77,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,  		irq->delivery_mode = APIC_DM_FIXED;  	} -	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r)) +	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))  		return r;  	kvm_for_each_vcpu(i, vcpu, kvm) { @@ -88,7 +91,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,  		if (!kvm_is_dm_lowest_prio(irq)) {  			if (r < 0)  				r = 0; -			r += kvm_apic_set_irq(vcpu, irq); +			r += kvm_apic_set_irq(vcpu, irq, dest_map);  		} else if (kvm_lapic_enabled(vcpu)) {  			if (!lowest)  				lowest = vcpu; @@ -98,7 +101,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,  	}  	if (lowest) -		r = kvm_apic_set_irq(lowest, irq); +		r = kvm_apic_set_irq(lowest, irq, dest_map);  	return r;  } @@ -121,7 +124,7 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,  }  int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, -		struct kvm *kvm, int irq_source_id, int level) +		struct kvm *kvm, int irq_source_id, int level, bool line_status)  {  	struct kvm_lapic_irq irq; @@ -130,7 +133,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,  	kvm_set_msi_irq(e, &irq); -	return kvm_irq_delivery_to_apic(kvm, NULL, &irq); +	return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);  } @@ -142,63 +145,12 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,  	kvm_set_msi_irq(e, &irq); -	if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r)) +	if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))  		return r;  	else  		return -EWOULDBLOCK;  } -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) -{ -	struct kvm_kernel_irq_routing_entry route; - -	if (!irqchip_in_kernel(kvm) || msi->flags != 0) -		return -EINVAL; - -	route.msi.address_lo = msi->address_lo; -	route.msi.address_hi = msi->address_hi; -	route.msi.data = msi->data; - -	return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); -} - -/* - * Return value: - *  < 0   Interrupt was ignored (masked or not delivered for other reasons) - *  = 0   Interrupt was coalesced (previous irq is still pending) - *  > 0   Number of CPUs interrupt was delivered to - */ -int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) -{ -	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; -	int ret = -1, i = 0; -	struct kvm_irq_routing_table *irq_rt; - -	trace_kvm_set_irq(irq, level, irq_source_id); - -	/* Not possible to detect if the guest uses the PIC or the -	 * IOAPIC.  So set the bit in both. The guest will ignore -	 * writes to the unused one. -	 */ -	rcu_read_lock(); -	irq_rt = rcu_dereference(kvm->irq_routing); -	if (irq < irq_rt->nr_rt_entries) -		hlist_for_each_entry(e, &irq_rt->map[irq], link) -			irq_set[i++] = *e; -	rcu_read_unlock(); - -	while(i--) { -		int r; -		r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level); -		if (r < 0) -			continue; - -		ret = r + ((ret < 0) ? 0 : ret); -	} - -	return ret; -} -  /*   * Deliver an IRQ in an atomic context if we can, or return a failure,   * user can retry in a process context. @@ -236,63 +188,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)  	return ret;  } -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ -	struct kvm_irq_ack_notifier *kian; -	int gsi; - -	rcu_read_lock(); -	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; -	if (gsi != -1) -		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, -					 link) -			if (kian->gsi == gsi) { -				rcu_read_unlock(); -				return true; -			} - -	rcu_read_unlock(); - -	return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ -	struct kvm_irq_ack_notifier *kian; -	int gsi; - -	trace_kvm_ack_irq(irqchip, pin); - -	rcu_read_lock(); -	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; -	if (gsi != -1) -		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, -					 link) -			if (kian->gsi == gsi) -				kian->irq_acked(kian); -	rcu_read_unlock(); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, -				   struct kvm_irq_ack_notifier *kian) -{ -	mutex_lock(&kvm->irq_lock); -	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); -	mutex_unlock(&kvm->irq_lock); -	kvm_ioapic_make_eoibitmap_request(kvm); -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, -				    struct kvm_irq_ack_notifier *kian) -{ -	mutex_lock(&kvm->irq_lock); -	hlist_del_init_rcu(&kian->link); -	mutex_unlock(&kvm->irq_lock); -	synchronize_rcu(); -	kvm_ioapic_make_eoibitmap_request(kvm); -} -  int kvm_request_irq_source_id(struct kvm *kvm)  {  	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; @@ -376,34 +271,14 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,  	rcu_read_unlock();  } -void kvm_free_irq_routing(struct kvm *kvm) -{ -	/* Called only during vm destruction. Nobody can use the pointer -	   at this stage */ -	kfree(kvm->irq_routing); -} - -static int setup_routing_entry(struct kvm_irq_routing_table *rt, -			       struct kvm_kernel_irq_routing_entry *e, -			       const struct kvm_irq_routing_entry *ue) +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, +			  struct kvm_kernel_irq_routing_entry *e, +			  const struct kvm_irq_routing_entry *ue)  {  	int r = -EINVAL;  	int delta;  	unsigned max_pin; -	struct kvm_kernel_irq_routing_entry *ei; -	/* -	 * Do not allow GSI to be mapped to the same irqchip more than once. -	 * Allow only one to one mapping between GSI and MSI. -	 */ -	hlist_for_each_entry(ei, &rt->map[ue->gsi], link) -		if (ei->type == KVM_IRQ_ROUTING_MSI || -		    ue->type == KVM_IRQ_ROUTING_MSI || -		    ue->u.irqchip.irqchip == ei->irqchip.irqchip) -			return r; - -	e->gsi = ue->gsi; -	e->type = ue->type;  	switch (ue->type) {  	case KVM_IRQ_ROUTING_IRQCHIP:  		delta = 0; @@ -440,69 +315,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,  		goto out;  	} -	hlist_add_head(&e->link, &rt->map[e->gsi]);  	r = 0;  out:  	return r;  } - -int kvm_set_irq_routing(struct kvm *kvm, -			const struct kvm_irq_routing_entry *ue, -			unsigned nr, -			unsigned flags) -{ -	struct kvm_irq_routing_table *new, *old; -	u32 i, j, nr_rt_entries = 0; -	int r; - -	for (i = 0; i < nr; ++i) { -		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) -			return -EINVAL; -		nr_rt_entries = max(nr_rt_entries, ue[i].gsi); -	} - -	nr_rt_entries += 1; - -	new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) -		      + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), -		      GFP_KERNEL); - -	if (!new) -		return -ENOMEM; - -	new->rt_entries = (void *)&new->map[nr_rt_entries]; - -	new->nr_rt_entries = nr_rt_entries; -	for (i = 0; i < 3; i++) -		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++) -			new->chip[i][j] = -1; - -	for (i = 0; i < nr; ++i) { -		r = -EINVAL; -		if (ue->flags) -			goto out; -		r = setup_routing_entry(new, &new->rt_entries[i], ue); -		if (r) -			goto out; -		++ue; -	} - -	mutex_lock(&kvm->irq_lock); -	old = kvm->irq_routing; -	kvm_irq_routing_update(kvm, new); -	mutex_unlock(&kvm->irq_lock); - -	synchronize_rcu(); - -	new = old; -	r = 0; - -out: -	kfree(new); -	return r; -} -  #define IOAPIC_ROUTING_ENTRY(irq) \  	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\  	  .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c new file mode 100644 index 000000000000..20dc9e4a8f6c --- /dev/null +++ b/virt/kvm/irqchip.c @@ -0,0 +1,237 @@ +/* + * irqchip.c: Common API for in kernel interrupt controllers + * Copyright (c) 2007, Intel Corporation. + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * Copyright (c) 2013, Alexander Graf <agraf@suse.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * This file is derived from virt/kvm/irq_comm.c. + * + * Authors: + *   Yaozu (Eddie) Dong <Eddie.dong@intel.com> + *   Alexander Graf <agraf@suse.de> + */ + +#include <linux/kvm_host.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <trace/events/kvm.h> +#include "irq.h" + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ +	struct kvm_irq_ack_notifier *kian; +	int gsi; + +	rcu_read_lock(); +	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; +	if (gsi != -1) +		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, +					 link) +			if (kian->gsi == gsi) { +				rcu_read_unlock(); +				return true; +			} + +	rcu_read_unlock(); + +	return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ +	struct kvm_irq_ack_notifier *kian; +	int gsi; + +	trace_kvm_ack_irq(irqchip, pin); + +	rcu_read_lock(); +	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; +	if (gsi != -1) +		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, +					 link) +			if (kian->gsi == gsi) +				kian->irq_acked(kian); +	rcu_read_unlock(); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, +				   struct kvm_irq_ack_notifier *kian) +{ +	mutex_lock(&kvm->irq_lock); +	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); +	mutex_unlock(&kvm->irq_lock); +#ifdef __KVM_HAVE_IOAPIC +	kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, +				    struct kvm_irq_ack_notifier *kian) +{ +	mutex_lock(&kvm->irq_lock); +	hlist_del_init_rcu(&kian->link); +	mutex_unlock(&kvm->irq_lock); +	synchronize_rcu(); +#ifdef __KVM_HAVE_IOAPIC +	kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) +{ +	struct kvm_kernel_irq_routing_entry route; + +	if (!irqchip_in_kernel(kvm) || msi->flags != 0) +		return -EINVAL; + +	route.msi.address_lo = msi->address_lo; +	route.msi.address_hi = msi->address_hi; +	route.msi.data = msi->data; + +	return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); +} + +/* + * Return value: + *  < 0   Interrupt was ignored (masked or not delivered for other reasons) + *  = 0   Interrupt was coalesced (previous irq is still pending) + *  > 0   Number of CPUs interrupt was delivered to + */ +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, +		bool line_status) +{ +	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; +	int ret = -1, i = 0; +	struct kvm_irq_routing_table *irq_rt; + +	trace_kvm_set_irq(irq, level, irq_source_id); + +	/* Not possible to detect if the guest uses the PIC or the +	 * IOAPIC.  So set the bit in both. The guest will ignore +	 * writes to the unused one. +	 */ +	rcu_read_lock(); +	irq_rt = rcu_dereference(kvm->irq_routing); +	if (irq < irq_rt->nr_rt_entries) +		hlist_for_each_entry(e, &irq_rt->map[irq], link) +			irq_set[i++] = *e; +	rcu_read_unlock(); + +	while(i--) { +		int r; +		r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level, +				   line_status); +		if (r < 0) +			continue; + +		ret = r + ((ret < 0) ? 0 : ret); +	} + +	return ret; +} + +void kvm_free_irq_routing(struct kvm *kvm) +{ +	/* Called only during vm destruction. Nobody can use the pointer +	   at this stage */ +	kfree(kvm->irq_routing); +} + +static int setup_routing_entry(struct kvm_irq_routing_table *rt, +			       struct kvm_kernel_irq_routing_entry *e, +			       const struct kvm_irq_routing_entry *ue) +{ +	int r = -EINVAL; +	struct kvm_kernel_irq_routing_entry *ei; + +	/* +	 * Do not allow GSI to be mapped to the same irqchip more than once. +	 * Allow only one to one mapping between GSI and MSI. +	 */ +	hlist_for_each_entry(ei, &rt->map[ue->gsi], link) +		if (ei->type == KVM_IRQ_ROUTING_MSI || +		    ue->type == KVM_IRQ_ROUTING_MSI || +		    ue->u.irqchip.irqchip == ei->irqchip.irqchip) +			return r; + +	e->gsi = ue->gsi; +	e->type = ue->type; +	r = kvm_set_routing_entry(rt, e, ue); +	if (r) +		goto out; + +	hlist_add_head(&e->link, &rt->map[e->gsi]); +	r = 0; +out: +	return r; +} + +int kvm_set_irq_routing(struct kvm *kvm, +			const struct kvm_irq_routing_entry *ue, +			unsigned nr, +			unsigned flags) +{ +	struct kvm_irq_routing_table *new, *old; +	u32 i, j, nr_rt_entries = 0; +	int r; + +	for (i = 0; i < nr; ++i) { +		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) +			return -EINVAL; +		nr_rt_entries = max(nr_rt_entries, ue[i].gsi); +	} + +	nr_rt_entries += 1; + +	new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) +		      + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), +		      GFP_KERNEL); + +	if (!new) +		return -ENOMEM; + +	new->rt_entries = (void *)&new->map[nr_rt_entries]; + +	new->nr_rt_entries = nr_rt_entries; +	for (i = 0; i < KVM_NR_IRQCHIPS; i++) +		for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++) +			new->chip[i][j] = -1; + +	for (i = 0; i < nr; ++i) { +		r = -EINVAL; +		if (ue->flags) +			goto out; +		r = setup_routing_entry(new, &new->rt_entries[i], ue); +		if (r) +			goto out; +		++ue; +	} + +	mutex_lock(&kvm->irq_lock); +	old = kvm->irq_routing; +	kvm_irq_routing_update(kvm, new); +	mutex_unlock(&kvm->irq_lock); + +	synchronize_rcu(); + +	new = old; +	r = 0; + +out: +	kfree(new); +	return r; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f18013f09e68..45f09362ee7b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -217,9 +217,9 @@ void kvm_make_mclock_inprogress_request(struct kvm *kvm)  	make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);  } -void kvm_make_update_eoibitmap_request(struct kvm *kvm) +void kvm_make_scan_ioapic_request(struct kvm *kvm)  { -	make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP); +	make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);  }  int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) @@ -244,6 +244,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)  	kvm_vcpu_set_in_spin_loop(vcpu, false);  	kvm_vcpu_set_dy_eligible(vcpu, false); +	vcpu->preempted = false;  	r = kvm_arch_vcpu_init(vcpu);  	if (r < 0) @@ -503,6 +504,7 @@ static struct kvm *kvm_create_vm(unsigned long type)  	mutex_init(&kvm->irq_lock);  	mutex_init(&kvm->slots_lock);  	atomic_set(&kvm->users_count, 1); +	INIT_LIST_HEAD(&kvm->devices);  	r = kvm_init_mmu_notifier(kvm);  	if (r) @@ -580,6 +582,19 @@ void kvm_free_physmem(struct kvm *kvm)  	kfree(kvm->memslots);  } +static void kvm_destroy_devices(struct kvm *kvm) +{ +	struct list_head *node, *tmp; + +	list_for_each_safe(node, tmp, &kvm->devices) { +		struct kvm_device *dev = +			list_entry(node, struct kvm_device, vm_node); + +		list_del(node); +		dev->ops->destroy(dev); +	} +} +  static void kvm_destroy_vm(struct kvm *kvm)  {  	int i; @@ -599,6 +614,7 @@ static void kvm_destroy_vm(struct kvm *kvm)  	kvm_arch_flush_shadow_all(kvm);  #endif  	kvm_arch_destroy_vm(kvm); +	kvm_destroy_devices(kvm);  	kvm_free_physmem(kvm);  	cleanup_srcu_struct(&kvm->srcu);  	kvm_arch_free_vm(kvm); @@ -719,24 +735,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,  }  /* - * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: - * - create a new memory slot - * - delete an existing memory slot - * - modify an existing memory slot - *   -- move it in the guest physical memory space - *   -- just change its flags - * - * Since flags can be changed by some of these operations, the following - * differentiation is the best we can do for __kvm_set_memory_region(): - */ -enum kvm_mr_change { -	KVM_MR_CREATE, -	KVM_MR_DELETE, -	KVM_MR_MOVE, -	KVM_MR_FLAGS_ONLY, -}; - -/*   * Allocate some memory and give it an address in the guest physical address   * space.   * @@ -745,8 +743,7 @@ enum kvm_mr_change {   * Must be called holding mmap_sem for write.   */  int __kvm_set_memory_region(struct kvm *kvm, -			    struct kvm_userspace_memory_region *mem, -			    bool user_alloc) +			    struct kvm_userspace_memory_region *mem)  {  	int r;  	gfn_t base_gfn; @@ -767,7 +764,7 @@ int __kvm_set_memory_region(struct kvm *kvm,  	if (mem->guest_phys_addr & (PAGE_SIZE - 1))  		goto out;  	/* We can read the guest memory with __xxx_user() later on. */ -	if (user_alloc && +	if ((mem->slot < KVM_USER_MEM_SLOTS) &&  	    ((mem->userspace_addr & (PAGE_SIZE - 1)) ||  	     !access_ok(VERIFY_WRITE,  			(void __user *)(unsigned long)mem->userspace_addr, @@ -875,7 +872,7 @@ int __kvm_set_memory_region(struct kvm *kvm,  		slots = old_memslots;  	} -	r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); +	r = kvm_arch_prepare_memory_region(kvm, &new, mem, change);  	if (r)  		goto out_slots; @@ -915,7 +912,7 @@ int __kvm_set_memory_region(struct kvm *kvm,  	old_memslots = install_new_memslots(kvm, slots, &new); -	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); +	kvm_arch_commit_memory_region(kvm, mem, &old, change);  	kvm_free_physmem_slot(&old, &new);  	kfree(old_memslots); @@ -932,26 +929,23 @@ out:  EXPORT_SYMBOL_GPL(__kvm_set_memory_region);  int kvm_set_memory_region(struct kvm *kvm, -			  struct kvm_userspace_memory_region *mem, -			  bool user_alloc) +			  struct kvm_userspace_memory_region *mem)  {  	int r;  	mutex_lock(&kvm->slots_lock); -	r = __kvm_set_memory_region(kvm, mem, user_alloc); +	r = __kvm_set_memory_region(kvm, mem);  	mutex_unlock(&kvm->slots_lock);  	return r;  }  EXPORT_SYMBOL_GPL(kvm_set_memory_region);  int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, -				   struct -				   kvm_userspace_memory_region *mem, -				   bool user_alloc) +				   struct kvm_userspace_memory_region *mem)  {  	if (mem->slot >= KVM_USER_MEM_SLOTS)  		return -EINVAL; -	return kvm_set_memory_region(kvm, mem, user_alloc); +	return kvm_set_memory_region(kvm, mem);  }  int kvm_get_dirty_log(struct kvm *kvm, @@ -1099,7 +1093,7 @@ static int kvm_read_hva_atomic(void *data, void __user *hva, int len)  	return __copy_from_user_inatomic(data, hva, len);  } -int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, +static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,  	unsigned long start, int write, struct page **page)  {  	int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET; @@ -1719,6 +1713,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)  			smp_send_reschedule(cpu);  	put_cpu();  } +EXPORT_SYMBOL_GPL(kvm_vcpu_kick);  #endif /* !CONFIG_S390 */  void kvm_resched(struct kvm_vcpu *vcpu) @@ -1816,6 +1811,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)  				continue;  			} else if (pass && i > last_boosted_vcpu)  				break; +			if (!ACCESS_ONCE(vcpu->preempted)) +				continue;  			if (vcpu == me)  				continue;  			if (waitqueue_active(&vcpu->wq)) @@ -2204,6 +2201,119 @@ out:  }  #endif +static int kvm_device_ioctl_attr(struct kvm_device *dev, +				 int (*accessor)(struct kvm_device *dev, +						 struct kvm_device_attr *attr), +				 unsigned long arg) +{ +	struct kvm_device_attr attr; + +	if (!accessor) +		return -EPERM; + +	if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) +		return -EFAULT; + +	return accessor(dev, &attr); +} + +static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, +			     unsigned long arg) +{ +	struct kvm_device *dev = filp->private_data; + +	switch (ioctl) { +	case KVM_SET_DEVICE_ATTR: +		return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); +	case KVM_GET_DEVICE_ATTR: +		return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); +	case KVM_HAS_DEVICE_ATTR: +		return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); +	default: +		if (dev->ops->ioctl) +			return dev->ops->ioctl(dev, ioctl, arg); + +		return -ENOTTY; +	} +} + +static int kvm_device_release(struct inode *inode, struct file *filp) +{ +	struct kvm_device *dev = filp->private_data; +	struct kvm *kvm = dev->kvm; + +	kvm_put_kvm(kvm); +	return 0; +} + +static const struct file_operations kvm_device_fops = { +	.unlocked_ioctl = kvm_device_ioctl, +#ifdef CONFIG_COMPAT +	.compat_ioctl = kvm_device_ioctl, +#endif +	.release = kvm_device_release, +}; + +struct kvm_device *kvm_device_from_filp(struct file *filp) +{ +	if (filp->f_op != &kvm_device_fops) +		return NULL; + +	return filp->private_data; +} + +static int kvm_ioctl_create_device(struct kvm *kvm, +				   struct kvm_create_device *cd) +{ +	struct kvm_device_ops *ops = NULL; +	struct kvm_device *dev; +	bool test = cd->flags & KVM_CREATE_DEVICE_TEST; +	int ret; + +	switch (cd->type) { +#ifdef CONFIG_KVM_MPIC +	case KVM_DEV_TYPE_FSL_MPIC_20: +	case KVM_DEV_TYPE_FSL_MPIC_42: +		ops = &kvm_mpic_ops; +		break; +#endif +#ifdef CONFIG_KVM_XICS +	case KVM_DEV_TYPE_XICS: +		ops = &kvm_xics_ops; +		break; +#endif +	default: +		return -ENODEV; +	} + +	if (test) +		return 0; + +	dev = kzalloc(sizeof(*dev), GFP_KERNEL); +	if (!dev) +		return -ENOMEM; + +	dev->ops = ops; +	dev->kvm = kvm; + +	ret = ops->create(dev, cd->type); +	if (ret < 0) { +		kfree(dev); +		return ret; +	} + +	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR); +	if (ret < 0) { +		ops->destroy(dev); +		return ret; +	} + +	list_add(&dev->vm_node, &kvm->devices); +	kvm_get_kvm(kvm); +	cd->fd = ret; +	return 0; +} +  static long kvm_vm_ioctl(struct file *filp,  			   unsigned int ioctl, unsigned long arg)  { @@ -2225,7 +2335,7 @@ static long kvm_vm_ioctl(struct file *filp,  						sizeof kvm_userspace_mem))  			goto out; -		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, true); +		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);  		break;  	}  	case KVM_GET_DIRTY_LOG: { @@ -2304,7 +2414,8 @@ static long kvm_vm_ioctl(struct file *filp,  		if (copy_from_user(&irq_event, argp, sizeof irq_event))  			goto out; -		r = kvm_vm_ioctl_irq_line(kvm, &irq_event); +		r = kvm_vm_ioctl_irq_line(kvm, &irq_event, +					ioctl == KVM_IRQ_LINE_STATUS);  		if (r)  			goto out; @@ -2318,6 +2429,54 @@ static long kvm_vm_ioctl(struct file *filp,  		break;  	}  #endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +	case KVM_SET_GSI_ROUTING: { +		struct kvm_irq_routing routing; +		struct kvm_irq_routing __user *urouting; +		struct kvm_irq_routing_entry *entries; + +		r = -EFAULT; +		if (copy_from_user(&routing, argp, sizeof(routing))) +			goto out; +		r = -EINVAL; +		if (routing.nr >= KVM_MAX_IRQ_ROUTES) +			goto out; +		if (routing.flags) +			goto out; +		r = -ENOMEM; +		entries = vmalloc(routing.nr * sizeof(*entries)); +		if (!entries) +			goto out; +		r = -EFAULT; +		urouting = argp; +		if (copy_from_user(entries, urouting->entries, +				   routing.nr * sizeof(*entries))) +			goto out_free_irq_routing; +		r = kvm_set_irq_routing(kvm, entries, routing.nr, +					routing.flags); +	out_free_irq_routing: +		vfree(entries); +		break; +	} +#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ +	case KVM_CREATE_DEVICE: { +		struct kvm_create_device cd; + +		r = -EFAULT; +		if (copy_from_user(&cd, argp, sizeof(cd))) +			goto out; + +		r = kvm_ioctl_create_device(kvm, &cd); +		if (r) +			goto out; + +		r = -EFAULT; +		if (copy_to_user(argp, &cd, sizeof(cd))) +			goto out; + +		r = 0; +		break; +	}  	default:  		r = kvm_arch_vm_ioctl(filp, ioctl, arg);  		if (r == -ENOTTY) @@ -2447,8 +2606,11 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)  #ifdef CONFIG_HAVE_KVM_MSI  	case KVM_CAP_SIGNAL_MSI:  #endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +	case KVM_CAP_IRQFD_RESAMPLE: +#endif  		return 1; -#ifdef KVM_CAP_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING  	case KVM_CAP_IRQ_ROUTING:  		return KVM_MAX_IRQ_ROUTES;  #endif @@ -2618,14 +2780,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,  	return NOTIFY_OK;  } - -asmlinkage void kvm_spurious_fault(void) -{ -	/* Fault while not rebooting.  We want the trace. */ -	BUG(); -} -EXPORT_SYMBOL_GPL(kvm_spurious_fault); -  static int kvm_reboot(struct notifier_block *notifier, unsigned long val,  		      void *v)  { @@ -2658,7 +2812,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)  	kfree(bus);  } -int kvm_io_bus_sort_cmp(const void *p1, const void *p2) +static int kvm_io_bus_sort_cmp(const void *p1, const void *p2)  {  	const struct kvm_io_range *r1 = p1;  	const struct kvm_io_range *r2 = p2; @@ -2670,7 +2824,7 @@ int kvm_io_bus_sort_cmp(const void *p1, const void *p2)  	return 0;  } -int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, +static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,  			  gpa_t addr, int len)  {  	bus->range[bus->dev_count++] = (struct kvm_io_range) { @@ -2685,7 +2839,7 @@ int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,  	return 0;  } -int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, +static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,  			     gpa_t addr, int len)  {  	struct kvm_io_range *range, key; @@ -2929,6 +3083,8 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)  static void kvm_sched_in(struct preempt_notifier *pn, int cpu)  {  	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); +	if (vcpu->preempted) +		vcpu->preempted = false;  	kvm_arch_vcpu_load(vcpu, cpu);  } @@ -2938,6 +3094,8 @@ static void kvm_sched_out(struct preempt_notifier *pn,  {  	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); +	if (current->state == TASK_RUNNING) +		vcpu->preempted = true;  	kvm_arch_vcpu_put(vcpu);  } @@ -2947,6 +3105,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,  	int r;  	int cpu; +	r = kvm_irqfd_init(); +	if (r) +		goto out_irqfd;  	r = kvm_arch_init(opaque);  	if (r)  		goto out_fail; @@ -3027,6 +3188,8 @@ out_free_0a:  out_free_0:  	kvm_arch_exit();  out_fail: +	kvm_irqfd_exit(); +out_irqfd:  	return r;  }  EXPORT_SYMBOL_GPL(kvm_init); @@ -3043,6 +3206,7 @@ void kvm_exit(void)  	on_each_cpu(hardware_disable_nolock, NULL, 1);  	kvm_arch_hardware_unsetup();  	kvm_arch_exit(); +	kvm_irqfd_exit();  	free_cpumask_var(cpus_hardware_enabled);  }  EXPORT_SYMBOL_GPL(kvm_exit); |