From 6c7caebc26c5f0b618f0ef6b851e9f5f27c3812f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 13 Jun 2016 14:48:25 +0200 Subject: KVM: introduce kvm->created_vcpus The race between creating the irqchip and the first VCPU is currently fixed by checking the presence of an irqchip before updating kvm->online_vcpus, and undoing the whole VCPU creation if someone created the irqchip in the meanwhile. Instead, introduce a new field in struct kvm that will count VCPUs under a mutex, without the atomic access and memory ordering that we need elsewhere to protect the vcpus array. This also plugs the race and is more easily applicable in all similar circumstances. Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 02e98f3131bd..15b757ae64e1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2346,9 +2346,20 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (id >= KVM_MAX_VCPU_ID) return -EINVAL; + mutex_lock(&kvm->lock); + if (kvm->created_vcpus == KVM_MAX_VCPUS) { + mutex_unlock(&kvm->lock); + return -EINVAL; + } + + kvm->created_vcpus++; + mutex_unlock(&kvm->lock); + vcpu = kvm_arch_vcpu_create(kvm, id); - if (IS_ERR(vcpu)) - return PTR_ERR(vcpu); + if (IS_ERR(vcpu)) { + r = PTR_ERR(vcpu); + goto vcpu_decrement; + } preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); @@ -2361,10 +2372,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) r = -EINVAL; goto unlock_vcpu_destroy; } - if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { - r = -EINVAL; - goto unlock_vcpu_destroy; - } if (kvm_get_vcpu_by_id(kvm, id)) { r = -EEXIST; goto unlock_vcpu_destroy; @@ -2397,6 +2404,10 @@ unlock_vcpu_destroy: mutex_unlock(&kvm->lock); vcpu_destroy: kvm_arch_vcpu_destroy(vcpu); +vcpu_decrement: + mutex_lock(&kvm->lock); + kvm->created_vcpus--; + mutex_unlock(&kvm->lock); return r; } -- cgit v1.2.3 From 557abc40d121358883d2da8bc8bf976d6e8ec332 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 13 Jun 2016 14:50:04 +0200 Subject: KVM: remove kvm_vcpu_compatible The new created_vcpus field makes it possible to avoid the race between irqchip and VCPU creation in a much nicer way; just check under kvm->lock whether a VCPU has already been created. We can then remove KVM_APIC_ARCHITECTURE too, because at this point the symbol is only governing the default definition of kvm_vcpu_compatible. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Kconfig | 1 - arch/x86/kvm/x86.c | 11 +++-------- include/linux/kvm_host.h | 6 ------ virt/kvm/Kconfig | 3 --- virt/kvm/kvm_main.c | 4 ---- 5 files changed, 3 insertions(+), 22 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 639a6e34500c..ab8e32f7b9a8 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -32,7 +32,6 @@ config KVM select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_EVENTFD - select KVM_APIC_ARCHITECTURE select KVM_ASYNC_PF select USER_RETURN_NOTIFIER select KVM_MMIO diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bf227212aebb..ab2f45a50bb5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3774,7 +3774,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = -EEXIST; if (irqchip_in_kernel(kvm)) goto split_irqchip_unlock; - if (atomic_read(&kvm->online_vcpus)) + if (kvm->created_vcpus) goto split_irqchip_unlock; r = kvm_setup_empty_irq_routing(kvm); if (r) @@ -3839,7 +3839,7 @@ long kvm_arch_vm_ioctl(struct file *filp, if (kvm->arch.vpic) goto create_irqchip_unlock; r = -EINVAL; - if (atomic_read(&kvm->online_vcpus)) + if (kvm->created_vcpus) goto create_irqchip_unlock; r = -ENOMEM; vpic = kvm_create_pic(kvm); @@ -3995,7 +3995,7 @@ long kvm_arch_vm_ioctl(struct file *filp, case KVM_SET_BOOT_CPU_ID: r = 0; mutex_lock(&kvm->lock); - if (atomic_read(&kvm->online_vcpus) != 0) + if (kvm->created_vcpus) r = -EBUSY; else kvm->arch.bsp_vcpu_id = arg; @@ -7639,11 +7639,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0; } -bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) -{ - return irqchip_in_kernel(vcpu->kvm) == lapic_in_kernel(vcpu); -} - struct static_key kvm_no_apic_vcpu __read_mostly; EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 63c6ab30bc81..0640ee92b978 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1105,12 +1105,6 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) #endif /* CONFIG_HAVE_KVM_EVENTFD */ -#ifdef CONFIG_KVM_APIC_ARCHITECTURE -bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu); -#else -static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; } -#endif - static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) { /* diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index e5d6108f5e85..b0cc1a34db27 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -16,9 +16,6 @@ config HAVE_KVM_EVENTFD bool select EVENTFD -config KVM_APIC_ARCHITECTURE - bool - config KVM_MMIO bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 15b757ae64e1..ef54b4c31792 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2368,10 +2368,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) goto vcpu_destroy; mutex_lock(&kvm->lock); - if (!kvm_vcpu_compatible(vcpu)) { - r = -EINVAL; - goto unlock_vcpu_destroy; - } if (kvm_get_vcpu_by_id(kvm, id)) { r = -EEXIST; goto unlock_vcpu_destroy; -- cgit v1.2.3 From 92176a8ede577d0ff78ab3298e06701f67ad5f51 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jun 2016 16:22:47 +0200 Subject: KVM: MMU: prepare to support mapping of VM_IO and VM_PFNMAP frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle VM_IO like VM_PFNMAP, as is common in the rest of Linux; extract the formula to convert hva->pfn into a new function, which will soon gain more capabilities. Cc: Xiao Guangrong Cc: Andrea Arcangeli Cc: Radim Krčmář Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ef54b4c31792..5aae59e00bef 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1442,6 +1442,16 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) return true; } +static int hva_to_pfn_remapped(struct vm_area_struct *vma, + unsigned long addr, bool *async, + bool write_fault, kvm_pfn_t *p_pfn) +{ + *p_pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + + vma->vm_pgoff; + BUG_ON(!kvm_is_reserved_pfn(*p_pfn)); + return 0; +} + /* * Pin guest page in memory and return its pfn. * @addr: host virtual address which maps memory to the guest @@ -1461,7 +1471,7 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, { struct vm_area_struct *vma; kvm_pfn_t pfn = 0; - int npages; + int npages, r; /* we can do it either atomically or asynchronously, not both */ BUG_ON(atomic && async); @@ -1487,10 +1497,10 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, if (vma == NULL) pfn = KVM_PFN_ERR_FAULT; - else if ((vma->vm_flags & VM_PFNMAP)) { - pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + - vma->vm_pgoff; - BUG_ON(!kvm_is_reserved_pfn(pfn)); + else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { + r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn); + if (r < 0) + pfn = KVM_PFN_ERR_FAULT; } else { if (async && vma_is_valid(vma, write_fault)) *async = true; -- cgit v1.2.3 From add6a0cd1c5ba51b201e1361b05a5df817083618 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jun 2016 17:51:18 +0200 Subject: KVM: MMU: try to fix up page faults before giving up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vGPU folks would like to trap the first access to a BAR by setting vm_ops on the VMAs produced by mmap-ing a VFIO device. The fault handler then can use remap_pfn_range to place some non-reserved pages in the VMA. This kind of VM_PFNMAP mapping is not handled by KVM, but follow_pfn and fixup_user_fault together help supporting it. The patch also supports VM_MIXEDMAP vmas where the pfns are not reserved and thus subject to reference counting. Cc: Xiao Guangrong Cc: Andrea Arcangeli Cc: Radim Krčmář Tested-by: Neo Jia Reported-by: Kirti Wankhede Signed-off-by: Paolo Bonzini --- mm/gup.c | 1 + virt/kvm/kvm_main.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 3 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/mm/gup.c b/mm/gup.c index c057784c8444..e3ac22f90fa4 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -720,6 +720,7 @@ retry: } return 0; } +EXPORT_SYMBOL_GPL(fixup_user_fault); static __always_inline long __get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5aae59e00bef..154b9ab459b0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1446,9 +1446,45 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, unsigned long addr, bool *async, bool write_fault, kvm_pfn_t *p_pfn) { - *p_pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + - vma->vm_pgoff; - BUG_ON(!kvm_is_reserved_pfn(*p_pfn)); + unsigned long pfn; + int r; + + r = follow_pfn(vma, addr, &pfn); + if (r) { + /* + * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does + * not call the fault handler, so do it here. + */ + bool unlocked = false; + r = fixup_user_fault(current, current->mm, addr, + (write_fault ? FAULT_FLAG_WRITE : 0), + &unlocked); + if (unlocked) + return -EAGAIN; + if (r) + return r; + + r = follow_pfn(vma, addr, &pfn); + if (r) + return r; + + } + + + /* + * Get a reference here because callers of *hva_to_pfn* and + * *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the + * returned pfn. This is only needed if the VMA has VM_MIXEDMAP + * set, but the kvm_get_pfn/kvm_release_pfn_clean pair will + * simply do nothing for reserved pfns. + * + * Whoever called remap_pfn_range is also going to call e.g. + * unmap_mapping_range before the underlying pages are freed, + * causing a call to our MMU notifier. + */ + kvm_get_pfn(pfn); + + *p_pfn = pfn; return 0; } @@ -1493,12 +1529,15 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, goto exit; } +retry: vma = find_vma_intersection(current->mm, addr, addr + 1); if (vma == NULL) pfn = KVM_PFN_ERR_FAULT; else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn); + if (r == -EAGAIN) + goto retry; if (r < 0) pfn = KVM_PFN_ERR_FAULT; } else { -- cgit v1.2.3 From 8a39d00670f0792c1186e442e1dd28fe0326f2ee Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 15 Jul 2016 12:43:26 +0100 Subject: KVM: kvm_io_bus: Add kvm_io_bus_get_dev() call The kvm_io_bus framework is a nice place of holding information about various MMIO regions for kernel emulated devices. Add a call to retrieve the kvm_io_device structure which is associated with a certain MMIO address. This avoids to duplicate kvm_io_bus' knowledge of MMIO regions without having to fake MMIO calls if a user needs the device a certain MMIO address belongs to. This will be used by the ITS emulation to get the associated ITS device when someone triggers an MSI via an ioctl from userspace. Signed-off-by: Andre Przywara Reviewed-by: Eric Auger Reviewed-by: Marc Zyngier Acked-by: Christoffer Dall Acked-by: Paolo Bonzini Tested-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'virt/kvm/kvm_main.c') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0640ee92b978..614a98137c5f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -164,6 +164,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_device *dev); +struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, + gpa_t addr); #ifdef CONFIG_KVM_ASYNC_PF struct kvm_async_pf { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ef54b4c31792..bd2eb92c5d0e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3496,6 +3496,30 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, return r; } +struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, + gpa_t addr) +{ + struct kvm_io_bus *bus; + int dev_idx, srcu_idx; + struct kvm_io_device *iodev = NULL; + + srcu_idx = srcu_read_lock(&kvm->srcu); + + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + + dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); + if (dev_idx < 0) + goto out_unlock; + + iodev = bus->range[dev_idx].dev; + +out_unlock: + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return iodev; +} +EXPORT_SYMBOL_GPL(kvm_io_bus_get_dev); + static struct notifier_block kvm_cpu_notifier = { .notifier_call = kvm_cpu_hotplug, }; -- cgit v1.2.3