diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2017-01-30 09:00:02 +0100 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2017-01-30 09:00:02 +0100 |
commit | 858a0d7eb5300b5f620d98ab3c4b96c9d5f19131 (patch) | |
tree | 79ad2ecb357183384b172155e44df71c86e24e49 /virt | |
parent | e326ce013a8e851193eb337aafb1aa396c533a61 (diff) | |
parent | 47087eeb744c83482774e8f6dc20cf2b11fff53f (diff) | |
download | linux-858a0d7eb5300b5f620d98ab3c4b96c9d5f19131.tar.bz2 |
Merge back earlier suspend/hibernation changes for v4.11.
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 51 | ||||
-rw-r--r-- | virt/kvm/arm/hyp/timer-sr.c | 33 | ||||
-rw-r--r-- | virt/kvm/arm/pmu.c | 8 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-init.c | 20 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-its.c | 11 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-kvm-device.c | 2 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v2.c | 3 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v3.c | 2 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.c | 41 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.h | 14 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v2.c | 8 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 8 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 12 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.h | 26 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 23 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 22 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 124 | ||||
-rw-r--r-- | virt/kvm/vfio.c | 18 | ||||
-rw-r--r-- | virt/lib/irqbypass.c | 4 |
19 files changed, 296 insertions, 134 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 27a1f6341d41..6a084cd57b88 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -24,6 +24,7 @@ #include <clocksource/arm_arch_timer.h> #include <asm/arch_timer.h> +#include <asm/kvm_hyp.h> #include <kvm/arm_vgic.h> #include <kvm/arm_arch_timer.h> @@ -39,7 +40,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) vcpu->arch.timer_cpu.active_cleared_last = false; } -static cycle_t kvm_phys_timer_read(void) +static u64 kvm_phys_timer_read(void) { return timecounter->cc->read(timecounter->cc); } @@ -89,9 +90,6 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) struct kvm_vcpu *vcpu; vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); - vcpu->arch.timer_cpu.armed = false; - - WARN_ON(!kvm_timer_should_fire(vcpu)); /* * If the vcpu is blocked we want to wake it up so that it will see @@ -102,7 +100,7 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) { - cycle_t cval, now; + u64 cval, now; cval = vcpu->arch.timer_cpu.cntv_cval; now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; @@ -155,7 +153,7 @@ static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - cycle_t cval, now; + u64 cval, now; if (!kvm_timer_irq_can_fire(vcpu)) return false; @@ -425,6 +423,11 @@ int kvm_timer_hyp_init(void) info = arch_timer_get_kvm_info(); timecounter = &info->timecounter; + if (!timecounter->cc) { + kvm_err("kvm_arch_timer: uninitialized timecounter\n"); + return -ENODEV; + } + if (info->virtual_irq <= 0) { kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", info->virtual_irq); @@ -451,7 +454,7 @@ int kvm_timer_hyp_init(void) kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, - "AP_KVM_ARM_TIMER_STARTING", kvm_timer_starting_cpu, + "kvm/arm/timer:starting", kvm_timer_starting_cpu, kvm_timer_dying_cpu); return err; } @@ -498,17 +501,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (ret) return ret; - - /* - * There is a potential race here between VCPUs starting for the first - * time, which may be enabling the timer multiple times. That doesn't - * hurt though, because we're just setting a variable to the same - * variable that it already was. The important thing is that all - * VCPUs have the enabled variable set, before entering the guest, if - * the arch timers are enabled. - */ - if (timecounter) - timer->enabled = 1; + timer->enabled = 1; return 0; } @@ -517,3 +510,25 @@ void kvm_timer_init(struct kvm *kvm) { kvm->arch.timer.cntvoff = kvm_phys_timer_read(); } + +/* + * On VHE system, we only need to configure trap on physical timer and counter + * accesses in EL0 and EL1 once, not for every world switch. + * The host kernel runs at EL2 with HCR_EL2.TGE == 1, + * and this makes those bits have no effect for the host kernel execution. + */ +void kvm_timer_init_vhe(void) +{ + /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */ + u32 cnthctl_shift = 10; + u64 val; + + /* + * Disallow physical timer access for the guest. + * Physical counter access is allowed. + */ + val = read_sysreg(cnthctl_el2); + val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift); + val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); + write_sysreg(val, cnthctl_el2); +} diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index 798866a8d875..63e28dd18bb0 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c @@ -35,10 +35,16 @@ void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) /* Disable the virtual timer */ write_sysreg_el0(0, cntv_ctl); - /* Allow physical timer/counter access for the host */ - val = read_sysreg(cnthctl_el2); - val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; - write_sysreg(val, cnthctl_el2); + /* + * We don't need to do this for VHE since the host kernel runs in EL2 + * with HCR_EL2.TGE ==1, which makes those bits have no impact. + */ + if (!has_vhe()) { + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); + } /* Clear cntvoff for the host */ write_sysreg(0, cntvoff_el2); @@ -50,14 +56,17 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; u64 val; - /* - * Disallow physical timer access for the guest - * Physical counter access is allowed - */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); + /* Those bits are already configured at boot on VHE-system */ + if (!has_vhe()) { + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); + } if (timer->enabled) { write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 6e9c40eea208..69ccce308458 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -305,7 +305,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) continue; type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) & ARMV8_PMU_EVTYPE_EVENT; - if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) + if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) && (enable & BIT(i))) { reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; reg = lower_32_bits(reg); @@ -379,7 +379,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, eventsel = data & ARMV8_PMU_EVTYPE_EVENT; /* Software increment event does't need to be backed by a perf event */ - if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR && + select_idx != ARMV8_PMU_CYCLE_IDX) return; memset(&attr, 0, sizeof(struct perf_event_attr)); @@ -391,7 +392,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; attr.exclude_hv = 1; /* Don't count EL2 events */ attr.exclude_host = 1; /* Don't count host events */ - attr.config = eventsel; + attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ? + ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel; counter = kvm_pmu_get_counter_value(vcpu, select_idx); /* The initial sample period (overflow count) of an event. */ diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 8cebfbc19e90..c737ea0a310a 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -268,15 +268,11 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - mutex_lock(&kvm->lock); - dist->ready = false; dist->initialized = false; kfree(dist->spis); dist->nr_spis = 0; - - mutex_unlock(&kvm->lock); } void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -286,7 +282,8 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) INIT_LIST_HEAD(&vgic_cpu->ap_list_head); } -void kvm_vgic_destroy(struct kvm *kvm) +/* To be called with kvm->lock held */ +static void __kvm_vgic_destroy(struct kvm *kvm) { struct kvm_vcpu *vcpu; int i; @@ -297,6 +294,13 @@ void kvm_vgic_destroy(struct kvm *kvm) kvm_vgic_vcpu_destroy(vcpu); } +void kvm_vgic_destroy(struct kvm *kvm) +{ + mutex_lock(&kvm->lock); + __kvm_vgic_destroy(kvm); + mutex_unlock(&kvm->lock); +} + /** * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest * is a GICv2. A GICv3 must be explicitly initialized by the guest using the @@ -348,6 +352,10 @@ int kvm_vgic_map_resources(struct kvm *kvm) ret = vgic_v2_map_resources(kvm); else ret = vgic_v3_map_resources(kvm); + + if (ret) + __kvm_vgic_destroy(kvm); + out: mutex_unlock(&kvm->lock); return ret; @@ -428,7 +436,7 @@ int kvm_vgic_hyp_init(void) } ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, - "AP_KVM_ARM_VGIC_INIT_STARTING", + "kvm/arm/vgic:starting", vgic_init_cpu_starting, vgic_init_cpu_dying); if (ret) { kvm_err("Cannot register vgic CPU notifier\n"); diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 4660a7d04eea..8c2b3cdcb2c5 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -632,21 +632,22 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id) int index; u64 indirect_ptr; gfn_t gfn; + int esz = GITS_BASER_ENTRY_SIZE(baser); if (!(baser & GITS_BASER_INDIRECT)) { phys_addr_t addr; - if (id >= (l1_tbl_size / GITS_BASER_ENTRY_SIZE(baser))) + if (id >= (l1_tbl_size / esz)) return false; - addr = BASER_ADDRESS(baser) + id * GITS_BASER_ENTRY_SIZE(baser); + addr = BASER_ADDRESS(baser) + id * esz; gfn = addr >> PAGE_SHIFT; return kvm_is_visible_gfn(its->dev->kvm, gfn); } /* calculate and check the index into the 1st level */ - index = id / (SZ_64K / GITS_BASER_ENTRY_SIZE(baser)); + index = id / (SZ_64K / esz); if (index >= (l1_tbl_size / sizeof(u64))) return false; @@ -670,8 +671,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id) indirect_ptr &= GENMASK_ULL(51, 16); /* Find the address of the actual entry */ - index = id % (SZ_64K / GITS_BASER_ENTRY_SIZE(baser)); - indirect_ptr += index * GITS_BASER_ENTRY_SIZE(baser); + index = id % (SZ_64K / esz); + indirect_ptr += index * esz; gfn = indirect_ptr >> PAGE_SHIFT; return kvm_is_visible_gfn(its->dev->kvm, gfn); diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index ce1f4ed9daf4..fbe87a63d250 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c @@ -221,11 +221,9 @@ int kvm_register_vgic_device(unsigned long type) ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3); -#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS if (ret) break; ret = kvm_vgic_register_its_device(); -#endif break; } diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index b44b359cbbad..78e34bc4d89b 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -129,6 +129,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, unsigned long val) { u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + u8 cpu_mask = GENMASK(atomic_read(&vcpu->kvm->online_vcpus) - 1, 0); int i; /* GICD_ITARGETSR[0-7] are read-only */ @@ -141,7 +142,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, spin_lock(&irq->irq_lock); - irq->targets = (val >> (i * 8)) & 0xff; + irq->targets = (val >> (i * 8)) & cpu_mask; target = irq->targets ? __ffs(irq->targets) : 0; irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target); diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 0d3c76a4208b..50f42f0f8c4f 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -42,7 +42,6 @@ u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, return reg | ((u64)val << lower); } -#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS bool vgic_has_its(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; @@ -52,7 +51,6 @@ bool vgic_has_its(struct kvm *kvm) return dist->has_its; } -#endif static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index e18b30ddcdce..ebe1b9fa3c4d 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -453,17 +453,33 @@ struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) return container_of(dev, struct vgic_io_device, dev); } -static bool check_region(const struct vgic_register_region *region, +static bool check_region(const struct kvm *kvm, + const struct vgic_register_region *region, gpa_t addr, int len) { - if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1) - return true; - if ((region->access_flags & VGIC_ACCESS_32bit) && - len == sizeof(u32) && !(addr & 3)) - return true; - if ((region->access_flags & VGIC_ACCESS_64bit) && - len == sizeof(u64) && !(addr & 7)) - return true; + int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + + switch (len) { + case sizeof(u8): + flags = VGIC_ACCESS_8bit; + break; + case sizeof(u32): + flags = VGIC_ACCESS_32bit; + break; + case sizeof(u64): + flags = VGIC_ACCESS_64bit; + break; + default: + return false; + } + + if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) { + if (!region->bits_per_irq) + return true; + + /* Do we access a non-allocated IRQ? */ + return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs; + } return false; } @@ -477,7 +493,7 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, addr - iodev->base_addr); - if (!region || !check_region(region, addr, len)) { + if (!region || !check_region(vcpu->kvm, region, addr, len)) { memset(val, 0, len); return 0; } @@ -510,10 +526,7 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, addr - iodev->base_addr); - if (!region) - return 0; - - if (!check_region(region, addr, len)) + if (!region || !check_region(vcpu->kvm, region, addr, len)) return 0; switch (iodev->iodev_type) { diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 4c34d39d44a0..84961b4e4422 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -50,15 +50,15 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; #define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1) /* - * (addr & mask) gives us the byte offset for the INT ID, so we want to - * divide this with 'bytes per irq' to get the INT ID, which is given - * by '(bits) / 8'. But we do this with fixed-point-arithmetic and - * take advantage of the fact that division by a fraction equals - * multiplication with the inverted fraction, and scale up both the - * numerator and denominator with 8 to support at most 64 bits per IRQ: + * (addr & mask) gives us the _byte_ offset for the INT ID. + * We multiply this by 8 the get the _bit_ offset, then divide this by + * the number of bits to learn the actual INT ID. + * But instead of a division (which requires a "long long div" implementation), + * we shift by the binary logarithm of <bits>. + * This assumes that <bits> is a power of two. */ #define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ - 64 / (bits) / 8) + 8 >> ilog2(bits)) /* * Some VGIC registers store per-IRQ information, with a different number diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 0a063af40565..834137e7b83f 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -50,8 +50,10 @@ void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu) WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE); - kvm_notify_acked_irq(vcpu->kvm, 0, - intid - VGIC_NR_PRIVATE_IRQS); + /* Only SPIs require notification */ + if (vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); } } @@ -291,8 +293,6 @@ int vgic_v2_map_resources(struct kvm *kvm) dist->ready = true; out: - if (ret) - kvm_vgic_destroy(kvm); return ret; } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 9f0dae397d9c..e6b03fd8c374 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -41,8 +41,10 @@ void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu) WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE); - kvm_notify_acked_irq(vcpu->kvm, 0, - intid - VGIC_NR_PRIVATE_IRQS); + /* Only SPIs require notification */ + if (vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); } /* @@ -300,8 +302,6 @@ int vgic_v3_map_resources(struct kvm *kvm) dist->ready = true; out: - if (ret) - kvm_vgic_destroy(kvm); return ret; } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 2893d5ba523a..6440b56ec90e 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -273,6 +273,18 @@ retry: * no more work for us to do. */ spin_unlock(&irq->irq_lock); + + /* + * We have to kick the VCPU here, because we could be + * queueing an edge-triggered interrupt for which we + * get no EOI maintenance interrupt. In that case, + * while the IRQ is already on the VCPU's AP list, the + * VCPU could have EOI'ed the original interrupt and + * won't see this one until it exits for some other + * reason. + */ + if (vcpu) + kvm_vcpu_kick(vcpu); return false; } diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 9d9e014765a2..859f65c6e056 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -84,37 +84,11 @@ int vgic_v3_probe(const struct gic_kvm_info *info); int vgic_v3_map_resources(struct kvm *kvm); int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address); -#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS int vgic_register_its_iodevs(struct kvm *kvm); bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); void vgic_enable_lpis(struct kvm_vcpu *vcpu); int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); -#else -static inline int vgic_register_its_iodevs(struct kvm *kvm) -{ - return -ENODEV; -} - -static inline bool vgic_has_its(struct kvm *kvm) -{ - return false; -} - -static inline int kvm_vgic_register_its_device(void) -{ - return -ENODEV; -} - -static inline void vgic_enable_lpis(struct kvm_vcpu *vcpu) -{ -} - -static inline int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) -{ - return -ENODEV; -} -#endif int kvm_register_vgic_device(unsigned long type); int vgic_lazy_init(struct kvm *kvm); diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 8035cc1eb955..3815e940fbea 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -76,21 +76,26 @@ static void async_pf_execute(struct work_struct *work) struct kvm_vcpu *vcpu = apf->vcpu; unsigned long addr = apf->addr; gva_t gva = apf->gva; + int locked = 1; might_sleep(); /* * This work is run asynchromously to the task which owns * mm and might be done in another context, so we must - * use FOLL_REMOTE. + * access remotely. */ - __get_user_pages_unlocked(NULL, mm, addr, 1, NULL, - FOLL_WRITE | FOLL_REMOTE); + down_read(&mm->mmap_sem); + get_user_pages_remote(NULL, mm, addr, 1, FOLL_WRITE, NULL, NULL, + &locked); + if (locked) + up_read(&mm->mmap_sem); kvm_async_page_present_sync(vcpu, apf); spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); + apf->vcpu = NULL; spin_unlock(&vcpu->async_pf.lock); /* @@ -113,6 +118,8 @@ static void async_pf_execute(struct work_struct *work) void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) { + spin_lock(&vcpu->async_pf.lock); + /* cancel outstanding work queue item */ while (!list_empty(&vcpu->async_pf.queue)) { struct kvm_async_pf *work = @@ -120,6 +127,14 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) typeof(*work), queue); list_del(&work->queue); + /* + * We know it's present in vcpu->async_pf.done, do + * nothing here. + */ + if (!work->vcpu) + continue; + + spin_unlock(&vcpu->async_pf.lock); #ifdef CONFIG_KVM_ASYNC_PF_SYNC flush_work(&work->work); #else @@ -129,9 +144,9 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) kmem_cache_free(async_pf_cache, work); } #endif + spin_lock(&vcpu->async_pf.lock); } - spin_lock(&vcpu->async_pf.lock); while (!list_empty(&vcpu->async_pf.done)) { struct kvm_async_pf *work = list_first_entry(&vcpu->async_pf.done, diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index f397e9b20370..a29786dd9522 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -42,6 +42,7 @@ #ifdef CONFIG_HAVE_KVM_IRQFD +static struct workqueue_struct *irqfd_cleanup_wq; static void irqfd_inject(struct work_struct *work) @@ -167,7 +168,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) list_del_init(&irqfd->list); - schedule_work(&irqfd->shutdown); + queue_work(irqfd_cleanup_wq, &irqfd->shutdown); } int __attribute__((weak)) kvm_arch_set_irq_inatomic( @@ -554,7 +555,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) * so that we guarantee there will not be any more interrupts on this * gsi once this deassign function returns. */ - flush_work(&irqfd->shutdown); + flush_workqueue(irqfd_cleanup_wq); return 0; } @@ -591,7 +592,7 @@ kvm_irqfd_release(struct kvm *kvm) * Block until we know all outstanding shutdown jobs have completed * since we do not take a kvm* reference. */ - flush_work(&irqfd->shutdown); + flush_workqueue(irqfd_cleanup_wq); } @@ -621,8 +622,23 @@ void kvm_irq_routing_update(struct kvm *kvm) spin_unlock_irq(&kvm->irqfds.lock); } +/* + * create a host-wide workqueue for issuing deferred shutdown requests + * aggregated from all vm* instances. We need our own isolated + * queue to ease flushing work items when a VM exits. + */ +int kvm_irqfd_init(void) +{ + irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0); + if (!irqfd_cleanup_wq) + return -ENOMEM; + + return 0; +} + void kvm_irqfd_exit(void) { + destroy_workqueue(irqfd_cleanup_wq); } #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2907b7b78654..482612b4e496 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -53,7 +53,7 @@ #include <asm/processor.h> #include <asm/io.h> #include <asm/ioctl.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/pgtable.h> #include "coalesced_mmio.h" @@ -70,16 +70,19 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); /* Architectures should define their poll value according to the halt latency */ -static unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; +unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); +EXPORT_SYMBOL_GPL(halt_poll_ns); /* Default doubles per-vcpu halt_poll_ns. */ -static unsigned int halt_poll_ns_grow = 2; +unsigned int halt_poll_ns_grow = 2; module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR); +EXPORT_SYMBOL_GPL(halt_poll_ns_grow); /* Default resets per-vcpu halt_poll_ns . */ -static unsigned int halt_poll_ns_shrink; +unsigned int halt_poll_ns_shrink; module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR); +EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); /* * Ordering of locks: @@ -595,7 +598,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) stat_data->kvm = kvm; stat_data->offset = p->offset; kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; - if (!debugfs_create_file(p->name, 0444, + if (!debugfs_create_file(p->name, 0644, kvm->debugfs_dentry, stat_data, stat_fops_per_vm[p->kind])) @@ -1415,13 +1418,12 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, npages = get_user_page_nowait(addr, write_fault, page); up_read(¤t->mm->mmap_sem); } else { - unsigned int flags = FOLL_TOUCH | FOLL_HWPOISON; + unsigned int flags = FOLL_HWPOISON; if (write_fault) flags |= FOLL_WRITE; - npages = __get_user_pages_unlocked(current, current->mm, addr, 1, - page, flags); + npages = get_user_pages_unlocked(addr, 1, page, flags); } if (npages != 1) return npages; @@ -1972,30 +1974,38 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, } EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); -int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) +int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len) { struct kvm_memslots *slots = kvm_memslots(kvm); int r; + gpa_t gpa = ghc->gpa + offset; - BUG_ON(len > ghc->len); + BUG_ON(len + offset > ghc->len); if (slots->generation != ghc->generation) kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); if (unlikely(!ghc->memslot)) - return kvm_write_guest(kvm, ghc->gpa, data, len); + return kvm_write_guest(kvm, gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; - r = __copy_to_user((void __user *)ghc->hva, data, len); + r = __copy_to_user((void __user *)ghc->hva + offset, data, len); if (r) return -EFAULT; - mark_page_dirty_in_slot(ghc->memslot, ghc->gpa >> PAGE_SHIFT); + mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT); return 0; } +EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached); + +int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) +{ + return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len); +} EXPORT_SYMBOL_GPL(kvm_write_guest_cached); int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, @@ -2889,10 +2899,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm, ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); if (ret < 0) { - ops->destroy(dev); mutex_lock(&kvm->lock); list_del(&dev->vm_node); mutex_unlock(&kvm->lock); + ops->destroy(dev); return ret; } @@ -3661,11 +3671,23 @@ static int vm_stat_get_per_vm(void *data, u64 *val) return 0; } +static int vm_stat_clear_per_vm(void *data, u64 val) +{ + struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; + + if (val) + return -EINVAL; + + *(ulong *)((void *)stat_data->kvm + stat_data->offset) = 0; + + return 0; +} + static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file) { __simple_attr_check_format("%llu\n", 0ull); return kvm_debugfs_open(inode, file, vm_stat_get_per_vm, - NULL, "%llu\n"); + vm_stat_clear_per_vm, "%llu\n"); } static const struct file_operations vm_stat_get_per_vm_fops = { @@ -3691,11 +3713,26 @@ static int vcpu_stat_get_per_vm(void *data, u64 *val) return 0; } +static int vcpu_stat_clear_per_vm(void *data, u64 val) +{ + int i; + struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; + struct kvm_vcpu *vcpu; + + if (val) + return -EINVAL; + + kvm_for_each_vcpu(i, vcpu, stat_data->kvm) + *(u64 *)((void *)vcpu + stat_data->offset) = 0; + + return 0; +} + static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file) { __simple_attr_check_format("%llu\n", 0ull); return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm, - NULL, "%llu\n"); + vcpu_stat_clear_per_vm, "%llu\n"); } static const struct file_operations vcpu_stat_get_per_vm_fops = { @@ -3730,7 +3767,26 @@ static int vm_stat_get(void *_offset, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n"); +static int vm_stat_clear(void *_offset, u64 val) +{ + unsigned offset = (long)_offset; + struct kvm *kvm; + struct kvm_stat_data stat_tmp = {.offset = offset}; + + if (val) + return -EINVAL; + + spin_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + stat_tmp.kvm = kvm; + vm_stat_clear_per_vm((void *)&stat_tmp, 0); + } + spin_unlock(&kvm_lock); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, vm_stat_clear, "%llu\n"); static int vcpu_stat_get(void *_offset, u64 *val) { @@ -3750,7 +3806,27 @@ static int vcpu_stat_get(void *_offset, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); +static int vcpu_stat_clear(void *_offset, u64 val) +{ + unsigned offset = (long)_offset; + struct kvm *kvm; + struct kvm_stat_data stat_tmp = {.offset = offset}; + + if (val) + return -EINVAL; + + spin_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + stat_tmp.kvm = kvm; + vcpu_stat_clear_per_vm((void *)&stat_tmp, 0); + } + spin_unlock(&kvm_lock); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear, + "%llu\n"); static const struct file_operations *stat_fops[] = { [KVM_STAT_VCPU] = &vcpu_stat_fops, @@ -3768,7 +3844,7 @@ static int kvm_init_debug(void) kvm_debugfs_num_entries = 0; for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { - if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir, + if (!debugfs_create_file(p->name, 0644, kvm_debugfs_dir, (void *)(long)p->offset, stat_fops[p->kind])) goto out_dir; @@ -3844,7 +3920,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, * kvm_arch_init makes sure there's at most one caller * for architectures that support multiple implementations, * like intel and amd on x86. + * kvm_arch_init must be called before kvm_irqfd_init to avoid creating + * conflicts in case kvm is already setup for another implementation. */ + r = kvm_irqfd_init(); + if (r) + goto out_irqfd; if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { r = -ENOMEM; @@ -3863,7 +3944,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, goto out_free_1; } - r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "AP_KVM_STARTING", + r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting", kvm_starting_cpu, kvm_dying_cpu); if (r) goto out_free_2; @@ -3926,6 +4007,7 @@ out_free_0a: free_cpumask_var(cpus_hardware_enabled); out_free_0: kvm_irqfd_exit(); +out_irqfd: kvm_arch_exit(); out_fail: return r; diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 1dd087da6f31..d32f239eb471 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -60,6 +60,19 @@ static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) symbol_put(vfio_group_put_external_user); } +static void kvm_vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm) +{ + void (*fn)(struct vfio_group *, struct kvm *); + + fn = symbol_get(vfio_group_set_kvm); + if (!fn) + return; + + fn(group, kvm); + + symbol_put(vfio_group_set_kvm); +} + static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group) { long (*fn)(struct vfio_group *, unsigned long); @@ -159,6 +172,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) mutex_unlock(&kv->lock); + kvm_vfio_group_set_kvm(vfio_group, dev->kvm); + kvm_vfio_update_coherency(dev); return 0; @@ -196,6 +211,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) mutex_unlock(&kv->lock); + kvm_vfio_group_set_kvm(vfio_group, NULL); + kvm_vfio_group_put_external_user(vfio_group); kvm_vfio_update_coherency(dev); @@ -240,6 +257,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev) struct kvm_vfio_group *kvg, *tmp; list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { + kvm_vfio_group_set_kvm(kvg->vfio_group, NULL); kvm_vfio_group_put_external_user(kvg->vfio_group); list_del(&kvg->node); kfree(kvg); diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c index 52abac4bb6a2..6d2fcd6fcb25 100644 --- a/virt/lib/irqbypass.c +++ b/virt/lib/irqbypass.c @@ -195,7 +195,7 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer) mutex_lock(&lock); list_for_each_entry(tmp, &consumers, node) { - if (tmp->token == consumer->token) { + if (tmp->token == consumer->token || tmp == consumer) { mutex_unlock(&lock); module_put(THIS_MODULE); return -EBUSY; @@ -245,7 +245,7 @@ void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer) mutex_lock(&lock); list_for_each_entry(tmp, &consumers, node) { - if (tmp->token != consumer->token) + if (tmp != consumer) continue; list_for_each_entry(producer, &producers, node) { |