summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2017-01-30 09:00:02 +0100
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2017-01-30 09:00:02 +0100
commit858a0d7eb5300b5f620d98ab3c4b96c9d5f19131 (patch)
tree79ad2ecb357183384b172155e44df71c86e24e49 /virt
parente326ce013a8e851193eb337aafb1aa396c533a61 (diff)
parent47087eeb744c83482774e8f6dc20cf2b11fff53f (diff)
downloadlinux-858a0d7eb5300b5f620d98ab3c4b96c9d5f19131.tar.bz2
Merge back earlier suspend/hibernation changes for v4.11.
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/arch_timer.c51
-rw-r--r--virt/kvm/arm/hyp/timer-sr.c33
-rw-r--r--virt/kvm/arm/pmu.c8
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c20
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c11
-rw-r--r--virt/kvm/arm/vgic/vgic-kvm-device.c2
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v2.c3
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c2
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c41
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.h14
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c8
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c8
-rw-r--r--virt/kvm/arm/vgic/vgic.c12
-rw-r--r--virt/kvm/arm/vgic/vgic.h26
-rw-r--r--virt/kvm/async_pf.c23
-rw-r--r--virt/kvm/eventfd.c22
-rw-r--r--virt/kvm/kvm_main.c124
-rw-r--r--virt/kvm/vfio.c18
-rw-r--r--virt/lib/irqbypass.c4
19 files changed, 296 insertions, 134 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 27a1f6341d41..6a084cd57b88 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -24,6 +24,7 @@
#include <clocksource/arm_arch_timer.h>
#include <asm/arch_timer.h>
+#include <asm/kvm_hyp.h>
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
@@ -39,7 +40,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->arch.timer_cpu.active_cleared_last = false;
}
-static cycle_t kvm_phys_timer_read(void)
+static u64 kvm_phys_timer_read(void)
{
return timecounter->cc->read(timecounter->cc);
}
@@ -89,9 +90,6 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
struct kvm_vcpu *vcpu;
vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
- vcpu->arch.timer_cpu.armed = false;
-
- WARN_ON(!kvm_timer_should_fire(vcpu));
/*
* If the vcpu is blocked we want to wake it up so that it will see
@@ -102,7 +100,7 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
{
- cycle_t cval, now;
+ u64 cval, now;
cval = vcpu->arch.timer_cpu.cntv_cval;
now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
@@ -155,7 +153,7 @@ static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- cycle_t cval, now;
+ u64 cval, now;
if (!kvm_timer_irq_can_fire(vcpu))
return false;
@@ -425,6 +423,11 @@ int kvm_timer_hyp_init(void)
info = arch_timer_get_kvm_info();
timecounter = &info->timecounter;
+ if (!timecounter->cc) {
+ kvm_err("kvm_arch_timer: uninitialized timecounter\n");
+ return -ENODEV;
+ }
+
if (info->virtual_irq <= 0) {
kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
info->virtual_irq);
@@ -451,7 +454,7 @@ int kvm_timer_hyp_init(void)
kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
- "AP_KVM_ARM_TIMER_STARTING", kvm_timer_starting_cpu,
+ "kvm/arm/timer:starting", kvm_timer_starting_cpu,
kvm_timer_dying_cpu);
return err;
}
@@ -498,17 +501,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
if (ret)
return ret;
-
- /*
- * There is a potential race here between VCPUs starting for the first
- * time, which may be enabling the timer multiple times. That doesn't
- * hurt though, because we're just setting a variable to the same
- * variable that it already was. The important thing is that all
- * VCPUs have the enabled variable set, before entering the guest, if
- * the arch timers are enabled.
- */
- if (timecounter)
- timer->enabled = 1;
+ timer->enabled = 1;
return 0;
}
@@ -517,3 +510,25 @@ void kvm_timer_init(struct kvm *kvm)
{
kvm->arch.timer.cntvoff = kvm_phys_timer_read();
}
+
+/*
+ * On VHE system, we only need to configure trap on physical timer and counter
+ * accesses in EL0 and EL1 once, not for every world switch.
+ * The host kernel runs at EL2 with HCR_EL2.TGE == 1,
+ * and this makes those bits have no effect for the host kernel execution.
+ */
+void kvm_timer_init_vhe(void)
+{
+ /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */
+ u32 cnthctl_shift = 10;
+ u64 val;
+
+ /*
+ * Disallow physical timer access for the guest.
+ * Physical counter access is allowed.
+ */
+ val = read_sysreg(cnthctl_el2);
+ val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift);
+ val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
+ write_sysreg(val, cnthctl_el2);
+}
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index 798866a8d875..63e28dd18bb0 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -35,10 +35,16 @@ void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
/* Disable the virtual timer */
write_sysreg_el0(0, cntv_ctl);
- /* Allow physical timer/counter access for the host */
- val = read_sysreg(cnthctl_el2);
- val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
- write_sysreg(val, cnthctl_el2);
+ /*
+ * We don't need to do this for VHE since the host kernel runs in EL2
+ * with HCR_EL2.TGE ==1, which makes those bits have no impact.
+ */
+ if (!has_vhe()) {
+ /* Allow physical timer/counter access for the host */
+ val = read_sysreg(cnthctl_el2);
+ val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+ write_sysreg(val, cnthctl_el2);
+ }
/* Clear cntvoff for the host */
write_sysreg(0, cntvoff_el2);
@@ -50,14 +56,17 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
u64 val;
- /*
- * Disallow physical timer access for the guest
- * Physical counter access is allowed
- */
- val = read_sysreg(cnthctl_el2);
- val &= ~CNTHCTL_EL1PCEN;
- val |= CNTHCTL_EL1PCTEN;
- write_sysreg(val, cnthctl_el2);
+ /* Those bits are already configured at boot on VHE-system */
+ if (!has_vhe()) {
+ /*
+ * Disallow physical timer access for the guest
+ * Physical counter access is allowed
+ */
+ val = read_sysreg(cnthctl_el2);
+ val &= ~CNTHCTL_EL1PCEN;
+ val |= CNTHCTL_EL1PCTEN;
+ write_sysreg(val, cnthctl_el2);
+ }
if (timer->enabled) {
write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 6e9c40eea208..69ccce308458 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -305,7 +305,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
continue;
type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
& ARMV8_PMU_EVTYPE_EVENT;
- if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
+ if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
&& (enable & BIT(i))) {
reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
reg = lower_32_bits(reg);
@@ -379,7 +379,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
/* Software increment event does't need to be backed by a perf event */
- if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
+ if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
+ select_idx != ARMV8_PMU_CYCLE_IDX)
return;
memset(&attr, 0, sizeof(struct perf_event_attr));
@@ -391,7 +392,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
attr.exclude_hv = 1; /* Don't count EL2 events */
attr.exclude_host = 1; /* Don't count host events */
- attr.config = eventsel;
+ attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
+ ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
counter = kvm_pmu_get_counter_value(vcpu, select_idx);
/* The initial sample period (overflow count) of an event. */
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 8cebfbc19e90..c737ea0a310a 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -268,15 +268,11 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- mutex_lock(&kvm->lock);
-
dist->ready = false;
dist->initialized = false;
kfree(dist->spis);
dist->nr_spis = 0;
-
- mutex_unlock(&kvm->lock);
}
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -286,7 +282,8 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
}
-void kvm_vgic_destroy(struct kvm *kvm)
+/* To be called with kvm->lock held */
+static void __kvm_vgic_destroy(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
int i;
@@ -297,6 +294,13 @@ void kvm_vgic_destroy(struct kvm *kvm)
kvm_vgic_vcpu_destroy(vcpu);
}
+void kvm_vgic_destroy(struct kvm *kvm)
+{
+ mutex_lock(&kvm->lock);
+ __kvm_vgic_destroy(kvm);
+ mutex_unlock(&kvm->lock);
+}
+
/**
* vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
* is a GICv2. A GICv3 must be explicitly initialized by the guest using the
@@ -348,6 +352,10 @@ int kvm_vgic_map_resources(struct kvm *kvm)
ret = vgic_v2_map_resources(kvm);
else
ret = vgic_v3_map_resources(kvm);
+
+ if (ret)
+ __kvm_vgic_destroy(kvm);
+
out:
mutex_unlock(&kvm->lock);
return ret;
@@ -428,7 +436,7 @@ int kvm_vgic_hyp_init(void)
}
ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
- "AP_KVM_ARM_VGIC_INIT_STARTING",
+ "kvm/arm/vgic:starting",
vgic_init_cpu_starting, vgic_init_cpu_dying);
if (ret) {
kvm_err("Cannot register vgic CPU notifier\n");
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 4660a7d04eea..8c2b3cdcb2c5 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -632,21 +632,22 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id)
int index;
u64 indirect_ptr;
gfn_t gfn;
+ int esz = GITS_BASER_ENTRY_SIZE(baser);
if (!(baser & GITS_BASER_INDIRECT)) {
phys_addr_t addr;
- if (id >= (l1_tbl_size / GITS_BASER_ENTRY_SIZE(baser)))
+ if (id >= (l1_tbl_size / esz))
return false;
- addr = BASER_ADDRESS(baser) + id * GITS_BASER_ENTRY_SIZE(baser);
+ addr = BASER_ADDRESS(baser) + id * esz;
gfn = addr >> PAGE_SHIFT;
return kvm_is_visible_gfn(its->dev->kvm, gfn);
}
/* calculate and check the index into the 1st level */
- index = id / (SZ_64K / GITS_BASER_ENTRY_SIZE(baser));
+ index = id / (SZ_64K / esz);
if (index >= (l1_tbl_size / sizeof(u64)))
return false;
@@ -670,8 +671,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id)
indirect_ptr &= GENMASK_ULL(51, 16);
/* Find the address of the actual entry */
- index = id % (SZ_64K / GITS_BASER_ENTRY_SIZE(baser));
- indirect_ptr += index * GITS_BASER_ENTRY_SIZE(baser);
+ index = id % (SZ_64K / esz);
+ indirect_ptr += index * esz;
gfn = indirect_ptr >> PAGE_SHIFT;
return kvm_is_visible_gfn(its->dev->kvm, gfn);
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index ce1f4ed9daf4..fbe87a63d250 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -221,11 +221,9 @@ int kvm_register_vgic_device(unsigned long type)
ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops,
KVM_DEV_TYPE_ARM_VGIC_V3);
-#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS
if (ret)
break;
ret = kvm_vgic_register_its_device();
-#endif
break;
}
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index b44b359cbbad..78e34bc4d89b 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -129,6 +129,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
unsigned long val)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
+ u8 cpu_mask = GENMASK(atomic_read(&vcpu->kvm->online_vcpus) - 1, 0);
int i;
/* GICD_ITARGETSR[0-7] are read-only */
@@ -141,7 +142,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
spin_lock(&irq->irq_lock);
- irq->targets = (val >> (i * 8)) & 0xff;
+ irq->targets = (val >> (i * 8)) & cpu_mask;
target = irq->targets ? __ffs(irq->targets) : 0;
irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 0d3c76a4208b..50f42f0f8c4f 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -42,7 +42,6 @@ u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len,
return reg | ((u64)val << lower);
}
-#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS
bool vgic_has_its(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
@@ -52,7 +51,6 @@ bool vgic_has_its(struct kvm *kvm)
return dist->has_its;
}
-#endif
static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index e18b30ddcdce..ebe1b9fa3c4d 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -453,17 +453,33 @@ struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
return container_of(dev, struct vgic_io_device, dev);
}
-static bool check_region(const struct vgic_register_region *region,
+static bool check_region(const struct kvm *kvm,
+ const struct vgic_register_region *region,
gpa_t addr, int len)
{
- if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1)
- return true;
- if ((region->access_flags & VGIC_ACCESS_32bit) &&
- len == sizeof(u32) && !(addr & 3))
- return true;
- if ((region->access_flags & VGIC_ACCESS_64bit) &&
- len == sizeof(u64) && !(addr & 7))
- return true;
+ int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+
+ switch (len) {
+ case sizeof(u8):
+ flags = VGIC_ACCESS_8bit;
+ break;
+ case sizeof(u32):
+ flags = VGIC_ACCESS_32bit;
+ break;
+ case sizeof(u64):
+ flags = VGIC_ACCESS_64bit;
+ break;
+ default:
+ return false;
+ }
+
+ if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) {
+ if (!region->bits_per_irq)
+ return true;
+
+ /* Do we access a non-allocated IRQ? */
+ return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs;
+ }
return false;
}
@@ -477,7 +493,7 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
addr - iodev->base_addr);
- if (!region || !check_region(region, addr, len)) {
+ if (!region || !check_region(vcpu->kvm, region, addr, len)) {
memset(val, 0, len);
return 0;
}
@@ -510,10 +526,7 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
addr - iodev->base_addr);
- if (!region)
- return 0;
-
- if (!check_region(region, addr, len))
+ if (!region || !check_region(vcpu->kvm, region, addr, len))
return 0;
switch (iodev->iodev_type) {
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index 4c34d39d44a0..84961b4e4422 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -50,15 +50,15 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
#define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1)
/*
- * (addr & mask) gives us the byte offset for the INT ID, so we want to
- * divide this with 'bytes per irq' to get the INT ID, which is given
- * by '(bits) / 8'. But we do this with fixed-point-arithmetic and
- * take advantage of the fact that division by a fraction equals
- * multiplication with the inverted fraction, and scale up both the
- * numerator and denominator with 8 to support at most 64 bits per IRQ:
+ * (addr & mask) gives us the _byte_ offset for the INT ID.
+ * We multiply this by 8 the get the _bit_ offset, then divide this by
+ * the number of bits to learn the actual INT ID.
+ * But instead of a division (which requires a "long long div" implementation),
+ * we shift by the binary logarithm of <bits>.
+ * This assumes that <bits> is a power of two.
*/
#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
- 64 / (bits) / 8)
+ 8 >> ilog2(bits))
/*
* Some VGIC registers store per-IRQ information, with a different number
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 0a063af40565..834137e7b83f 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -50,8 +50,10 @@ void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE);
- kvm_notify_acked_irq(vcpu->kvm, 0,
- intid - VGIC_NR_PRIVATE_IRQS);
+ /* Only SPIs require notification */
+ if (vgic_valid_spi(vcpu->kvm, intid))
+ kvm_notify_acked_irq(vcpu->kvm, 0,
+ intid - VGIC_NR_PRIVATE_IRQS);
}
}
@@ -291,8 +293,6 @@ int vgic_v2_map_resources(struct kvm *kvm)
dist->ready = true;
out:
- if (ret)
- kvm_vgic_destroy(kvm);
return ret;
}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 9f0dae397d9c..e6b03fd8c374 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -41,8 +41,10 @@ void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE);
- kvm_notify_acked_irq(vcpu->kvm, 0,
- intid - VGIC_NR_PRIVATE_IRQS);
+ /* Only SPIs require notification */
+ if (vgic_valid_spi(vcpu->kvm, intid))
+ kvm_notify_acked_irq(vcpu->kvm, 0,
+ intid - VGIC_NR_PRIVATE_IRQS);
}
/*
@@ -300,8 +302,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
dist->ready = true;
out:
- if (ret)
- kvm_vgic_destroy(kvm);
return ret;
}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 2893d5ba523a..6440b56ec90e 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -273,6 +273,18 @@ retry:
* no more work for us to do.
*/
spin_unlock(&irq->irq_lock);
+
+ /*
+ * We have to kick the VCPU here, because we could be
+ * queueing an edge-triggered interrupt for which we
+ * get no EOI maintenance interrupt. In that case,
+ * while the IRQ is already on the VCPU's AP list, the
+ * VCPU could have EOI'ed the original interrupt and
+ * won't see this one until it exits for some other
+ * reason.
+ */
+ if (vcpu)
+ kvm_vcpu_kick(vcpu);
return false;
}
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 9d9e014765a2..859f65c6e056 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -84,37 +84,11 @@ int vgic_v3_probe(const struct gic_kvm_info *info);
int vgic_v3_map_resources(struct kvm *kvm);
int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
-#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS
int vgic_register_its_iodevs(struct kvm *kvm);
bool vgic_has_its(struct kvm *kvm);
int kvm_vgic_register_its_device(void);
void vgic_enable_lpis(struct kvm_vcpu *vcpu);
int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
-#else
-static inline int vgic_register_its_iodevs(struct kvm *kvm)
-{
- return -ENODEV;
-}
-
-static inline bool vgic_has_its(struct kvm *kvm)
-{
- return false;
-}
-
-static inline int kvm_vgic_register_its_device(void)
-{
- return -ENODEV;
-}
-
-static inline void vgic_enable_lpis(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
-{
- return -ENODEV;
-}
-#endif
int kvm_register_vgic_device(unsigned long type);
int vgic_lazy_init(struct kvm *kvm);
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8035cc1eb955..3815e940fbea 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -76,21 +76,26 @@ static void async_pf_execute(struct work_struct *work)
struct kvm_vcpu *vcpu = apf->vcpu;
unsigned long addr = apf->addr;
gva_t gva = apf->gva;
+ int locked = 1;
might_sleep();
/*
* This work is run asynchromously to the task which owns
* mm and might be done in another context, so we must
- * use FOLL_REMOTE.
+ * access remotely.
*/
- __get_user_pages_unlocked(NULL, mm, addr, 1, NULL,
- FOLL_WRITE | FOLL_REMOTE);
+ down_read(&mm->mmap_sem);
+ get_user_pages_remote(NULL, mm, addr, 1, FOLL_WRITE, NULL, NULL,
+ &locked);
+ if (locked)
+ up_read(&mm->mmap_sem);
kvm_async_page_present_sync(vcpu, apf);
spin_lock(&vcpu->async_pf.lock);
list_add_tail(&apf->link, &vcpu->async_pf.done);
+ apf->vcpu = NULL;
spin_unlock(&vcpu->async_pf.lock);
/*
@@ -113,6 +118,8 @@ static void async_pf_execute(struct work_struct *work)
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
{
+ spin_lock(&vcpu->async_pf.lock);
+
/* cancel outstanding work queue item */
while (!list_empty(&vcpu->async_pf.queue)) {
struct kvm_async_pf *work =
@@ -120,6 +127,14 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
typeof(*work), queue);
list_del(&work->queue);
+ /*
+ * We know it's present in vcpu->async_pf.done, do
+ * nothing here.
+ */
+ if (!work->vcpu)
+ continue;
+
+ spin_unlock(&vcpu->async_pf.lock);
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
flush_work(&work->work);
#else
@@ -129,9 +144,9 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
kmem_cache_free(async_pf_cache, work);
}
#endif
+ spin_lock(&vcpu->async_pf.lock);
}
- spin_lock(&vcpu->async_pf.lock);
while (!list_empty(&vcpu->async_pf.done)) {
struct kvm_async_pf *work =
list_first_entry(&vcpu->async_pf.done,
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index f397e9b20370..a29786dd9522 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -42,6 +42,7 @@
#ifdef CONFIG_HAVE_KVM_IRQFD
+static struct workqueue_struct *irqfd_cleanup_wq;
static void
irqfd_inject(struct work_struct *work)
@@ -167,7 +168,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
list_del_init(&irqfd->list);
- schedule_work(&irqfd->shutdown);
+ queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
}
int __attribute__((weak)) kvm_arch_set_irq_inatomic(
@@ -554,7 +555,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
* so that we guarantee there will not be any more interrupts on this
* gsi once this deassign function returns.
*/
- flush_work(&irqfd->shutdown);
+ flush_workqueue(irqfd_cleanup_wq);
return 0;
}
@@ -591,7 +592,7 @@ kvm_irqfd_release(struct kvm *kvm)
* Block until we know all outstanding shutdown jobs have completed
* since we do not take a kvm* reference.
*/
- flush_work(&irqfd->shutdown);
+ flush_workqueue(irqfd_cleanup_wq);
}
@@ -621,8 +622,23 @@ void kvm_irq_routing_update(struct kvm *kvm)
spin_unlock_irq(&kvm->irqfds.lock);
}
+/*
+ * create a host-wide workqueue for issuing deferred shutdown requests
+ * aggregated from all vm* instances. We need our own isolated
+ * queue to ease flushing work items when a VM exits.
+ */
+int kvm_irqfd_init(void)
+{
+ irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
+ if (!irqfd_cleanup_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
void kvm_irqfd_exit(void)
{
+ destroy_workqueue(irqfd_cleanup_wq);
}
#endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2907b7b78654..482612b4e496 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -53,7 +53,7 @@
#include <asm/processor.h>
#include <asm/io.h>
#include <asm/ioctl.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/pgtable.h>
#include "coalesced_mmio.h"
@@ -70,16 +70,19 @@ MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
/* Architectures should define their poll value according to the halt latency */
-static unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
+unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
+EXPORT_SYMBOL_GPL(halt_poll_ns);
/* Default doubles per-vcpu halt_poll_ns. */
-static unsigned int halt_poll_ns_grow = 2;
+unsigned int halt_poll_ns_grow = 2;
module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
+EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
/* Default resets per-vcpu halt_poll_ns . */
-static unsigned int halt_poll_ns_shrink;
+unsigned int halt_poll_ns_shrink;
module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
+EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
/*
* Ordering of locks:
@@ -595,7 +598,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
stat_data->kvm = kvm;
stat_data->offset = p->offset;
kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
- if (!debugfs_create_file(p->name, 0444,
+ if (!debugfs_create_file(p->name, 0644,
kvm->debugfs_dentry,
stat_data,
stat_fops_per_vm[p->kind]))
@@ -1415,13 +1418,12 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
npages = get_user_page_nowait(addr, write_fault, page);
up_read(&current->mm->mmap_sem);
} else {
- unsigned int flags = FOLL_TOUCH | FOLL_HWPOISON;
+ unsigned int flags = FOLL_HWPOISON;
if (write_fault)
flags |= FOLL_WRITE;
- npages = __get_user_pages_unlocked(current, current->mm, addr, 1,
- page, flags);
+ npages = get_user_pages_unlocked(addr, 1, page, flags);
}
if (npages != 1)
return npages;
@@ -1972,30 +1974,38 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
}
EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
-int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- void *data, unsigned long len)
+int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+ void *data, int offset, unsigned long len)
{
struct kvm_memslots *slots = kvm_memslots(kvm);
int r;
+ gpa_t gpa = ghc->gpa + offset;
- BUG_ON(len > ghc->len);
+ BUG_ON(len + offset > ghc->len);
if (slots->generation != ghc->generation)
kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len);
if (unlikely(!ghc->memslot))
- return kvm_write_guest(kvm, ghc->gpa, data, len);
+ return kvm_write_guest(kvm, gpa, data, len);
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
- r = __copy_to_user((void __user *)ghc->hva, data, len);
+ r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
if (r)
return -EFAULT;
- mark_page_dirty_in_slot(ghc->memslot, ghc->gpa >> PAGE_SHIFT);
+ mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
return 0;
}
+EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached);
+
+int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+ void *data, unsigned long len)
+{
+ return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len);
+}
EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
@@ -2889,10 +2899,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
if (ret < 0) {
- ops->destroy(dev);
mutex_lock(&kvm->lock);
list_del(&dev->vm_node);
mutex_unlock(&kvm->lock);
+ ops->destroy(dev);
return ret;
}
@@ -3661,11 +3671,23 @@ static int vm_stat_get_per_vm(void *data, u64 *val)
return 0;
}
+static int vm_stat_clear_per_vm(void *data, u64 val)
+{
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+
+ if (val)
+ return -EINVAL;
+
+ *(ulong *)((void *)stat_data->kvm + stat_data->offset) = 0;
+
+ return 0;
+}
+
static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file)
{
__simple_attr_check_format("%llu\n", 0ull);
return kvm_debugfs_open(inode, file, vm_stat_get_per_vm,
- NULL, "%llu\n");
+ vm_stat_clear_per_vm, "%llu\n");
}
static const struct file_operations vm_stat_get_per_vm_fops = {
@@ -3691,11 +3713,26 @@ static int vcpu_stat_get_per_vm(void *data, u64 *val)
return 0;
}
+static int vcpu_stat_clear_per_vm(void *data, u64 val)
+{
+ int i;
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+ struct kvm_vcpu *vcpu;
+
+ if (val)
+ return -EINVAL;
+
+ kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
+ *(u64 *)((void *)vcpu + stat_data->offset) = 0;
+
+ return 0;
+}
+
static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file)
{
__simple_attr_check_format("%llu\n", 0ull);
return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm,
- NULL, "%llu\n");
+ vcpu_stat_clear_per_vm, "%llu\n");
}
static const struct file_operations vcpu_stat_get_per_vm_fops = {
@@ -3730,7 +3767,26 @@ static int vm_stat_get(void *_offset, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n");
+static int vm_stat_clear(void *_offset, u64 val)
+{
+ unsigned offset = (long)_offset;
+ struct kvm *kvm;
+ struct kvm_stat_data stat_tmp = {.offset = offset};
+
+ if (val)
+ return -EINVAL;
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ stat_tmp.kvm = kvm;
+ vm_stat_clear_per_vm((void *)&stat_tmp, 0);
+ }
+ spin_unlock(&kvm_lock);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, vm_stat_clear, "%llu\n");
static int vcpu_stat_get(void *_offset, u64 *val)
{
@@ -3750,7 +3806,27 @@ static int vcpu_stat_get(void *_offset, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
+static int vcpu_stat_clear(void *_offset, u64 val)
+{
+ unsigned offset = (long)_offset;
+ struct kvm *kvm;
+ struct kvm_stat_data stat_tmp = {.offset = offset};
+
+ if (val)
+ return -EINVAL;
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ stat_tmp.kvm = kvm;
+ vcpu_stat_clear_per_vm((void *)&stat_tmp, 0);
+ }
+ spin_unlock(&kvm_lock);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear,
+ "%llu\n");
static const struct file_operations *stat_fops[] = {
[KVM_STAT_VCPU] = &vcpu_stat_fops,
@@ -3768,7 +3844,7 @@ static int kvm_init_debug(void)
kvm_debugfs_num_entries = 0;
for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
- if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
+ if (!debugfs_create_file(p->name, 0644, kvm_debugfs_dir,
(void *)(long)p->offset,
stat_fops[p->kind]))
goto out_dir;
@@ -3844,7 +3920,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
* kvm_arch_init makes sure there's at most one caller
* for architectures that support multiple implementations,
* like intel and amd on x86.
+ * kvm_arch_init must be called before kvm_irqfd_init to avoid creating
+ * conflicts in case kvm is already setup for another implementation.
*/
+ r = kvm_irqfd_init();
+ if (r)
+ goto out_irqfd;
if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
r = -ENOMEM;
@@ -3863,7 +3944,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
goto out_free_1;
}
- r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "AP_KVM_STARTING",
+ r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
kvm_starting_cpu, kvm_dying_cpu);
if (r)
goto out_free_2;
@@ -3926,6 +4007,7 @@ out_free_0a:
free_cpumask_var(cpus_hardware_enabled);
out_free_0:
kvm_irqfd_exit();
+out_irqfd:
kvm_arch_exit();
out_fail:
return r;
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 1dd087da6f31..d32f239eb471 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -60,6 +60,19 @@ static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
symbol_put(vfio_group_put_external_user);
}
+static void kvm_vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
+{
+ void (*fn)(struct vfio_group *, struct kvm *);
+
+ fn = symbol_get(vfio_group_set_kvm);
+ if (!fn)
+ return;
+
+ fn(group, kvm);
+
+ symbol_put(vfio_group_set_kvm);
+}
+
static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group)
{
long (*fn)(struct vfio_group *, unsigned long);
@@ -159,6 +172,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
mutex_unlock(&kv->lock);
+ kvm_vfio_group_set_kvm(vfio_group, dev->kvm);
+
kvm_vfio_update_coherency(dev);
return 0;
@@ -196,6 +211,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
mutex_unlock(&kv->lock);
+ kvm_vfio_group_set_kvm(vfio_group, NULL);
+
kvm_vfio_group_put_external_user(vfio_group);
kvm_vfio_update_coherency(dev);
@@ -240,6 +257,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
struct kvm_vfio_group *kvg, *tmp;
list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
+ kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
kvm_vfio_group_put_external_user(kvg->vfio_group);
list_del(&kvg->node);
kfree(kvg);
diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c
index 52abac4bb6a2..6d2fcd6fcb25 100644
--- a/virt/lib/irqbypass.c
+++ b/virt/lib/irqbypass.c
@@ -195,7 +195,7 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer)
mutex_lock(&lock);
list_for_each_entry(tmp, &consumers, node) {
- if (tmp->token == consumer->token) {
+ if (tmp->token == consumer->token || tmp == consumer) {
mutex_unlock(&lock);
module_put(THIS_MODULE);
return -EBUSY;
@@ -245,7 +245,7 @@ void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer)
mutex_lock(&lock);
list_for_each_entry(tmp, &consumers, node) {
- if (tmp->token != consumer->token)
+ if (tmp != consumer)
continue;
list_for_each_entry(producer, &producers, node) {