summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2017-05-02 09:48:26 -0700
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2017-05-02 09:48:26 -0700
commit0337966d121ebebf73a1c346123e8112796e684e (patch)
treec0d4388591e72dc5a26ee976a9cbca70f6bafbbd /virt
parent7c5bb4ac2b76d2a09256aec8a7d584bf3e2b0466 (diff)
parent8a038b83e012097a7ac6cfb9f6c5fac1da8fad6e (diff)
downloadlinux-0337966d121ebebf73a1c346123e8112796e684e.tar.bz2
Merge branch 'next' into for-linus
Prepare input updates for 4.12 merge window.
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/arch_timer.c201
-rw-r--r--virt/kvm/arm/hyp/timer-sr.c13
-rw-r--r--virt/kvm/arm/vgic/vgic-debug.c283
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c4
-rw-r--r--virt/kvm/arm/vgic/vgic-irqfd.c3
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c115
-rw-r--r--virt/kvm/arm/vgic/vgic-kvm-device.c231
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v2.c87
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c203
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c199
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.h24
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c12
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c45
-rw-r--r--virt/kvm/arm/vgic/vgic.c66
-rw-r--r--virt/kvm/arm/vgic/vgic.h83
-rw-r--r--virt/kvm/async_pf.c3
-rw-r--r--virt/kvm/eventfd.c3
-rw-r--r--virt/kvm/kvm_main.c175
18 files changed, 1369 insertions, 381 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 6a084cd57b88..35d7100e0815 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -37,10 +37,10 @@ static u32 host_vtimer_irq_flags;
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
- vcpu->arch.timer_cpu.active_cleared_last = false;
+ vcpu_vtimer(vcpu)->active_cleared_last = false;
}
-static u64 kvm_phys_timer_read(void)
+u64 kvm_phys_timer_read(void)
{
return timecounter->cc->read(timecounter->cc);
}
@@ -98,12 +98,12 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
kvm_vcpu_kick(vcpu);
}
-static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
+static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
{
u64 cval, now;
- cval = vcpu->arch.timer_cpu.cntv_cval;
- now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+ cval = timer_ctx->cnt_cval;
+ now = kvm_phys_timer_read() - timer_ctx->cntvoff;
if (now < cval) {
u64 ns;
@@ -118,6 +118,35 @@ static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
return 0;
}
+static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
+{
+ return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
+ (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE);
+}
+
+/*
+ * Returns the earliest expiration time in ns among guest timers.
+ * Note that it will return 0 if none of timers can fire.
+ */
+static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
+{
+ u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+
+ if (kvm_timer_irq_can_fire(vtimer))
+ min_virt = kvm_timer_compute_delta(vtimer);
+
+ if (kvm_timer_irq_can_fire(ptimer))
+ min_phys = kvm_timer_compute_delta(ptimer);
+
+ /* If none of timers can fire, then return 0 */
+ if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX))
+ return 0;
+
+ return min(min_virt, min_phys);
+}
+
static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
{
struct arch_timer_cpu *timer;
@@ -132,7 +161,7 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
* PoV (NTP on the host may have forced it to expire
* early). If we should have slept longer, restart it.
*/
- ns = kvm_timer_compute_delta(vcpu);
+ ns = kvm_timer_earliest_exp(vcpu);
if (unlikely(ns)) {
hrtimer_forward_now(hrt, ns_to_ktime(ns));
return HRTIMER_RESTART;
@@ -142,42 +171,33 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
return HRTIMER_NORESTART;
}
-static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
- (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
-}
-
-bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
+bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
u64 cval, now;
- if (!kvm_timer_irq_can_fire(vcpu))
+ if (!kvm_timer_irq_can_fire(timer_ctx))
return false;
- cval = timer->cntv_cval;
- now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+ cval = timer_ctx->cnt_cval;
+ now = kvm_phys_timer_read() - timer_ctx->cntvoff;
return cval <= now;
}
-static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
+static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
+ struct arch_timer_context *timer_ctx)
{
int ret;
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
BUG_ON(!vgic_initialized(vcpu->kvm));
- timer->active_cleared_last = false;
- timer->irq.level = new_level;
- trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq,
- timer->irq.level);
- ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
- timer->irq.irq,
- timer->irq.level);
+ timer_ctx->active_cleared_last = false;
+ timer_ctx->irq.level = new_level;
+ trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
+ timer_ctx->irq.level);
+
+ ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq,
+ timer_ctx->irq.level);
WARN_ON(ret);
}
@@ -188,22 +208,43 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
/*
* If userspace modified the timer registers via SET_ONE_REG before
- * the vgic was initialized, we mustn't set the timer->irq.level value
+ * the vgic was initialized, we mustn't set the vtimer->irq.level value
* because the guest would never see the interrupt. Instead wait
* until we call this function from kvm_timer_flush_hwstate.
*/
if (!vgic_initialized(vcpu->kvm) || !timer->enabled)
return -ENODEV;
- if (kvm_timer_should_fire(vcpu) != timer->irq.level)
- kvm_timer_update_irq(vcpu, !timer->irq.level);
+ if (kvm_timer_should_fire(vtimer) != vtimer->irq.level)
+ kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer);
+
+ if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
+ kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
return 0;
}
+/* Schedule the background timer for the emulated timer. */
+static void kvm_timer_emulate(struct kvm_vcpu *vcpu,
+ struct arch_timer_context *timer_ctx)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ if (kvm_timer_should_fire(timer_ctx))
+ return;
+
+ if (!kvm_timer_irq_can_fire(timer_ctx))
+ return;
+
+ /* The timer has not yet expired, schedule a background timer */
+ timer_arm(timer, kvm_timer_compute_delta(timer_ctx));
+}
+
/*
* Schedule the background timer before calling kvm_vcpu_block, so that this
* thread is removed from its waitqueue and made runnable when there's a timer
@@ -212,26 +253,31 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
void kvm_timer_schedule(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
BUG_ON(timer_is_armed(timer));
/*
- * No need to schedule a background timer if the guest timer has
+ * No need to schedule a background timer if any guest timer has
* already expired, because kvm_vcpu_block will return before putting
* the thread to sleep.
*/
- if (kvm_timer_should_fire(vcpu))
+ if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer))
return;
/*
- * If the timer is not capable of raising interrupts (disabled or
+ * If both timers are not capable of raising interrupts (disabled or
* masked), then there's no more work for us to do.
*/
- if (!kvm_timer_irq_can_fire(vcpu))
+ if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer))
return;
- /* The timer has not yet expired, schedule a background timer */
- timer_arm(timer, kvm_timer_compute_delta(vcpu));
+ /*
+ * The guest timers have not yet expired, schedule a background timer.
+ * Set the earliest expiration time among the guest timers.
+ */
+ timer_arm(timer, kvm_timer_earliest_exp(vcpu));
}
void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
@@ -249,13 +295,16 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
*/
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
bool phys_active;
int ret;
if (kvm_timer_update_state(vcpu))
return;
+ /* Set the background timer for the physical timer emulation. */
+ kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu));
+
/*
* If we enter the guest with the virtual input level to the VGIC
* asserted, then we have already told the VGIC what we need to, and
@@ -273,8 +322,8 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
* to ensure that hardware interrupts from the timer triggers a guest
* exit.
*/
- phys_active = timer->irq.level ||
- kvm_vgic_map_is_active(vcpu, timer->irq.irq);
+ phys_active = vtimer->irq.level ||
+ kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
/*
* We want to avoid hitting the (re)distributor as much as
@@ -296,7 +345,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
* - cached value is "active clear"
* - value to be programmed is "active clear"
*/
- if (timer->active_cleared_last && !phys_active)
+ if (vtimer->active_cleared_last && !phys_active)
return;
ret = irq_set_irqchip_state(host_vtimer_irq,
@@ -304,7 +353,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
phys_active);
WARN_ON(ret);
- timer->active_cleared_last = !phys_active;
+ vtimer->active_cleared_last = !phys_active;
}
/**
@@ -318,7 +367,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- BUG_ON(timer_is_armed(timer));
+ /*
+ * This is to cancel the background timer for the physical timer
+ * emulation if it is set.
+ */
+ timer_disarm(timer);
/*
* The guest could have modified the timer registers or the timer
@@ -328,9 +381,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
}
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
- const struct kvm_irq_level *irq)
+ const struct kvm_irq_level *virt_irq,
+ const struct kvm_irq_level *phys_irq)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
/*
* The vcpu timer irq number cannot be determined in
@@ -338,7 +393,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
* kvm_vcpu_set_target(). To handle this, we determine
* vcpu timer irq number when the vcpu is reset.
*/
- timer->irq.irq = irq->irq;
+ vtimer->irq.irq = virt_irq->irq;
+ ptimer->irq.irq = phys_irq->irq;
/*
* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
@@ -346,16 +402,40 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
* resets the timer to be disabled and unmasked and is compliant with
* the ARMv7 architecture.
*/
- timer->cntv_ctl = 0;
+ vtimer->cnt_ctl = 0;
+ ptimer->cnt_ctl = 0;
kvm_timer_update_state(vcpu);
return 0;
}
+/* Make the updates of cntvoff for all vtimer contexts atomic */
+static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
+{
+ int i;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_vcpu *tmp;
+
+ mutex_lock(&kvm->lock);
+ kvm_for_each_vcpu(i, tmp, kvm)
+ vcpu_vtimer(tmp)->cntvoff = cntvoff;
+
+ /*
+ * When called from the vcpu create path, the CPU being created is not
+ * included in the loop above, so we just set it here as well.
+ */
+ vcpu_vtimer(vcpu)->cntvoff = cntvoff;
+ mutex_unlock(&kvm->lock);
+}
+
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ /* Synchronize cntvoff across all vtimers of a VM. */
+ update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
+ vcpu_ptimer(vcpu)->cntvoff = 0;
+
INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
timer->timer.function = kvm_timer_expire;
@@ -368,17 +448,17 @@ static void kvm_timer_init_interrupt(void *info)
int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
switch (regid) {
case KVM_REG_ARM_TIMER_CTL:
- timer->cntv_ctl = value;
+ vtimer->cnt_ctl = value;
break;
case KVM_REG_ARM_TIMER_CNT:
- vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value;
+ update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
break;
case KVM_REG_ARM_TIMER_CVAL:
- timer->cntv_cval = value;
+ vtimer->cnt_cval = value;
break;
default:
return -1;
@@ -390,15 +470,15 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
{
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
switch (regid) {
case KVM_REG_ARM_TIMER_CTL:
- return timer->cntv_ctl;
+ return vtimer->cnt_ctl;
case KVM_REG_ARM_TIMER_CNT:
- return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+ return kvm_phys_timer_read() - vtimer->cntvoff;
case KVM_REG_ARM_TIMER_CVAL:
- return timer->cntv_cval;
+ return vtimer->cnt_cval;
}
return (u64)-1;
}
@@ -462,14 +542,16 @@ int kvm_timer_hyp_init(void)
void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
timer_disarm(timer);
- kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq);
+ kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq);
}
int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct irq_desc *desc;
struct irq_data *data;
int phys_irq;
@@ -497,7 +579,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
* Tell the VGIC that the virtual interrupt is tied to a
* physical interrupt. We do that once per VCPU.
*/
- ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq);
+ ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq);
if (ret)
return ret;
@@ -506,11 +588,6 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
return 0;
}
-void kvm_timer_init(struct kvm *kvm)
-{
- kvm->arch.timer.cntvoff = kvm_phys_timer_read();
-}
-
/*
* On VHE system, we only need to configure trap on physical timer and counter
* accesses in EL0 and EL1 once, not for every world switch.
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index 63e28dd18bb0..4734915ab71f 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -25,11 +25,12 @@
void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
u64 val;
if (timer->enabled) {
- timer->cntv_ctl = read_sysreg_el0(cntv_ctl);
- timer->cntv_cval = read_sysreg_el0(cntv_cval);
+ vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+ vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
}
/* Disable the virtual timer */
@@ -52,8 +53,8 @@ void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
u64 val;
/* Those bits are already configured at boot on VHE-system */
@@ -69,9 +70,9 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
}
if (timer->enabled) {
- write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
- write_sysreg_el0(timer->cntv_cval, cntv_cval);
+ write_sysreg(vtimer->cntvoff, cntvoff_el2);
+ write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
isb();
- write_sysreg_el0(timer->cntv_ctl, cntv_ctl);
+ write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
}
}
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c
new file mode 100644
index 000000000000..7072ab743332
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-debug.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) 2016 Linaro
+ * Author: Christoffer Dall <christoffer.dall@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/interrupt.h>
+#include <linux/kvm_host.h>
+#include <linux/seq_file.h>
+#include <kvm/arm_vgic.h>
+#include <asm/kvm_mmu.h>
+#include "vgic.h"
+
+/*
+ * Structure to control looping through the entire vgic state. We start at
+ * zero for each field and move upwards. So, if dist_id is 0 we print the
+ * distributor info. When dist_id is 1, we have already printed it and move
+ * on.
+ *
+ * When vcpu_id < nr_cpus we print the vcpu info until vcpu_id == nr_cpus and
+ * so on.
+ */
+struct vgic_state_iter {
+ int nr_cpus;
+ int nr_spis;
+ int dist_id;
+ int vcpu_id;
+ int intid;
+};
+
+static void iter_next(struct vgic_state_iter *iter)
+{
+ if (iter->dist_id == 0) {
+ iter->dist_id++;
+ return;
+ }
+
+ iter->intid++;
+ if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
+ ++iter->vcpu_id < iter->nr_cpus)
+ iter->intid = 0;
+}
+
+static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
+ loff_t pos)
+{
+ int nr_cpus = atomic_read(&kvm->online_vcpus);
+
+ memset(iter, 0, sizeof(*iter));
+
+ iter->nr_cpus = nr_cpus;
+ iter->nr_spis = kvm->arch.vgic.nr_spis;
+
+ /* Fast forward to the right position if needed */
+ while (pos--)
+ iter_next(iter);
+}
+
+static bool end_of_vgic(struct vgic_state_iter *iter)
+{
+ return iter->dist_id > 0 &&
+ iter->vcpu_id == iter->nr_cpus &&
+ (iter->intid - VGIC_NR_PRIVATE_IRQS) == iter->nr_spis;
+}
+
+static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
+{
+ struct kvm *kvm = (struct kvm *)s->private;
+ struct vgic_state_iter *iter;
+
+ mutex_lock(&kvm->lock);
+ iter = kvm->arch.vgic.iter;
+ if (iter) {
+ iter = ERR_PTR(-EBUSY);
+ goto out;
+ }
+
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter) {
+ iter = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ iter_init(kvm, iter, *pos);
+ kvm->arch.vgic.iter = iter;
+
+ if (end_of_vgic(iter))
+ iter = NULL;
+out:
+ mutex_unlock(&kvm->lock);
+ return iter;
+}
+
+static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct kvm *kvm = (struct kvm *)s->private;
+ struct vgic_state_iter *iter = kvm->arch.vgic.iter;
+
+ ++*pos;
+ iter_next(iter);
+ if (end_of_vgic(iter))
+ iter = NULL;
+ return iter;
+}
+
+static void vgic_debug_stop(struct seq_file *s, void *v)
+{
+ struct kvm *kvm = (struct kvm *)s->private;
+ struct vgic_state_iter *iter;
+
+ /*
+ * If the seq file wasn't properly opened, there's nothing to clearn
+ * up.
+ */
+ if (IS_ERR(v))
+ return;
+
+ mutex_lock(&kvm->lock);
+ iter = kvm->arch.vgic.iter;
+ kfree(iter);
+ kvm->arch.vgic.iter = NULL;
+ mutex_unlock(&kvm->lock);
+}
+
+static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
+{
+ seq_printf(s, "Distributor\n");
+ seq_printf(s, "===========\n");
+ seq_printf(s, "vgic_model:\t%s\n",
+ (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) ?
+ "GICv3" : "GICv2");
+ seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
+ seq_printf(s, "enabled:\t%d\n", dist->enabled);
+ seq_printf(s, "\n");
+
+ seq_printf(s, "P=pending_latch, L=line_level, A=active\n");
+ seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n");
+}
+
+static void print_header(struct seq_file *s, struct vgic_irq *irq,
+ struct kvm_vcpu *vcpu)
+{
+ int id = 0;
+ char *hdr = "SPI ";
+
+ if (vcpu) {
+ hdr = "VCPU";
+ id = vcpu->vcpu_id;
+ }
+
+ seq_printf(s, "\n");
+ seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHC HWID TARGET SRC PRI VCPU_ID\n", hdr, id);
+ seq_printf(s, "---------------------------------------------------------------\n");
+}
+
+static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
+ struct kvm_vcpu *vcpu)
+{
+ char *type;
+ if (irq->intid < VGIC_NR_SGIS)
+ type = "SGI";
+ else if (irq->intid < VGIC_NR_PRIVATE_IRQS)
+ type = "PPI";
+ else
+ type = "SPI";
+
+ if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS)
+ print_header(s, irq, vcpu);
+
+ seq_printf(s, " %s %4d "
+ " %2d "
+ "%d%d%d%d%d%d "
+ "%8d "
+ "%8x "
+ " %2x "
+ "%3d "
+ " %2d "
+ "\n",
+ type, irq->intid,
+ (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1,
+ irq->pending_latch,
+ irq->line_level,
+ irq->active,
+ irq->enabled,
+ irq->hw,
+ irq->config == VGIC_CONFIG_LEVEL,
+ irq->hwintid,
+ irq->mpidr,
+ irq->source,
+ irq->priority,
+ (irq->vcpu) ? irq->vcpu->vcpu_id : -1);
+
+}
+
+static int vgic_debug_show(struct seq_file *s, void *v)
+{
+ struct kvm *kvm = (struct kvm *)s->private;
+ struct vgic_state_iter *iter = (struct vgic_state_iter *)v;
+ struct vgic_irq *irq;
+ struct kvm_vcpu *vcpu = NULL;
+
+ if (iter->dist_id == 0) {
+ print_dist_state(s, &kvm->arch.vgic);
+ return 0;
+ }
+
+ if (!kvm->arch.vgic.initialized)
+ return 0;
+
+ if (iter->vcpu_id < iter->nr_cpus) {
+ vcpu = kvm_get_vcpu(kvm, iter->vcpu_id);
+ irq = &vcpu->arch.vgic_cpu.private_irqs[iter->intid];
+ } else {
+ irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS];
+ }
+
+ spin_lock(&irq->irq_lock);
+ print_irq_state(s, irq, vcpu);
+ spin_unlock(&irq->irq_lock);
+
+ return 0;
+}
+
+static struct seq_operations vgic_debug_seq_ops = {
+ .start = vgic_debug_start,
+ .next = vgic_debug_next,
+ .stop = vgic_debug_stop,
+ .show = vgic_debug_show
+};
+
+static int debug_open(struct inode *inode, struct file *file)
+{
+ int ret;
+ ret = seq_open(file, &vgic_debug_seq_ops);
+ if (!ret) {
+ struct seq_file *seq;
+ /* seq_open will have modified file->private_data */
+ seq = file->private_data;
+ seq->private = inode->i_private;
+ }
+
+ return ret;
+};
+
+static struct file_operations vgic_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+int vgic_debug_init(struct kvm *kvm)
+{
+ if (!kvm->debugfs_dentry)
+ return -ENOENT;
+
+ if (!debugfs_create_file("vgic-state", 0444,
+ kvm->debugfs_dentry,
+ kvm,
+ &vgic_debug_fops))
+ return -ENOMEM;
+
+ return 0;
+}
+
+int vgic_debug_destroy(struct kvm *kvm)
+{
+ return 0;
+}
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index c737ea0a310a..276139a24e6f 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -259,6 +259,8 @@ int vgic_init(struct kvm *kvm)
if (ret)
goto out;
+ vgic_debug_init(kvm);
+
dist->initialized = true;
out:
return ret;
@@ -288,6 +290,8 @@ static void __kvm_vgic_destroy(struct kvm *kvm)
struct kvm_vcpu *vcpu;
int i;
+ vgic_debug_destroy(kvm);
+
kvm_vgic_dist_destroy(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c
index d918dcf26a5a..f138ed2e9c63 100644
--- a/virt/kvm/arm/vgic/vgic-irqfd.c
+++ b/virt/kvm/arm/vgic/vgic-irqfd.c
@@ -99,6 +99,9 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
if (!vgic_has_its(kvm))
return -ENODEV;
+ if (!level)
+ return -1;
+
return vgic_its_inject_msi(kvm, &msi);
}
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 8c2b3cdcb2c5..8d1da1af4b09 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -350,7 +350,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
spin_lock(&irq->irq_lock);
- irq->pending = pendmask & (1U << bit_nr);
+ irq->pending_latch = pendmask & (1U << bit_nr);
vgic_queue_irq_unlock(vcpu->kvm, irq);
vgic_put_irq(vcpu->kvm, irq);
}
@@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
return ret;
}
-static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- u32 reg = 0;
-
- mutex_lock(&its->cmd_lock);
- if (its->creadr == its->cwriter)
- reg |= GITS_CTLR_QUIESCENT;
- if (its->enabled)
- reg |= GITS_CTLR_ENABLE;
- mutex_unlock(&its->cmd_lock);
-
- return reg;
-}
-
-static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- its->enabled = !!(val & GITS_CTLR_ENABLE);
-}
-
static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
struct vgic_its *its,
gpa_t addr, unsigned int len)
@@ -465,7 +442,7 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
return -EBUSY;
spin_lock(&itte->irq->irq_lock);
- itte->irq->pending = true;
+ itte->irq->pending_latch = true;
vgic_queue_irq_unlock(kvm, itte->irq);
return 0;
@@ -913,7 +890,7 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
if (!itte)
return E_ITS_CLEAR_UNMAPPED_INTERRUPT;
- itte->irq->pending = false;
+ itte->irq->pending_latch = false;
return 0;
}
@@ -1161,33 +1138,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its,
#define ITS_CMD_SIZE 32
#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5))
-/*
- * By writing to CWRITER the guest announces new commands to be processed.
- * To avoid any races in the first place, we take the its_cmd lock, which
- * protects our ring buffer variables, so that there is only one user
- * per ITS handling commands at a given time.
- */
-static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
+/* Must be called with the cmd_lock held. */
+static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
{
gpa_t cbaser;
u64 cmd_buf[4];
- u32 reg;
- if (!its)
- return;
-
- mutex_lock(&its->cmd_lock);
-
- reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
- reg = ITS_CMD_OFFSET(reg);
- if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
- mutex_unlock(&its->cmd_lock);
+ /* Commands are only processed when the ITS is enabled. */
+ if (!its->enabled)
return;
- }
- its->cwriter = reg;
cbaser = CBASER_ADDRESS(its->cbaser);
while (its->cwriter != its->creadr) {
@@ -1207,6 +1167,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
its->creadr = 0;
}
+}
+
+/*
+ * By writing to CWRITER the guest announces new commands to be processed.
+ * To avoid any races in the first place, we take the its_cmd lock, which
+ * protects our ring buffer variables, so that there is only one user
+ * per ITS handling commands at a given time.
+ */
+static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u64 reg;
+
+ if (!its)
+ return;
+
+ mutex_lock(&its->cmd_lock);
+
+ reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
+ reg = ITS_CMD_OFFSET(reg);
+ if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
+ mutex_unlock(&its->cmd_lock);
+ return;
+ }
+ its->cwriter = reg;
+
+ vgic_its_process_commands(kvm, its);
mutex_unlock(&its->cmd_lock);
}
@@ -1287,6 +1275,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
*regptr = reg;
}
+static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
+ struct vgic_its *its,
+ gpa_t addr, unsigned int len)
+{
+ u32 reg = 0;
+
+ mutex_lock(&its->cmd_lock);
+ if (its->creadr == its->cwriter)
+ reg |= GITS_CTLR_QUIESCENT;
+ if (its->enabled)
+ reg |= GITS_CTLR_ENABLE;
+ mutex_unlock(&its->cmd_lock);
+
+ return reg;
+}
+
+static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ mutex_lock(&its->cmd_lock);
+
+ its->enabled = !!(val & GITS_CTLR_ENABLE);
+
+ /*
+ * Try to process any pending commands. This function bails out early
+ * if the ITS is disabled or no commands have been queued.
+ */
+ vgic_its_process_commands(kvm, its);
+
+ mutex_unlock(&its->cmd_lock);
+}
+
#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \
{ \
.reg_offset = off, \
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index fbe87a63d250..d181d2baee9c 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -17,6 +17,7 @@
#include <kvm/arm_vgic.h>
#include <linux/uaccess.h>
#include <asm/kvm_mmu.h>
+#include <asm/cputype.h>
#include "vgic.h"
/* common helpers */
@@ -230,14 +231,8 @@ int kvm_register_vgic_device(unsigned long type)
return ret;
}
-struct vgic_reg_attr {
- struct kvm_vcpu *vcpu;
- gpa_t addr;
-};
-
-static int parse_vgic_v2_attr(struct kvm_device *dev,
- struct kvm_device_attr *attr,
- struct vgic_reg_attr *reg_attr)
+int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
+ struct vgic_reg_attr *reg_attr)
{
int cpuid;
@@ -292,14 +287,14 @@ static bool lock_all_vcpus(struct kvm *kvm)
}
/**
- * vgic_attr_regs_access_v2 - allows user space to access VGIC v2 state
+ * vgic_v2_attr_regs_access - allows user space to access VGIC v2 state
*
* @dev: kvm device handle
* @attr: kvm device attribute
* @reg: address the value is read or written
* @is_write: true if userspace is writing a register
*/
-static int vgic_attr_regs_access_v2(struct kvm_device *dev,
+static int vgic_v2_attr_regs_access(struct kvm_device *dev,
struct kvm_device_attr *attr,
u32 *reg, bool is_write)
{
@@ -308,7 +303,7 @@ static int vgic_attr_regs_access_v2(struct kvm_device *dev,
struct kvm_vcpu *vcpu;
int ret;
- ret = parse_vgic_v2_attr(dev, attr, &reg_attr);
+ ret = vgic_v2_parse_attr(dev, attr, &reg_attr);
if (ret)
return ret;
@@ -362,7 +357,7 @@ static int vgic_v2_set_attr(struct kvm_device *dev,
if (get_user(reg, uaddr))
return -EFAULT;
- return vgic_attr_regs_access_v2(dev, attr, &reg, true);
+ return vgic_v2_attr_regs_access(dev, attr, &reg, true);
}
}
@@ -384,7 +379,7 @@ static int vgic_v2_get_attr(struct kvm_device *dev,
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
u32 reg = 0;
- ret = vgic_attr_regs_access_v2(dev, attr, &reg, false);
+ ret = vgic_v2_attr_regs_access(dev, attr, &reg, false);
if (ret)
return ret;
return put_user(reg, uaddr);
@@ -428,16 +423,211 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = {
.has_attr = vgic_v2_has_attr,
};
+int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
+ struct vgic_reg_attr *reg_attr)
+{
+ unsigned long vgic_mpidr, mpidr_reg;
+
+ /*
+ * For KVM_DEV_ARM_VGIC_GRP_DIST_REGS group,
+ * attr might not hold MPIDR. Hence assume vcpu0.
+ */
+ if (attr->group != KVM_DEV_ARM_VGIC_GRP_DIST_REGS) {
+ vgic_mpidr = (attr->attr & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) >>
+ KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT;
+
+ mpidr_reg = VGIC_TO_MPIDR(vgic_mpidr);
+ reg_attr->vcpu = kvm_mpidr_to_vcpu(dev->kvm, mpidr_reg);
+ } else {
+ reg_attr->vcpu = kvm_get_vcpu(dev->kvm, 0);
+ }
+
+ if (!reg_attr->vcpu)
+ return -EINVAL;
+
+ reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+
+ return 0;
+}
+
+/*
+ * vgic_v3_attr_regs_access - allows user space to access VGIC v3 state
+ *
+ * @dev: kvm device handle
+ * @attr: kvm device attribute
+ * @reg: address the value is read or written
+ * @is_write: true if userspace is writing a register
+ */
+static int vgic_v3_attr_regs_access(struct kvm_device *dev,
+ struct kvm_device_attr *attr,
+ u64 *reg, bool is_write)
+{
+ struct vgic_reg_attr reg_attr;
+ gpa_t addr;
+ struct kvm_vcpu *vcpu;
+ int ret;
+ u32 tmp32;
+
+ ret = vgic_v3_parse_attr(dev, attr, &reg_attr);
+ if (ret)
+ return ret;
+
+ vcpu = reg_attr.vcpu;
+ addr = reg_attr.addr;
+
+ mutex_lock(&dev->kvm->lock);
+
+ if (unlikely(!vgic_initialized(dev->kvm))) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (!lock_all_vcpus(dev->kvm)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ if (is_write)
+ tmp32 = *reg;
+
+ ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &tmp32);
+ if (!is_write)
+ *reg = tmp32;
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
+ if (is_write)
+ tmp32 = *reg;
+
+ ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &tmp32);
+ if (!is_write)
+ *reg = tmp32;
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
+ u64 regid;
+
+ regid = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK);
+ ret = vgic_v3_cpu_sysregs_uaccess(vcpu, is_write,
+ regid, reg);
+ break;
+ }
+ case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
+ unsigned int info, intid;
+
+ info = (attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >>
+ KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT;
+ if (info == VGIC_LEVEL_INFO_LINE_LEVEL) {
+ intid = attr->attr &
+ KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK;
+ ret = vgic_v3_line_level_info_uaccess(vcpu, is_write,
+ intid, reg);
+ } else {
+ ret = -EINVAL;
+ }
+ break;
+ }
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ unlock_all_vcpus(dev->kvm);
+out:
+ mutex_unlock(&dev->kvm->lock);
+ return ret;
+}
+
static int vgic_v3_set_attr(struct kvm_device *dev,
struct kvm_device_attr *attr)
{
- return vgic_set_common_attr(dev, attr);
+ int ret;
+
+ ret = vgic_set_common_attr(dev, attr);
+ if (ret != -ENXIO)
+ return ret;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u32 tmp32;
+ u64 reg;
+
+ if (get_user(tmp32, uaddr))
+ return -EFAULT;
+
+ reg = tmp32;
+ return vgic_v3_attr_regs_access(dev, attr, &reg, true);
+ }
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
+ u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+ u64 reg;
+
+ if (get_user(reg, uaddr))
+ return -EFAULT;
+
+ return vgic_v3_attr_regs_access(dev, attr, &reg, true);
+ }
+ case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u64 reg;
+ u32 tmp32;
+
+ if (get_user(tmp32, uaddr))
+ return -EFAULT;
+
+ reg = tmp32;
+ return vgic_v3_attr_regs_access(dev, attr, &reg, true);
+ }
+ }
+ return -ENXIO;
}
static int vgic_v3_get_attr(struct kvm_device *dev,
struct kvm_device_attr *attr)
{
- return vgic_get_common_attr(dev, attr);
+ int ret;
+
+ ret = vgic_get_common_attr(dev, attr);
+ if (ret != -ENXIO)
+ return ret;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u64 reg;
+ u32 tmp32;
+
+ ret = vgic_v3_attr_regs_access(dev, attr, &reg, false);
+ if (ret)
+ return ret;
+ tmp32 = reg;
+ return put_user(tmp32, uaddr);
+ }
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
+ u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+ u64 reg;
+
+ ret = vgic_v3_attr_regs_access(dev, attr, &reg, false);
+ if (ret)
+ return ret;
+ return put_user(reg, uaddr);
+ }
+ case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u64 reg;
+ u32 tmp32;
+
+ ret = vgic_v3_attr_regs_access(dev, attr, &reg, false);
+ if (ret)
+ return ret;
+ tmp32 = reg;
+ return put_user(tmp32, uaddr);
+ }
+ }
+ return -ENXIO;
}
static int vgic_v3_has_attr(struct kvm_device *dev,
@@ -451,8 +641,19 @@ static int vgic_v3_has_attr(struct kvm_device *dev,
return 0;
}
break;
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+ return vgic_v3_has_attr_regs(dev, attr);
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
return 0;
+ case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
+ if (((attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >>
+ KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) ==
+ VGIC_LEVEL_INFO_LINE_LEVEL)
+ return 0;
+ break;
+ }
case KVM_DEV_ARM_VGIC_GRP_CTRL:
switch (attr->attr) {
case KVM_DEV_ARM_VGIC_CTRL_INIT:
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index 78e34bc4d89b..a3ad7ff95c9b 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -98,7 +98,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid);
spin_lock(&irq->irq_lock);
- irq->pending = true;
+ irq->pending_latch = true;
irq->source |= 1U << source_vcpu->vcpu_id;
vgic_queue_irq_unlock(source_vcpu->kvm, irq);
@@ -182,7 +182,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
irq->source &= ~((val >> (i * 8)) & 0xff);
if (!irq->source)
- irq->pending = false;
+ irq->pending_latch = false;
spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
@@ -204,7 +204,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
irq->source |= (val >> (i * 8)) & 0xff;
if (irq->source) {
- irq->pending = true;
+ irq->pending_latch = true;
vgic_queue_irq_unlock(vcpu->kvm, irq);
} else {
spin_unlock(&irq->irq_lock);
@@ -213,22 +213,6 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
}
}
-static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
-{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_set_vmcr(vcpu, vmcr);
- else
- vgic_v3_set_vmcr(vcpu, vmcr);
-}
-
-static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
-{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_get_vmcr(vcpu, vmcr);
- else
- vgic_v3_get_vmcr(vcpu, vmcr);
-}
-
#define GICC_ARCH_VERSION_V2 0x2
/* These are for userland accesses only, there is no guest-facing emulation. */
@@ -369,21 +353,30 @@ unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev)
int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
{
- int nr_irqs = dev->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
- const struct vgic_register_region *regions;
+ const struct vgic_register_region *region;
+ struct vgic_io_device iodev;
+ struct vgic_reg_attr reg_attr;
+ struct kvm_vcpu *vcpu;
gpa_t addr;
- int nr_regions, i, len;
+ int ret;
+
+ ret = vgic_v2_parse_attr(dev, attr, &reg_attr);
+ if (ret)
+ return ret;
- addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+ vcpu = reg_attr.vcpu;
+ addr = reg_attr.addr;
switch (attr->group) {
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
- regions = vgic_v2_dist_registers;
- nr_regions = ARRAY_SIZE(vgic_v2_dist_registers);
+ iodev.regions = vgic_v2_dist_registers;
+ iodev.nr_regions = ARRAY_SIZE(vgic_v2_dist_registers);
+ iodev.base_addr = 0;
break;
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
- regions = vgic_v2_cpu_registers;
- nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers);
+ iodev.regions = vgic_v2_cpu_registers;
+ iodev.nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers);
+ iodev.base_addr = 0;
break;
default:
return -ENXIO;
@@ -393,43 +386,11 @@ int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
if (addr & 3)
return -ENXIO;
- for (i = 0; i < nr_regions; i++) {
- if (regions[i].bits_per_irq)
- len = (regions[i].bits_per_irq * nr_irqs) / 8;
- else
- len = regions[i].len;
-
- if (regions[i].reg_offset <= addr &&
- regions[i].reg_offset + len > addr)
- return 0;
- }
-
- return -ENXIO;
-}
-
-/*
- * When userland tries to access the VGIC register handlers, we need to
- * create a usable struct vgic_io_device to be passed to the handlers and we
- * have to set up a buffer similar to what would have happened if a guest MMIO
- * access occurred, including doing endian conversions on BE systems.
- */
-static int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
- bool is_write, int offset, u32 *val)
-{
- unsigned int len = 4;
- u8 buf[4];
- int ret;
-
- if (is_write) {
- vgic_data_host_to_mmio_bus(buf, len, *val);
- ret = kvm_io_gic_ops.write(vcpu, &dev->dev, offset, len, buf);
- } else {
- ret = kvm_io_gic_ops.read(vcpu, &dev->dev, offset, len, buf);
- if (!ret)
- *val = vgic_data_mmio_bus_to_host(buf, len);
- }
+ region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32));
+ if (!region)
+ return -ENXIO;
- return ret;
+ return 0;
}
int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 50f42f0f8c4f..6afb3b484886 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -18,6 +18,8 @@
#include <kvm/arm_vgic.h>
#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
#include "vgic.h"
#include "vgic-mmio.h"
@@ -207,6 +209,60 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
return 0;
}
+static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ u32 value = 0;
+ int i;
+
+ /*
+ * pending state of interrupt is latched in pending_latch variable.
+ * Userspace will save and restore pending state and line_level
+ * separately.
+ * Refer to Documentation/virtual/kvm/devices/arm-vgic-v3.txt
+ * for handling of ISPENDR and ICPENDR.
+ */
+ for (i = 0; i < len * 8; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ if (irq->pending_latch)
+ value |= (1U << i);
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ return value;
+}
+
+static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ int i;
+
+ for (i = 0; i < len * 8; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ spin_lock(&irq->irq_lock);
+ if (test_bit(i, &val)) {
+ /*
+ * pending_latch is set irrespective of irq type
+ * (level or edge) to avoid dependency that VM should
+ * restore irq config before pending info.
+ */
+ irq->pending_latch = true;
+ vgic_queue_irq_unlock(vcpu->kvm, irq);
+ } else {
+ irq->pending_latch = false;
+ spin_unlock(&irq->irq_lock);
+ }
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+}
+
/* We want to avoid outer shareable. */
u64 vgic_sanitise_shareability(u64 field)
{
@@ -356,7 +412,7 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
* We take some special care here to fix the calculation of the register
* offset.
*/
-#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, bpi, acc) \
+#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, ur, uw, bpi, acc) \
{ \
.reg_offset = off, \
.bits_per_irq = bpi, \
@@ -371,47 +427,54 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
.access_flags = acc, \
.read = rd, \
.write = wr, \
+ .uaccess_read = ur, \
+ .uaccess_write = uw, \
}
static const struct vgic_register_region vgic_v3_dist_registers[] = {
REGISTER_DESC_WITH_LENGTH(GICD_CTLR,
vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16,
VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICD_STATUSR,
+ vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
+ VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR,
- vgic_mmio_read_rao, vgic_mmio_write_wi, 1,
+ vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
- vgic_mmio_read_enable, vgic_mmio_write_senable, 1,
+ vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER,
- vgic_mmio_read_enable, vgic_mmio_write_cenable, 1,
+ vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
- vgic_mmio_read_pending, vgic_mmio_write_spending, 1,
+ vgic_mmio_read_pending, vgic_mmio_write_spending,
+ vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
- vgic_mmio_read_pending, vgic_mmio_write_cpending, 1,
+ vgic_mmio_read_pending, vgic_mmio_write_cpending,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
- vgic_mmio_read_active, vgic_mmio_write_sactive, 1,
+ vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER,
- vgic_mmio_read_active, vgic_mmio_write_cactive, 1,
+ vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR,
- vgic_mmio_read_priority, vgic_mmio_write_priority, 8,
- VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+ vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL,
+ 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR,
- vgic_mmio_read_raz, vgic_mmio_write_wi, 8,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 8,
VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR,
- vgic_mmio_read_config, vgic_mmio_write_config, 2,
+ vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR,
- vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER,
- vgic_mmio_read_irouter, vgic_mmio_write_irouter, 64,
+ vgic_mmio_read_irouter, vgic_mmio_write_irouter, NULL, NULL, 64,
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICD_IDREGS,
vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
@@ -422,12 +485,18 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
REGISTER_DESC_WITH_LENGTH(GICR_CTLR,
vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4,
VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_STATUSR,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+ VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_IIDR,
vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_TYPER,
vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8,
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_WAKER,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+ VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER,
vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8,
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
@@ -449,11 +518,13 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0,
vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ISPENDR0,
- vgic_mmio_read_pending, vgic_mmio_write_spending, 4,
+ REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISPENDR0,
+ vgic_mmio_read_pending, vgic_mmio_write_spending,
+ vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ICPENDR0,
- vgic_mmio_read_pending, vgic_mmio_write_cpending, 4,
+ REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0,
+ vgic_mmio_read_pending, vgic_mmio_write_cpending,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0,
vgic_mmio_read_active, vgic_mmio_write_sactive, 4,
@@ -546,6 +617,54 @@ int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address)
return ret;
}
+int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ const struct vgic_register_region *region;
+ struct vgic_io_device iodev;
+ struct vgic_reg_attr reg_attr;
+ struct kvm_vcpu *vcpu;
+ gpa_t addr;
+ int ret;
+
+ ret = vgic_v3_parse_attr(dev, attr, &reg_attr);
+ if (ret)
+ return ret;
+
+ vcpu = reg_attr.vcpu;
+ addr = reg_attr.addr;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ iodev.regions = vgic_v3_dist_registers;
+ iodev.nr_regions = ARRAY_SIZE(vgic_v3_dist_registers);
+ iodev.base_addr = 0;
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{
+ iodev.regions = vgic_v3_rdbase_registers;
+ iodev.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
+ iodev.base_addr = 0;
+ break;
+ }
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
+ u64 reg, id;
+
+ id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK);
+ return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, &reg);
+ }
+ default:
+ return -ENXIO;
+ }
+
+ /* We only support aligned 32-bit accesses. */
+ if (addr & 3)
+ return -ENXIO;
+
+ region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32));
+ if (!region)
+ return -ENXIO;
+
+ return 0;
+}
/*
* Compare a given affinity (level 1-3 and a level 0 mask, from the SGI
* generation register ICC_SGI1R_EL1) with a given VCPU.
@@ -646,9 +765,55 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
spin_lock(&irq->irq_lock);
- irq->pending = true;
+ irq->pending_latch = true;
vgic_queue_irq_unlock(vcpu->kvm, irq);
vgic_put_irq(vcpu->kvm, irq);
}
}
+
+int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ int offset, u32 *val)
+{
+ struct vgic_io_device dev = {
+ .regions = vgic_v3_dist_registers,
+ .nr_regions = ARRAY_SIZE(vgic_v3_dist_registers),
+ };
+
+ return vgic_uaccess(vcpu, &dev, is_write, offset, val);
+}
+
+int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ int offset, u32 *val)
+{
+ struct vgic_io_device rd_dev = {
+ .regions = vgic_v3_rdbase_registers,
+ .nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers),
+ };
+
+ struct vgic_io_device sgi_dev = {
+ .regions = vgic_v3_sgibase_registers,
+ .nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers),
+ };
+
+ /* SGI_base is the next 64K frame after RD_base */
+ if (offset >= SZ_64K)
+ return vgic_uaccess(vcpu, &sgi_dev, is_write, offset - SZ_64K,
+ val);
+ else
+ return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
+}
+
+int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ u32 intid, u64 *val)
+{
+ if (intid % 32)
+ return -EINVAL;
+
+ if (is_write)
+ vgic_write_irq_line_level_info(vcpu, intid, *val);
+ else
+ *val = vgic_read_irq_line_level_info(vcpu, intid);
+
+ return 0;
+}
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index ebe1b9fa3c4d..2a5db1352722 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -111,7 +111,7 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
for (i = 0; i < len * 8; i++) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- if (irq->pending)
+ if (irq_is_pending(irq))
value |= (1U << i);
vgic_put_irq(vcpu->kvm, irq);
@@ -131,9 +131,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
spin_lock(&irq->irq_lock);
- irq->pending = true;
- if (irq->config == VGIC_CONFIG_LEVEL)
- irq->soft_pending = true;
+ irq->pending_latch = true;
vgic_queue_irq_unlock(vcpu->kvm, irq);
vgic_put_irq(vcpu->kvm, irq);
@@ -152,12 +150,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
spin_lock(&irq->irq_lock);
- if (irq->config == VGIC_CONFIG_LEVEL) {
- irq->soft_pending = false;
- irq->pending = irq->line_level;
- } else {
- irq->pending = false;
- }
+ irq->pending_latch = false;
spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
@@ -187,21 +180,37 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
bool new_active_state)
{
+ struct kvm_vcpu *requester_vcpu;
spin_lock(&irq->irq_lock);
+
+ /*
+ * The vcpu parameter here can mean multiple things depending on how
+ * this function is called; when handling a trap from the kernel it
+ * depends on the GIC version, and these functions are also called as
+ * part of save/restore from userspace.
+ *
+ * Therefore, we have to figure out the requester in a reliable way.
+ *
+ * When accessing VGIC state from user space, the requester_vcpu is
+ * NULL, which is fine, because we guarantee that no VCPUs are running
+ * when accessing VGIC state from user space so irq->vcpu->cpu is
+ * always -1.
+ */
+ requester_vcpu = kvm_arm_get_running_vcpu();
+
/*
* If this virtual IRQ was written into a list register, we
* have to make sure the CPU that runs the VCPU thread has
- * synced back LR state to the struct vgic_irq. We can only
- * know this for sure, when either this irq is not assigned to
- * anyone's AP list anymore, or the VCPU thread is not
- * running on any CPUs.
+ * synced back the LR state to the struct vgic_irq.
*
- * In the opposite case, we know the VCPU thread may be on its
- * way back from the guest and still has to sync back this
- * IRQ, so we release and re-acquire the spin_lock to let the
- * other thread sync back the IRQ.
+ * As long as the conditions below are true, we know the VCPU thread
+ * may be on its way back from the guest (we kicked the VCPU thread in
+ * vgic_change_active_prepare) and still has to sync back this IRQ,
+ * so we release and re-acquire the spin_lock to let the other thread
+ * sync back the IRQ.
*/
while (irq->vcpu && /* IRQ may have state in an LR somewhere */
+ irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
irq->vcpu->cpu != -1) /* VCPU thread is running */
cond_resched_lock(&irq->irq_lock);
@@ -359,18 +368,70 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
spin_lock(&irq->irq_lock);
- if (test_bit(i * 2 + 1, &val)) {
+ if (test_bit(i * 2 + 1, &val))
irq->config = VGIC_CONFIG_EDGE;
- } else {
+ else
irq->config = VGIC_CONFIG_LEVEL;
- irq->pending = irq->line_level | irq->soft_pending;
- }
spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
}
}
+u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
+{
+ int i;
+ u64 val = 0;
+ int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+
+ for (i = 0; i < 32; i++) {
+ struct vgic_irq *irq;
+
+ if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs)
+ continue;
+
+ irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+ if (irq->config == VGIC_CONFIG_LEVEL && irq->line_level)
+ val |= (1U << i);
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ return val;
+}
+
+void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
+ const u64 val)
+{
+ int i;
+ int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+
+ for (i = 0; i < 32; i++) {
+ struct vgic_irq *irq;
+ bool new_level;
+
+ if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs)
+ continue;
+
+ irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ /*
+ * Line level is set irrespective of irq type
+ * (level or edge) to avoid dependency that VM should
+ * restore irq config before line level.
+ */
+ new_level = !!(val & (1U << i));
+ spin_lock(&irq->irq_lock);
+ irq->line_level = new_level;
+ if (new_level)
+ vgic_queue_irq_unlock(vcpu->kvm, irq);
+ else
+ spin_unlock(&irq->irq_lock);
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+}
+
static int match_region(const void *key, const void *elt)
{
const unsigned int offset = (unsigned long)key;
@@ -394,6 +455,22 @@ vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
sizeof(region[0]), match_region);
}
+void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_set_vmcr(vcpu, vmcr);
+ else
+ vgic_v3_set_vmcr(vcpu, vmcr);
+}
+
+void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_get_vmcr(vcpu, vmcr);
+ else
+ vgic_v3_get_vmcr(vcpu, vmcr);
+}
+
/*
* kvm_mmio_read_buf() returns a value in a format where it can be converted
* to a byte array and be directly observed as the guest wanted it to appear
@@ -484,6 +561,74 @@ static bool check_region(const struct kvm *kvm,
return false;
}
+const struct vgic_register_region *
+vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
+ gpa_t addr, int len)
+{
+ const struct vgic_register_region *region;
+
+ region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
+ addr - iodev->base_addr);
+ if (!region || !check_region(vcpu->kvm, region, addr, len))
+ return NULL;
+
+ return region;
+}
+
+static int vgic_uaccess_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, u32 *val)
+{
+ struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
+ const struct vgic_register_region *region;
+ struct kvm_vcpu *r_vcpu;
+
+ region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32));
+ if (!region) {
+ *val = 0;
+ return 0;
+ }
+
+ r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
+ if (region->uaccess_read)
+ *val = region->uaccess_read(r_vcpu, addr, sizeof(u32));
+ else
+ *val = region->read(r_vcpu, addr, sizeof(u32));
+
+ return 0;
+}
+
+static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, const u32 *val)
+{
+ struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
+ const struct vgic_register_region *region;
+ struct kvm_vcpu *r_vcpu;
+
+ region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32));
+ if (!region)
+ return 0;
+
+ r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
+ if (region->uaccess_write)
+ region->uaccess_write(r_vcpu, addr, sizeof(u32), *val);
+ else
+ region->write(r_vcpu, addr, sizeof(u32), *val);
+
+ return 0;
+}
+
+/*
+ * Userland access to VGIC registers.
+ */
+int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
+ bool is_write, int offset, u32 *val)
+{
+ if (is_write)
+ return vgic_uaccess_write(vcpu, &dev->dev, offset, val);
+ else
+ return vgic_uaccess_read(vcpu, &dev->dev, offset, val);
+}
+
static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, void *val)
{
@@ -491,9 +636,8 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
const struct vgic_register_region *region;
unsigned long data = 0;
- region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
- addr - iodev->base_addr);
- if (!region || !check_region(vcpu->kvm, region, addr, len)) {
+ region = vgic_get_mmio_region(vcpu, iodev, addr, len);
+ if (!region) {
memset(val, 0, len);
return 0;
}
@@ -524,9 +668,8 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
const struct vgic_register_region *region;
unsigned long data = vgic_data_mmio_bus_to_host(val, len);
- region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
- addr - iodev->base_addr);
- if (!region || !check_region(vcpu->kvm, region, addr, len))
+ region = vgic_get_mmio_region(vcpu, iodev, addr, len);
+ if (!region)
return 0;
switch (iodev->iodev_type) {
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index 84961b4e4422..98bb566b660a 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -34,6 +34,10 @@ struct vgic_register_region {
gpa_t addr, unsigned int len,
unsigned long val);
};
+ unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr,
+ unsigned int len);
+ void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
+ unsigned int len, unsigned long val);
};
extern struct kvm_io_device_ops kvm_io_gic_ops;
@@ -86,6 +90,18 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
.write = wr, \
}
+#define REGISTER_DESC_WITH_LENGTH_UACCESS(off, rd, wr, urd, uwr, length, acc) \
+ { \
+ .reg_offset = off, \
+ .bits_per_irq = 0, \
+ .len = length, \
+ .access_flags = acc, \
+ .read = rd, \
+ .write = wr, \
+ .uaccess_read = urd, \
+ .uaccess_write = uwr, \
+ }
+
int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu,
struct vgic_register_region *reg_desc,
struct vgic_io_device *region,
@@ -158,6 +174,14 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val);
+int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
+ bool is_write, int offset, u32 *val);
+
+u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid);
+
+void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
+ const u64 val);
+
unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev);
unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev);
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 834137e7b83f..b834ecdf3225 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -104,7 +104,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
/* Edge is the only case where we preserve the pending bit */
if (irq->config == VGIC_CONFIG_EDGE &&
(val & GICH_LR_PENDING_BIT)) {
- irq->pending = true;
+ irq->pending_latch = true;
if (vgic_irq_is_sgi(intid)) {
u32 cpuid = val & GICH_LR_PHYSID_CPUID;
@@ -120,9 +120,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
*/
if (irq->config == VGIC_CONFIG_LEVEL) {
if (!(val & GICH_LR_PENDING_BIT))
- irq->soft_pending = false;
-
- irq->pending = irq->line_level || irq->soft_pending;
+ irq->pending_latch = false;
}
spin_unlock(&irq->irq_lock);
@@ -145,11 +143,11 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
{
u32 val = irq->intid;
- if (irq->pending) {
+ if (irq_is_pending(irq)) {
val |= GICH_LR_PENDING_BIT;
if (irq->config == VGIC_CONFIG_EDGE)
- irq->pending = false;
+ irq->pending_latch = false;
if (vgic_irq_is_sgi(irq->intid)) {
u32 src = ffs(irq->source);
@@ -158,7 +156,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source)
- irq->pending = true;
+ irq->pending_latch = true;
}
}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index e6b03fd8c374..be0f4c3e0142 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -94,7 +94,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
/* Edge is the only case where we preserve the pending bit */
if (irq->config == VGIC_CONFIG_EDGE &&
(val & ICH_LR_PENDING_BIT)) {
- irq->pending = true;
+ irq->pending_latch = true;
if (vgic_irq_is_sgi(intid) &&
model == KVM_DEV_TYPE_ARM_VGIC_V2) {
@@ -111,9 +111,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
*/
if (irq->config == VGIC_CONFIG_LEVEL) {
if (!(val & ICH_LR_PENDING_BIT))
- irq->soft_pending = false;
-
- irq->pending = irq->line_level || irq->soft_pending;
+ irq->pending_latch = false;
}
spin_unlock(&irq->irq_lock);
@@ -127,11 +125,11 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
u32 model = vcpu->kvm->arch.vgic.vgic_model;
u64 val = irq->intid;
- if (irq->pending) {
+ if (irq_is_pending(irq)) {
val |= ICH_LR_PENDING_BIT;
if (irq->config == VGIC_CONFIG_EDGE)
- irq->pending = false;
+ irq->pending_latch = false;
if (vgic_irq_is_sgi(irq->intid) &&
model == KVM_DEV_TYPE_ARM_VGIC_V2) {
@@ -141,7 +139,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source)
- irq->pending = true;
+ irq->pending_latch = true;
}
}
@@ -177,10 +175,18 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
{
u32 vmcr;
- vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
+ /*
+ * Ignore the FIQen bit, because GIC emulation always implies
+ * SRE=1 which means the vFIQEn bit is also RES1.
+ */
+ vmcr = ((vmcrp->ctlr >> ICC_CTLR_EL1_EOImode_SHIFT) <<
+ ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK;
+ vmcr |= (vmcrp->ctlr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK;
vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
+ vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK;
+ vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK;
vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
}
@@ -189,10 +195,18 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
{
u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
- vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT;
+ /*
+ * Ignore the FIQen bit, because GIC emulation always implies
+ * SRE=1 which means the vFIQEn bit is also RES1.
+ */
+ vmcrp->ctlr = ((vmcr >> ICH_VMCR_EOIM_SHIFT) <<
+ ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
+ vmcrp->ctlr |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT;
vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
+ vmcrp->grpen0 = (vmcr & ICH_VMCR_ENG0_MASK) >> ICH_VMCR_ENG0_SHIFT;
+ vmcrp->grpen1 = (vmcr & ICH_VMCR_ENG1_MASK) >> ICH_VMCR_ENG1_SHIFT;
}
#define INITIAL_PENDBASER_VALUE \
@@ -215,15 +229,25 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
/*
* If we are emulating a GICv3, we do it in an non-GICv2-compatible
* way, so we force SRE to 1 to demonstrate this to the guest.
+ * Also, we don't support any form of IRQ/FIQ bypass.
* This goes with the spec allowing the value to be RAO/WI.
*/
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
+ vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
+ ICC_SRE_EL1_DFB |
+ ICC_SRE_EL1_SRE);
vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
} else {
vgic_v3->vgic_sre = 0;
}
+ vcpu->arch.vgic_cpu.num_id_bits = (kvm_vgic_global_state.ich_vtr_el2 &
+ ICH_VTR_ID_BITS_MASK) >>
+ ICH_VTR_ID_BITS_SHIFT;
+ vcpu->arch.vgic_cpu.num_pri_bits = ((kvm_vgic_global_state.ich_vtr_el2 &
+ ICH_VTR_PRI_BITS_MASK) >>
+ ICH_VTR_PRI_BITS_SHIFT) + 1;
+
/* Get the show on the road... */
vgic_v3->vgic_hcr = ICH_HCR_EN;
}
@@ -322,6 +346,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
*/
kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
kvm_vgic_global_state.can_emulate_gicv2 = false;
+ kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2;
if (!info->vcpu.start) {
kvm_info("GICv3: no GICV resource entry\n");
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 6440b56ec90e..654dfd40e449 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -160,7 +160,7 @@ static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
* If the distributor is disabled, pending interrupts shouldn't be
* forwarded.
*/
- if (irq->enabled && irq->pending) {
+ if (irq->enabled && irq_is_pending(irq)) {
if (unlikely(irq->target_vcpu &&
!irq->target_vcpu->kvm->arch.vgic.enabled))
return NULL;
@@ -204,8 +204,8 @@ static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
goto out;
}
- penda = irqa->enabled && irqa->pending;
- pendb = irqb->enabled && irqb->pending;
+ penda = irqa->enabled && irq_is_pending(irqa);
+ pendb = irqb->enabled && irq_is_pending(irqb);
if (!penda || !pendb) {
ret = (int)pendb - (int)penda;
@@ -335,9 +335,22 @@ retry:
return true;
}
-static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
- unsigned int intid, bool level,
- bool mapped_irq)
+/**
+ * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
+ * @kvm: The VM structure pointer
+ * @cpuid: The CPU for PPIs
+ * @intid: The INTID to inject a new state to.
+ * @level: Edge-triggered: true: to trigger the interrupt
+ * false: to ignore the call
+ * Level-sensitive true: raise the input signal
+ * false: lower the input signal
+ *
+ * The VGIC is not concerned with devices being active-LOW or active-HIGH for
+ * level-sensitive interrupts. You can think of the level parameter as 1
+ * being HIGH and 0 being LOW and all devices being active-HIGH.
+ */
+int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
+ bool level)
{
struct kvm_vcpu *vcpu;
struct vgic_irq *irq;
@@ -357,11 +370,6 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
if (!irq)
return -EINVAL;
- if (irq->hw != mapped_irq) {
- vgic_put_irq(kvm, irq);
- return -EINVAL;
- }
-
spin_lock(&irq->irq_lock);
if (!vgic_validate_injection(irq, level)) {
@@ -371,12 +379,10 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
return 0;
}
- if (irq->config == VGIC_CONFIG_LEVEL) {
+ if (irq->config == VGIC_CONFIG_LEVEL)
irq->line_level = level;
- irq->pending = level || irq->soft_pending;
- } else {
- irq->pending = true;
- }
+ else
+ irq->pending_latch = true;
vgic_queue_irq_unlock(kvm, irq);
vgic_put_irq(kvm, irq);
@@ -384,32 +390,6 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
return 0;
}
-/**
- * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
- * @kvm: The VM structure pointer
- * @cpuid: The CPU for PPIs
- * @intid: The INTID to inject a new state to.
- * @level: Edge-triggered: true: to trigger the interrupt
- * false: to ignore the call
- * Level-sensitive true: raise the input signal
- * false: lower the input signal
- *
- * The VGIC is not concerned with devices being active-LOW or active-HIGH for
- * level-sensitive interrupts. You can think of the level parameter as 1
- * being HIGH and 0 being LOW and all devices being active-HIGH.
- */
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
- bool level)
-{
- return vgic_update_irq_pending(kvm, cpuid, intid, level, false);
-}
-
-int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid,
- bool level)
-{
- return vgic_update_irq_pending(kvm, cpuid, intid, level, true);
-}
-
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq)
{
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
@@ -689,7 +669,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
spin_lock(&irq->irq_lock);
- pending = irq->pending && irq->enabled;
+ pending = irq_is_pending(irq) && irq->enabled;
spin_unlock(&irq->irq_lock);
if (pending)
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 859f65c6e056..db28f7cadab2 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -30,13 +30,79 @@
#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS)
+#define VGIC_AFFINITY_0_SHIFT 0
+#define VGIC_AFFINITY_0_MASK (0xffUL << VGIC_AFFINITY_0_SHIFT)
+#define VGIC_AFFINITY_1_SHIFT 8
+#define VGIC_AFFINITY_1_MASK (0xffUL << VGIC_AFFINITY_1_SHIFT)
+#define VGIC_AFFINITY_2_SHIFT 16
+#define VGIC_AFFINITY_2_MASK (0xffUL << VGIC_AFFINITY_2_SHIFT)
+#define VGIC_AFFINITY_3_SHIFT 24
+#define VGIC_AFFINITY_3_MASK (0xffUL << VGIC_AFFINITY_3_SHIFT)
+
+#define VGIC_AFFINITY_LEVEL(reg, level) \
+ ((((reg) & VGIC_AFFINITY_## level ##_MASK) \
+ >> VGIC_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
+
+/*
+ * The Userspace encodes the affinity differently from the MPIDR,
+ * Below macro converts vgic userspace format to MPIDR reg format.
+ */
+#define VGIC_TO_MPIDR(val) (VGIC_AFFINITY_LEVEL(val, 0) | \
+ VGIC_AFFINITY_LEVEL(val, 1) | \
+ VGIC_AFFINITY_LEVEL(val, 2) | \
+ VGIC_AFFINITY_LEVEL(val, 3))
+
+/*
+ * As per Documentation/virtual/kvm/devices/arm-vgic-v3.txt,
+ * below macros are defined for CPUREG encoding.
+ */
+#define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK 0x000000000000c000
+#define KVM_REG_ARM_VGIC_SYSREG_OP0_SHIFT 14
+#define KVM_REG_ARM_VGIC_SYSREG_OP1_MASK 0x0000000000003800
+#define KVM_REG_ARM_VGIC_SYSREG_OP1_SHIFT 11
+#define KVM_REG_ARM_VGIC_SYSREG_CRN_MASK 0x0000000000000780
+#define KVM_REG_ARM_VGIC_SYSREG_CRN_SHIFT 7
+#define KVM_REG_ARM_VGIC_SYSREG_CRM_MASK 0x0000000000000078
+#define KVM_REG_ARM_VGIC_SYSREG_CRM_SHIFT 3
+#define KVM_REG_ARM_VGIC_SYSREG_OP2_MASK 0x0000000000000007
+#define KVM_REG_ARM_VGIC_SYSREG_OP2_SHIFT 0
+
+#define KVM_DEV_ARM_VGIC_SYSREG_MASK (KVM_REG_ARM_VGIC_SYSREG_OP0_MASK | \
+ KVM_REG_ARM_VGIC_SYSREG_OP1_MASK | \
+ KVM_REG_ARM_VGIC_SYSREG_CRN_MASK | \
+ KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \
+ KVM_REG_ARM_VGIC_SYSREG_OP2_MASK)
+
+static inline bool irq_is_pending(struct vgic_irq *irq)
+{
+ if (irq->config == VGIC_CONFIG_EDGE)
+ return irq->pending_latch;
+ else
+ return irq->pending_latch || irq->line_level;
+}
+
struct vgic_vmcr {
u32 ctlr;
u32 abpr;
u32 bpr;
u32 pmr;
+ /* Below member variable are valid only for GICv3 */
+ u32 grpen0;
+ u32 grpen1;
+};
+
+struct vgic_reg_attr {
+ struct kvm_vcpu *vcpu;
+ gpa_t addr;
};
+int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
+ struct vgic_reg_attr *reg_attr);
+int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
+ struct vgic_reg_attr *reg_attr);
+const struct vgic_register_region *
+vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
+ gpa_t addr, int len);
struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
u32 intid);
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
@@ -89,9 +155,24 @@ bool vgic_has_its(struct kvm *kvm);
int kvm_vgic_register_its_device(void);
void vgic_enable_lpis(struct kvm_vcpu *vcpu);
int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
-
+int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
+int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ int offset, u32 *val);
+int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ int offset, u32 *val);
+int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ u64 id, u64 *val);
+int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
+ u64 *reg);
+int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ u32 intid, u64 *val);
int kvm_register_vgic_device(unsigned long type);
+void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
int vgic_lazy_init(struct kvm *kvm);
int vgic_init(struct kvm *kvm);
+int vgic_debug_init(struct kvm *kvm);
+int vgic_debug_destroy(struct kvm *kvm);
+
#endif
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 3815e940fbea..bb298a200cd3 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/mmu_context.h>
+#include <linux/sched/mm.h>
#include "async_pf.h"
#include <trace/events/kvm.h>
@@ -204,7 +205,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
work->addr = hva;
work->arch = *arch;
work->mm = current->mm;
- atomic_inc(&work->mm->mm_users);
+ mmget(work->mm);
kvm_get_kvm(work->vcpu->kvm);
/* this can't really happen otherwise gfn_to_pfn_async
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a29786dd9522..4d28a9ddbee0 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
continue;
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
- kvm->buses[bus_idx]->ioeventfd_count--;
+ if (kvm->buses[bus_idx])
+ kvm->buses[bus_idx]->ioeventfd_count--;
ioeventfd_release(p);
ret = 0;
break;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 482612b4e496..88257b311cb5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -32,7 +32,9 @@
#include <linux/file.h>
#include <linux/syscore_ops.h>
#include <linux/cpu.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/stat.h>
#include <linux/cpumask.h>
#include <linux/smp.h>
#include <linux/anon_inodes.h>
@@ -506,11 +508,6 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
if (!slots)
return NULL;
- /*
- * Init kvm generation close to the maximum to easily test the
- * code of handling generation number wrap-around.
- */
- slots->generation = -150;
for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
slots->id_to_index[i] = slots->memslots[i].id = i;
@@ -616,13 +613,13 @@ static struct kvm *kvm_create_vm(unsigned long type)
return ERR_PTR(-ENOMEM);
spin_lock_init(&kvm->mmu_lock);
- atomic_inc(&current->mm->mm_count);
+ mmgrab(current->mm);
kvm->mm = current->mm;
kvm_eventfd_init(kvm);
mutex_init(&kvm->lock);
mutex_init(&kvm->irq_lock);
mutex_init(&kvm->slots_lock);
- atomic_set(&kvm->users_count, 1);
+ refcount_set(&kvm->users_count, 1);
INIT_LIST_HEAD(&kvm->devices);
r = kvm_arch_init_vm(kvm, type);
@@ -641,9 +638,16 @@ static struct kvm *kvm_create_vm(unsigned long type)
r = -ENOMEM;
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- kvm->memslots[i] = kvm_alloc_memslots();
- if (!kvm->memslots[i])
+ struct kvm_memslots *slots = kvm_alloc_memslots();
+ if (!slots)
goto out_err_no_srcu;
+ /*
+ * Generations must be different for each address space.
+ * Init kvm generation close to the maximum to easily test the
+ * code of handling generation number wrap-around.
+ */
+ slots->generation = i * 2 - 150;
+ rcu_assign_pointer(kvm->memslots[i], slots);
}
if (init_srcu_struct(&kvm->srcu))
@@ -723,8 +727,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
- for (i = 0; i < KVM_NR_BUSES; i++)
- kvm_io_bus_destroy(kvm->buses[i]);
+ for (i = 0; i < KVM_NR_BUSES; i++) {
+ if (kvm->buses[i])
+ kvm_io_bus_destroy(kvm->buses[i]);
+ kvm->buses[i] = NULL;
+ }
kvm_coalesced_mmio_free(kvm);
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
@@ -745,13 +752,13 @@ static void kvm_destroy_vm(struct kvm *kvm)
void kvm_get_kvm(struct kvm *kvm)
{
- atomic_inc(&kvm->users_count);
+ refcount_inc(&kvm->users_count);
}
EXPORT_SYMBOL_GPL(kvm_get_kvm);
void kvm_put_kvm(struct kvm *kvm)
{
- if (atomic_dec_and_test(&kvm->users_count))
+ if (refcount_dec_and_test(&kvm->users_count))
kvm_destroy_vm(kvm);
}
EXPORT_SYMBOL_GPL(kvm_put_kvm);
@@ -870,8 +877,14 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
* Increment the new memslot generation a second time. This prevents
* vm exits that race with memslot updates from caching a memslot
* generation that will (potentially) be valid forever.
+ *
+ * Generations must be unique even across address spaces. We do not need
+ * a global counter for that, instead the generation space is evenly split
+ * across address spaces. For example, with two address spaces, address
+ * space 0 will use generations 0, 4, 8, ... while * address space 1 will
+ * use generations 2, 6, 10, 14, ...
*/
- slots->generation++;
+ slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1;
kvm_arch_memslots_updated(kvm, slots);
@@ -1052,7 +1065,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
* changes) is disallowed above, so any other attribute changes getting
* here can be skipped.
*/
- if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+ if (as_id == 0 && (change == KVM_MR_CREATE || change == KVM_MR_MOVE)) {
r = kvm_iommu_map_pages(kvm, &new);
return r;
}
@@ -1094,37 +1107,31 @@ int kvm_get_dirty_log(struct kvm *kvm,
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int r, i, as_id, id;
+ int i, as_id, id;
unsigned long n;
unsigned long any = 0;
- r = -EINVAL;
as_id = log->slot >> 16;
id = (u16)log->slot;
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
- goto out;
+ return -EINVAL;
slots = __kvm_memslots(kvm, as_id);
memslot = id_to_memslot(slots, id);
- r = -ENOENT;
if (!memslot->dirty_bitmap)
- goto out;
+ return -ENOENT;
n = kvm_dirty_bitmap_bytes(memslot);
for (i = 0; !any && i < n/sizeof(long); ++i)
any = memslot->dirty_bitmap[i];
- r = -EFAULT;
if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
- goto out;
+ return -EFAULT;
if (any)
*is_dirty = 1;
-
- r = 0;
-out:
- return r;
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
@@ -1156,24 +1163,22 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int r, i, as_id, id;
+ int i, as_id, id;
unsigned long n;
unsigned long *dirty_bitmap;
unsigned long *dirty_bitmap_buffer;
- r = -EINVAL;
as_id = log->slot >> 16;
id = (u16)log->slot;
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
- goto out;
+ return -EINVAL;
slots = __kvm_memslots(kvm, as_id);
memslot = id_to_memslot(slots, id);
dirty_bitmap = memslot->dirty_bitmap;
- r = -ENOENT;
if (!dirty_bitmap)
- goto out;
+ return -ENOENT;
n = kvm_dirty_bitmap_bytes(memslot);
@@ -1202,14 +1207,9 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
}
spin_unlock(&kvm->mmu_lock);
-
- r = -EFAULT;
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
- goto out;
-
- r = 0;
-out:
- return r;
+ return -EFAULT;
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
#endif
@@ -1937,10 +1937,10 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
}
EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest);
-int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- gpa_t gpa, unsigned long len)
+static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots,
+ struct gfn_to_hva_cache *ghc,
+ gpa_t gpa, unsigned long len)
{
- struct kvm_memslots *slots = kvm_memslots(kvm);
int offset = offset_in_page(gpa);
gfn_t start_gfn = gpa >> PAGE_SHIFT;
gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT;
@@ -1950,7 +1950,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
ghc->gpa = gpa;
ghc->generation = slots->generation;
ghc->len = len;
- ghc->memslot = gfn_to_memslot(kvm, start_gfn);
+ ghc->memslot = __gfn_to_memslot(slots, start_gfn);
ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL);
if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) {
ghc->hva += offset;
@@ -1960,7 +1960,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
* verify that the entire region is valid here.
*/
while (start_gfn <= end_gfn) {
- ghc->memslot = gfn_to_memslot(kvm, start_gfn);
+ ghc->memslot = __gfn_to_memslot(slots, start_gfn);
ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn,
&nr_pages_avail);
if (kvm_is_error_hva(ghc->hva))
@@ -1972,22 +1972,29 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
-int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- void *data, int offset, unsigned long len)
+int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
+ gpa_t gpa, unsigned long len)
+{
+ struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
+ return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva_cache_init);
+
+int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
+ void *data, int offset, unsigned long len)
{
- struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
int r;
gpa_t gpa = ghc->gpa + offset;
BUG_ON(len + offset > ghc->len);
if (slots->generation != ghc->generation)
- kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len);
+ __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
if (unlikely(!ghc->memslot))
- return kvm_write_guest(kvm, gpa, data, len);
+ return kvm_vcpu_write_guest(vcpu, gpa, data, len);
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
@@ -1999,28 +2006,28 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached);
+EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_offset_cached);
-int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- void *data, unsigned long len)
+int kvm_vcpu_write_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
+ void *data, unsigned long len)
{
- return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len);
+ return kvm_vcpu_write_guest_offset_cached(vcpu, ghc, data, 0, len);
}
-EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
+EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_cached);
-int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- void *data, unsigned long len)
+int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
+ void *data, unsigned long len)
{
- struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
int r;
BUG_ON(len > ghc->len);
if (slots->generation != ghc->generation)
- kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len);
+ __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
if (unlikely(!ghc->memslot))
- return kvm_read_guest(kvm, ghc->gpa, data, len);
+ return kvm_vcpu_read_guest(vcpu, ghc->gpa, data, len);
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
@@ -2031,7 +2038,7 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
+EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_cached);
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
{
@@ -2348,9 +2355,9 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
-static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int kvm_vcpu_fault(struct vm_fault *vmf)
{
- struct kvm_vcpu *vcpu = vma->vm_file->private_data;
+ struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data;
struct page *page;
if (vmf->pgoff == 0)
@@ -3133,10 +3140,9 @@ static long kvm_vm_compat_ioctl(struct file *filp,
struct compat_kvm_dirty_log compat_log;
struct kvm_dirty_log log;
- r = -EFAULT;
if (copy_from_user(&compat_log, (void __user *)arg,
sizeof(compat_log)))
- goto out;
+ return -EFAULT;
log.slot = compat_log.slot;
log.padding1 = compat_log.padding1;
log.padding2 = compat_log.padding2;
@@ -3148,8 +3154,6 @@ static long kvm_vm_compat_ioctl(struct file *filp,
default:
r = kvm_vm_ioctl(filp, ioctl, arg);
}
-
-out:
return r;
}
#endif
@@ -3473,6 +3477,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
r = __kvm_io_bus_write(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3490,6 +3496,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
/* First try the device referenced by cookie. */
if ((cookie >= 0) && (cookie < bus->dev_count) &&
@@ -3540,6 +3548,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
r = __kvm_io_bus_read(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3552,6 +3562,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx];
+ if (!bus)
+ return -ENOMEM;
+
/* exclude ioeventfd which is limited by maximum fd */
if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
return -ENOSPC;
@@ -3571,37 +3584,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
}
/* Caller must hold slots_lock. */
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
- struct kvm_io_device *dev)
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ struct kvm_io_device *dev)
{
- int i, r;
+ int i;
struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx];
- r = -ENOENT;
+ if (!bus)
+ return;
+
for (i = 0; i < bus->dev_count; i++)
if (bus->range[i].dev == dev) {
- r = 0;
break;
}
- if (r)
- return r;
+ if (i == bus->dev_count)
+ return;
new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
sizeof(struct kvm_io_range)), GFP_KERNEL);
- if (!new_bus)
- return -ENOMEM;
+ if (!new_bus) {
+ pr_err("kvm: failed to shrink bus, removing it completely\n");
+ goto broken;
+ }
memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
new_bus->dev_count--;
memcpy(new_bus->range + i, bus->range + i + 1,
(new_bus->dev_count - i) * sizeof(struct kvm_io_range));
+broken:
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
kfree(bus);
- return r;
+ return;
}
struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -3614,6 +3631,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
srcu_idx = srcu_read_lock(&kvm->srcu);
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+ if (!bus)
+ goto out_unlock;
dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
if (dev_idx < 0)
@@ -3640,7 +3659,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
* To avoid the race between open and the removal of the debugfs
* directory we test against the users count.
*/
- if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0))
+ if (!refcount_inc_not_zero(&stat_data->kvm->users_count))
return -ENOENT;
if (simple_attr_open(inode, file, get, set, fmt)) {