diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2017-05-02 09:48:26 -0700 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2017-05-02 09:48:26 -0700 |
commit | 0337966d121ebebf73a1c346123e8112796e684e (patch) | |
tree | c0d4388591e72dc5a26ee976a9cbca70f6bafbbd /virt | |
parent | 7c5bb4ac2b76d2a09256aec8a7d584bf3e2b0466 (diff) | |
parent | 8a038b83e012097a7ac6cfb9f6c5fac1da8fad6e (diff) | |
download | linux-0337966d121ebebf73a1c346123e8112796e684e.tar.bz2 |
Merge branch 'next' into for-linus
Prepare input updates for 4.12 merge window.
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 201 | ||||
-rw-r--r-- | virt/kvm/arm/hyp/timer-sr.c | 13 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-debug.c | 283 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-init.c | 4 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-irqfd.c | 3 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-its.c | 115 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-kvm-device.c | 231 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v2.c | 87 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v3.c | 203 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.c | 199 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.h | 24 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v2.c | 12 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 45 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 66 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.h | 83 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 3 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 3 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 175 |
18 files changed, 1369 insertions, 381 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 6a084cd57b88..35d7100e0815 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -37,10 +37,10 @@ static u32 host_vtimer_irq_flags; void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { - vcpu->arch.timer_cpu.active_cleared_last = false; + vcpu_vtimer(vcpu)->active_cleared_last = false; } -static u64 kvm_phys_timer_read(void) +u64 kvm_phys_timer_read(void) { return timecounter->cc->read(timecounter->cc); } @@ -98,12 +98,12 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) kvm_vcpu_kick(vcpu); } -static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) +static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) { u64 cval, now; - cval = vcpu->arch.timer_cpu.cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + cval = timer_ctx->cnt_cval; + now = kvm_phys_timer_read() - timer_ctx->cntvoff; if (now < cval) { u64 ns; @@ -118,6 +118,35 @@ static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) return 0; } +static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) +{ + return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && + (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); +} + +/* + * Returns the earliest expiration time in ns among guest timers. + * Note that it will return 0 if none of timers can fire. + */ +static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) +{ + u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + + if (kvm_timer_irq_can_fire(vtimer)) + min_virt = kvm_timer_compute_delta(vtimer); + + if (kvm_timer_irq_can_fire(ptimer)) + min_phys = kvm_timer_compute_delta(ptimer); + + /* If none of timers can fire, then return 0 */ + if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX)) + return 0; + + return min(min_virt, min_phys); +} + static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) { struct arch_timer_cpu *timer; @@ -132,7 +161,7 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) * PoV (NTP on the host may have forced it to expire * early). If we should have slept longer, restart it. */ - ns = kvm_timer_compute_delta(vcpu); + ns = kvm_timer_earliest_exp(vcpu); if (unlikely(ns)) { hrtimer_forward_now(hrt, ns_to_ktime(ns)); return HRTIMER_RESTART; @@ -142,42 +171,33 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } -static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && - (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); -} - -bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) +bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; u64 cval, now; - if (!kvm_timer_irq_can_fire(vcpu)) + if (!kvm_timer_irq_can_fire(timer_ctx)) return false; - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + cval = timer_ctx->cnt_cval; + now = kvm_phys_timer_read() - timer_ctx->cntvoff; return cval <= now; } -static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, + struct arch_timer_context *timer_ctx) { int ret; - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; BUG_ON(!vgic_initialized(vcpu->kvm)); - timer->active_cleared_last = false; - timer->irq.level = new_level; - trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq, - timer->irq.level); - ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, - timer->irq.irq, - timer->irq.level); + timer_ctx->active_cleared_last = false; + timer_ctx->irq.level = new_level; + trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, + timer_ctx->irq.level); + + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, + timer_ctx->irq.level); WARN_ON(ret); } @@ -188,22 +208,43 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) static int kvm_timer_update_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); /* * If userspace modified the timer registers via SET_ONE_REG before - * the vgic was initialized, we mustn't set the timer->irq.level value + * the vgic was initialized, we mustn't set the vtimer->irq.level value * because the guest would never see the interrupt. Instead wait * until we call this function from kvm_timer_flush_hwstate. */ if (!vgic_initialized(vcpu->kvm) || !timer->enabled) return -ENODEV; - if (kvm_timer_should_fire(vcpu) != timer->irq.level) - kvm_timer_update_irq(vcpu, !timer->irq.level); + if (kvm_timer_should_fire(vtimer) != vtimer->irq.level) + kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer); + + if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) + kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); return 0; } +/* Schedule the background timer for the emulated timer. */ +static void kvm_timer_emulate(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer_ctx) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + if (kvm_timer_should_fire(timer_ctx)) + return; + + if (!kvm_timer_irq_can_fire(timer_ctx)) + return; + + /* The timer has not yet expired, schedule a background timer */ + timer_arm(timer, kvm_timer_compute_delta(timer_ctx)); +} + /* * Schedule the background timer before calling kvm_vcpu_block, so that this * thread is removed from its waitqueue and made runnable when there's a timer @@ -212,26 +253,31 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu) void kvm_timer_schedule(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); BUG_ON(timer_is_armed(timer)); /* - * No need to schedule a background timer if the guest timer has + * No need to schedule a background timer if any guest timer has * already expired, because kvm_vcpu_block will return before putting * the thread to sleep. */ - if (kvm_timer_should_fire(vcpu)) + if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer)) return; /* - * If the timer is not capable of raising interrupts (disabled or + * If both timers are not capable of raising interrupts (disabled or * masked), then there's no more work for us to do. */ - if (!kvm_timer_irq_can_fire(vcpu)) + if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer)) return; - /* The timer has not yet expired, schedule a background timer */ - timer_arm(timer, kvm_timer_compute_delta(vcpu)); + /* + * The guest timers have not yet expired, schedule a background timer. + * Set the earliest expiration time among the guest timers. + */ + timer_arm(timer, kvm_timer_earliest_exp(vcpu)); } void kvm_timer_unschedule(struct kvm_vcpu *vcpu) @@ -249,13 +295,16 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu) */ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); bool phys_active; int ret; if (kvm_timer_update_state(vcpu)) return; + /* Set the background timer for the physical timer emulation. */ + kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); + /* * If we enter the guest with the virtual input level to the VGIC * asserted, then we have already told the VGIC what we need to, and @@ -273,8 +322,8 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) * to ensure that hardware interrupts from the timer triggers a guest * exit. */ - phys_active = timer->irq.level || - kvm_vgic_map_is_active(vcpu, timer->irq.irq); + phys_active = vtimer->irq.level || + kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); /* * We want to avoid hitting the (re)distributor as much as @@ -296,7 +345,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) * - cached value is "active clear" * - value to be programmed is "active clear" */ - if (timer->active_cleared_last && !phys_active) + if (vtimer->active_cleared_last && !phys_active) return; ret = irq_set_irqchip_state(host_vtimer_irq, @@ -304,7 +353,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) phys_active); WARN_ON(ret); - timer->active_cleared_last = !phys_active; + vtimer->active_cleared_last = !phys_active; } /** @@ -318,7 +367,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - BUG_ON(timer_is_armed(timer)); + /* + * This is to cancel the background timer for the physical timer + * emulation if it is set. + */ + timer_disarm(timer); /* * The guest could have modified the timer registers or the timer @@ -328,9 +381,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) } int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *irq) + const struct kvm_irq_level *virt_irq, + const struct kvm_irq_level *phys_irq) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); /* * The vcpu timer irq number cannot be determined in @@ -338,7 +393,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * kvm_vcpu_set_target(). To handle this, we determine * vcpu timer irq number when the vcpu is reset. */ - timer->irq.irq = irq->irq; + vtimer->irq.irq = virt_irq->irq; + ptimer->irq.irq = phys_irq->irq; /* * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 @@ -346,16 +402,40 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * resets the timer to be disabled and unmasked and is compliant with * the ARMv7 architecture. */ - timer->cntv_ctl = 0; + vtimer->cnt_ctl = 0; + ptimer->cnt_ctl = 0; kvm_timer_update_state(vcpu); return 0; } +/* Make the updates of cntvoff for all vtimer contexts atomic */ +static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) +{ + int i; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + + mutex_lock(&kvm->lock); + kvm_for_each_vcpu(i, tmp, kvm) + vcpu_vtimer(tmp)->cntvoff = cntvoff; + + /* + * When called from the vcpu create path, the CPU being created is not + * included in the loop above, so we just set it here as well. + */ + vcpu_vtimer(vcpu)->cntvoff = cntvoff; + mutex_unlock(&kvm->lock); +} + void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + /* Synchronize cntvoff across all vtimers of a VM. */ + update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); + vcpu_ptimer(vcpu)->cntvoff = 0; + INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); timer->timer.function = kvm_timer_expire; @@ -368,17 +448,17 @@ static void kvm_timer_init_interrupt(void *info) int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); switch (regid) { case KVM_REG_ARM_TIMER_CTL: - timer->cntv_ctl = value; + vtimer->cnt_ctl = value; break; case KVM_REG_ARM_TIMER_CNT: - vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value; + update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); break; case KVM_REG_ARM_TIMER_CVAL: - timer->cntv_cval = value; + vtimer->cnt_cval = value; break; default: return -1; @@ -390,15 +470,15 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); switch (regid) { case KVM_REG_ARM_TIMER_CTL: - return timer->cntv_ctl; + return vtimer->cnt_ctl; case KVM_REG_ARM_TIMER_CNT: - return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + return kvm_phys_timer_read() - vtimer->cntvoff; case KVM_REG_ARM_TIMER_CVAL: - return timer->cntv_cval; + return vtimer->cnt_cval; } return (u64)-1; } @@ -462,14 +542,16 @@ int kvm_timer_hyp_init(void) void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); timer_disarm(timer); - kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq); + kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); } int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct irq_desc *desc; struct irq_data *data; int phys_irq; @@ -497,7 +579,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) * Tell the VGIC that the virtual interrupt is tied to a * physical interrupt. We do that once per VCPU. */ - ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq); + ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq); if (ret) return ret; @@ -506,11 +588,6 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return 0; } -void kvm_timer_init(struct kvm *kvm) -{ - kvm->arch.timer.cntvoff = kvm_phys_timer_read(); -} - /* * On VHE system, we only need to configure trap on physical timer and counter * accesses in EL0 and EL1 once, not for every world switch. diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index 63e28dd18bb0..4734915ab71f 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c @@ -25,11 +25,12 @@ void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); u64 val; if (timer->enabled) { - timer->cntv_ctl = read_sysreg_el0(cntv_ctl); - timer->cntv_cval = read_sysreg_el0(cntv_cval); + vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); + vtimer->cnt_cval = read_sysreg_el0(cntv_cval); } /* Disable the virtual timer */ @@ -52,8 +53,8 @@ void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) { - struct kvm *kvm = kern_hyp_va(vcpu->kvm); struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); u64 val; /* Those bits are already configured at boot on VHE-system */ @@ -69,9 +70,9 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) } if (timer->enabled) { - write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); - write_sysreg_el0(timer->cntv_cval, cntv_cval); + write_sysreg(vtimer->cntvoff, cntvoff_el2); + write_sysreg_el0(vtimer->cnt_cval, cntv_cval); isb(); - write_sysreg_el0(timer->cntv_ctl, cntv_ctl); + write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); } } diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c new file mode 100644 index 000000000000..7072ab743332 --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-debug.c @@ -0,0 +1,283 @@ +/* + * Copyright (C) 2016 Linaro + * Author: Christoffer Dall <christoffer.dall@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/cpu.h> +#include <linux/debugfs.h> +#include <linux/interrupt.h> +#include <linux/kvm_host.h> +#include <linux/seq_file.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_mmu.h> +#include "vgic.h" + +/* + * Structure to control looping through the entire vgic state. We start at + * zero for each field and move upwards. So, if dist_id is 0 we print the + * distributor info. When dist_id is 1, we have already printed it and move + * on. + * + * When vcpu_id < nr_cpus we print the vcpu info until vcpu_id == nr_cpus and + * so on. + */ +struct vgic_state_iter { + int nr_cpus; + int nr_spis; + int dist_id; + int vcpu_id; + int intid; +}; + +static void iter_next(struct vgic_state_iter *iter) +{ + if (iter->dist_id == 0) { + iter->dist_id++; + return; + } + + iter->intid++; + if (iter->intid == VGIC_NR_PRIVATE_IRQS && + ++iter->vcpu_id < iter->nr_cpus) + iter->intid = 0; +} + +static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, + loff_t pos) +{ + int nr_cpus = atomic_read(&kvm->online_vcpus); + + memset(iter, 0, sizeof(*iter)); + + iter->nr_cpus = nr_cpus; + iter->nr_spis = kvm->arch.vgic.nr_spis; + + /* Fast forward to the right position if needed */ + while (pos--) + iter_next(iter); +} + +static bool end_of_vgic(struct vgic_state_iter *iter) +{ + return iter->dist_id > 0 && + iter->vcpu_id == iter->nr_cpus && + (iter->intid - VGIC_NR_PRIVATE_IRQS) == iter->nr_spis; +} + +static void *vgic_debug_start(struct seq_file *s, loff_t *pos) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter; + + mutex_lock(&kvm->lock); + iter = kvm->arch.vgic.iter; + if (iter) { + iter = ERR_PTR(-EBUSY); + goto out; + } + + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) { + iter = ERR_PTR(-ENOMEM); + goto out; + } + + iter_init(kvm, iter, *pos); + kvm->arch.vgic.iter = iter; + + if (end_of_vgic(iter)) + iter = NULL; +out: + mutex_unlock(&kvm->lock); + return iter; +} + +static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter = kvm->arch.vgic.iter; + + ++*pos; + iter_next(iter); + if (end_of_vgic(iter)) + iter = NULL; + return iter; +} + +static void vgic_debug_stop(struct seq_file *s, void *v) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter; + + /* + * If the seq file wasn't properly opened, there's nothing to clearn + * up. + */ + if (IS_ERR(v)) + return; + + mutex_lock(&kvm->lock); + iter = kvm->arch.vgic.iter; + kfree(iter); + kvm->arch.vgic.iter = NULL; + mutex_unlock(&kvm->lock); +} + +static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) +{ + seq_printf(s, "Distributor\n"); + seq_printf(s, "===========\n"); + seq_printf(s, "vgic_model:\t%s\n", + (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) ? + "GICv3" : "GICv2"); + seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); + seq_printf(s, "enabled:\t%d\n", dist->enabled); + seq_printf(s, "\n"); + + seq_printf(s, "P=pending_latch, L=line_level, A=active\n"); + seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n"); +} + +static void print_header(struct seq_file *s, struct vgic_irq *irq, + struct kvm_vcpu *vcpu) +{ + int id = 0; + char *hdr = "SPI "; + + if (vcpu) { + hdr = "VCPU"; + id = vcpu->vcpu_id; + } + + seq_printf(s, "\n"); + seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHC HWID TARGET SRC PRI VCPU_ID\n", hdr, id); + seq_printf(s, "---------------------------------------------------------------\n"); +} + +static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, + struct kvm_vcpu *vcpu) +{ + char *type; + if (irq->intid < VGIC_NR_SGIS) + type = "SGI"; + else if (irq->intid < VGIC_NR_PRIVATE_IRQS) + type = "PPI"; + else + type = "SPI"; + + if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS) + print_header(s, irq, vcpu); + + seq_printf(s, " %s %4d " + " %2d " + "%d%d%d%d%d%d " + "%8d " + "%8x " + " %2x " + "%3d " + " %2d " + "\n", + type, irq->intid, + (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1, + irq->pending_latch, + irq->line_level, + irq->active, + irq->enabled, + irq->hw, + irq->config == VGIC_CONFIG_LEVEL, + irq->hwintid, + irq->mpidr, + irq->source, + irq->priority, + (irq->vcpu) ? irq->vcpu->vcpu_id : -1); + +} + +static int vgic_debug_show(struct seq_file *s, void *v) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter = (struct vgic_state_iter *)v; + struct vgic_irq *irq; + struct kvm_vcpu *vcpu = NULL; + + if (iter->dist_id == 0) { + print_dist_state(s, &kvm->arch.vgic); + return 0; + } + + if (!kvm->arch.vgic.initialized) + return 0; + + if (iter->vcpu_id < iter->nr_cpus) { + vcpu = kvm_get_vcpu(kvm, iter->vcpu_id); + irq = &vcpu->arch.vgic_cpu.private_irqs[iter->intid]; + } else { + irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS]; + } + + spin_lock(&irq->irq_lock); + print_irq_state(s, irq, vcpu); + spin_unlock(&irq->irq_lock); + + return 0; +} + +static struct seq_operations vgic_debug_seq_ops = { + .start = vgic_debug_start, + .next = vgic_debug_next, + .stop = vgic_debug_stop, + .show = vgic_debug_show +}; + +static int debug_open(struct inode *inode, struct file *file) +{ + int ret; + ret = seq_open(file, &vgic_debug_seq_ops); + if (!ret) { + struct seq_file *seq; + /* seq_open will have modified file->private_data */ + seq = file->private_data; + seq->private = inode->i_private; + } + + return ret; +}; + +static struct file_operations vgic_debug_fops = { + .owner = THIS_MODULE, + .open = debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +int vgic_debug_init(struct kvm *kvm) +{ + if (!kvm->debugfs_dentry) + return -ENOENT; + + if (!debugfs_create_file("vgic-state", 0444, + kvm->debugfs_dentry, + kvm, + &vgic_debug_fops)) + return -ENOMEM; + + return 0; +} + +int vgic_debug_destroy(struct kvm *kvm) +{ + return 0; +} diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index c737ea0a310a..276139a24e6f 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -259,6 +259,8 @@ int vgic_init(struct kvm *kvm) if (ret) goto out; + vgic_debug_init(kvm); + dist->initialized = true; out: return ret; @@ -288,6 +290,8 @@ static void __kvm_vgic_destroy(struct kvm *kvm) struct kvm_vcpu *vcpu; int i; + vgic_debug_destroy(kvm); + kvm_vgic_dist_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index d918dcf26a5a..f138ed2e9c63 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -99,6 +99,9 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, if (!vgic_has_its(kvm)) return -ENODEV; + if (!level) + return -1; + return vgic_its_inject_msi(kvm, &msi); } diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 8c2b3cdcb2c5..8d1da1af4b09 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -350,7 +350,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); spin_lock(&irq->irq_lock); - irq->pending = pendmask & (1U << bit_nr); + irq->pending_latch = pendmask & (1U << bit_nr); vgic_queue_irq_unlock(vcpu->kvm, irq); vgic_put_irq(vcpu->kvm, irq); } @@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) return ret; } -static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, - struct vgic_its *its, - gpa_t addr, unsigned int len) -{ - u32 reg = 0; - - mutex_lock(&its->cmd_lock); - if (its->creadr == its->cwriter) - reg |= GITS_CTLR_QUIESCENT; - if (its->enabled) - reg |= GITS_CTLR_ENABLE; - mutex_unlock(&its->cmd_lock); - - return reg; -} - -static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) -{ - its->enabled = !!(val & GITS_CTLR_ENABLE); -} - static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, struct vgic_its *its, gpa_t addr, unsigned int len) @@ -465,7 +442,7 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, return -EBUSY; spin_lock(&itte->irq->irq_lock); - itte->irq->pending = true; + itte->irq->pending_latch = true; vgic_queue_irq_unlock(kvm, itte->irq); return 0; @@ -913,7 +890,7 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its, if (!itte) return E_ITS_CLEAR_UNMAPPED_INTERRUPT; - itte->irq->pending = false; + itte->irq->pending_latch = false; return 0; } @@ -1161,33 +1138,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its, #define ITS_CMD_SIZE 32 #define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5)) -/* - * By writing to CWRITER the guest announces new commands to be processed. - * To avoid any races in the first place, we take the its_cmd lock, which - * protects our ring buffer variables, so that there is only one user - * per ITS handling commands at a given time. - */ -static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, - gpa_t addr, unsigned int len, - unsigned long val) +/* Must be called with the cmd_lock held. */ +static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) { gpa_t cbaser; u64 cmd_buf[4]; - u32 reg; - if (!its) - return; - - mutex_lock(&its->cmd_lock); - - reg = update_64bit_reg(its->cwriter, addr & 7, len, val); - reg = ITS_CMD_OFFSET(reg); - if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { - mutex_unlock(&its->cmd_lock); + /* Commands are only processed when the ITS is enabled. */ + if (!its->enabled) return; - } - its->cwriter = reg; cbaser = CBASER_ADDRESS(its->cbaser); while (its->cwriter != its->creadr) { @@ -1207,6 +1167,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser)) its->creadr = 0; } +} + +/* + * By writing to CWRITER the guest announces new commands to be processed. + * To avoid any races in the first place, we take the its_cmd lock, which + * protects our ring buffer variables, so that there is only one user + * per ITS handling commands at a given time. + */ +static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u64 reg; + + if (!its) + return; + + mutex_lock(&its->cmd_lock); + + reg = update_64bit_reg(its->cwriter, addr & 7, len, val); + reg = ITS_CMD_OFFSET(reg); + if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + mutex_unlock(&its->cmd_lock); + return; + } + its->cwriter = reg; + + vgic_its_process_commands(kvm, its); mutex_unlock(&its->cmd_lock); } @@ -1287,6 +1275,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, *regptr = reg; } +static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 reg = 0; + + mutex_lock(&its->cmd_lock); + if (its->creadr == its->cwriter) + reg |= GITS_CTLR_QUIESCENT; + if (its->enabled) + reg |= GITS_CTLR_ENABLE; + mutex_unlock(&its->cmd_lock); + + return reg; +} + +static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + mutex_lock(&its->cmd_lock); + + its->enabled = !!(val & GITS_CTLR_ENABLE); + + /* + * Try to process any pending commands. This function bails out early + * if the ITS is disabled or no commands have been queued. + */ + vgic_its_process_commands(kvm, its); + + mutex_unlock(&its->cmd_lock); +} + #define REGISTER_ITS_DESC(off, rd, wr, length, acc) \ { \ .reg_offset = off, \ diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index fbe87a63d250..d181d2baee9c 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c @@ -17,6 +17,7 @@ #include <kvm/arm_vgic.h> #include <linux/uaccess.h> #include <asm/kvm_mmu.h> +#include <asm/cputype.h> #include "vgic.h" /* common helpers */ @@ -230,14 +231,8 @@ int kvm_register_vgic_device(unsigned long type) return ret; } -struct vgic_reg_attr { - struct kvm_vcpu *vcpu; - gpa_t addr; -}; - -static int parse_vgic_v2_attr(struct kvm_device *dev, - struct kvm_device_attr *attr, - struct vgic_reg_attr *reg_attr) +int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr) { int cpuid; @@ -292,14 +287,14 @@ static bool lock_all_vcpus(struct kvm *kvm) } /** - * vgic_attr_regs_access_v2 - allows user space to access VGIC v2 state + * vgic_v2_attr_regs_access - allows user space to access VGIC v2 state * * @dev: kvm device handle * @attr: kvm device attribute * @reg: address the value is read or written * @is_write: true if userspace is writing a register */ -static int vgic_attr_regs_access_v2(struct kvm_device *dev, +static int vgic_v2_attr_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, u32 *reg, bool is_write) { @@ -308,7 +303,7 @@ static int vgic_attr_regs_access_v2(struct kvm_device *dev, struct kvm_vcpu *vcpu; int ret; - ret = parse_vgic_v2_attr(dev, attr, ®_attr); + ret = vgic_v2_parse_attr(dev, attr, ®_attr); if (ret) return ret; @@ -362,7 +357,7 @@ static int vgic_v2_set_attr(struct kvm_device *dev, if (get_user(reg, uaddr)) return -EFAULT; - return vgic_attr_regs_access_v2(dev, attr, ®, true); + return vgic_v2_attr_regs_access(dev, attr, ®, true); } } @@ -384,7 +379,7 @@ static int vgic_v2_get_attr(struct kvm_device *dev, u32 __user *uaddr = (u32 __user *)(long)attr->addr; u32 reg = 0; - ret = vgic_attr_regs_access_v2(dev, attr, ®, false); + ret = vgic_v2_attr_regs_access(dev, attr, ®, false); if (ret) return ret; return put_user(reg, uaddr); @@ -428,16 +423,211 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = { .has_attr = vgic_v2_has_attr, }; +int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr) +{ + unsigned long vgic_mpidr, mpidr_reg; + + /* + * For KVM_DEV_ARM_VGIC_GRP_DIST_REGS group, + * attr might not hold MPIDR. Hence assume vcpu0. + */ + if (attr->group != KVM_DEV_ARM_VGIC_GRP_DIST_REGS) { + vgic_mpidr = (attr->attr & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) >> + KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT; + + mpidr_reg = VGIC_TO_MPIDR(vgic_mpidr); + reg_attr->vcpu = kvm_mpidr_to_vcpu(dev->kvm, mpidr_reg); + } else { + reg_attr->vcpu = kvm_get_vcpu(dev->kvm, 0); + } + + if (!reg_attr->vcpu) + return -EINVAL; + + reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + + return 0; +} + +/* + * vgic_v3_attr_regs_access - allows user space to access VGIC v3 state + * + * @dev: kvm device handle + * @attr: kvm device attribute + * @reg: address the value is read or written + * @is_write: true if userspace is writing a register + */ +static int vgic_v3_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u64 *reg, bool is_write) +{ + struct vgic_reg_attr reg_attr; + gpa_t addr; + struct kvm_vcpu *vcpu; + int ret; + u32 tmp32; + + ret = vgic_v3_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + mutex_lock(&dev->kvm->lock); + + if (unlikely(!vgic_initialized(dev->kvm))) { + ret = -EBUSY; + goto out; + } + + if (!lock_all_vcpus(dev->kvm)) { + ret = -EBUSY; + goto out; + } + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + if (is_write) + tmp32 = *reg; + + ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &tmp32); + if (!is_write) + *reg = tmp32; + break; + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + if (is_write) + tmp32 = *reg; + + ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &tmp32); + if (!is_write) + *reg = tmp32; + break; + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 regid; + + regid = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); + ret = vgic_v3_cpu_sysregs_uaccess(vcpu, is_write, + regid, reg); + break; + } + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + unsigned int info, intid; + + info = (attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> + KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT; + if (info == VGIC_LEVEL_INFO_LINE_LEVEL) { + intid = attr->attr & + KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK; + ret = vgic_v3_line_level_info_uaccess(vcpu, is_write, + intid, reg); + } else { + ret = -EINVAL; + } + break; + } + default: + ret = -EINVAL; + break; + } + + unlock_all_vcpus(dev->kvm); +out: + mutex_unlock(&dev->kvm->lock); + return ret; +} + static int vgic_v3_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return vgic_set_common_attr(dev, attr); + int ret; + + ret = vgic_set_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 tmp32; + u64 reg; + + if (get_user(tmp32, uaddr)) + return -EFAULT; + + reg = tmp32; + return vgic_v3_attr_regs_access(dev, attr, ®, true); + } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_v3_attr_regs_access(dev, attr, ®, true); + } + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u64 reg; + u32 tmp32; + + if (get_user(tmp32, uaddr)) + return -EFAULT; + + reg = tmp32; + return vgic_v3_attr_regs_access(dev, attr, ®, true); + } + } + return -ENXIO; } static int vgic_v3_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return vgic_get_common_attr(dev, attr); + int ret; + + ret = vgic_get_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u64 reg; + u32 tmp32; + + ret = vgic_v3_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + tmp32 = reg; + return put_user(tmp32, uaddr); + } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + + ret = vgic_v3_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + return put_user(reg, uaddr); + } + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u64 reg; + u32 tmp32; + + ret = vgic_v3_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + tmp32 = reg; + return put_user(tmp32, uaddr); + } + } + return -ENXIO; } static int vgic_v3_has_attr(struct kvm_device *dev, @@ -451,8 +641,19 @@ static int vgic_v3_has_attr(struct kvm_device *dev, return 0; } break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + return vgic_v3_has_attr_regs(dev, attr); case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: return 0; + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + if (((attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> + KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) == + VGIC_LEVEL_INFO_LINE_LEVEL) + return 0; + break; + } case KVM_DEV_ARM_VGIC_GRP_CTRL: switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 78e34bc4d89b..a3ad7ff95c9b 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -98,7 +98,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); spin_lock(&irq->irq_lock); - irq->pending = true; + irq->pending_latch = true; irq->source |= 1U << source_vcpu->vcpu_id; vgic_queue_irq_unlock(source_vcpu->kvm, irq); @@ -182,7 +182,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, irq->source &= ~((val >> (i * 8)) & 0xff); if (!irq->source) - irq->pending = false; + irq->pending_latch = false; spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); @@ -204,7 +204,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, irq->source |= (val >> (i * 8)) & 0xff; if (irq->source) { - irq->pending = true; + irq->pending_latch = true; vgic_queue_irq_unlock(vcpu->kvm, irq); } else { spin_unlock(&irq->irq_lock); @@ -213,22 +213,6 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, } } -static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_set_vmcr(vcpu, vmcr); - else - vgic_v3_set_vmcr(vcpu, vmcr); -} - -static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_get_vmcr(vcpu, vmcr); - else - vgic_v3_get_vmcr(vcpu, vmcr); -} - #define GICC_ARCH_VERSION_V2 0x2 /* These are for userland accesses only, there is no guest-facing emulation. */ @@ -369,21 +353,30 @@ unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev) int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) { - int nr_irqs = dev->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; - const struct vgic_register_region *regions; + const struct vgic_register_region *region; + struct vgic_io_device iodev; + struct vgic_reg_attr reg_attr; + struct kvm_vcpu *vcpu; gpa_t addr; - int nr_regions, i, len; + int ret; + + ret = vgic_v2_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; - addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - regions = vgic_v2_dist_registers; - nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); + iodev.regions = vgic_v2_dist_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); + iodev.base_addr = 0; break; case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - regions = vgic_v2_cpu_registers; - nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers); + iodev.regions = vgic_v2_cpu_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers); + iodev.base_addr = 0; break; default: return -ENXIO; @@ -393,43 +386,11 @@ int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) if (addr & 3) return -ENXIO; - for (i = 0; i < nr_regions; i++) { - if (regions[i].bits_per_irq) - len = (regions[i].bits_per_irq * nr_irqs) / 8; - else - len = regions[i].len; - - if (regions[i].reg_offset <= addr && - regions[i].reg_offset + len > addr) - return 0; - } - - return -ENXIO; -} - -/* - * When userland tries to access the VGIC register handlers, we need to - * create a usable struct vgic_io_device to be passed to the handlers and we - * have to set up a buffer similar to what would have happened if a guest MMIO - * access occurred, including doing endian conversions on BE systems. - */ -static int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, - bool is_write, int offset, u32 *val) -{ - unsigned int len = 4; - u8 buf[4]; - int ret; - - if (is_write) { - vgic_data_host_to_mmio_bus(buf, len, *val); - ret = kvm_io_gic_ops.write(vcpu, &dev->dev, offset, len, buf); - } else { - ret = kvm_io_gic_ops.read(vcpu, &dev->dev, offset, len, buf); - if (!ret) - *val = vgic_data_mmio_bus_to_host(buf, len); - } + region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32)); + if (!region) + return -ENXIO; - return ret; + return 0; } int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 50f42f0f8c4f..6afb3b484886 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -18,6 +18,8 @@ #include <kvm/arm_vgic.h> #include <asm/kvm_emulate.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> #include "vgic.h" #include "vgic-mmio.h" @@ -207,6 +209,60 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu, return 0; } +static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* + * pending state of interrupt is latched in pending_latch variable. + * Userspace will save and restore pending state and line_level + * separately. + * Refer to Documentation/virtual/kvm/devices/arm-vgic-v3.txt + * for handling of ISPENDR and ICPENDR. + */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + if (irq->pending_latch) + value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + spin_lock(&irq->irq_lock); + if (test_bit(i, &val)) { + /* + * pending_latch is set irrespective of irq type + * (level or edge) to avoid dependency that VM should + * restore irq config before pending info. + */ + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq); + } else { + irq->pending_latch = false; + spin_unlock(&irq->irq_lock); + } + + vgic_put_irq(vcpu->kvm, irq); + } +} + /* We want to avoid outer shareable. */ u64 vgic_sanitise_shareability(u64 field) { @@ -356,7 +412,7 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, * We take some special care here to fix the calculation of the register * offset. */ -#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, bpi, acc) \ +#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, ur, uw, bpi, acc) \ { \ .reg_offset = off, \ .bits_per_irq = bpi, \ @@ -371,47 +427,54 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, .access_flags = acc, \ .read = rd, \ .write = wr, \ + .uaccess_read = ur, \ + .uaccess_write = uw, \ } static const struct vgic_register_region vgic_v3_dist_registers[] = { REGISTER_DESC_WITH_LENGTH(GICD_CTLR, vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICD_STATUSR, + vgic_mmio_read_rao, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR, - vgic_mmio_read_rao, vgic_mmio_write_wi, 1, + vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER, - vgic_mmio_read_enable, vgic_mmio_write_senable, 1, + vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER, - vgic_mmio_read_enable, vgic_mmio_write_cenable, 1, + vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, - vgic_mmio_read_pending, vgic_mmio_write_spending, 1, + vgic_mmio_read_pending, vgic_mmio_write_spending, + vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, - vgic_mmio_read_pending, vgic_mmio_write_cpending, 1, + vgic_mmio_read_pending, vgic_mmio_write_cpending, + vgic_mmio_read_raz, vgic_mmio_write_wi, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, - vgic_mmio_read_active, vgic_mmio_write_sactive, 1, + vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, - vgic_mmio_read_active, vgic_mmio_write_cactive, 1, + vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, - vgic_mmio_read_priority, vgic_mmio_write_priority, 8, - VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, + 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR, - vgic_mmio_read_raz, vgic_mmio_write_wi, 8, + vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR, - vgic_mmio_read_config, vgic_mmio_write_config, 2, + vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR, - vgic_mmio_read_raz, vgic_mmio_write_wi, 1, + vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER, - vgic_mmio_read_irouter, vgic_mmio_write_irouter, 64, + vgic_mmio_read_irouter, vgic_mmio_write_irouter, NULL, NULL, 64, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICD_IDREGS, vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, @@ -422,12 +485,18 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = { REGISTER_DESC_WITH_LENGTH(GICR_CTLR, vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_STATUSR, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_IIDR, vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_TYPER, vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_WAKER, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), @@ -449,11 +518,13 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = { REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0, vgic_mmio_read_enable, vgic_mmio_write_cenable, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ISPENDR0, - vgic_mmio_read_pending, vgic_mmio_write_spending, 4, + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISPENDR0, + vgic_mmio_read_pending, vgic_mmio_write_spending, + vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ICPENDR0, - vgic_mmio_read_pending, vgic_mmio_write_cpending, 4, + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0, + vgic_mmio_read_pending, vgic_mmio_write_cpending, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0, vgic_mmio_read_active, vgic_mmio_write_sactive, 4, @@ -546,6 +617,54 @@ int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address) return ret; } +int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + const struct vgic_register_region *region; + struct vgic_io_device iodev; + struct vgic_reg_attr reg_attr; + struct kvm_vcpu *vcpu; + gpa_t addr; + int ret; + + ret = vgic_v3_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + iodev.regions = vgic_v3_dist_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v3_dist_registers); + iodev.base_addr = 0; + break; + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{ + iodev.regions = vgic_v3_rdbase_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers); + iodev.base_addr = 0; + break; + } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 reg, id; + + id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); + return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, ®); + } + default: + return -ENXIO; + } + + /* We only support aligned 32-bit accesses. */ + if (addr & 3) + return -ENXIO; + + region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32)); + if (!region) + return -ENXIO; + + return 0; +} /* * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI * generation register ICC_SGI1R_EL1) with a given VCPU. @@ -646,9 +765,55 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); spin_lock(&irq->irq_lock); - irq->pending = true; + irq->pending_latch = true; vgic_queue_irq_unlock(vcpu->kvm, irq); vgic_put_irq(vcpu->kvm, irq); } } + +int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device dev = { + .regions = vgic_v3_dist_registers, + .nr_regions = ARRAY_SIZE(vgic_v3_dist_registers), + }; + + return vgic_uaccess(vcpu, &dev, is_write, offset, val); +} + +int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device rd_dev = { + .regions = vgic_v3_rdbase_registers, + .nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers), + }; + + struct vgic_io_device sgi_dev = { + .regions = vgic_v3_sgibase_registers, + .nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers), + }; + + /* SGI_base is the next 64K frame after RD_base */ + if (offset >= SZ_64K) + return vgic_uaccess(vcpu, &sgi_dev, is_write, offset - SZ_64K, + val); + else + return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val); +} + +int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, + u32 intid, u64 *val) +{ + if (intid % 32) + return -EINVAL; + + if (is_write) + vgic_write_irq_line_level_info(vcpu, intid, *val); + else + *val = vgic_read_irq_line_level_info(vcpu, intid); + + return 0; +} diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index ebe1b9fa3c4d..2a5db1352722 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -111,7 +111,7 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, for (i = 0; i < len * 8; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - if (irq->pending) + if (irq_is_pending(irq)) value |= (1U << i); vgic_put_irq(vcpu->kvm, irq); @@ -131,9 +131,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); spin_lock(&irq->irq_lock); - irq->pending = true; - if (irq->config == VGIC_CONFIG_LEVEL) - irq->soft_pending = true; + irq->pending_latch = true; vgic_queue_irq_unlock(vcpu->kvm, irq); vgic_put_irq(vcpu->kvm, irq); @@ -152,12 +150,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, spin_lock(&irq->irq_lock); - if (irq->config == VGIC_CONFIG_LEVEL) { - irq->soft_pending = false; - irq->pending = irq->line_level; - } else { - irq->pending = false; - } + irq->pending_latch = false; spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); @@ -187,21 +180,37 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool new_active_state) { + struct kvm_vcpu *requester_vcpu; spin_lock(&irq->irq_lock); + + /* + * The vcpu parameter here can mean multiple things depending on how + * this function is called; when handling a trap from the kernel it + * depends on the GIC version, and these functions are also called as + * part of save/restore from userspace. + * + * Therefore, we have to figure out the requester in a reliable way. + * + * When accessing VGIC state from user space, the requester_vcpu is + * NULL, which is fine, because we guarantee that no VCPUs are running + * when accessing VGIC state from user space so irq->vcpu->cpu is + * always -1. + */ + requester_vcpu = kvm_arm_get_running_vcpu(); + /* * If this virtual IRQ was written into a list register, we * have to make sure the CPU that runs the VCPU thread has - * synced back LR state to the struct vgic_irq. We can only - * know this for sure, when either this irq is not assigned to - * anyone's AP list anymore, or the VCPU thread is not - * running on any CPUs. + * synced back the LR state to the struct vgic_irq. * - * In the opposite case, we know the VCPU thread may be on its - * way back from the guest and still has to sync back this - * IRQ, so we release and re-acquire the spin_lock to let the - * other thread sync back the IRQ. + * As long as the conditions below are true, we know the VCPU thread + * may be on its way back from the guest (we kicked the VCPU thread in + * vgic_change_active_prepare) and still has to sync back this IRQ, + * so we release and re-acquire the spin_lock to let the other thread + * sync back the IRQ. */ while (irq->vcpu && /* IRQ may have state in an LR somewhere */ + irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */ irq->vcpu->cpu != -1) /* VCPU thread is running */ cond_resched_lock(&irq->irq_lock); @@ -359,18 +368,70 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); spin_lock(&irq->irq_lock); - if (test_bit(i * 2 + 1, &val)) { + if (test_bit(i * 2 + 1, &val)) irq->config = VGIC_CONFIG_EDGE; - } else { + else irq->config = VGIC_CONFIG_LEVEL; - irq->pending = irq->line_level | irq->soft_pending; - } spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); } } +u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) +{ + int i; + u64 val = 0; + int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + + for (i = 0; i < 32; i++) { + struct vgic_irq *irq; + + if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) + continue; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + if (irq->config == VGIC_CONFIG_LEVEL && irq->line_level) + val |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return val; +} + +void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, + const u64 val) +{ + int i; + int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + + for (i = 0; i < 32; i++) { + struct vgic_irq *irq; + bool new_level; + + if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) + continue; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + /* + * Line level is set irrespective of irq type + * (level or edge) to avoid dependency that VM should + * restore irq config before line level. + */ + new_level = !!(val & (1U << i)); + spin_lock(&irq->irq_lock); + irq->line_level = new_level; + if (new_level) + vgic_queue_irq_unlock(vcpu->kvm, irq); + else + spin_unlock(&irq->irq_lock); + + vgic_put_irq(vcpu->kvm, irq); + } +} + static int match_region(const void *key, const void *elt) { const unsigned int offset = (unsigned long)key; @@ -394,6 +455,22 @@ vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, sizeof(region[0]), match_region); } +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_set_vmcr(vcpu, vmcr); + else + vgic_v3_set_vmcr(vcpu, vmcr); +} + +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_get_vmcr(vcpu, vmcr); + else + vgic_v3_get_vmcr(vcpu, vmcr); +} + /* * kvm_mmio_read_buf() returns a value in a format where it can be converted * to a byte array and be directly observed as the guest wanted it to appear @@ -484,6 +561,74 @@ static bool check_region(const struct kvm *kvm, return false; } +const struct vgic_register_region * +vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, + gpa_t addr, int len) +{ + const struct vgic_register_region *region; + + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, + addr - iodev->base_addr); + if (!region || !check_region(vcpu->kvm, region, addr, len)) + return NULL; + + return region; +} + +static int vgic_uaccess_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, u32 *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + struct kvm_vcpu *r_vcpu; + + region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32)); + if (!region) { + *val = 0; + return 0; + } + + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; + if (region->uaccess_read) + *val = region->uaccess_read(r_vcpu, addr, sizeof(u32)); + else + *val = region->read(r_vcpu, addr, sizeof(u32)); + + return 0; +} + +static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, const u32 *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + struct kvm_vcpu *r_vcpu; + + region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32)); + if (!region) + return 0; + + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; + if (region->uaccess_write) + region->uaccess_write(r_vcpu, addr, sizeof(u32), *val); + else + region->write(r_vcpu, addr, sizeof(u32), *val); + + return 0; +} + +/* + * Userland access to VGIC registers. + */ +int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, + bool is_write, int offset, u32 *val) +{ + if (is_write) + return vgic_uaccess_write(vcpu, &dev->dev, offset, val); + else + return vgic_uaccess_read(vcpu, &dev->dev, offset, val); +} + static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { @@ -491,9 +636,8 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, const struct vgic_register_region *region; unsigned long data = 0; - region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, - addr - iodev->base_addr); - if (!region || !check_region(vcpu->kvm, region, addr, len)) { + region = vgic_get_mmio_region(vcpu, iodev, addr, len); + if (!region) { memset(val, 0, len); return 0; } @@ -524,9 +668,8 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, const struct vgic_register_region *region; unsigned long data = vgic_data_mmio_bus_to_host(val, len); - region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, - addr - iodev->base_addr); - if (!region || !check_region(vcpu->kvm, region, addr, len)) + region = vgic_get_mmio_region(vcpu, iodev, addr, len); + if (!region) return 0; switch (iodev->iodev_type) { diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 84961b4e4422..98bb566b660a 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -34,6 +34,10 @@ struct vgic_register_region { gpa_t addr, unsigned int len, unsigned long val); }; + unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); }; extern struct kvm_io_device_ops kvm_io_gic_ops; @@ -86,6 +90,18 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; .write = wr, \ } +#define REGISTER_DESC_WITH_LENGTH_UACCESS(off, rd, wr, urd, uwr, length, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = 0, \ + .len = length, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + .uaccess_read = urd, \ + .uaccess_write = uwr, \ + } + int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu, struct vgic_register_region *reg_desc, struct vgic_io_device *region, @@ -158,6 +174,14 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); +int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, + bool is_write, int offset, u32 *val); + +u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid); + +void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, + const u64 val); + unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 834137e7b83f..b834ecdf3225 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -104,7 +104,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) /* Edge is the only case where we preserve the pending bit */ if (irq->config == VGIC_CONFIG_EDGE && (val & GICH_LR_PENDING_BIT)) { - irq->pending = true; + irq->pending_latch = true; if (vgic_irq_is_sgi(intid)) { u32 cpuid = val & GICH_LR_PHYSID_CPUID; @@ -120,9 +120,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) */ if (irq->config == VGIC_CONFIG_LEVEL) { if (!(val & GICH_LR_PENDING_BIT)) - irq->soft_pending = false; - - irq->pending = irq->line_level || irq->soft_pending; + irq->pending_latch = false; } spin_unlock(&irq->irq_lock); @@ -145,11 +143,11 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) { u32 val = irq->intid; - if (irq->pending) { + if (irq_is_pending(irq)) { val |= GICH_LR_PENDING_BIT; if (irq->config == VGIC_CONFIG_EDGE) - irq->pending = false; + irq->pending_latch = false; if (vgic_irq_is_sgi(irq->intid)) { u32 src = ffs(irq->source); @@ -158,7 +156,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; irq->source &= ~(1 << (src - 1)); if (irq->source) - irq->pending = true; + irq->pending_latch = true; } } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index e6b03fd8c374..be0f4c3e0142 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -94,7 +94,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) /* Edge is the only case where we preserve the pending bit */ if (irq->config == VGIC_CONFIG_EDGE && (val & ICH_LR_PENDING_BIT)) { - irq->pending = true; + irq->pending_latch = true; if (vgic_irq_is_sgi(intid) && model == KVM_DEV_TYPE_ARM_VGIC_V2) { @@ -111,9 +111,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) */ if (irq->config == VGIC_CONFIG_LEVEL) { if (!(val & ICH_LR_PENDING_BIT)) - irq->soft_pending = false; - - irq->pending = irq->line_level || irq->soft_pending; + irq->pending_latch = false; } spin_unlock(&irq->irq_lock); @@ -127,11 +125,11 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) u32 model = vcpu->kvm->arch.vgic.vgic_model; u64 val = irq->intid; - if (irq->pending) { + if (irq_is_pending(irq)) { val |= ICH_LR_PENDING_BIT; if (irq->config == VGIC_CONFIG_EDGE) - irq->pending = false; + irq->pending_latch = false; if (vgic_irq_is_sgi(irq->intid) && model == KVM_DEV_TYPE_ARM_VGIC_V2) { @@ -141,7 +139,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; irq->source &= ~(1 << (src - 1)); if (irq->source) - irq->pending = true; + irq->pending_latch = true; } } @@ -177,10 +175,18 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { u32 vmcr; - vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK; + /* + * Ignore the FIQen bit, because GIC emulation always implies + * SRE=1 which means the vFIQEn bit is also RES1. + */ + vmcr = ((vmcrp->ctlr >> ICC_CTLR_EL1_EOImode_SHIFT) << + ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK; + vmcr |= (vmcrp->ctlr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK; vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; + vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK; + vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK; vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; } @@ -189,10 +195,18 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; - vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT; + /* + * Ignore the FIQen bit, because GIC emulation always implies + * SRE=1 which means the vFIQEn bit is also RES1. + */ + vmcrp->ctlr = ((vmcr >> ICH_VMCR_EOIM_SHIFT) << + ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK; + vmcrp->ctlr |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + vmcrp->grpen0 = (vmcr & ICH_VMCR_ENG0_MASK) >> ICH_VMCR_ENG0_SHIFT; + vmcrp->grpen1 = (vmcr & ICH_VMCR_ENG1_MASK) >> ICH_VMCR_ENG1_SHIFT; } #define INITIAL_PENDBASER_VALUE \ @@ -215,15 +229,25 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) /* * If we are emulating a GICv3, we do it in an non-GICv2-compatible * way, so we force SRE to 1 to demonstrate this to the guest. + * Also, we don't support any form of IRQ/FIQ bypass. * This goes with the spec allowing the value to be RAO/WI. */ if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; + vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB | + ICC_SRE_EL1_DFB | + ICC_SRE_EL1_SRE); vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; } else { vgic_v3->vgic_sre = 0; } + vcpu->arch.vgic_cpu.num_id_bits = (kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_ID_BITS_MASK) >> + ICH_VTR_ID_BITS_SHIFT; + vcpu->arch.vgic_cpu.num_pri_bits = ((kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_PRI_BITS_MASK) >> + ICH_VTR_PRI_BITS_SHIFT) + 1; + /* Get the show on the road... */ vgic_v3->vgic_hcr = ICH_HCR_EN; } @@ -322,6 +346,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) */ kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; kvm_vgic_global_state.can_emulate_gicv2 = false; + kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2; if (!info->vcpu.start) { kvm_info("GICv3: no GICV resource entry\n"); diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 6440b56ec90e..654dfd40e449 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -160,7 +160,7 @@ static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) * If the distributor is disabled, pending interrupts shouldn't be * forwarded. */ - if (irq->enabled && irq->pending) { + if (irq->enabled && irq_is_pending(irq)) { if (unlikely(irq->target_vcpu && !irq->target_vcpu->kvm->arch.vgic.enabled)) return NULL; @@ -204,8 +204,8 @@ static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b) goto out; } - penda = irqa->enabled && irqa->pending; - pendb = irqb->enabled && irqb->pending; + penda = irqa->enabled && irq_is_pending(irqa); + pendb = irqb->enabled && irq_is_pending(irqb); if (!penda || !pendb) { ret = (int)pendb - (int)penda; @@ -335,9 +335,22 @@ retry: return true; } -static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, - unsigned int intid, bool level, - bool mapped_irq) +/** + * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic + * @kvm: The VM structure pointer + * @cpuid: The CPU for PPIs + * @intid: The INTID to inject a new state to. + * @level: Edge-triggered: true: to trigger the interrupt + * false: to ignore the call + * Level-sensitive true: raise the input signal + * false: lower the input signal + * + * The VGIC is not concerned with devices being active-LOW or active-HIGH for + * level-sensitive interrupts. You can think of the level parameter as 1 + * being HIGH and 0 being LOW and all devices being active-HIGH. + */ +int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, + bool level) { struct kvm_vcpu *vcpu; struct vgic_irq *irq; @@ -357,11 +370,6 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, if (!irq) return -EINVAL; - if (irq->hw != mapped_irq) { - vgic_put_irq(kvm, irq); - return -EINVAL; - } - spin_lock(&irq->irq_lock); if (!vgic_validate_injection(irq, level)) { @@ -371,12 +379,10 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, return 0; } - if (irq->config == VGIC_CONFIG_LEVEL) { + if (irq->config == VGIC_CONFIG_LEVEL) irq->line_level = level; - irq->pending = level || irq->soft_pending; - } else { - irq->pending = true; - } + else + irq->pending_latch = true; vgic_queue_irq_unlock(kvm, irq); vgic_put_irq(kvm, irq); @@ -384,32 +390,6 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, return 0; } -/** - * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic - * @kvm: The VM structure pointer - * @cpuid: The CPU for PPIs - * @intid: The INTID to inject a new state to. - * @level: Edge-triggered: true: to trigger the interrupt - * false: to ignore the call - * Level-sensitive true: raise the input signal - * false: lower the input signal - * - * The VGIC is not concerned with devices being active-LOW or active-HIGH for - * level-sensitive interrupts. You can think of the level parameter as 1 - * being HIGH and 0 being LOW and all devices being active-HIGH. - */ -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level) -{ - return vgic_update_irq_pending(kvm, cpuid, intid, level, false); -} - -int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level) -{ - return vgic_update_irq_pending(kvm, cpuid, intid, level, true); -} - int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); @@ -689,7 +669,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { spin_lock(&irq->irq_lock); - pending = irq->pending && irq->enabled; + pending = irq_is_pending(irq) && irq->enabled; spin_unlock(&irq->irq_lock); if (pending) diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 859f65c6e056..db28f7cadab2 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -30,13 +30,79 @@ #define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS) +#define VGIC_AFFINITY_0_SHIFT 0 +#define VGIC_AFFINITY_0_MASK (0xffUL << VGIC_AFFINITY_0_SHIFT) +#define VGIC_AFFINITY_1_SHIFT 8 +#define VGIC_AFFINITY_1_MASK (0xffUL << VGIC_AFFINITY_1_SHIFT) +#define VGIC_AFFINITY_2_SHIFT 16 +#define VGIC_AFFINITY_2_MASK (0xffUL << VGIC_AFFINITY_2_SHIFT) +#define VGIC_AFFINITY_3_SHIFT 24 +#define VGIC_AFFINITY_3_MASK (0xffUL << VGIC_AFFINITY_3_SHIFT) + +#define VGIC_AFFINITY_LEVEL(reg, level) \ + ((((reg) & VGIC_AFFINITY_## level ##_MASK) \ + >> VGIC_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) + +/* + * The Userspace encodes the affinity differently from the MPIDR, + * Below macro converts vgic userspace format to MPIDR reg format. + */ +#define VGIC_TO_MPIDR(val) (VGIC_AFFINITY_LEVEL(val, 0) | \ + VGIC_AFFINITY_LEVEL(val, 1) | \ + VGIC_AFFINITY_LEVEL(val, 2) | \ + VGIC_AFFINITY_LEVEL(val, 3)) + +/* + * As per Documentation/virtual/kvm/devices/arm-vgic-v3.txt, + * below macros are defined for CPUREG encoding. + */ +#define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK 0x000000000000c000 +#define KVM_REG_ARM_VGIC_SYSREG_OP0_SHIFT 14 +#define KVM_REG_ARM_VGIC_SYSREG_OP1_MASK 0x0000000000003800 +#define KVM_REG_ARM_VGIC_SYSREG_OP1_SHIFT 11 +#define KVM_REG_ARM_VGIC_SYSREG_CRN_MASK 0x0000000000000780 +#define KVM_REG_ARM_VGIC_SYSREG_CRN_SHIFT 7 +#define KVM_REG_ARM_VGIC_SYSREG_CRM_MASK 0x0000000000000078 +#define KVM_REG_ARM_VGIC_SYSREG_CRM_SHIFT 3 +#define KVM_REG_ARM_VGIC_SYSREG_OP2_MASK 0x0000000000000007 +#define KVM_REG_ARM_VGIC_SYSREG_OP2_SHIFT 0 + +#define KVM_DEV_ARM_VGIC_SYSREG_MASK (KVM_REG_ARM_VGIC_SYSREG_OP0_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_OP1_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_CRN_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_OP2_MASK) + +static inline bool irq_is_pending(struct vgic_irq *irq) +{ + if (irq->config == VGIC_CONFIG_EDGE) + return irq->pending_latch; + else + return irq->pending_latch || irq->line_level; +} + struct vgic_vmcr { u32 ctlr; u32 abpr; u32 bpr; u32 pmr; + /* Below member variable are valid only for GICv3 */ + u32 grpen0; + u32 grpen1; +}; + +struct vgic_reg_attr { + struct kvm_vcpu *vcpu; + gpa_t addr; }; +int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr); +int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr); +const struct vgic_register_region * +vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, + gpa_t addr, int len); struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid); void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); @@ -89,9 +155,24 @@ bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); void vgic_enable_lpis(struct kvm_vcpu *vcpu); int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); - +int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); +int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, + u64 id, u64 *val); +int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, + u64 *reg); +int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, + u32 intid, u64 *val); int kvm_register_vgic_device(unsigned long type); +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); int vgic_lazy_init(struct kvm *kvm); int vgic_init(struct kvm *kvm); +int vgic_debug_init(struct kvm *kvm); +int vgic_debug_destroy(struct kvm *kvm); + #endif diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 3815e940fbea..bb298a200cd3 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -24,6 +24,7 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/mmu_context.h> +#include <linux/sched/mm.h> #include "async_pf.h" #include <trace/events/kvm.h> @@ -204,7 +205,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, work->addr = hva; work->arch = *arch; work->mm = current->mm; - atomic_inc(&work->mm->mm_users); + mmget(work->mm); kvm_get_kvm(work->vcpu->kvm); /* this can't really happen otherwise gfn_to_pfn_async diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index a29786dd9522..4d28a9ddbee0 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - kvm->buses[bus_idx]->ioeventfd_count--; + if (kvm->buses[bus_idx]) + kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; break; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 482612b4e496..88257b311cb5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -32,7 +32,9 @@ #include <linux/file.h> #include <linux/syscore_ops.h> #include <linux/cpu.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/mm.h> +#include <linux/sched/stat.h> #include <linux/cpumask.h> #include <linux/smp.h> #include <linux/anon_inodes.h> @@ -506,11 +508,6 @@ static struct kvm_memslots *kvm_alloc_memslots(void) if (!slots) return NULL; - /* - * Init kvm generation close to the maximum to easily test the - * code of handling generation number wrap-around. - */ - slots->generation = -150; for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) slots->id_to_index[i] = slots->memslots[i].id = i; @@ -616,13 +613,13 @@ static struct kvm *kvm_create_vm(unsigned long type) return ERR_PTR(-ENOMEM); spin_lock_init(&kvm->mmu_lock); - atomic_inc(¤t->mm->mm_count); + mmgrab(current->mm); kvm->mm = current->mm; kvm_eventfd_init(kvm); mutex_init(&kvm->lock); mutex_init(&kvm->irq_lock); mutex_init(&kvm->slots_lock); - atomic_set(&kvm->users_count, 1); + refcount_set(&kvm->users_count, 1); INIT_LIST_HEAD(&kvm->devices); r = kvm_arch_init_vm(kvm, type); @@ -641,9 +638,16 @@ static struct kvm *kvm_create_vm(unsigned long type) r = -ENOMEM; for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { - kvm->memslots[i] = kvm_alloc_memslots(); - if (!kvm->memslots[i]) + struct kvm_memslots *slots = kvm_alloc_memslots(); + if (!slots) goto out_err_no_srcu; + /* + * Generations must be different for each address space. + * Init kvm generation close to the maximum to easily test the + * code of handling generation number wrap-around. + */ + slots->generation = i * 2 - 150; + rcu_assign_pointer(kvm->memslots[i], slots); } if (init_srcu_struct(&kvm->srcu)) @@ -723,8 +727,11 @@ static void kvm_destroy_vm(struct kvm *kvm) list_del(&kvm->vm_list); spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); - for (i = 0; i < KVM_NR_BUSES; i++) - kvm_io_bus_destroy(kvm->buses[i]); + for (i = 0; i < KVM_NR_BUSES; i++) { + if (kvm->buses[i]) + kvm_io_bus_destroy(kvm->buses[i]); + kvm->buses[i] = NULL; + } kvm_coalesced_mmio_free(kvm); #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); @@ -745,13 +752,13 @@ static void kvm_destroy_vm(struct kvm *kvm) void kvm_get_kvm(struct kvm *kvm) { - atomic_inc(&kvm->users_count); + refcount_inc(&kvm->users_count); } EXPORT_SYMBOL_GPL(kvm_get_kvm); void kvm_put_kvm(struct kvm *kvm) { - if (atomic_dec_and_test(&kvm->users_count)) + if (refcount_dec_and_test(&kvm->users_count)) kvm_destroy_vm(kvm); } EXPORT_SYMBOL_GPL(kvm_put_kvm); @@ -870,8 +877,14 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, * Increment the new memslot generation a second time. This prevents * vm exits that race with memslot updates from caching a memslot * generation that will (potentially) be valid forever. + * + * Generations must be unique even across address spaces. We do not need + * a global counter for that, instead the generation space is evenly split + * across address spaces. For example, with two address spaces, address + * space 0 will use generations 0, 4, 8, ... while * address space 1 will + * use generations 2, 6, 10, 14, ... */ - slots->generation++; + slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1; kvm_arch_memslots_updated(kvm, slots); @@ -1052,7 +1065,7 @@ int __kvm_set_memory_region(struct kvm *kvm, * changes) is disallowed above, so any other attribute changes getting * here can be skipped. */ - if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { + if (as_id == 0 && (change == KVM_MR_CREATE || change == KVM_MR_MOVE)) { r = kvm_iommu_map_pages(kvm, &new); return r; } @@ -1094,37 +1107,31 @@ int kvm_get_dirty_log(struct kvm *kvm, { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; - int r, i, as_id, id; + int i, as_id, id; unsigned long n; unsigned long any = 0; - r = -EINVAL; as_id = log->slot >> 16; id = (u16)log->slot; if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) - goto out; + return -EINVAL; slots = __kvm_memslots(kvm, as_id); memslot = id_to_memslot(slots, id); - r = -ENOENT; if (!memslot->dirty_bitmap) - goto out; + return -ENOENT; n = kvm_dirty_bitmap_bytes(memslot); for (i = 0; !any && i < n/sizeof(long); ++i) any = memslot->dirty_bitmap[i]; - r = -EFAULT; if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) - goto out; + return -EFAULT; if (any) *is_dirty = 1; - - r = 0; -out: - return r; + return 0; } EXPORT_SYMBOL_GPL(kvm_get_dirty_log); @@ -1156,24 +1163,22 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; - int r, i, as_id, id; + int i, as_id, id; unsigned long n; unsigned long *dirty_bitmap; unsigned long *dirty_bitmap_buffer; - r = -EINVAL; as_id = log->slot >> 16; id = (u16)log->slot; if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) - goto out; + return -EINVAL; slots = __kvm_memslots(kvm, as_id); memslot = id_to_memslot(slots, id); dirty_bitmap = memslot->dirty_bitmap; - r = -ENOENT; if (!dirty_bitmap) - goto out; + return -ENOENT; n = kvm_dirty_bitmap_bytes(memslot); @@ -1202,14 +1207,9 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, } spin_unlock(&kvm->mmu_lock); - - r = -EFAULT; if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) - goto out; - - r = 0; -out: - return r; + return -EFAULT; + return 0; } EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); #endif @@ -1937,10 +1937,10 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, } EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest); -int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - gpa_t gpa, unsigned long len) +static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, + struct gfn_to_hva_cache *ghc, + gpa_t gpa, unsigned long len) { - struct kvm_memslots *slots = kvm_memslots(kvm); int offset = offset_in_page(gpa); gfn_t start_gfn = gpa >> PAGE_SHIFT; gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; @@ -1950,7 +1950,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, ghc->gpa = gpa; ghc->generation = slots->generation; ghc->len = len; - ghc->memslot = gfn_to_memslot(kvm, start_gfn); + ghc->memslot = __gfn_to_memslot(slots, start_gfn); ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL); if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) { ghc->hva += offset; @@ -1960,7 +1960,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, * verify that the entire region is valid here. */ while (start_gfn <= end_gfn) { - ghc->memslot = gfn_to_memslot(kvm, start_gfn); + ghc->memslot = __gfn_to_memslot(slots, start_gfn); ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); if (kvm_is_error_hva(ghc->hva)) @@ -1972,22 +1972,29 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, } return 0; } -EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); -int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, int offset, unsigned long len) +int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, + gpa_t gpa, unsigned long len) +{ + struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); + return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva_cache_init); + +int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len) { - struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); int r; gpa_t gpa = ghc->gpa + offset; BUG_ON(len + offset > ghc->len); if (slots->generation != ghc->generation) - kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); + __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); if (unlikely(!ghc->memslot)) - return kvm_write_guest(kvm, gpa, data, len); + return kvm_vcpu_write_guest(vcpu, gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; @@ -1999,28 +2006,28 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, return 0; } -EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached); +EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_offset_cached); -int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) +int kvm_vcpu_write_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) { - return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len); + return kvm_vcpu_write_guest_offset_cached(vcpu, ghc, data, 0, len); } -EXPORT_SYMBOL_GPL(kvm_write_guest_cached); +EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_cached); -int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) +int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) { - struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); int r; BUG_ON(len > ghc->len); if (slots->generation != ghc->generation) - kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); + __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); if (unlikely(!ghc->memslot)) - return kvm_read_guest(kvm, ghc->gpa, data, len); + return kvm_vcpu_read_guest(vcpu, ghc->gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; @@ -2031,7 +2038,7 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, return 0; } -EXPORT_SYMBOL_GPL(kvm_read_guest_cached); +EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_cached); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) { @@ -2348,9 +2355,9 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) } EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); -static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int kvm_vcpu_fault(struct vm_fault *vmf) { - struct kvm_vcpu *vcpu = vma->vm_file->private_data; + struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data; struct page *page; if (vmf->pgoff == 0) @@ -3133,10 +3140,9 @@ static long kvm_vm_compat_ioctl(struct file *filp, struct compat_kvm_dirty_log compat_log; struct kvm_dirty_log log; - r = -EFAULT; if (copy_from_user(&compat_log, (void __user *)arg, sizeof(compat_log))) - goto out; + return -EFAULT; log.slot = compat_log.slot; log.padding1 = compat_log.padding1; log.padding2 = compat_log.padding2; @@ -3148,8 +3154,6 @@ static long kvm_vm_compat_ioctl(struct file *filp, default: r = kvm_vm_ioctl(filp, ioctl, arg); } - -out: return r; } #endif @@ -3473,6 +3477,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return -ENOMEM; r = __kvm_io_bus_write(vcpu, bus, &range, val); return r < 0 ? r : 0; } @@ -3490,6 +3496,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return -ENOMEM; /* First try the device referenced by cookie. */ if ((cookie >= 0) && (cookie < bus->dev_count) && @@ -3540,6 +3548,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return -ENOMEM; r = __kvm_io_bus_read(vcpu, bus, &range, val); return r < 0 ? r : 0; } @@ -3552,6 +3562,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; + if (!bus) + return -ENOMEM; + /* exclude ioeventfd which is limited by maximum fd */ if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) return -ENOSPC; @@ -3571,37 +3584,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, } /* Caller must hold slots_lock. */ -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev) +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_io_device *dev) { - int i, r; + int i; struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; - r = -ENOENT; + if (!bus) + return; + for (i = 0; i < bus->dev_count; i++) if (bus->range[i].dev == dev) { - r = 0; break; } - if (r) - return r; + if (i == bus->dev_count) + return; new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * sizeof(struct kvm_io_range)), GFP_KERNEL); - if (!new_bus) - return -ENOMEM; + if (!new_bus) { + pr_err("kvm: failed to shrink bus, removing it completely\n"); + goto broken; + } memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); new_bus->dev_count--; memcpy(new_bus->range + i, bus->range + i + 1, (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); +broken: rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); kfree(bus); - return r; + return; } struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, @@ -3614,6 +3631,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, srcu_idx = srcu_read_lock(&kvm->srcu); bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + if (!bus) + goto out_unlock; dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); if (dev_idx < 0) @@ -3640,7 +3659,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, * To avoid the race between open and the removal of the debugfs * directory we test against the users count. */ - if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0)) + if (!refcount_inc_not_zero(&stat_data->kvm->users_count)) return -ENOENT; if (simple_attr_open(inode, file, get, set, fmt)) { |