From 250be9d61cf8898b1eea140fa31fe7713c49e989 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 19 Feb 2018 09:38:07 -0600 Subject: KVM: arm/arm64: No need to zero CNTVOFF in kvm_timer_vcpu_put() for VHE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In AArch64/AArch32, the virtual counter uses a fixed virtual offset of zero in the following situations as per ARMv8 specifications: 1) HCR_EL2.E2H is 1, and CNTVCT_EL0/CNTVCT are read from EL2. 2) HCR_EL2.{E2H, TGE} is {1, 1}, and either: — CNTVCT_EL0 is read from Non-secure EL0 or EL2. — CNTVCT is read from Non-secure EL0. So, no need to zero CNTVOFF_EL2/CNTVOFF for VHE case. Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Shanker Donthineni Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 70268c0bec79..86eca32474cc 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -541,9 +541,11 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) * The kernel may decide to run userspace after calling vcpu_put, so * we reset cntvoff to 0 to ensure a consistent read between user * accesses to the virtual counter and kernel access to the physical - * counter. + * counter of non-VHE case. For VHE, the virtual counter uses a fixed + * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. */ - set_cntvoff(0); + if (!has_vhe()) + set_cntvoff(0); } /* -- cgit v1.2.3 From 62b06f8f429cd233e4e2e7bbd21081ad60c9018f Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 6 Mar 2018 09:21:06 +0000 Subject: KVM: arm/arm64: vgic: Add missing irq_lock to vgic_mmio_read_pending Our irq_is_pending() helper function accesses multiple members of the vgic_irq struct, so we need to hold the lock when calling it. Add that requirement as a comment to the definition and take the lock around the call in vgic_mmio_read_pending(), where we were missing it before. Fixes: 96b298000db4 ("KVM: arm/arm64: vgic-new: Add PENDING registers handlers") Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio.c | 3 +++ virt/kvm/arm/vgic/vgic.h | 1 + 2 files changed, 4 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 83d82bd7dc4e..dbe99d635c80 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -113,9 +113,12 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + unsigned long flags; + spin_lock_irqsave(&irq->irq_lock, flags); if (irq_is_pending(irq)) value |= (1U << i); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 12c37b89f7a3..5b11859a1a1e 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -96,6 +96,7 @@ /* we only support 64 kB translation table page size */ #define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16) +/* Requires the irq_lock to be held by the caller. */ static inline bool irq_is_pending(struct vgic_irq *irq) { if (irq->config == VGIC_CONFIG_EDGE) -- cgit v1.2.3 From e21a4f3a930cda6e4902cb5b3213365e5ff3ce7c Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 27 Feb 2018 12:33:50 +0100 Subject: KVM: arm/arm64: Avoid vcpu_load for other vcpu ioctls than KVM_RUN Calling vcpu_load() registers preempt notifiers for this vcpu and calls kvm_arch_vcpu_load(). The latter will soon be doing a lot of heavy lifting on arm/arm64 and will try to do things such as enabling the virtual timer and setting us up to handle interrupts from the timer hardware. Loading state onto hardware registers and enabling hardware to signal interrupts can be problematic when we're not actually about to run the VCPU, because it makes it difficult to establish the right context when handling interrupts from the timer, and it makes the register access code difficult to reason about. Luckily, now when we call vcpu_load in each ioctl implementation, we can simply remove the call from the non-KVM_RUN vcpu ioctls, and our kvm_arch_vcpu_load() is only used for loading vcpu content to the physical CPU when we're actually going to run the vcpu. Cc: stable@vger.kernel.org Fixes: 9b062471e52a ("KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl") Reviewed-by: Julien Grall Reviewed-by: Marc Zyngier Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/kvm/guest.c | 3 --- virt/kvm/arm/arm.c | 9 --------- 2 files changed, 12 deletions(-) (limited to 'virt') diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index d7e3299a7734..959e50d2588c 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -363,8 +363,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, { int ret = 0; - vcpu_load(vcpu); - trace_kvm_set_guest_debug(vcpu, dbg->control); if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) { @@ -386,7 +384,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, } out: - vcpu_put(vcpu); return ret; } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 86941f6181bb..53572304843b 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -384,14 +384,11 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - vcpu_load(vcpu); - if (vcpu->arch.power_off) mp_state->mp_state = KVM_MP_STATE_STOPPED; else mp_state->mp_state = KVM_MP_STATE_RUNNABLE; - vcpu_put(vcpu); return 0; } @@ -400,8 +397,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int ret = 0; - vcpu_load(vcpu); - switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: vcpu->arch.power_off = false; @@ -413,7 +408,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, ret = -EINVAL; } - vcpu_put(vcpu); return ret; } @@ -1036,8 +1030,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_device_attr attr; long r; - vcpu_load(vcpu); - switch (ioctl) { case KVM_ARM_VCPU_INIT: { struct kvm_vcpu_init init; @@ -1114,7 +1106,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; } - vcpu_put(vcpu); return r; } -- cgit v1.2.3 From 413aa807ae39fed7e387c175d2d0ae9fcf6c0c9d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 5 Mar 2018 11:36:38 +0100 Subject: KVM: arm/arm64: Reset mapped IRQs on VM reset We currently don't allow resetting mapped IRQs from userspace, because their state is controlled by the hardware. But we do need to reset the state when the VM is reset, so we provide a function for the 'owner' of the mapped interrupt to reset the interrupt state. Currently only the timer uses mapped interrupts, so we call this function from the timer reset logic. Cc: stable@vger.kernel.org Fixes: 4c60e360d6df ("KVM: arm/arm64: Provide a get_input_level for the arch timer") Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 1 + virt/kvm/arm/arch_timer.c | 4 ++++ virt/kvm/arm/vgic/vgic.c | 26 ++++++++++++++++++++++++++ 3 files changed, 31 insertions(+) (limited to 'virt') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index cdbd142ca7f2..02924ae2527e 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -360,6 +360,7 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu); bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); +void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid); void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 70f4c30918eb..3945021510a9 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -581,6 +581,7 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) { + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); @@ -594,6 +595,9 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) ptimer->cnt_ctl = 0; kvm_timer_update_state(vcpu); + if (timer->enabled && irqchip_in_kernel(vcpu->kvm)) + kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq); + return 0; } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index c7c5ef190afa..0001858a2c23 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -495,6 +495,32 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, return ret; } +/** + * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ + * @vcpu: The VCPU pointer + * @vintid: The INTID of the interrupt + * + * Reset the active and pending states of a mapped interrupt. Kernel + * subsystems injecting mapped interrupts should reset their interrupt lines + * when we are doing a reset of the VM. + */ +void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + + if (!irq->hw) + goto out; + + spin_lock_irqsave(&irq->irq_lock, flags); + irq->active = false; + irq->pending_latch = false; + irq->line_level = false; + spin_unlock_irqrestore(&irq->irq_lock, flags); +out: + vgic_put_irq(vcpu->kvm, irq); +} + int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) { struct vgic_irq *irq; -- cgit v1.2.3 From 76600428c3677659e3c3633bb4f2ea302220a275 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 2 Mar 2018 08:16:30 +0000 Subject: KVM: arm/arm64: Reduce verbosity of KVM init log On my GICv3 system, the following is printed to the kernel log at boot: kvm [1]: 8-bit VMID kvm [1]: IDMAP page: d20e35000 kvm [1]: HYP VA range: 800000000000:ffffffffffff kvm [1]: vgic-v2@2c020000 kvm [1]: GIC system register CPU interface enabled kvm [1]: vgic interrupt IRQ1 kvm [1]: virtual timer IRQ4 kvm [1]: Hyp mode initialized successfully The KVM IDMAP is a mapping of a statically allocated kernel structure, and so printing its physical address leaks the physical placement of the kernel when physical KASLR in effect. So change the kvm_info() to kvm_debug() to remove it from the log output. While at it, trim the output a bit more: IRQ numbers can be found in /proc/interrupts, and the HYP VA and vgic-v2 lines are not highly informational either. Cc: Acked-by: Will Deacon Acked-by: Christoffer Dall Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 2 +- virt/kvm/arm/mmu.c | 6 +++--- virt/kvm/arm/vgic/vgic-v2.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 3945021510a9..282389eb204f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -771,7 +771,7 @@ int kvm_timer_hyp_init(bool has_gic) static_branch_enable(&has_gic_active_state); } - kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); + kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, "kvm/arm/timer:starting", kvm_timer_starting_cpu, diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ec62d1cccab7..b960acdd0c05 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1810,9 +1810,9 @@ int kvm_mmu_init(void) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - kvm_info("IDMAP page: %lx\n", hyp_idmap_start); - kvm_info("HYP VA range: %lx:%lx\n", - kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); + kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); + kvm_debug("HYP VA range: %lx:%lx\n", + kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && hyp_idmap_start < kern_hyp_va(~0UL) && diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index c32d7b93ffd1..e9d840a75e7b 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -410,7 +410,7 @@ int vgic_v2_probe(const struct gic_kvm_info *info) kvm_vgic_global_state.type = VGIC_V2; kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; - kvm_info("vgic-v2@%llx\n", info->vctrl.start); + kvm_debug("vgic-v2@%llx\n", info->vctrl.start); return 0; out: -- cgit v1.2.3 From 16ca6a607d84bef0129698d8d808f501afd08d43 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Mar 2018 21:48:01 +0000 Subject: KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid The vgic code is trying to be clever when injecting GICv2 SGIs, and will happily populate LRs with the same interrupt number if they come from multiple vcpus (after all, they are distinct interrupt sources). Unfortunately, this is against the letter of the architecture, and the GICv2 architecture spec says "Each valid interrupt stored in the List registers must have a unique VirtualID for that virtual CPU interface.". GICv3 has similar (although slightly ambiguous) restrictions. This results in guests locking up when using GICv2-on-GICv3, for example. The obvious fix is to stop trying so hard, and inject a single vcpu per SGI per guest entry. After all, pending SGIs with multiple source vcpus are pretty rare, and are mostly seen in scenario where the physical CPUs are severely overcomitted. But as we now only inject a single instance of a multi-source SGI per vcpu entry, we may delay those interrupts for longer than strictly necessary, and run the risk of injecting lower priority interrupts in the meantime. In order to address this, we adopt a three stage strategy: - If we encounter a multi-source SGI in the AP list while computing its depth, we force the list to be sorted - When populating the LRs, we prevent the injection of any interrupt of lower priority than that of the first multi-source SGI we've injected. - Finally, the injection of a multi-source SGI triggers the request of a maintenance interrupt when there will be no pending interrupt in the LRs (HCR_NPIE). At the point where the last pending interrupt in the LRs switches from Pending to Active, the maintenance interrupt will be delivered, allowing us to add the remaining SGIs using the same process. Cc: stable@vger.kernel.org Fixes: 0919e84c0fc1 ("KVM: arm/arm64: vgic-new: Add IRQ sync/flush framework") Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 1 + include/linux/irqchip/arm-gic.h | 1 + virt/kvm/arm/vgic/vgic-v2.c | 9 +++++- virt/kvm/arm/vgic/vgic-v3.c | 9 +++++- virt/kvm/arm/vgic/vgic.c | 61 +++++++++++++++++++++++++++++--------- virt/kvm/arm/vgic/vgic.h | 2 ++ 6 files changed, 67 insertions(+), 16 deletions(-) (limited to 'virt') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index c00c4c33e432..b26eccc78fb1 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -503,6 +503,7 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_NPIE (1 << 3) #define ICH_HCR_TC (1 << 10) #define ICH_HCR_TALL0 (1 << 11) #define ICH_HCR_TALL1 (1 << 12) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index d3453ee072fc..68d8b1f73682 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -84,6 +84,7 @@ #define GICH_HCR_EN (1 << 0) #define GICH_HCR_UIE (1 << 1) +#define GICH_HCR_NPIE (1 << 3) #define GICH_LR_VIRTUALID (0x3ff << 0) #define GICH_LR_PHYSID_CPUID_SHIFT (10) diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index e9d840a75e7b..29556f71b691 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -37,6 +37,13 @@ void vgic_v2_init_lrs(void) vgic_v2_write_lr(i, 0); } +void vgic_v2_set_npie(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; + + cpuif->vgic_hcr |= GICH_HCR_NPIE; +} + void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; @@ -64,7 +71,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) int lr; unsigned long flags; - cpuif->vgic_hcr &= ~GICH_HCR_UIE; + cpuif->vgic_hcr &= ~(GICH_HCR_UIE | GICH_HCR_NPIE); for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { u32 val = cpuif->vgic_lr[lr]; diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 6b329414e57a..0ff2006f3781 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -26,6 +26,13 @@ static bool group1_trap; static bool common_trap; static bool gicv4_enable; +void vgic_v3_set_npie(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; + + cpuif->vgic_hcr |= ICH_HCR_NPIE; +} + void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; @@ -47,7 +54,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) int lr; unsigned long flags; - cpuif->vgic_hcr &= ~ICH_HCR_UIE; + cpuif->vgic_hcr &= ~(ICH_HCR_UIE | ICH_HCR_NPIE); for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { u64 val = cpuif->vgic_lr[lr]; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 0001858a2c23..8201899126f6 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -710,22 +710,37 @@ static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) vgic_v3_set_underflow(vcpu); } +static inline void vgic_set_npie(struct kvm_vcpu *vcpu) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_set_npie(vcpu); + else + vgic_v3_set_npie(vcpu); +} + /* Requires the ap_list_lock to be held. */ -static int compute_ap_list_depth(struct kvm_vcpu *vcpu) +static int compute_ap_list_depth(struct kvm_vcpu *vcpu, + bool *multi_sgi) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq; int count = 0; + *multi_sgi = false; + DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { spin_lock(&irq->irq_lock); /* GICv2 SGIs can count for more than one... */ - if (vgic_irq_is_sgi(irq->intid) && irq->source) - count += hweight8(irq->source); - else + if (vgic_irq_is_sgi(irq->intid) && irq->source) { + int w = hweight8(irq->source); + + count += w; + *multi_sgi |= (w > 1); + } else { count++; + } spin_unlock(&irq->irq_lock); } return count; @@ -736,28 +751,43 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq; - int count = 0; + int count; + bool npie = false; + bool multi_sgi; + u8 prio = 0xff; DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); - if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) + count = compute_ap_list_depth(vcpu, &multi_sgi); + if (count > kvm_vgic_global_state.nr_lr || multi_sgi) vgic_sort_ap_list(vcpu); + count = 0; + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { spin_lock(&irq->irq_lock); - if (unlikely(vgic_target_oracle(irq) != vcpu)) - goto next; - /* - * If we get an SGI with multiple sources, try to get - * them in all at once. + * If we have multi-SGIs in the pipeline, we need to + * guarantee that they are all seen before any IRQ of + * lower priority. In that case, we need to filter out + * these interrupts by exiting early. This is easy as + * the AP list has been sorted already. */ - do { + if (multi_sgi && irq->priority > prio) { + spin_unlock(&irq->irq_lock); + break; + } + + if (likely(vgic_target_oracle(irq) == vcpu)) { vgic_populate_lr(vcpu, irq, count++); - } while (irq->source && count < kvm_vgic_global_state.nr_lr); -next: + if (irq->source) { + npie = true; + prio = irq->priority; + } + } + spin_unlock(&irq->irq_lock); if (count == kvm_vgic_global_state.nr_lr) { @@ -768,6 +798,9 @@ next: } } + if (npie) + vgic_set_npie(vcpu); + vcpu->arch.vgic_cpu.used_lrs = count; /* Nuke remaining LRs */ diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 5b11859a1a1e..f5b8519e5546 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -160,6 +160,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); +void vgic_v2_set_npie(struct kvm_vcpu *vcpu); int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); @@ -189,6 +190,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); +void vgic_v3_set_npie(struct kvm_vcpu *vcpu); void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_enable(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 27e91ad1e746e341ca2312f29bccb9736be7b476 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Mar 2018 21:44:37 +0000 Subject: kvm: arm/arm64: vgic-v3: Tighten synchronization for guests using v2 on v3 On guest exit, and when using GICv2 on GICv3, we use a dsb(st) to force synchronization between the memory-mapped guest view and the system-register view that the hypervisor uses. This is incorrect, as the spec calls out the need for "a DSB whose required access type is both loads and stores with any Shareability attribute", while we're only synchronizing stores. We also lack an isb after the dsb to ensure that the latter has actually been executed before we start reading stuff from the sysregs. The fix is pretty easy: turn dsb(st) into dsb(sy), and slap an isb() just after. Cc: stable@vger.kernel.org Fixes: f68d2b1b73cc ("arm64: KVM: Implement vgic-v3 save/restore") Acked-by: Christoffer Dall Reviewed-by: Andre Przywara Signed-off-by: Marc Zyngier --- virt/kvm/arm/hyp/vgic-v3-sr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index f5c3d6d7019e..b89ce5432214 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -215,7 +215,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) * are now visible to the system register interface. */ if (!cpu_if->vgic_sre) { - dsb(st); + dsb(sy); + isb(); cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); } -- cgit v1.2.3 From 7a364bd5db1cfba50f3bde115db0dadc31b0169b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 25 Nov 2017 20:25:00 +0100 Subject: KVM: arm/arm64: Avoid vcpu_load for other vcpu ioctls than KVM_RUN Calling vcpu_load() registers preempt notifiers for this vcpu and calls kvm_arch_vcpu_load(). The latter will soon be doing a lot of heavy lifting on arm/arm64 and will try to do things such as enabling the virtual timer and setting us up to handle interrupts from the timer hardware. Loading state onto hardware registers and enabling hardware to signal interrupts can be problematic when we're not actually about to run the VCPU, because it makes it difficult to establish the right context when handling interrupts from the timer, and it makes the register access code difficult to reason about. Luckily, now when we call vcpu_load in each ioctl implementation, we can simply remove the call from the non-KVM_RUN vcpu ioctls, and our kvm_arch_vcpu_load() is only used for loading vcpu content to the physical CPU when we're actually going to run the vcpu. Reviewed-by: Julien Grall Reviewed-by: Marc Zyngier Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/kvm/guest.c | 3 --- virt/kvm/arm/arm.c | 9 --------- 2 files changed, 12 deletions(-) (limited to 'virt') diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index d7e3299a7734..959e50d2588c 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -363,8 +363,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, { int ret = 0; - vcpu_load(vcpu); - trace_kvm_set_guest_debug(vcpu, dbg->control); if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) { @@ -386,7 +384,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, } out: - vcpu_put(vcpu); return ret; } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 86941f6181bb..53572304843b 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -384,14 +384,11 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - vcpu_load(vcpu); - if (vcpu->arch.power_off) mp_state->mp_state = KVM_MP_STATE_STOPPED; else mp_state->mp_state = KVM_MP_STATE_RUNNABLE; - vcpu_put(vcpu); return 0; } @@ -400,8 +397,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int ret = 0; - vcpu_load(vcpu); - switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: vcpu->arch.power_off = false; @@ -413,7 +408,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, ret = -EINVAL; } - vcpu_put(vcpu); return ret; } @@ -1036,8 +1030,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_device_attr attr; long r; - vcpu_load(vcpu); - switch (ioctl) { case KVM_ARM_VCPU_INIT: { struct kvm_vcpu_init init; @@ -1114,7 +1106,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; } - vcpu_put(vcpu); return r; } -- cgit v1.2.3 From 829a58635497d7cc668133ac17daca9b78652661 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 29 Nov 2017 16:37:53 +0100 Subject: KVM: arm/arm64: Move vcpu_load call after kvm_vcpu_first_run_init Moving the call to vcpu_load() in kvm_arch_vcpu_ioctl_run() to after we've called kvm_vcpu_first_run_init() simplifies some of the vgic and there is also no need to do vcpu_load() for things such as handling the immediate_exit flag. Reviewed-by: Julien Grall Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 4 ---- virt/kvm/arm/arm.c | 22 ++++++++-------------- virt/kvm/arm/vgic/vgic-init.c | 11 ----------- 3 files changed, 8 insertions(+), 29 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 86eca32474cc..7dd8d9677dc5 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -842,11 +842,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return ret; no_vgic: - preempt_disable(); timer->enabled = 1; - kvm_timer_vcpu_load(vcpu); - preempt_enable(); - return 0; } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 53572304843b..932e61858c55 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -632,27 +632,22 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (unlikely(!kvm_vcpu_initialized(vcpu))) return -ENOEXEC; - vcpu_load(vcpu); - ret = kvm_vcpu_first_run_init(vcpu); if (ret) - goto out; + return ret; if (run->exit_reason == KVM_EXIT_MMIO) { ret = kvm_handle_mmio_return(vcpu, vcpu->run); if (ret) - goto out; - if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) { - ret = 0; - goto out; - } - + return ret; + if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) + return 0; } - if (run->immediate_exit) { - ret = -EINTR; - goto out; - } + if (run->immediate_exit) + return -EINTR; + + vcpu_load(vcpu); kvm_sigset_activate(vcpu); @@ -811,7 +806,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_sigset_deactivate(vcpu); -out: vcpu_put(vcpu); return ret; } diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 743ca5cb05ef..3e8209a07585 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -302,17 +302,6 @@ int vgic_init(struct kvm *kvm) dist->initialized = true; - /* - * If we're initializing GICv2 on-demand when first running the VCPU - * then we need to load the VGIC state onto the CPU. We can detect - * this easily by checking if we are in between vcpu_load and vcpu_put - * when we just initialized the VGIC. - */ - preempt_disable(); - vcpu = kvm_arm_get_running_vcpu(); - if (vcpu) - kvm_vgic_load(vcpu); - preempt_enable(); out: return ret; } -- cgit v1.2.3 From 3df59d8dd3c2526b33d51af9e6f66e61262de71b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 3 Aug 2017 12:09:05 +0200 Subject: KVM: arm/arm64: Get rid of vcpu->arch.irq_lines We currently have a separate read-modify-write of the HCR_EL2 on entry to the guest for the sole purpose of setting the VF and VI bits, if set. Since this is most rarely the case (only when using userspace IRQ chip and interrupts are in flight), let's get rid of this operation and instead modify the bits in the vcpu->arch.hcr[_el2] directly when needed. Acked-by: Marc Zyngier Reviewed-by: Andrew Jones Reviewed-by: Julien Thierry Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_emulate.h | 9 ++------- arch/arm/include/asm/kvm_host.h | 3 --- arch/arm/kvm/emulate.c | 2 +- arch/arm/kvm/hyp/switch.c | 2 +- arch/arm64/include/asm/kvm_emulate.h | 9 ++------- arch/arm64/include/asm/kvm_host.h | 3 --- arch/arm64/kvm/hyp/switch.c | 6 ------ arch/arm64/kvm/inject_fault.c | 2 +- virt/kvm/arm/arm.c | 11 ++++++----- virt/kvm/arm/mmu.c | 6 +++--- 10 files changed, 16 insertions(+), 37 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 9003bd19cb70..e27caa4b47a1 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -92,14 +92,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) vcpu->arch.hcr = HCR_GUEST_MASK; } -static inline unsigned long vcpu_get_hcr(const struct kvm_vcpu *vcpu) +static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu) { - return vcpu->arch.hcr; -} - -static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) -{ - vcpu->arch.hcr = hcr; + return (unsigned long *)&vcpu->arch.hcr; } static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 248b930563e5..6137195ab815 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -155,9 +155,6 @@ struct kvm_vcpu_arch { /* HYP trapping configuration */ u32 hcr; - /* Interrupt related fields */ - u32 irq_lines; /* IRQ and FIQ levels */ - /* Exception Information */ struct kvm_vcpu_fault_info fault; diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index cdff963f133a..fa501bf437f3 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -174,5 +174,5 @@ unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu) */ void kvm_inject_vabt(struct kvm_vcpu *vcpu) { - vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VA); + *vcpu_hcr(vcpu) |= HCR_VA; } diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index ae45ae96aac2..e86679daddff 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c @@ -44,7 +44,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host) isb(); } - write_sysreg(vcpu->arch.hcr | vcpu->arch.irq_lines, HCR); + write_sysreg(vcpu->arch.hcr, HCR); /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ write_sysreg(HSTR_T(15), HSTR); write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 4610bc818097..9ee316b962c8 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -69,14 +69,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TID3; } -static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) +static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) { - return vcpu->arch.hcr_el2; -} - -static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) -{ - vcpu->arch.hcr_el2 = hcr; + return (unsigned long *)&vcpu->arch.hcr_el2; } static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 618cfee7206a..b027a7f025d4 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -272,9 +272,6 @@ struct kvm_vcpu_arch { /* IO related fields */ struct kvm_decode mmio_decode; - /* Interrupt related fields */ - u64 irq_lines; /* IRQ and FIQ levels */ - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 4117717548b0..80bf38ccc8a4 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -178,12 +178,6 @@ static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) { - u64 val; - - val = read_sysreg(hcr_el2); - val |= vcpu->arch.irq_lines; - write_sysreg(val, hcr_el2); - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) __vgic_v3_restore_state(vcpu); else diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 60666a056944..c1e179d34e6a 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -167,7 +167,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr) { vcpu_set_vsesr(vcpu, esr); - vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE); + *vcpu_hcr(vcpu) |= HCR_VSE; } /** diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 932e61858c55..49d13510e9c2 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -420,7 +420,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, */ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v)) + bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) && !v->arch.power_off && !v->arch.pause); } @@ -814,18 +815,18 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) { int bit_index; bool set; - unsigned long *ptr; + unsigned long *hcr; if (number == KVM_ARM_IRQ_CPU_IRQ) bit_index = __ffs(HCR_VI); else /* KVM_ARM_IRQ_CPU_FIQ */ bit_index = __ffs(HCR_VF); - ptr = (unsigned long *)&vcpu->arch.irq_lines; + hcr = vcpu_hcr(vcpu); if (level) - set = test_and_set_bit(bit_index, ptr); + set = test_and_set_bit(bit_index, hcr); else - set = test_and_clear_bit(bit_index, ptr); + set = test_and_clear_bit(bit_index, hcr); /* * If we didn't change anything, no need to wake up or kick other CPUs diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ec62d1cccab7..9ebff8e530f9 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -2035,7 +2035,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, */ void kvm_set_way_flush(struct kvm_vcpu *vcpu) { - unsigned long hcr = vcpu_get_hcr(vcpu); + unsigned long hcr = *vcpu_hcr(vcpu); /* * If this is the first time we do a S/W operation @@ -2050,7 +2050,7 @@ void kvm_set_way_flush(struct kvm_vcpu *vcpu) trace_kvm_set_way_flush(*vcpu_pc(vcpu), vcpu_has_cache_enabled(vcpu)); stage2_flush_vm(vcpu->kvm); - vcpu_set_hcr(vcpu, hcr | HCR_TVM); + *vcpu_hcr(vcpu) = hcr | HCR_TVM; } } @@ -2068,7 +2068,7 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) /* Caches are now on, stop trapping VM ops (until a S/W op) */ if (now_enabled) - vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM); + *vcpu_hcr(vcpu) &= ~HCR_TVM; trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); } -- cgit v1.2.3 From bc192ceec37108bf6c04a5c5795fcea5f940b0de Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 10 Oct 2017 10:21:18 +0200 Subject: KVM: arm/arm64: Add kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs As we are about to move a bunch of save/restore logic for VHE kernels to the load and put functions, we need some infrastructure to do this. Reviewed-by: Andrew Jones Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 3 +++ arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/hyp/sysreg-sr.c | 30 ++++++++++++++++++++++++++++++ virt/kvm/arm/arm.c | 2 ++ 4 files changed, 38 insertions(+) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 6137195ab815..c6a749568dd6 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -312,4 +312,7 @@ static inline bool kvm_arm_harden_branch_predictor(void) return false; } +static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {} +static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b027a7f025d4..c30fc96992df 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -435,4 +435,7 @@ static inline bool kvm_arm_harden_branch_predictor(void) return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR); } +void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); +void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 43b7dd65e3e6..434f0fc9cfb3 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -190,6 +190,36 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); } +/** + * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Load system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_load() function + * and loading system register state early avoids having to load them on + * every entry to the VM. + */ +void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) +{ +} + +/** + * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Save guest system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_put() function + * and deferring saving system register state until we're no longer running the + * VCPU avoids having to save them on every exit from the VM. + */ +void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) +{ +} + void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2) { asm("msr tpidr_el2, %0": : "r" (tpidr_el2)); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 49d13510e9c2..2062d9357971 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -362,10 +362,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_arm_set_running_vcpu(vcpu); kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); + kvm_vcpu_load_sysregs(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + kvm_vcpu_put_sysregs(vcpu); kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); -- cgit v1.2.3 From 3f5c90b890acfa7ad0b817a67cfc5eaaf0e21f7d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 3 Oct 2017 14:02:12 +0200 Subject: KVM: arm64: Introduce VHE-specific kvm_vcpu_run So far this is mostly (see below) a copy of the legacy non-VHE switch function, but we will start reworking these functions in separate directions to work on VHE and non-VHE in the most optimal way in later patches. The only difference after this patch between the VHE and non-VHE run functions is that we omit the branch-predictor variant-2 hardening for QC Falkor CPUs, because this workaround is specific to a series of non-VHE ARMv8.0 CPUs. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_asm.h | 5 ++- arch/arm/kvm/hyp/switch.c | 2 +- arch/arm64/include/asm/kvm_asm.h | 4 ++- arch/arm64/include/asm/kvm_host.h | 7 +++++ arch/arm64/kvm/hyp/switch.c | 66 ++++++++++++++++++++++++++++++++++++++- virt/kvm/arm/arm.c | 12 ++++--- 6 files changed, 87 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 36dd2962a42d..5a953ecb0d78 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -70,7 +70,10 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); -extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); +/* no VHE on 32-bit :( */ +static inline int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { BUG(); return 0; } + +extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); extern void __init_stage2_translation(void); diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index e86679daddff..aac025783ee8 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c @@ -154,7 +154,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) return true; } -int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) +int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 7149f1520382..d53d40704416 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -58,7 +58,9 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); -extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); +extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); + +extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_ich_vtr_el2(void); extern u64 __vgic_v3_read_vmcr(void); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c30fc96992df..87abc94fb591 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -428,6 +428,13 @@ static inline void kvm_arm_vhe_guest_enter(void) static inline void kvm_arm_vhe_guest_exit(void) { local_daif_restore(DAIF_PROCCTX_NOIRQ); + + /* + * When we exit from the guest we change a number of CPU configuration + * parameters, such as traps. Make sure these changes take effect + * before running the host or additional guests. + */ + isb(); } static inline bool kvm_arm_harden_branch_predictor(void) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index b055111df1a1..1b94ac6a85e6 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -362,7 +362,71 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } -int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) +/* Switch to the guest for VHE systems running in EL2 */ +int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + bool fp_enabled; + u64 exit_code; + + vcpu = kern_hyp_va(vcpu); + + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + host_ctxt->__hyp_running_vcpu = vcpu; + guest_ctxt = &vcpu->arch.ctxt; + + __sysreg_save_host_state(host_ctxt); + + __activate_traps(vcpu); + __activate_vm(vcpu); + + __vgic_restore_state(vcpu); + __timer_enable_traps(vcpu); + + /* + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_guest_state(guest_ctxt); + __debug_switch_to_guest(vcpu); + + do { + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + + /* And we're baaack! */ + } while (fixup_guest_exit(vcpu, &exit_code)); + + fp_enabled = __fpsimd_enabled(); + + __sysreg_save_guest_state(guest_ctxt); + __sysreg32_save_state(vcpu); + __timer_disable_traps(vcpu); + __vgic_save_state(vcpu); + + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + + __sysreg_restore_host_state(host_ctxt); + + if (fp_enabled) { + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); + } + + /* + * This must come after restoring the host sysregs, since a non-VHE + * system may enable SPE here and make use of the TTBRs. + */ + __debug_switch_to_host(vcpu); + + return exit_code; +} + +/* Switch to the guest for legacy non-VHE systems */ +int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 2062d9357971..09dbee56ed8f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -733,13 +733,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) */ trace_kvm_entry(*vcpu_pc(vcpu)); guest_enter_irqoff(); - if (has_vhe()) - kvm_arm_vhe_guest_enter(); - - ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); - if (has_vhe()) + if (has_vhe()) { + kvm_arm_vhe_guest_enter(); + ret = kvm_vcpu_run_vhe(vcpu); kvm_arm_vhe_guest_exit(); + } else { + ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu); + } + vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; /* -- cgit v1.2.3 From 04fef057003c207ff4d9f22d2127aee2f9abecd0 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 5 Aug 2017 22:51:35 +0200 Subject: KVM: arm64: Remove noop calls to timer save/restore from VHE switch The VHE switch function calls __timer_enable_traps and __timer_disable_traps which don't do anything on VHE systems. Therefore, simply remove these calls from the VHE switch function and make the functions non-conditional as they are now only called from the non-VHE switch path. Acked-by: Marc Zyngier Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/switch.c | 2 -- virt/kvm/arm/hyp/timer-sr.c | 44 ++++++++++++++++++++++---------------------- 2 files changed, 22 insertions(+), 24 deletions(-) (limited to 'virt') diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 9ffd802e775d..307f8c1fcc2f 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -379,7 +379,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __activate_vm(vcpu->kvm); __vgic_restore_state(vcpu); - __timer_enable_traps(vcpu); /* * We must restore the 32-bit state before the sysregs, thanks @@ -400,7 +399,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __sysreg_save_guest_state(guest_ctxt); __sysreg32_save_state(vcpu); - __timer_disable_traps(vcpu); __vgic_save_state(vcpu); __deactivate_traps(vcpu); diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index f24404b3c8df..77754a62eb0c 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c @@ -27,34 +27,34 @@ void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high) write_sysreg(cntvoff, cntvoff_el2); } +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) { - /* - * We don't need to do this for VHE since the host kernel runs in EL2 - * with HCR_EL2.TGE ==1, which makes those bits have no impact. - */ - if (!has_vhe()) { - u64 val; + u64 val; - /* Allow physical timer/counter access for the host */ - val = read_sysreg(cnthctl_el2); - val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; - write_sysreg(val, cnthctl_el2); - } + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); } +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) { - if (!has_vhe()) { - u64 val; + u64 val; - /* - * Disallow physical timer access for the guest - * Physical counter access is allowed - */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); - } + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); } -- cgit v1.2.3 From 8d404c4c246137531f94dfee352f350d59d0e5a7 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 16 Mar 2016 15:38:53 +0100 Subject: KVM: arm64: Rewrite system register accessors to read/write functions Currently we access the system registers array via the vcpu_sys_reg() macro. However, we are about to change the behavior to some times modify the register file directly, so let's change this to two primitives: * Accessor macros vcpu_write_sys_reg() and vcpu_read_sys_reg() * Direct array access macro __vcpu_sys_reg() The accessor macros should be used in places where the code needs to access the currently loaded VCPU's state as observed by the guest. For example, when trapping on cache related registers, a write to a system register should go directly to the VCPU version of the register. The direct array access macro can be used in places where the VCPU is known to never be running (for example userspace access) or for registers which are never context switched (for example all the PMU system registers). This rewrites all users of vcpu_sys_regs to one of the macros described above. No functional change. Acked-by: Marc Zyngier Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 13 ++++--- arch/arm64/include/asm/kvm_host.h | 13 ++++++- arch/arm64/include/asm/kvm_mmu.h | 2 +- arch/arm64/kvm/debug.c | 28 ++++++++++----- arch/arm64/kvm/inject_fault.c | 8 ++--- arch/arm64/kvm/sys_regs.c | 69 ++++++++++++++++++------------------ arch/arm64/kvm/sys_regs.h | 4 +-- arch/arm64/kvm/sys_regs_generic_v8.c | 4 +-- virt/kvm/arm/pmu.c | 36 +++++++++---------- 9 files changed, 101 insertions(+), 76 deletions(-) (limited to 'virt') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 3cc535591bdf..d313aaae5c38 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -290,15 +290,18 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { - return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; + return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) { - if (vcpu_mode_is_32bit(vcpu)) + if (vcpu_mode_is_32bit(vcpu)) { *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT; - else - vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25); + } else { + u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + sctlr |= (1 << 25); + vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr); + } } static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) @@ -306,7 +309,7 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) if (vcpu_mode_is_32bit(vcpu)) return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT); - return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); + return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); } static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9001fd0890c9..179bb9d5760b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -287,7 +287,18 @@ struct kvm_vcpu_arch { }; #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) -#define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) + +/* + * Only use __vcpu_sys_reg if you know you want the memory backed version of a + * register, and not the one most recently accessed by a running VCPU. For + * example, for userspace access or for system registers that are never context + * switched, but only emulated. + */ +#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) + +#define vcpu_read_sys_reg(v,r) __vcpu_sys_reg(v,r) +#define vcpu_write_sys_reg(v,n,r) do { __vcpu_sys_reg(v,r) = n; } while (0) + /* * CP14 and CP15 live in the same array, as they are backed by the * same system registers. diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7faed6e48b46..cffa34e23718 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -249,7 +249,7 @@ struct kvm; static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { - return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; + return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; } static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index feedb877cff8..a1f4ebdfe6d3 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -46,7 +46,9 @@ static DEFINE_PER_CPU(u32, mdcr_el2); */ static void save_guest_debug_regs(struct kvm_vcpu *vcpu) { - vcpu->arch.guest_debug_preserved.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1); + u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1); + + vcpu->arch.guest_debug_preserved.mdscr_el1 = val; trace_kvm_arm_set_dreg32("Saved MDSCR_EL1", vcpu->arch.guest_debug_preserved.mdscr_el1); @@ -54,10 +56,12 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu) static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) { - vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_preserved.mdscr_el1; + u64 val = vcpu->arch.guest_debug_preserved.mdscr_el1; + + vcpu_write_sys_reg(vcpu, val, MDSCR_EL1); trace_kvm_arm_set_dreg32("Restored MDSCR_EL1", - vcpu_sys_reg(vcpu, MDSCR_EL1)); + vcpu_read_sys_reg(vcpu, MDSCR_EL1)); } /** @@ -108,6 +112,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) { bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY); + unsigned long mdscr; trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); @@ -152,9 +157,13 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) */ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; - vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_SS; + mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); + mdscr |= DBG_MDSCR_SS; + vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); } else { - vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS; + mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); + mdscr &= ~DBG_MDSCR_SS; + vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); } trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu)); @@ -170,7 +179,9 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) */ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { /* Enable breakpoints/watchpoints */ - vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_MDE; + mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); + mdscr |= DBG_MDSCR_MDE; + vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state; vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; @@ -194,12 +205,11 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; /* If KDE or MDE are set, perform a full save/restore cycle. */ - if ((vcpu_sys_reg(vcpu, MDSCR_EL1) & DBG_MDSCR_KDE) || - (vcpu_sys_reg(vcpu, MDSCR_EL1) & DBG_MDSCR_MDE)) + if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); - trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1)); + trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); } void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 30a3f58cdb7b..63dba401fc7d 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -58,7 +58,7 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) exc_offset = LOWER_EL_AArch32_VECTOR; } - return vcpu_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; + return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; } static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) @@ -73,7 +73,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; *vcpu_spsr(vcpu) = cpsr; - vcpu_sys_reg(vcpu, FAR_EL1) = addr; + vcpu_write_sys_reg(vcpu, addr, FAR_EL1); /* * Build an {i,d}abort, depending on the level and the @@ -94,7 +94,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr if (!is_iabt) esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT; - vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT; + vcpu_write_sys_reg(vcpu, esr | ESR_ELx_FSC_EXTABT, ESR_EL1); } static void inject_undef64(struct kvm_vcpu *vcpu) @@ -115,7 +115,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) if (kvm_vcpu_trap_il_is32bit(vcpu)) esr |= ESR_ELx_IL; - vcpu_sys_reg(vcpu, ESR_EL1) = esr; + vcpu_write_sys_reg(vcpu, esr, ESR_EL1); } /** diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 691f81c31018..7514db002430 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -133,14 +133,14 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, if (!p->is_aarch32 || !p->is_32bit) { val = p->regval; } else { - val = vcpu_sys_reg(vcpu, reg); + val = vcpu_read_sys_reg(vcpu, reg); if (r->reg % 2) val = (p->regval << 32) | (u64)lower_32_bits(val); else val = ((u64)upper_32_bits(val) << 32) | lower_32_bits(p->regval); } - vcpu_sys_reg(vcpu, reg) = val; + vcpu_write_sys_reg(vcpu, val, reg); kvm_toggle_cache(vcpu, was_enabled); return true; @@ -249,10 +249,10 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) { - vcpu_sys_reg(vcpu, r->reg) = p->regval; + vcpu_write_sys_reg(vcpu, p->regval, r->reg); vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; } else { - p->regval = vcpu_sys_reg(vcpu, r->reg); + p->regval = vcpu_read_sys_reg(vcpu, r->reg); } trace_trap_reg(__func__, r->reg, p->is_write, p->regval); @@ -465,7 +465,8 @@ static void reset_wcr(struct kvm_vcpu *vcpu, static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - vcpu_sys_reg(vcpu, AMAIR_EL1) = read_sysreg(amair_el1); + u64 amair = read_sysreg(amair_el1); + vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1); } static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) @@ -482,7 +483,7 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0); mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1); mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2); - vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr; + vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1); } static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) @@ -496,12 +497,12 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); - vcpu_sys_reg(vcpu, PMCR_EL0) = val; + __vcpu_sys_reg(vcpu, PMCR_EL0) = val; } static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) { - u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); + u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0); bool enabled = (reg & flags) || vcpu_mode_priv(vcpu); if (!enabled) @@ -543,14 +544,14 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { /* Only update writeable bits of PMCR */ - val = vcpu_sys_reg(vcpu, PMCR_EL0); + val = __vcpu_sys_reg(vcpu, PMCR_EL0); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; - vcpu_sys_reg(vcpu, PMCR_EL0) = val; + __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); } else { /* PMCR.P & PMCR.C are RAZ */ - val = vcpu_sys_reg(vcpu, PMCR_EL0) + val = __vcpu_sys_reg(vcpu, PMCR_EL0) & ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C); p->regval = val; } @@ -568,10 +569,10 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (p->is_write) - vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval; + __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval; else /* return PMSELR.SEL field */ - p->regval = vcpu_sys_reg(vcpu, PMSELR_EL0) + p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK; return true; @@ -604,7 +605,7 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx) { u64 pmcr, val; - pmcr = vcpu_sys_reg(vcpu, PMCR_EL0); + pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0); val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) { kvm_inject_undefined(vcpu); @@ -629,7 +630,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, if (pmu_access_event_counter_el0_disabled(vcpu)) return false; - idx = vcpu_sys_reg(vcpu, PMSELR_EL0) + idx = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK; } else if (r->Op2 == 0) { /* PMCCNTR_EL0 */ @@ -684,7 +685,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) { /* PMXEVTYPER_EL0 */ - idx = vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK; + idx = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK; reg = PMEVTYPER0_EL0 + idx; } else if (r->CRn == 14 && (r->CRm & 12) == 12) { idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); @@ -702,9 +703,9 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { kvm_pmu_set_counter_event_type(vcpu, p->regval, idx); - vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK; + __vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK; } else { - p->regval = vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK; + p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK; } return true; @@ -726,15 +727,15 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val = p->regval & mask; if (r->Op2 & 0x1) { /* accessing PMCNTENSET_EL0 */ - vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; + __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; kvm_pmu_enable_counter(vcpu, val); } else { /* accessing PMCNTENCLR_EL0 */ - vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; + __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; kvm_pmu_disable_counter(vcpu, val); } } else { - p->regval = vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask; + p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask; } return true; @@ -758,12 +759,12 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (r->Op2 & 0x1) /* accessing PMINTENSET_EL1 */ - vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val; + __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val; else /* accessing PMINTENCLR_EL1 */ - vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; + __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; } else { - p->regval = vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask; + p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask; } return true; @@ -783,12 +784,12 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { if (r->CRm & 0x2) /* accessing PMOVSSET_EL0 */ - vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); else /* accessing PMOVSCLR_EL0 */ - vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); } else { - p->regval = vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask; + p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask; } return true; @@ -825,10 +826,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; } - vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval - & ARMV8_PMU_USERENR_MASK; + __vcpu_sys_reg(vcpu, PMUSERENR_EL0) = + p->regval & ARMV8_PMU_USERENR_MASK; } else { - p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0) + p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0) & ARMV8_PMU_USERENR_MASK; } @@ -2230,7 +2231,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (r->get_user) return (r->get_user)(vcpu, r, reg, uaddr); - return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id); + return reg_to_user(uaddr, &__vcpu_sys_reg(vcpu, r->reg), reg->id); } int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) @@ -2251,7 +2252,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (r->set_user) return (r->set_user)(vcpu, r, reg, uaddr); - return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); + return reg_from_user(&__vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); } static unsigned int num_demux_regs(void) @@ -2457,6 +2458,6 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) reset_sys_reg_descs(vcpu, table, num); for (num = 1; num < NR_SYS_REGS; num++) - if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242) - panic("Didn't reset vcpu_sys_reg(%zi)", num); + if (__vcpu_sys_reg(vcpu, num) == 0x4242424242424242) + panic("Didn't reset __vcpu_sys_reg(%zi)", num); } diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 060f5348ef25..cd710f8b63e0 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -89,14 +89,14 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu, { BUG_ON(!r->reg); BUG_ON(r->reg >= NR_SYS_REGS); - vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; + __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; } static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { BUG_ON(!r->reg); BUG_ON(r->reg >= NR_SYS_REGS); - vcpu_sys_reg(vcpu, r->reg) = r->val; + __vcpu_sys_reg(vcpu, r->reg) = r->val; } static inline int cmp_sys_reg(const struct sys_reg_desc *i1, diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 969ade1d333d..ddb8497d18d6 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -38,13 +38,13 @@ static bool access_actlr(struct kvm_vcpu *vcpu, if (p->is_write) return ignore_write(vcpu, p); - p->regval = vcpu_sys_reg(vcpu, ACTLR_EL1); + p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); return true; } static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1); + __vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1); } /* diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 8a9c42366db7..1c5b76c46e26 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -37,7 +37,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) reg = (select_idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; - counter = vcpu_sys_reg(vcpu, reg); + counter = __vcpu_sys_reg(vcpu, reg); /* The real counter value is equal to the value of counter register plus * the value perf event counts. @@ -61,7 +61,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) reg = (select_idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; - vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); + __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); } /** @@ -78,7 +78,7 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; - vcpu_sys_reg(vcpu, reg) = counter; + __vcpu_sys_reg(vcpu, reg) = counter; perf_event_disable(pmc->perf_event); perf_event_release_kernel(pmc->perf_event); pmc->perf_event = NULL; @@ -125,7 +125,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) { - u64 val = vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; + u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; val &= ARMV8_PMU_PMCR_N_MASK; if (val == 0) @@ -147,7 +147,7 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; - if (!(vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { @@ -193,10 +193,10 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { u64 reg = 0; - if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { - reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0); - reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0); - reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1); + if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { + reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); + reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); reg &= kvm_pmu_valid_counter_mask(vcpu); } @@ -295,7 +295,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); int idx = pmc->idx; - vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); if (kvm_pmu_overflow_status(vcpu)) { kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); @@ -316,19 +316,19 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) if (val == 0) return; - enable = vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { if (!(val & BIT(i))) continue; - type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) + type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) & ARMV8_PMU_EVTYPE_EVENT; if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) && (enable & BIT(i))) { - reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; reg = lower_32_bits(reg); - vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; if (!reg) - vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); } } } @@ -348,7 +348,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) mask = kvm_pmu_valid_counter_mask(vcpu); if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter(vcpu, - vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); + __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); } else { kvm_pmu_disable_counter(vcpu, mask); } @@ -369,8 +369,8 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) { - return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && - (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); + return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && + (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); } /** -- cgit v1.2.3 From 00536ec476601a60d976eebf6aeb9633d4fb37d9 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 27 Dec 2017 20:01:52 +0100 Subject: KVM: arm/arm64: Prepare to handle deferred save/restore of SPSR_EL1 SPSR_EL1 is not used by a VHE host kernel and can be deferred, but we need to rework the accesses to this register to access the latest value depending on whether or not guest system registers are loaded on the CPU or only reside in memory. The handling of accessing the various banked SPSRs for 32-bit VMs is a bit clunky, but this will be improved in following patches which will first prepare and subsequently implement deferred save/restore of the 32-bit registers, including the 32-bit SPSRs. Reviewed-by: Marc Zyngier Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_emulate.h | 12 ++++++++++- arch/arm/kvm/emulate.c | 2 +- arch/arm64/include/asm/kvm_emulate.h | 41 +++++++++++++++++++++++++++++++----- arch/arm64/kvm/inject_fault.c | 4 ++-- virt/kvm/arm/aarch32.c | 2 +- 5 files changed, 51 insertions(+), 10 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index e27caa4b47a1..6493bd479ddc 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -41,7 +41,17 @@ static inline unsigned long *vcpu_reg32(struct kvm_vcpu *vcpu, u8 reg_num) return vcpu_reg(vcpu, reg_num); } -unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu); +unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu); + +static inline unsigned long vpcu_read_spsr(struct kvm_vcpu *vcpu) +{ + return *__vcpu_spsr(vcpu); +} + +static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) +{ + *__vcpu_spsr(vcpu) = v; +} static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu, u8 reg_num) diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index fa501bf437f3..9046b53d87c1 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -142,7 +142,7 @@ unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) /* * Return the SPSR for the current mode of the virtual CPU. */ -unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu) +unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu) { unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; switch (mode) { diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d313aaae5c38..f32640132e26 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -143,13 +144,43 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val; } -/* Get vcpu SPSR for current mode */ -static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) +static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) { - if (vcpu_mode_is_32bit(vcpu)) - return vcpu_spsr32(vcpu); + unsigned long *p = (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; + + if (vcpu_mode_is_32bit(vcpu)) { + unsigned long *p_32bit = vcpu_spsr32(vcpu); + + /* KVM_SPSR_SVC aliases KVM_SPSR_EL1 */ + if (p_32bit != p) + return *p_32bit; + } + + if (vcpu->arch.sysregs_loaded_on_cpu) + return read_sysreg_el1(spsr); + else + return *p; +} - return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; +static inline void vcpu_write_spsr(const struct kvm_vcpu *vcpu, unsigned long v) +{ + unsigned long *p = (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; + + /* KVM_SPSR_SVC aliases KVM_SPSR_EL1 */ + if (vcpu_mode_is_32bit(vcpu)) { + unsigned long *p_32bit = vcpu_spsr32(vcpu); + + /* KVM_SPSR_SVC aliases KVM_SPSR_EL1 */ + if (p_32bit != p) { + *p_32bit = v; + return; + } + } + + if (vcpu->arch.sysregs_loaded_on_cpu) + write_sysreg_el1(v, spsr); + else + *p = v; } static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 63dba401fc7d..7f8d2a4e420f 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -71,7 +71,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; - *vcpu_spsr(vcpu) = cpsr; + vcpu_write_spsr(vcpu, cpsr); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -106,7 +106,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; - *vcpu_spsr(vcpu) = cpsr; + vcpu_write_spsr(vcpu, cpsr); /* * Build an unknown exception, depending on the instruction diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 8bc479fa37e6..efc84cbe8277 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -178,7 +178,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) *vcpu_cpsr(vcpu) = cpsr; /* Note: These now point to the banked copies */ - *vcpu_spsr(vcpu) = new_spsr_value; + vcpu_write_spsr(vcpu, new_spsr_value); *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; /* Branch to exception vector */ -- cgit v1.2.3 From bb5ed7035918d265189e2623d71c8f458713d3e9 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 5 Oct 2017 00:02:41 +0200 Subject: KVM: arm/arm64: Get rid of vgic_elrsr There is really no need to store the vgic_elrsr on the VGIC data structures as the only need we have for the elrsr is to figure out if an LR is inactive when we save the VGIC state upon returning from the guest. We can might as well store this in a temporary local variable. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 2 -- virt/kvm/arm/hyp/vgic-v2-sr.c | 28 +++++++--------------------- virt/kvm/arm/hyp/vgic-v3-sr.c | 6 +++--- virt/kvm/arm/vgic/vgic-v2.c | 1 - virt/kvm/arm/vgic/vgic-v3.c | 1 - 5 files changed, 10 insertions(+), 28 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index cdbd142ca7f2..ac98ae46bfb7 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -263,7 +263,6 @@ struct vgic_dist { struct vgic_v2_cpu_if { u32 vgic_hcr; u32 vgic_vmcr; - u64 vgic_elrsr; /* Saved only */ u32 vgic_apr; u32 vgic_lr[VGIC_V2_MAX_LRS]; }; @@ -272,7 +271,6 @@ struct vgic_v3_cpu_if { u32 vgic_hcr; u32 vgic_vmcr; u32 vgic_sre; /* Restored only, change ignored */ - u32 vgic_elrsr; /* Saved only */ u32 vgic_ap0r[4]; u32 vgic_ap1r[4]; u64 vgic_lr[VGIC_V3_MAX_LRS]; diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index 4fe6e797e8b3..a91b0d2b9249 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -23,29 +23,19 @@ #include #include -static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; - u32 elrsr0, elrsr1; - - elrsr0 = readl_relaxed(base + GICH_ELRSR0); - if (unlikely(nr_lr > 32)) - elrsr1 = readl_relaxed(base + GICH_ELRSR1); - else - elrsr1 = 0; - - cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; -} - static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int i; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + u64 elrsr; + int i; + + elrsr = readl_relaxed(base + GICH_ELRSR0); + if (unlikely(used_lrs > 32)) + elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32; for (i = 0; i < used_lrs; i++) { - if (cpu_if->vgic_elrsr & (1UL << i)) + if (elrsr & (1UL << i)) cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; else cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); @@ -68,13 +58,9 @@ void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) if (used_lrs) { cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); - - save_elrsr(vcpu, base); save_lrs(vcpu, base); - writel_relaxed(0, base + GICH_HCR); } else { - cpu_if->vgic_elrsr = ~0UL; cpu_if->vgic_apr = 0; } } diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index f5c3d6d7019e..9abf2f3c12b5 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -222,15 +222,16 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) if (used_lrs) { int i; u32 nr_pre_bits; + u32 elrsr; - cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); + elrsr = read_gicreg(ICH_ELSR_EL2); write_gicreg(0, ICH_HCR_EL2); val = read_gicreg(ICH_VTR_EL2); nr_pre_bits = vtr_to_nr_pre_bits(val); for (i = 0; i < used_lrs; i++) { - if (cpu_if->vgic_elrsr & (1 << i)) + if (elrsr & (1 << i)) cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; else cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); @@ -262,7 +263,6 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) cpu_if->its_vpe.its_vm) write_gicreg(0, ICH_HCR_EL2); - cpu_if->vgic_elrsr = 0xffff; cpu_if->vgic_ap0r[0] = 0; cpu_if->vgic_ap0r[1] = 0; cpu_if->vgic_ap0r[2] = 0; diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index c32d7b93ffd1..bb305d49cfdd 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -265,7 +265,6 @@ void vgic_v2_enable(struct kvm_vcpu *vcpu) * anyway. */ vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; - vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; /* Get the show on the road... */ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 6b329414e57a..b76e21f3e6bd 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -267,7 +267,6 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) * anyway. */ vgic_v3->vgic_vmcr = 0; - vgic_v3->vgic_elrsr = ~0; /* * If we are emulating a GICv3, we do it in an non-GICv2-compatible -- cgit v1.2.3 From 75174ba6ca9dcfddda88aa420da4d7aa2b279fdf Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 22 Dec 2016 20:39:10 +0100 Subject: KVM: arm/arm64: Handle VGICv2 save/restore from the main VGIC code We can program the GICv2 hypervisor control interface logic directly from the core vgic code and can instead do the save/restore directly from the flush/sync functions, which can lead to a number of future optimizations. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/kvm/hyp/switch.c | 4 --- arch/arm64/include/asm/kvm_hyp.h | 2 -- arch/arm64/kvm/hyp/switch.c | 4 --- virt/kvm/arm/hyp/vgic-v2-sr.c | 65 ---------------------------------------- virt/kvm/arm/vgic/vgic-v2.c | 63 ++++++++++++++++++++++++++++++++++++++ virt/kvm/arm/vgic/vgic.c | 19 +++++++++++- virt/kvm/arm/vgic/vgic.h | 3 ++ 7 files changed, 84 insertions(+), 76 deletions(-) (limited to 'virt') diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index aac025783ee8..882b9b9e0077 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c @@ -92,16 +92,12 @@ static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) __vgic_v3_save_state(vcpu); - else - __vgic_v2_save_state(vcpu); } static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) __vgic_v3_restore_state(vcpu); - else - __vgic_v2_restore_state(vcpu); } static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 949f2e77ae58..febe417b8b4e 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -120,8 +120,6 @@ typeof(orig) * __hyp_text fname(void) \ return val; \ } -void __vgic_v2_save_state(struct kvm_vcpu *vcpu); -void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 67c66b4e237e..31badf6e91e8 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -196,16 +196,12 @@ static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) __vgic_v3_save_state(vcpu); - else - __vgic_v2_save_state(vcpu); } static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) __vgic_v3_restore_state(vcpu); - else - __vgic_v2_restore_state(vcpu); } static bool __hyp_text __true_value(void) diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index a91b0d2b9249..0bbafdfd4adb 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -23,71 +23,6 @@ #include #include -static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - u64 elrsr; - int i; - - elrsr = readl_relaxed(base + GICH_ELRSR0); - if (unlikely(used_lrs > 32)) - elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32; - - for (i = 0; i < used_lrs; i++) { - if (elrsr & (1UL << i)) - cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; - else - cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); - - writel_relaxed(0, base + GICH_LR0 + (i * 4)); - } -} - -/* vcpu is already in the HYP VA space */ -void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &kvm->arch.vgic; - void __iomem *base = kern_hyp_va(vgic->vctrl_base); - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - - if (!base) - return; - - if (used_lrs) { - cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); - save_lrs(vcpu, base); - writel_relaxed(0, base + GICH_HCR); - } else { - cpu_if->vgic_apr = 0; - } -} - -/* vcpu is already in the HYP VA space */ -void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &kvm->arch.vgic; - void __iomem *base = kern_hyp_va(vgic->vctrl_base); - int i; - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - - if (!base) - return; - - if (used_lrs) { - writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); - writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); - for (i = 0; i < used_lrs; i++) { - writel_relaxed(cpu_if->vgic_lr[i], - base + GICH_LR0 + (i * 4)); - } - } -} - #ifdef CONFIG_ARM64 /* * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index bb305d49cfdd..1e5f3eb6973d 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -421,6 +421,69 @@ out: return ret; } +static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + u64 elrsr; + int i; + + elrsr = readl_relaxed(base + GICH_ELRSR0); + if (unlikely(used_lrs > 32)) + elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32; + + for (i = 0; i < used_lrs; i++) { + if (elrsr & (1UL << i)) + cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; + else + cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); + + writel_relaxed(0, base + GICH_LR0 + (i * 4)); + } +} + +void vgic_v2_save_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct vgic_dist *vgic = &kvm->arch.vgic; + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + void __iomem *base = vgic->vctrl_base; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + + if (!base) + return; + + if (used_lrs) { + cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); + save_lrs(vcpu, base); + writel_relaxed(0, base + GICH_HCR); + } else { + cpu_if->vgic_apr = 0; + } +} + +void vgic_v2_restore_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct vgic_dist *vgic = &kvm->arch.vgic; + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + void __iomem *base = vgic->vctrl_base; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + int i; + + if (!base) + return; + + if (used_lrs) { + writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); + writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); + for (i = 0; i < used_lrs; i++) { + writel_relaxed(cpu_if->vgic_lr[i], + base + GICH_LR0 + (i * 4)); + } + } +} + void vgic_v2_load(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index c7c5ef190afa..12e2a28f437e 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -749,11 +749,19 @@ next: vgic_clear_lr(vcpu, count); } +static inline void vgic_save_state(struct kvm_vcpu *vcpu) +{ + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_save_state(vcpu); +} + /* Sync back the hardware VGIC state into our emulation after a guest's run. */ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + vgic_save_state(vcpu); + WARN_ON(vgic_v4_sync_hwstate(vcpu)); /* An empty ap_list_head implies used_lrs == 0 */ @@ -765,6 +773,12 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) vgic_prune_ap_list(vcpu); } +static inline void vgic_restore_state(struct kvm_vcpu *vcpu) +{ + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_restore_state(vcpu); +} + /* Flush our emulation state into the GIC hardware before entering the guest. */ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { @@ -780,13 +794,16 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) * this. */ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) - return; + goto out; DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); vgic_flush_lr_state(vcpu); spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + +out: + vgic_restore_state(vcpu); } void kvm_vgic_load(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 12c37b89f7a3..89b9547fba27 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -176,6 +176,9 @@ void vgic_v2_init_lrs(void); void vgic_v2_load(struct kvm_vcpu *vcpu); void vgic_v2_put(struct kvm_vcpu *vcpu); +void vgic_v2_save_state(struct kvm_vcpu *vcpu); +void vgic_v2_restore_state(struct kvm_vcpu *vcpu); + static inline void vgic_get_irq_kref(struct vgic_irq *irq) { if (irq->intid < VGIC_MIN_LPI) -- cgit v1.2.3 From 8a43a2b34b7dc59dc6df5fa0a3b8540918bc4c58 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 4 Oct 2017 23:25:24 +0200 Subject: KVM: arm/arm64: Move arm64-only vgic-v2-sr.c file to arm64 The vgic-v2-sr.c file now only contains the logic to replay unaligned accesses to the virtual CPU interface on 16K and 64K page systems, which is only relevant on 64-bit platforms. Therefore move this file to the arm64 KVM tree, remove the compile directive from the 32-bit side makefile, and remove the ifdef in the C file. Since this file also no longer saves/restores anything, rename the file to vgic-v2-cpuif-proxy.c to more accurately describe the logic in this file. Reviewed-by: Andre Przywara Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/kvm/hyp/Makefile | 1 - arch/arm64/kvm/hyp/Makefile | 2 +- arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 78 +++++++++++++++++++++++++++++++ virt/kvm/arm/hyp/vgic-v2-sr.c | 80 -------------------------------- 4 files changed, 79 insertions(+), 82 deletions(-) create mode 100644 arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c delete mode 100644 virt/kvm/arm/hyp/vgic-v2-sr.c (limited to 'virt') diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index 5638ce0c9524..1964111c984a 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile @@ -7,7 +7,6 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING KVM=../../../../virt/kvm -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index f04400d494b7..4313f7475333 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -7,10 +7,10 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING KVM=../../../../virt/kvm -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o obj-$(CONFIG_KVM_ARM_HOST) += entry.o diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c new file mode 100644 index 000000000000..97f357ea9c72 --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include +#include +#include + +/* + * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the + * guest. + * + * @vcpu: the offending vcpu + * + * Returns: + * 1: GICV access successfully performed + * 0: Not a GICV access + * -1: Illegal GICV access + */ +int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct vgic_dist *vgic = &kvm->arch.vgic; + phys_addr_t fault_ipa; + void __iomem *addr; + int rd; + + /* Build the full address */ + fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + + /* If not for GICV, move on */ + if (fault_ipa < vgic->vgic_cpu_base || + fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE)) + return 0; + + /* Reject anything but a 32bit access */ + if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32)) + return -1; + + /* Not aligned? Don't bother */ + if (fault_ipa & 3) + return -1; + + rd = kvm_vcpu_dabt_get_rd(vcpu); + addr = kern_hyp_va((kern_hyp_va(&kvm_vgic_global_state))->vcpu_base_va); + addr += fault_ipa - vgic->vgic_cpu_base; + + if (kvm_vcpu_dabt_iswrite(vcpu)) { + u32 data = vcpu_data_guest_to_host(vcpu, + vcpu_get_reg(vcpu, rd), + sizeof(u32)); + writel_relaxed(data, addr); + } else { + u32 data = readl_relaxed(addr); + vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data, + sizeof(u32))); + } + + return 1; +} diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c deleted file mode 100644 index 0bbafdfd4adb..000000000000 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_ARM64 -/* - * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the - * guest. - * - * @vcpu: the offending vcpu - * - * Returns: - * 1: GICV access successfully performed - * 0: Not a GICV access - * -1: Illegal GICV access - */ -int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_dist *vgic = &kvm->arch.vgic; - phys_addr_t fault_ipa; - void __iomem *addr; - int rd; - - /* Build the full address */ - fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); - fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); - - /* If not for GICV, move on */ - if (fault_ipa < vgic->vgic_cpu_base || - fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE)) - return 0; - - /* Reject anything but a 32bit access */ - if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32)) - return -1; - - /* Not aligned? Don't bother */ - if (fault_ipa & 3) - return -1; - - rd = kvm_vcpu_dabt_get_rd(vcpu); - addr = kern_hyp_va((kern_hyp_va(&kvm_vgic_global_state))->vcpu_base_va); - addr += fault_ipa - vgic->vgic_cpu_base; - - if (kvm_vcpu_dabt_iswrite(vcpu)) { - u32 data = vcpu_data_guest_to_host(vcpu, - vcpu_get_reg(vcpu, rd), - sizeof(u32)); - writel_relaxed(data, addr); - } else { - u32 data = readl_relaxed(addr); - vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data, - sizeof(u32))); - } - - return 1; -} -#endif -- cgit v1.2.3 From 771621b0e2f80629948e0dc627d18b6730778c52 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 4 Oct 2017 23:42:32 +0200 Subject: KVM: arm/arm64: Handle VGICv3 save/restore from the main VGIC code on VHE Just like we can program the GICv2 hypervisor control interface directly from the core vgic code, we can do the same for the GICv3 hypervisor control interface on VHE systems. We do this by simply calling the save/restore functions when we have VHE and we can then get rid of the save/restore function calls from the VHE world switch function. One caveat is that we now write GICv3 system register state before the potential early exit path in the run loop, and because we sync back state in the early exit path, we have to ensure that we read a consistent GIC state from the sync path, even though we have never actually run the guest with the newly written GIC state. We solve this by inserting an ISB in the early exit path. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/switch.c | 13 ++++++------- virt/kvm/arm/arm.c | 1 + virt/kvm/arm/vgic/vgic.c | 21 +++++++++++++++++++-- 3 files changed, 26 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 31badf6e91e8..86abbee40d3f 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -192,13 +192,15 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) write_sysreg(0, vttbr_el2); } -static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) +/* Save VGICv3 state on non-VHE systems */ +static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) __vgic_v3_save_state(vcpu); } -static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) +/* Restore VGICv3 state on non_VEH systems */ +static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) __vgic_v3_restore_state(vcpu); @@ -400,8 +402,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __activate_traps(vcpu); __activate_vm(vcpu->kvm); - __vgic_restore_state(vcpu); - sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); @@ -415,7 +415,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) fp_enabled = fpsimd_enabled_vhe(); sysreg_save_guest_state_vhe(guest_ctxt); - __vgic_save_state(vcpu); __deactivate_traps(vcpu); @@ -451,7 +450,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) __activate_traps(vcpu); __activate_vm(kern_hyp_va(vcpu->kvm)); - __vgic_restore_state(vcpu); + __hyp_vgic_restore_state(vcpu); __timer_enable_traps(vcpu); /* @@ -484,7 +483,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) __sysreg_save_state_nvhe(guest_ctxt); __sysreg32_save_state(vcpu); __timer_disable_traps(vcpu); - __vgic_save_state(vcpu); + __hyp_vgic_save_state(vcpu); __deactivate_traps(vcpu); __deactivate_vm(vcpu); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 09dbee56ed8f..dba629c5f8ac 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -717,6 +717,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; + isb(); /* Ensure work in x_flush_hwstate is committed */ kvm_pmu_sync_hwstate(vcpu); if (static_branch_unlikely(&userspace_irqchip_in_use)) kvm_timer_sync_hwstate(vcpu); diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 12e2a28f437e..eaab4a616ecf 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "vgic.h" @@ -749,10 +750,22 @@ next: vgic_clear_lr(vcpu, count); } +static inline bool can_access_vgic_from_kernel(void) +{ + /* + * GICv2 can always be accessed from the kernel because it is + * memory-mapped, and VHE systems can access GICv3 EL2 system + * registers. + */ + return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); +} + static inline void vgic_save_state(struct kvm_vcpu *vcpu) { if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_save_state(vcpu); + else + __vgic_v3_save_state(vcpu); } /* Sync back the hardware VGIC state into our emulation after a guest's run. */ @@ -760,7 +773,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - vgic_save_state(vcpu); + if (can_access_vgic_from_kernel()) + vgic_save_state(vcpu); WARN_ON(vgic_v4_sync_hwstate(vcpu)); @@ -777,6 +791,8 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu) { if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_restore_state(vcpu); + else + __vgic_v3_restore_state(vcpu); } /* Flush our emulation state into the GIC hardware before entering the guest. */ @@ -803,7 +819,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); out: - vgic_restore_state(vcpu); + if (can_access_vgic_from_kernel()) + vgic_restore_state(vcpu); } void kvm_vgic_load(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 923a2e30e5745a8f94f953f7aacaafd3d551e12d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 5 Oct 2017 00:18:07 +0200 Subject: KVM: arm/arm64: Move VGIC APR save/restore to vgic put/load The APRs can only have bits set when the guest acknowledges an interrupt in the LR and can only have a bit cleared when the guest EOIs an interrupt in the LR. Therefore, if we have no LRs with any pending/active interrupts, the APR cannot change value and there is no need to clear it on every exit from the VM (hint: it will have already been cleared when we exited the guest the last time with the LRs all EOIed). The only case we need to take care of is when we migrate the VCPU away from a CPU or migrate a new VCPU onto a CPU, or when we return to userspace to capture the state of the VCPU for migration. To make sure this works, factor out the APR save/restore functionality into separate functions called from the VCPU (and by extension VGIC) put/load hooks. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_hyp.h | 2 + arch/arm64/include/asm/kvm_hyp.h | 2 + virt/kvm/arm/hyp/vgic-v3-sr.c | 124 +++++++++++++++++++++------------------ virt/kvm/arm/vgic/vgic-v2.c | 7 +-- virt/kvm/arm/vgic/vgic-v3.c | 5 ++ 5 files changed, 78 insertions(+), 62 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index 1ab8329e9ff7..530a3c1cfe6f 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -110,6 +110,8 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); +void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu); +void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu); asmlinkage void __vfp_save_state(struct vfp_hard_struct *vfp); asmlinkage void __vfp_restore_state(struct vfp_hard_struct *vfp); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index febe417b8b4e..6f3929b2fcf7 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -124,6 +124,8 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); +void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu); +void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); void __timer_enable_traps(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 9abf2f3c12b5..437d7af08683 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -21,6 +21,7 @@ #include #include +#include #define vtr_to_max_lr_idx(v) ((v) & 0xf) #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) @@ -221,14 +222,11 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) if (used_lrs) { int i; - u32 nr_pre_bits; u32 elrsr; elrsr = read_gicreg(ICH_ELSR_EL2); write_gicreg(0, ICH_HCR_EL2); - val = read_gicreg(ICH_VTR_EL2); - nr_pre_bits = vtr_to_nr_pre_bits(val); for (i = 0; i < used_lrs; i++) { if (elrsr & (1 << i)) @@ -238,39 +236,10 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) __gic_v3_set_lr(0, i); } - - switch (nr_pre_bits) { - case 7: - cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); - cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); - case 6: - cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); - default: - cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); - } - - switch (nr_pre_bits) { - case 7: - cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); - cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); - case 6: - cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); - default: - cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); - } } else { if (static_branch_unlikely(&vgic_v3_cpuif_trap) || cpu_if->its_vpe.its_vm) write_gicreg(0, ICH_HCR_EL2); - - cpu_if->vgic_ap0r[0] = 0; - cpu_if->vgic_ap0r[1] = 0; - cpu_if->vgic_ap0r[2] = 0; - cpu_if->vgic_ap0r[3] = 0; - cpu_if->vgic_ap1r[0] = 0; - cpu_if->vgic_ap1r[1] = 0; - cpu_if->vgic_ap1r[2] = 0; - cpu_if->vgic_ap1r[3] = 0; } val = read_gicreg(ICC_SRE_EL2); @@ -287,8 +256,6 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - u64 val; - u32 nr_pre_bits; int i; /* @@ -306,32 +273,9 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); } - val = read_gicreg(ICH_VTR_EL2); - nr_pre_bits = vtr_to_nr_pre_bits(val); - if (used_lrs) { write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); - switch (nr_pre_bits) { - case 7: - __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); - __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); - case 6: - __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); - default: - __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); - } - - switch (nr_pre_bits) { - case 7: - __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); - __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); - case 6: - __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); - default: - __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); - } - for (i = 0; i < used_lrs; i++) __gic_v3_set_lr(cpu_if->vgic_lr[i], i); } else { @@ -364,6 +308,72 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) ICC_SRE_EL2); } +void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if; + u64 val; + u32 nr_pre_bits; + + vcpu = kern_hyp_va(vcpu); + cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + val = read_gicreg(ICH_VTR_EL2); + nr_pre_bits = vtr_to_nr_pre_bits(val); + + switch (nr_pre_bits) { + case 7: + cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); + cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); + case 6: + cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); + default: + cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); + } + + switch (nr_pre_bits) { + case 7: + cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); + cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); + case 6: + cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); + default: + cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); + } +} + +void __hyp_text __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if; + u64 val; + u32 nr_pre_bits; + + vcpu = kern_hyp_va(vcpu); + cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + val = read_gicreg(ICH_VTR_EL2); + nr_pre_bits = vtr_to_nr_pre_bits(val); + + switch (nr_pre_bits) { + case 7: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); + case 6: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); + default: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); + } + + switch (nr_pre_bits) { + case 7: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); + case 6: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); + default: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); + } +} + void __hyp_text __vgic_v3_init_lrs(void) { int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2)); diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 1e5f3eb6973d..ca7cfee9f353 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -446,7 +446,6 @@ void vgic_v2_save_state(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; struct vgic_dist *vgic = &kvm->arch.vgic; - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; void __iomem *base = vgic->vctrl_base; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; @@ -454,11 +453,8 @@ void vgic_v2_save_state(struct kvm_vcpu *vcpu) return; if (used_lrs) { - cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); save_lrs(vcpu, base); writel_relaxed(0, base + GICH_HCR); - } else { - cpu_if->vgic_apr = 0; } } @@ -476,7 +472,6 @@ void vgic_v2_restore_state(struct kvm_vcpu *vcpu) if (used_lrs) { writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); - writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); for (i = 0; i < used_lrs; i++) { writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4)); @@ -490,6 +485,7 @@ void vgic_v2_load(struct kvm_vcpu *vcpu) struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR); + writel_relaxed(cpu_if->vgic_apr, vgic->vctrl_base + GICH_APR); } void vgic_v2_put(struct kvm_vcpu *vcpu) @@ -498,4 +494,5 @@ void vgic_v2_put(struct kvm_vcpu *vcpu) struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR); + cpu_if->vgic_apr = readl_relaxed(vgic->vctrl_base + GICH_APR); } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index b76e21f3e6bd..4bafcd1e6bb8 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -587,6 +588,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) */ if (likely(cpu_if->vgic_sre)) kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); + + kvm_call_hyp(__vgic_v3_restore_aprs, vcpu); } void vgic_v3_put(struct kvm_vcpu *vcpu) @@ -595,4 +598,6 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) if (likely(cpu_if->vgic_sre)) cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); + + kvm_call_hyp(__vgic_v3_save_aprs, vcpu); } -- cgit v1.2.3 From 2d0e63e030babe19c94b4453ef4b272c0aacd75a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 5 Oct 2017 17:19:19 +0200 Subject: KVM: arm/arm64: Avoid VGICv3 save/restore on VHE with no IRQs We can finally get completely rid of any calls to the VGICv3 save/restore functions when the AP lists are empty on VHE systems. This requires carefully factoring out trap configuration from saving and restoring state, and carefully choosing what to do on the VHE and non-VHE path. One of the challenges is that we cannot save/restore the VMCR lazily because we can only write the VMCR when ICC_SRE_EL1.SRE is cleared when emulating a GICv2-on-GICv3, since otherwise all Group-0 interrupts end up being delivered as FIQ. To solve this problem, and still provide fast performance in the fast path of exiting a VM when no interrupts are pending (which also optimized the latency for actually delivering virtual interrupts coming from physical interrupts), we orchestrate a dance of only doing the activate/deactivate traps in vgic load/put for VHE systems (which can have ICC_SRE_EL1.SRE cleared when running in the host), and doing the configuration on every round-trip on non-VHE systems. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_hyp.h | 2 + arch/arm/kvm/hyp/switch.c | 8 ++- arch/arm64/include/asm/kvm_hyp.h | 2 + arch/arm64/kvm/hyp/switch.c | 8 ++- virt/kvm/arm/hyp/vgic-v3-sr.c | 120 +++++++++++++++++++++++++-------------- virt/kvm/arm/vgic/vgic-v3.c | 6 ++ virt/kvm/arm/vgic/vgic.c | 9 ++- 7 files changed, 103 insertions(+), 52 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index 530a3c1cfe6f..e93a0cac9add 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -110,6 +110,8 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); +void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu); +void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu); void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu); void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu); diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index 882b9b9e0077..acf1c37fa49c 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c @@ -90,14 +90,18 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) { - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { __vgic_v3_save_state(vcpu); + __vgic_v3_deactivate_traps(vcpu); + } } static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) { - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { + __vgic_v3_activate_traps(vcpu); __vgic_v3_restore_state(vcpu); + } } static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 6f3929b2fcf7..384c34397619 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -124,6 +124,8 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); +void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu); +void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu); void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu); void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 86abbee40d3f..07b572173265 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -195,15 +195,19 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) /* Save VGICv3 state on non-VHE systems */ static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) { - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { __vgic_v3_save_state(vcpu); + __vgic_v3_deactivate_traps(vcpu); + } } /* Restore VGICv3 state on non_VEH systems */ static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) { - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { + __vgic_v3_activate_traps(vcpu); __vgic_v3_restore_state(vcpu); + } } static bool __hyp_text __true_value(void) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 437d7af08683..b13cbd41dbc3 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -209,15 +209,15 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - u64 val; /* * Make sure stores to the GIC via the memory mapped interface - * are now visible to the system register interface. + * are now visible to the system register interface when reading the + * LRs, and when reading back the VMCR on non-VHE systems. */ - if (!cpu_if->vgic_sre) { - dsb(st); - cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + if (used_lrs || !has_vhe()) { + if (!cpu_if->vgic_sre) + dsb(st); } if (used_lrs) { @@ -226,7 +226,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) elrsr = read_gicreg(ICH_ELSR_EL2); - write_gicreg(0, ICH_HCR_EL2); + write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); for (i = 0; i < used_lrs; i++) { if (elrsr & (1 << i)) @@ -236,19 +236,6 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) __gic_v3_set_lr(0, i); } - } else { - if (static_branch_unlikely(&vgic_v3_cpuif_trap) || - cpu_if->its_vpe.its_vm) - write_gicreg(0, ICH_HCR_EL2); - } - - val = read_gicreg(ICC_SRE_EL2); - write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); - - if (!cpu_if->vgic_sre) { - /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ - isb(); - write_gicreg(1, ICC_SRE_EL1); } } @@ -258,6 +245,31 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; int i; + if (used_lrs) { + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); + + for (i = 0; i < used_lrs; i++) + __gic_v3_set_lr(cpu_if->vgic_lr[i], i); + } + + /* + * Ensure that writes to the LRs, and on non-VHE systems ensure that + * the write to the VMCR in __vgic_v3_activate_traps(), will have + * reached the (re)distributors. This ensure the guest will read the + * correct values from the memory-mapped interface. + */ + if (used_lrs || !has_vhe()) { + if (!cpu_if->vgic_sre) { + isb(); + dsb(sy); + } + } +} + +void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + /* * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a * Group0 interrupt (as generated in GICv2 mode) to be @@ -265,47 +277,69 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) * consequences. So we must make sure that ICC_SRE_EL1 has * been actually programmed with the value we want before * starting to mess with the rest of the GIC, and VMCR_EL2 in - * particular. + * particular. This logic must be called before + * __vgic_v3_restore_state(). */ if (!cpu_if->vgic_sre) { write_gicreg(0, ICC_SRE_EL1); isb(); write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); - } - if (used_lrs) { - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); - for (i = 0; i < used_lrs; i++) - __gic_v3_set_lr(cpu_if->vgic_lr[i], i); - } else { - /* - * If we need to trap system registers, we must write - * ICH_HCR_EL2 anyway, even if no interrupts are being - * injected. Same thing if GICv4 is used, as VLPI - * delivery is gated by ICH_HCR_EL2.En. - */ - if (static_branch_unlikely(&vgic_v3_cpuif_trap) || - cpu_if->its_vpe.its_vm) - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); + if (has_vhe()) { + /* + * Ensure that the write to the VMCR will have reached + * the (re)distributors. This ensure the guest will + * read the correct values from the memory-mapped + * interface. + */ + isb(); + dsb(sy); + } } /* - * Ensures that the above will have reached the - * (re)distributors. This ensure the guest will read the - * correct values from the memory-mapped interface. + * Prevent the guest from touching the GIC system registers if + * SRE isn't enabled for GICv3 emulation. */ + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, + ICC_SRE_EL2); + + /* + * If we need to trap system registers, we must write + * ICH_HCR_EL2 anyway, even if no interrupts are being + * injected, + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); +} + +void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 val; + if (!cpu_if->vgic_sre) { + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + } + + val = read_gicreg(ICC_SRE_EL2); + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + + if (!cpu_if->vgic_sre) { + /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ isb(); - dsb(sy); + write_gicreg(1, ICC_SRE_EL1); } /* - * Prevent the guest from touching the GIC system registers if - * SRE isn't enabled for GICv3 emulation. + * If we were trapping system registers, we enabled the VGIC even if + * no interrupts were being injected, and we disable it again here. */ - write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, - ICC_SRE_EL2); + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) + write_gicreg(0, ICH_HCR_EL2); } void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 4bafcd1e6bb8..4200657694f0 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -590,6 +590,9 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); kvm_call_hyp(__vgic_v3_restore_aprs, vcpu); + + if (has_vhe()) + __vgic_v3_activate_traps(vcpu); } void vgic_v3_put(struct kvm_vcpu *vcpu) @@ -600,4 +603,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); kvm_call_hyp(__vgic_v3_save_aprs, vcpu); + + if (has_vhe()) + __vgic_v3_deactivate_traps(vcpu); } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index eaab4a616ecf..4c4b011685b4 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -773,15 +773,15 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - if (can_access_vgic_from_kernel()) - vgic_save_state(vcpu); - WARN_ON(vgic_v4_sync_hwstate(vcpu)); /* An empty ap_list_head implies used_lrs == 0 */ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; + if (can_access_vgic_from_kernel()) + vgic_save_state(vcpu); + if (vgic_cpu->used_lrs) vgic_fold_lr_state(vcpu); vgic_prune_ap_list(vcpu); @@ -810,7 +810,7 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) * this. */ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) - goto out; + return; DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); @@ -818,7 +818,6 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) vgic_flush_lr_state(vcpu); spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); -out: if (can_access_vgic_from_kernel()) vgic_restore_state(vcpu); } -- cgit v1.2.3 From b4ef04995d33a1ecfec3bdfb7776c1ac36dbe87d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 3 Dec 2017 20:04:51 +0000 Subject: KVM: arm/arm64: Demote HYP VA range display to being a debug feature Displaying the HYP VA information is slightly counterproductive when using VA randomization. Turn it into a debug feature only, and adjust the last displayed value to reflect the top of RAM instead of ~0. Acked-by: Christoffer Dall Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 9ebff8e530f9..3eaf46828caa 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1810,9 +1810,10 @@ int kvm_mmu_init(void) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - kvm_info("IDMAP page: %lx\n", hyp_idmap_start); - kvm_info("HYP VA range: %lx:%lx\n", - kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); + kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); + kvm_debug("HYP VA range: %lx:%lx\n", + kern_hyp_va(PAGE_OFFSET), + kern_hyp_va((unsigned long)high_memory - 1)); if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && hyp_idmap_start < kern_hyp_va(~0UL) && -- cgit v1.2.3 From 807a378425d27d377fdf181d1dba91539bac9294 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 4 Dec 2017 16:26:09 +0000 Subject: KVM: arm/arm64: Move ioremap calls to create_hyp_io_mappings Both HYP io mappings call ioremap, followed by create_hyp_io_mappings. Let's move the ioremap call into create_hyp_io_mappings itself, which simplifies the code a bit and allows for further refactoring. Reviewed-by: Christoffer Dall Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_mmu.h | 3 ++- arch/arm64/include/asm/kvm_mmu.h | 3 ++- virt/kvm/arm/mmu.c | 24 ++++++++++++++---------- virt/kvm/arm/vgic/vgic-v2.c | 31 ++++++++----------------------- 4 files changed, 26 insertions(+), 35 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 93395b7c2322..bf17ad83d2f0 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -50,7 +50,8 @@ #include int create_hyp_mappings(void *from, void *to, pgprot_t prot); -int create_hyp_io_mappings(void *from, void *to, phys_addr_t); +int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, + void __iomem **kaddr); void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 021d3a8117a8..8d5f9934d819 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -140,7 +140,8 @@ static inline unsigned long __kern_hyp_va(unsigned long v) #include int create_hyp_mappings(void *from, void *to, pgprot_t prot); -int create_hyp_io_mappings(void *from, void *to, phys_addr_t); +int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, + void __iomem **kaddr); void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 3eaf46828caa..2bc32bdf932a 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -709,26 +709,30 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot) } /** - * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode - * @from: The kernel start VA of the range - * @to: The kernel end VA of the range (exclusive) + * create_hyp_io_mappings - Map IO into both kernel and HYP * @phys_addr: The physical start address which gets mapped + * @size: Size of the region being mapped + * @kaddr: Kernel VA for this mapping * * The resulting HYP VA is the same as the kernel VA, modulo * HYP_PAGE_OFFSET. */ -int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) +int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, + void __iomem **kaddr) { - unsigned long start = kern_hyp_va((unsigned long)from); - unsigned long end = kern_hyp_va((unsigned long)to); + unsigned long start, end; - if (is_kernel_in_hyp_mode()) + *kaddr = ioremap(phys_addr, size); + if (!*kaddr) + return -ENOMEM; + + if (is_kernel_in_hyp_mode()) { return 0; + } - /* Check for a valid kernel IO mapping */ - if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) - return -EINVAL; + start = kern_hyp_va((unsigned long)*kaddr); + end = kern_hyp_va((unsigned long)*kaddr + size); return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end, __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); } diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index ca7cfee9f353..66532f2f0e40 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -360,16 +360,10 @@ int vgic_v2_probe(const struct gic_kvm_info *info) if (!PAGE_ALIGNED(info->vcpu.start) || !PAGE_ALIGNED(resource_size(&info->vcpu))) { kvm_info("GICV region size/alignment is unsafe, using trapping (reduced performance)\n"); - kvm_vgic_global_state.vcpu_base_va = ioremap(info->vcpu.start, - resource_size(&info->vcpu)); - if (!kvm_vgic_global_state.vcpu_base_va) { - kvm_err("Cannot ioremap GICV\n"); - return -ENOMEM; - } - ret = create_hyp_io_mappings(kvm_vgic_global_state.vcpu_base_va, - kvm_vgic_global_state.vcpu_base_va + resource_size(&info->vcpu), - info->vcpu.start); + ret = create_hyp_io_mappings(info->vcpu.start, + resource_size(&info->vcpu), + &kvm_vgic_global_state.vcpu_base_va); if (ret) { kvm_err("Cannot map GICV into hyp\n"); goto out; @@ -378,26 +372,17 @@ int vgic_v2_probe(const struct gic_kvm_info *info) static_branch_enable(&vgic_v2_cpuif_trap); } - kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start, - resource_size(&info->vctrl)); - if (!kvm_vgic_global_state.vctrl_base) { - kvm_err("Cannot ioremap GICH\n"); - ret = -ENOMEM; + ret = create_hyp_io_mappings(info->vctrl.start, + resource_size(&info->vctrl), + &kvm_vgic_global_state.vctrl_base); + if (ret) { + kvm_err("Cannot map VCTRL into hyp\n"); goto out; } vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; - ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base, - kvm_vgic_global_state.vctrl_base + - resource_size(&info->vctrl), - info->vctrl.start); - if (ret) { - kvm_err("Cannot map VCTRL into hyp\n"); - goto out; - } - ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); if (ret) { kvm_err("Cannot register GICv2 KVM device\n"); -- cgit v1.2.3 From 1bb32a44aea1fe73c6f84e466a45ae559ef74559 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 4 Dec 2017 16:43:23 +0000 Subject: KVM: arm/arm64: Keep GICv2 HYP VAs in kvm_vgic_global_state As we're about to change the way we map devices at HYP, we need to move away from kern_hyp_va on an IO address. One way of achieving this is to store the VAs in kvm_vgic_global_state, and use that directly from the HYP code. This requires a small change to create_hyp_io_mappings so that it can also return a HYP VA. We take this opportunity to nuke the vctrl_base field in the emulated distributor, as it is not used anymore. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_mmu.h | 3 ++- arch/arm64/include/asm/kvm_mmu.h | 3 ++- arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 +- include/kvm/arm_vgic.h | 12 ++++++------ virt/kvm/arm/mmu.c | 20 +++++++++++++++----- virt/kvm/arm/vgic/vgic-init.c | 6 ------ virt/kvm/arm/vgic/vgic-v2.c | 26 ++++++++++++-------------- 7 files changed, 38 insertions(+), 34 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index bf17ad83d2f0..4c5c8a386baf 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -51,7 +51,8 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, - void __iomem **kaddr); + void __iomem **kaddr, + void __iomem **haddr); void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 8d5f9934d819..3836641dd6f0 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -141,7 +141,8 @@ static inline unsigned long __kern_hyp_va(unsigned long v) int create_hyp_mappings(void *from, void *to, pgprot_t prot); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, - void __iomem **kaddr); + void __iomem **kaddr, + void __iomem **haddr); void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 10eb2e96b3e6..86801b6055d6 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -60,7 +60,7 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) return -1; rd = kvm_vcpu_dabt_get_rd(vcpu); - addr = kern_hyp_va(hyp_symbol_addr(kvm_vgic_global_state)->vcpu_base_va); + addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va; addr += fault_ipa - vgic->vgic_cpu_base; if (kvm_vcpu_dabt_iswrite(vcpu)) { diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ac98ae46bfb7..87d2ad0a4292 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -57,11 +57,15 @@ struct vgic_global { /* Physical address of vgic virtual cpu interface */ phys_addr_t vcpu_base; - /* GICV mapping */ + /* GICV mapping, kernel VA */ void __iomem *vcpu_base_va; + /* GICV mapping, HYP VA */ + void __iomem *vcpu_hyp_va; - /* virtual control interface mapping */ + /* virtual control interface mapping, kernel VA */ void __iomem *vctrl_base; + /* virtual control interface mapping, HYP VA */ + void __iomem *vctrl_hyp; /* Number of implemented list registers */ int nr_lr; @@ -209,10 +213,6 @@ struct vgic_dist { int nr_spis; - /* TODO: Consider moving to global state */ - /* Virtual control interface mapping */ - void __iomem *vctrl_base; - /* base addresses in guest physical address space: */ gpa_t vgic_dist_base; /* distributor */ union { diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 2bc32bdf932a..52b0ee31ebee 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -713,28 +713,38 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot) * @phys_addr: The physical start address which gets mapped * @size: Size of the region being mapped * @kaddr: Kernel VA for this mapping - * - * The resulting HYP VA is the same as the kernel VA, modulo - * HYP_PAGE_OFFSET. + * @haddr: HYP VA for this mapping */ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, - void __iomem **kaddr) + void __iomem **kaddr, + void __iomem **haddr) { unsigned long start, end; + int ret; *kaddr = ioremap(phys_addr, size); if (!*kaddr) return -ENOMEM; if (is_kernel_in_hyp_mode()) { + *haddr = *kaddr; return 0; } start = kern_hyp_va((unsigned long)*kaddr); end = kern_hyp_va((unsigned long)*kaddr + size); - return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end, + ret = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end, __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); + + if (ret) { + iounmap(*kaddr); + *kaddr = NULL; + return ret; + } + + *haddr = (void __iomem *)start; + return 0; } /** diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 3e8209a07585..68378fe17a0e 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -166,12 +166,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vgic_model = type; - /* - * kvm_vgic_global_state.vctrl_base is set on vgic probe (kvm_arch_init) - * it is stored in distributor struct for asm save/restore purpose - */ - kvm->arch.vgic.vctrl_base = kvm_vgic_global_state.vctrl_base; - kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 66532f2f0e40..96e144f9d93d 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -363,7 +363,8 @@ int vgic_v2_probe(const struct gic_kvm_info *info) ret = create_hyp_io_mappings(info->vcpu.start, resource_size(&info->vcpu), - &kvm_vgic_global_state.vcpu_base_va); + &kvm_vgic_global_state.vcpu_base_va, + &kvm_vgic_global_state.vcpu_hyp_va); if (ret) { kvm_err("Cannot map GICV into hyp\n"); goto out; @@ -374,7 +375,8 @@ int vgic_v2_probe(const struct gic_kvm_info *info) ret = create_hyp_io_mappings(info->vctrl.start, resource_size(&info->vctrl), - &kvm_vgic_global_state.vctrl_base); + &kvm_vgic_global_state.vctrl_base, + &kvm_vgic_global_state.vctrl_hyp); if (ret) { kvm_err("Cannot map VCTRL into hyp\n"); goto out; @@ -429,9 +431,7 @@ static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) void vgic_v2_save_state(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; - struct vgic_dist *vgic = &kvm->arch.vgic; - void __iomem *base = vgic->vctrl_base; + void __iomem *base = kvm_vgic_global_state.vctrl_base; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; if (!base) @@ -445,10 +445,8 @@ void vgic_v2_save_state(struct kvm_vcpu *vcpu) void vgic_v2_restore_state(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; - struct vgic_dist *vgic = &kvm->arch.vgic; struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - void __iomem *base = vgic->vctrl_base; + void __iomem *base = kvm_vgic_global_state.vctrl_base; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; int i; @@ -467,17 +465,17 @@ void vgic_v2_restore_state(struct kvm_vcpu *vcpu) void vgic_v2_load(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; - writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR); - writel_relaxed(cpu_if->vgic_apr, vgic->vctrl_base + GICH_APR); + writel_relaxed(cpu_if->vgic_vmcr, + kvm_vgic_global_state.vctrl_base + GICH_VMCR); + writel_relaxed(cpu_if->vgic_apr, + kvm_vgic_global_state.vctrl_base + GICH_APR); } void vgic_v2_put(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; - cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR); - cpu_if->vgic_apr = readl_relaxed(vgic->vctrl_base + GICH_APR); + cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); + cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); } -- cgit v1.2.3 From 46fef158f10d255dfe47cf3a0ab5f0965b66b1da Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 12 Mar 2018 14:25:10 +0000 Subject: KVM: arm/arm64: Fix idmap size and alignment Although the idmap section of KVM can only be at most 4kB and must be aligned on a 4kB boundary, the rest of the code expects it to be page aligned. Things get messy when tearing down the HYP page tables when PAGE_SIZE is 64K, and the idmap section isn't 64K aligned. Let's fix this by computing aligned boundaries that the HYP code will use. Acked-by: Catalin Marinas Reported-by: James Morse Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 52b0ee31ebee..c41d03a8af93 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1815,7 +1815,9 @@ int kvm_mmu_init(void) int err; hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE); hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); /* -- cgit v1.2.3 From 3ddd45565373604d4f49cb0496fc0168e3863c1f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Mar 2018 15:17:33 +0000 Subject: KVM: arm64: Fix HYP idmap unmap when using 52bit PA Unmapping the idmap range using 52bit PA is quite broken, as we don't take into account the right number of PGD entries, and rely on PTRS_PER_PGD. The result is that pgd_index() truncates the address, and we end-up in the weed. Let's introduce a new unmap_hyp_idmap_range() that knows about this, together with a kvm_pgd_index() helper, which hides a bit of the complexity of the issue. Fixes: 98732d1b189b ("KVM: arm/arm64: fix HYP ID map extension to 52 bits") Reported-by: James Morse Reviewed-by: Catalin Marinas Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index c41d03a8af93..f7fe724c14f4 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -479,7 +479,13 @@ static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) clear_hyp_pgd_entry(pgd); } -static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) +static unsigned int kvm_pgd_index(unsigned long addr, unsigned int ptrs_per_pgd) +{ + return (addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1); +} + +static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd, + phys_addr_t start, u64 size) { pgd_t *pgd; phys_addr_t addr = start, end = start + size; @@ -489,7 +495,7 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) * We don't unmap anything from HYP, except at the hyp tear down. * Hence, we don't have to invalidate the TLBs here. */ - pgd = pgdp + pgd_index(addr); + pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd); do { next = pgd_addr_end(addr, end); if (!pgd_none(*pgd)) @@ -497,6 +503,16 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) } while (pgd++, addr = next, addr != end); } +static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) +{ + __unmap_hyp_range(pgdp, PTRS_PER_PGD, start, size); +} + +static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size) +{ + __unmap_hyp_range(pgdp, __kvm_idmap_ptrs_per_pgd(), start, size); +} + /** * free_hyp_pgds - free Hyp-mode page tables * @@ -512,13 +528,13 @@ void free_hyp_pgds(void) mutex_lock(&kvm_hyp_pgd_mutex); if (boot_hyp_pgd) { - unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); + unmap_hyp_idmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) { - unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); + unmap_hyp_idmap_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET), (uintptr_t)high_memory - PAGE_OFFSET); unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START), @@ -634,7 +650,7 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd, addr = start & PAGE_MASK; end = PAGE_ALIGN(end); do { - pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1)); + pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd); if (pgd_none(*pgd)) { pud = pud_alloc_one(NULL, addr); -- cgit v1.2.3 From e3f019b37b580c3b954419212da26ac5db412a08 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 4 Dec 2017 17:04:38 +0000 Subject: KVM: arm/arm64: Move HYP IO VAs to the "idmap" range We so far mapped our HYP IO (which is essentially the GICv2 control registers) using the same method as for memory. It recently appeared that is a bit unsafe: We compute the HYP VA using the kern_hyp_va helper, but that helper is only designed to deal with kernel VAs coming from the linear map, and not from the vmalloc region... This could in turn cause some bad aliasing between the two, amplified by the upcoming VA randomisation. A solution is to come up with our very own basic VA allocator for MMIO. Since half of the HYP address space only contains a single page (the idmap), we have plenty to borrow from. Let's use the idmap as a base, and allocate downwards from it. GICv2 now lives on the other side of the great VA barrier. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_mmu.h | 3 ++ virt/kvm/arm/mmu.c | 73 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 62 insertions(+), 14 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 4c5c8a386baf..b0b0cde77ca6 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -49,6 +49,9 @@ #include #include +/* Ensure compatibility with arm64 */ +#define VA_BITS 32 + int create_hyp_mappings(void *from, void *to, pgprot_t prot); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index f7fe724c14f4..d8ea68b78e9c 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -43,6 +43,8 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +static unsigned long io_map_base; + #define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) @@ -518,27 +520,35 @@ static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size) * * Assumes hyp_pgd is a page table used strictly in Hyp-mode and * therefore contains either mappings in the kernel memory area (above - * PAGE_OFFSET), or device mappings in the vmalloc range (from - * VMALLOC_START to VMALLOC_END). + * PAGE_OFFSET), or device mappings in the idmap range. * - * boot_hyp_pgd should only map two pages for the init code. + * boot_hyp_pgd should only map the idmap range, and is only used in + * the extended idmap case. */ void free_hyp_pgds(void) { + pgd_t *id_pgd; + mutex_lock(&kvm_hyp_pgd_mutex); + id_pgd = boot_hyp_pgd ? boot_hyp_pgd : hyp_pgd; + + if (id_pgd) { + /* In case we never called hyp_mmu_init() */ + if (!io_map_base) + io_map_base = hyp_idmap_start; + unmap_hyp_idmap_range(id_pgd, io_map_base, + hyp_idmap_start + PAGE_SIZE - io_map_base); + } + if (boot_hyp_pgd) { - unmap_hyp_idmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) { - unmap_hyp_idmap_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET), (uintptr_t)high_memory - PAGE_OFFSET); - unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START), - VMALLOC_END - VMALLOC_START); free_pages((unsigned long)hyp_pgd, hyp_pgd_order); hyp_pgd = NULL; @@ -735,8 +745,9 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, void __iomem **haddr) { - unsigned long start, end; - int ret; + pgd_t *pgd = hyp_pgd; + unsigned long base; + int ret = 0; *kaddr = ioremap(phys_addr, size); if (!*kaddr) @@ -747,19 +758,52 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, return 0; } + mutex_lock(&kvm_hyp_pgd_mutex); - start = kern_hyp_va((unsigned long)*kaddr); - end = kern_hyp_va((unsigned long)*kaddr + size); - ret = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end, - __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); + /* + * This assumes that we we have enough space below the idmap + * page to allocate our VAs. If not, the check below will + * kick. A potential alternative would be to detect that + * overflow and switch to an allocation above the idmap. + * + * The allocated size is always a multiple of PAGE_SIZE. + */ + size = PAGE_ALIGN(size + offset_in_page(phys_addr)); + base = io_map_base - size; + /* + * Verify that BIT(VA_BITS - 1) hasn't been flipped by + * allocating the new area, as it would indicate we've + * overflowed the idmap/IO address range. + */ + if ((base ^ io_map_base) & BIT(VA_BITS - 1)) + ret = -ENOMEM; + else + io_map_base = base; + + mutex_unlock(&kvm_hyp_pgd_mutex); + + if (ret) + goto out; + + if (__kvm_cpu_uses_extended_idmap()) + pgd = boot_hyp_pgd; + + ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(), + base, base + size, + __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); + if (ret) + goto out; + + *haddr = (void __iomem *)base + offset_in_page(phys_addr); + +out: if (ret) { iounmap(*kaddr); *kaddr = NULL; return ret; } - *haddr = (void __iomem *)start; return 0; } @@ -1892,6 +1936,7 @@ int kvm_mmu_init(void) goto out; } + io_map_base = hyp_idmap_start; return 0; out: free_hyp_pgds(); -- cgit v1.2.3 From ed57cac83e05f2e93567e4b5c57ee58a1bf8a582 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 3 Dec 2017 18:22:49 +0000 Subject: arm64: KVM: Introduce EL2 VA randomisation The main idea behind randomising the EL2 VA is that we usually have a few spare bits between the most significant bit of the VA mask and the most significant bit of the linear mapping. Those bits could be a bunch of zeroes, and could be useful to move things around a bit. Of course, the more memory you have, the less randomisation you get... Alternatively, these bits could be the result of KASLR, in which case they are already random. But it would be nice to have a *different* randomization, just to make the job of a potential attacker a bit more difficult. Inserting these random bits is a bit involved. We don't have a spare register (short of rewriting all the kern_hyp_va call sites), and the immediate we want to insert is too random to be used with the ORR instruction. The best option I could come up with is the following sequence: and x0, x0, #va_mask ror x0, x0, #first_random_bit add x0, x0, #(random & 0xfff) add x0, x0, #(random >> 12), lsl #12 ror x0, x0, #(63 - first_random_bit) making it a fairly long sequence, but one that a decent CPU should be able to execute without breaking a sweat. It is of course NOPed out on VHE. The last 4 instructions can also be turned into NOPs if it appears that there is no free bits to use. Acked-by: Catalin Marinas Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_mmu.h | 12 +++++-- arch/arm64/kvm/va_layout.c | 76 +++++++++++++++++++++++++++++++++++++--- virt/kvm/arm/mmu.c | 2 +- 3 files changed, 82 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 3836641dd6f0..a84e2dd14ebe 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -85,7 +85,11 @@ */ .macro kern_hyp_va reg alternative_cb kvm_update_va_mask - and \reg, \reg, #1 + and \reg, \reg, #1 /* mask with va_mask */ + ror \reg, \reg, #1 /* rotate to the first tag bit */ + add \reg, \reg, #0 /* insert the low 12 bits of the tag */ + add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */ + ror \reg, \reg, #63 /* rotate back */ alternative_cb_end .endm @@ -102,7 +106,11 @@ void kvm_update_va_mask(struct alt_instr *alt, static inline unsigned long __kern_hyp_va(unsigned long v) { - asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n", + asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" + "ror %0, %0, #1\n" + "add %0, %0, #0\n" + "add %0, %0, #0, lsl 12\n" + "ror %0, %0, #63\n", kvm_update_va_mask) : "+r" (v)); return v; diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index 7998d1a60916..3d41a480b6a5 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -16,24 +16,60 @@ */ #include +#include +#include #include #include #include #include +/* + * The LSB of the random hyp VA tag or 0 if no randomization is used. + */ +static u8 tag_lsb; +/* + * The random hyp VA tag value with the region bit if hyp randomization is used + */ +static u64 tag_val; static u64 va_mask; static void compute_layout(void) { phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); u64 hyp_va_msb; + int kva_msb; /* Where is my RAM region? */ hyp_va_msb = idmap_addr & BIT(VA_BITS - 1); hyp_va_msb ^= BIT(VA_BITS - 1); - va_mask = GENMASK_ULL(VA_BITS - 2, 0); - va_mask |= hyp_va_msb; + kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ + (u64)(high_memory - 1)); + + if (kva_msb == (VA_BITS - 1)) { + /* + * No space in the address, let's compute the mask so + * that it covers (VA_BITS - 1) bits, and the region + * bit. The tag stays set to zero. + */ + va_mask = BIT(VA_BITS - 1) - 1; + va_mask |= hyp_va_msb; + } else { + /* + * We do have some free bits to insert a random tag. + * Hyp VAs are now created from kernel linear map VAs + * using the following formula (with V == VA_BITS): + * + * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 + * --------------------------------------------------------- + * | 0000000 | hyp_va_msb | random tag | kern linear VA | + */ + tag_lsb = kva_msb; + va_mask = GENMASK_ULL(tag_lsb - 1, 0); + tag_val = get_random_long() & GENMASK_ULL(VA_BITS - 2, tag_lsb); + tag_val |= hyp_va_msb; + tag_val >>= tag_lsb; + } } static u32 compute_instruction(int n, u32 rd, u32 rn) @@ -46,6 +82,33 @@ static u32 compute_instruction(int n, u32 rd, u32 rn) AARCH64_INSN_VARIANT_64BIT, rn, rd, va_mask); break; + + case 1: + /* ROR is a variant of EXTR with Rm = Rn */ + insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT, + rn, rn, rd, + tag_lsb); + break; + + case 2: + insn = aarch64_insn_gen_add_sub_imm(rd, rn, + tag_val & GENMASK(11, 0), + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_ADSB_ADD); + break; + + case 3: + insn = aarch64_insn_gen_add_sub_imm(rd, rn, + tag_val & GENMASK(23, 12), + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_ADSB_ADD); + break; + + case 4: + /* ROR is a variant of EXTR with Rm = Rn */ + insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT, + rn, rn, rd, 64 - tag_lsb); + break; } return insn; @@ -56,8 +119,7 @@ void __init kvm_update_va_mask(struct alt_instr *alt, { int i; - /* We only expect a single instruction in the alternative sequence */ - BUG_ON(nr_inst != 1); + BUG_ON(nr_inst != 5); if (!has_vhe() && !va_mask) compute_layout(); @@ -68,8 +130,12 @@ void __init kvm_update_va_mask(struct alt_instr *alt, /* * VHE doesn't need any address translation, let's NOP * everything. + * + * Alternatively, if we don't have any spare bits in + * the address, NOP everything after masking that + * kernel VA. */ - if (has_vhe()) { + if (has_vhe() || (!tag_lsb && i > 0)) { updptr[i] = cpu_to_le32(aarch64_insn_gen_nop()); continue; } diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index d8ea68b78e9c..394f41a65732 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1892,7 +1892,7 @@ int kvm_mmu_init(void) kern_hyp_va((unsigned long)high_memory - 1)); if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && - hyp_idmap_start < kern_hyp_va(~0UL) && + hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) && hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) { /* * The idmap page is intersecting with the VA space, -- cgit v1.2.3 From dc2e4633ff39c9fb5ef3a5443721f0e3499b36ed Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Feb 2018 11:00:29 +0000 Subject: arm/arm64: KVM: Introduce EL2-specific executable mappings Until now, all EL2 executable mappings were derived from their EL1 VA. Since we want to decouple the vectors mapping from the rest of the hypervisor, we need to be able to map some text somewhere else. The "idmap" region (for lack of a better name) is ideally suited for this, as we have a huge range that hardly has anything in it. Let's extend the IO allocator to also deal with executable mappings, thus providing the required feature. Acked-by: Catalin Marinas Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_mmu.h | 2 + arch/arm64/include/asm/kvm_mmu.h | 2 + virt/kvm/arm/mmu.c | 80 +++++++++++++++++++++++++++++----------- 3 files changed, 63 insertions(+), 21 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index b0b0cde77ca6..707a1f06dc5d 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -56,6 +56,8 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, void __iomem **haddr); +int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, + void **haddr); void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 65dc225d019c..eb04437d50fa 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -151,6 +151,8 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, void __iomem **haddr); +int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, + void **haddr); void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 394f41a65732..7f6a944db23d 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -734,30 +734,13 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot) return 0; } -/** - * create_hyp_io_mappings - Map IO into both kernel and HYP - * @phys_addr: The physical start address which gets mapped - * @size: Size of the region being mapped - * @kaddr: Kernel VA for this mapping - * @haddr: HYP VA for this mapping - */ -int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, - void __iomem **kaddr, - void __iomem **haddr) +static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, + unsigned long *haddr, pgprot_t prot) { pgd_t *pgd = hyp_pgd; unsigned long base; int ret = 0; - *kaddr = ioremap(phys_addr, size); - if (!*kaddr) - return -ENOMEM; - - if (is_kernel_in_hyp_mode()) { - *haddr = *kaddr; - return 0; - } - mutex_lock(&kvm_hyp_pgd_mutex); /* @@ -791,19 +774,74 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(), base, base + size, - __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); + __phys_to_pfn(phys_addr), prot); if (ret) goto out; - *haddr = (void __iomem *)base + offset_in_page(phys_addr); + *haddr = base + offset_in_page(phys_addr); out: + return ret; +} + +/** + * create_hyp_io_mappings - Map IO into both kernel and HYP + * @phys_addr: The physical start address which gets mapped + * @size: Size of the region being mapped + * @kaddr: Kernel VA for this mapping + * @haddr: HYP VA for this mapping + */ +int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, + void __iomem **kaddr, + void __iomem **haddr) +{ + unsigned long addr; + int ret; + + *kaddr = ioremap(phys_addr, size); + if (!*kaddr) + return -ENOMEM; + + if (is_kernel_in_hyp_mode()) { + *haddr = *kaddr; + return 0; + } + + ret = __create_hyp_private_mapping(phys_addr, size, + &addr, PAGE_HYP_DEVICE); if (ret) { iounmap(*kaddr); *kaddr = NULL; + *haddr = NULL; + return ret; + } + + *haddr = (void __iomem *)addr; + return 0; +} + +/** + * create_hyp_exec_mappings - Map an executable range into HYP + * @phys_addr: The physical start address which gets mapped + * @size: Size of the region being mapped + * @haddr: HYP VA for this mapping + */ +int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, + void **haddr) +{ + unsigned long addr; + int ret; + + BUG_ON(is_kernel_in_hyp_mode()); + + ret = __create_hyp_private_mapping(phys_addr, size, + &addr, PAGE_HYP_EXEC); + if (ret) { + *haddr = NULL; return ret; } + *haddr = (void *)addr; return 0; } -- cgit v1.2.3 From 67b5b673ad4d469186edddecfe59909a9fd4b643 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Mar 2018 14:59:40 +0000 Subject: KVM: arm/arm64: vgic: Disallow Active+Pending for level interrupts It was recently reported that VFIO mediated devices, and anything that VFIO exposes as level interrupts, do no strictly follow the expected logic of such interrupts as it only lowers the input line when the guest has EOId the interrupt at the GIC level, rather than when it Acked the interrupt at the device level. THe GIC's Active+Pending state is fundamentally incompatible with this behaviour, as it prevents KVM from observing the EOI, and in turn results in VFIO never dropping the line. This results in an interrupt storm in the guest, which it really never expected. As we cannot really change VFIO to follow the strict rules of level signalling, let's forbid the A+P state altogether, as it is in the end only an optimization. It ensures that we will transition via an invalid state, which we can use to notify VFIO of the EOI. Reviewed-by: Eric Auger Tested-by: Eric Auger Tested-by: Shunyong Yang Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-v2.c | 54 +++++++++++++++++++++++++-------------------- virt/kvm/arm/vgic/vgic-v3.c | 54 +++++++++++++++++++++++++-------------------- 2 files changed, 60 insertions(+), 48 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 8327ea932aea..45aa433f018f 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -105,12 +105,9 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) /* * Clear soft pending state when level irqs have been acked. - * Always regenerate the pending state. */ - if (irq->config == VGIC_CONFIG_LEVEL) { - if (!(val & GICH_LR_PENDING_BIT)) - irq->pending_latch = false; - } + if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE)) + irq->pending_latch = false; /* * Level-triggered mapped IRQs are special because we only @@ -153,8 +150,35 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) { u32 val = irq->intid; + bool allow_pending = true; + + if (irq->active) + val |= GICH_LR_ACTIVE_BIT; + + if (irq->hw) { + val |= GICH_LR_HW; + val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active) + allow_pending = false; + } else { + if (irq->config == VGIC_CONFIG_LEVEL) { + val |= GICH_LR_EOI; - if (irq_is_pending(irq)) { + /* + * Software resampling doesn't work very well + * if we allow P+A, so let's not do that. + */ + if (irq->active) + allow_pending = false; + } + } + + if (allow_pending && irq_is_pending(irq)) { val |= GICH_LR_PENDING_BIT; if (irq->config == VGIC_CONFIG_EDGE) @@ -171,24 +195,6 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) } } - if (irq->active) - val |= GICH_LR_ACTIVE_BIT; - - if (irq->hw) { - val |= GICH_LR_HW; - val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; - /* - * Never set pending+active on a HW interrupt, as the - * pending state is kept at the physical distributor - * level. - */ - if (irq->active && irq_is_pending(irq)) - val &= ~GICH_LR_PENDING_BIT; - } else { - if (irq->config == VGIC_CONFIG_LEVEL) - val |= GICH_LR_EOI; - } - /* * Level-triggered mapped IRQs are special because we only observe * rising edges as input to the VGIC. We therefore lower the line diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index e17f61a434fa..8195f52ae6f0 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -97,12 +97,9 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) /* * Clear soft pending state when level irqs have been acked. - * Always regenerate the pending state. */ - if (irq->config == VGIC_CONFIG_LEVEL) { - if (!(val & ICH_LR_PENDING_BIT)) - irq->pending_latch = false; - } + if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE)) + irq->pending_latch = false; /* * Level-triggered mapped IRQs are special because we only @@ -136,8 +133,35 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) { u32 model = vcpu->kvm->arch.vgic.vgic_model; u64 val = irq->intid; + bool allow_pending = true; + + if (irq->active) + val |= ICH_LR_ACTIVE_BIT; + + if (irq->hw) { + val |= ICH_LR_HW; + val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active) + allow_pending = false; + } else { + if (irq->config == VGIC_CONFIG_LEVEL) { + val |= ICH_LR_EOI; - if (irq_is_pending(irq)) { + /* + * Software resampling doesn't work very well + * if we allow P+A, so let's not do that. + */ + if (irq->active) + allow_pending = false; + } + } + + if (allow_pending && irq_is_pending(irq)) { val |= ICH_LR_PENDING_BIT; if (irq->config == VGIC_CONFIG_EDGE) @@ -155,24 +179,6 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) } } - if (irq->active) - val |= ICH_LR_ACTIVE_BIT; - - if (irq->hw) { - val |= ICH_LR_HW; - val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; - /* - * Never set pending+active on a HW interrupt, as the - * pending state is kept at the physical distributor - * level. - */ - if (irq->active && irq_is_pending(irq)) - val &= ~ICH_LR_PENDING_BIT; - } else { - if (irq->config == VGIC_CONFIG_LEVEL) - val |= ICH_LR_EOI; - } - /* * Level-triggered mapped IRQs are special because we only observe * rising edges as input to the VGIC. We therefore lower the line -- cgit v1.2.3 From 7d8b44c54e0c7c8f688e3a07f17e6083f849f01f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 23 Mar 2018 14:57:09 +0000 Subject: KVM: arm/arm64: vgic-its: Fix potential overrun in vgic_copy_lpi_list vgic_copy_lpi_list() parses the LPI list and picks LPIs targeting a given vcpu. We allocate the array containing the intids before taking the lpi_list_lock, which means we can have an array size that is not equal to the number of LPIs. This is particularly obvious when looking at the path coming from vgic_enable_lpis, which is not a command, and thus can run in parallel with commands: vcpu 0: vcpu 1: vgic_enable_lpis its_sync_lpi_pending_table vgic_copy_lpi_list intids = kmalloc_array(irq_count) MAPI(lpi targeting vcpu 0) list_for_each_entry(lpi_list_head) intids[i++] = irq->intid; At that stage, we will happily overrun the intids array. Boo. An easy fix is is to break once the array is full. The MAPI command will update the config anyway, and we won't miss a thing. We also make sure that lpi_list_count is read exactly once, so that further updates of that value will not affect the array bound check. Cc: stable@vger.kernel.org Fixes: ccb1d791ab9e ("KVM: arm64: vgic-its: Fix pending table sync") Reviewed-by: Andre Przywara Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-its.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 465095355666..a8f07243aa9f 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -316,21 +316,24 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_irq *irq; u32 *intids; - int irq_count = dist->lpi_list_count, i = 0; + int irq_count, i = 0; /* - * We use the current value of the list length, which may change - * after the kmalloc. We don't care, because the guest shouldn't - * change anything while the command handling is still running, - * and in the worst case we would miss a new IRQ, which one wouldn't - * expect to be covered by this command anyway. + * There is an obvious race between allocating the array and LPIs + * being mapped/unmapped. If we ended up here as a result of a + * command, we're safe (locks are held, preventing another + * command). If coming from another path (such as enabling LPIs), + * we must be careful not to overrun the array. */ + irq_count = READ_ONCE(dist->lpi_list_count); intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL); if (!intids) return -ENOMEM; spin_lock(&dist->lpi_list_lock); list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + if (i == irq_count) + break; /* We don't need to "get" the IRQ, as we hold the list lock. */ if (irq->target_vcpu != vcpu) continue; -- cgit v1.2.3