diff options
Diffstat (limited to 'virt/kvm')
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 2 | ||||
-rw-r--r-- | virt/kvm/arm/arm.c | 60 | ||||
-rw-r--r-- | virt/kvm/arm/mmu.c | 20 | ||||
-rw-r--r-- | virt/kvm/arm/psci.c | 1 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-debug.c | 14 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v3.c | 81 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.c | 88 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 2 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v4.c | 107 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.h | 1 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 663 |
11 files changed, 747 insertions, 292 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 0d9438e9de2a..93bd59b46848 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -788,7 +788,7 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) vcpu_ptimer(vcpu), TIMER_REG_CTL); case KVM_REG_ARM_PTIMER_CNT: return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CNT); + vcpu_ptimer(vcpu), TIMER_REG_CNT); case KVM_REG_ARM_PTIMER_CVAL: return kvm_arm_timer_read(vcpu, vcpu_ptimer(vcpu), TIMER_REG_CVAL); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index eda7b624eab8..48d0ec44ad77 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -64,12 +64,12 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } -int kvm_arch_hardware_setup(void) +int kvm_arch_hardware_setup(void *opaque) { return 0; } -int kvm_arch_check_processor_compat(void) +int kvm_arch_check_processor_compat(void *opaque) { return 0; } @@ -625,6 +625,14 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) kvm_update_stolen_time(vcpu); + + if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) { + /* The distributor enable bits were changed */ + preempt_disable(); + vgic_v4_put(vcpu, false); + vgic_v4_load(vcpu); + preempt_enable(); + } } } @@ -1181,55 +1189,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, return r; } -/** - * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot - * @kvm: kvm instance - * @log: slot id and address to which we copy the log - * - * Steps 1-4 below provide general overview of dirty page logging. See - * kvm_get_dirty_log_protect() function description for additional details. - * - * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we - * always flush the TLB (step 4) even if previous step failed and the dirty - * bitmap may be corrupt. Regardless of previous outcome the KVM logging API - * does not preclude user space subsequent dirty log read. Flushing TLB ensures - * writes will be marked dirty for next log read. - * - * 1. Take a snapshot of the bit and clear it if needed. - * 2. Write protect the corresponding page. - * 3. Copy the snapshot to the userspace. - * 4. Flush TLB's if needed. - */ -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { - bool flush = false; - int r; - - mutex_lock(&kvm->slots_lock); - r = kvm_get_dirty_log_protect(kvm, log, &flush); - - if (flush) - kvm_flush_remote_tlbs(kvm); - - mutex_unlock(&kvm->slots_lock); - return r; } -int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log) +void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) { - bool flush = false; - int r; - - mutex_lock(&kvm->slots_lock); - - r = kvm_clear_dirty_log_protect(kvm, log, &flush); - - if (flush) - kvm_flush_remote_tlbs(kvm); - - mutex_unlock(&kvm->slots_lock); - return r; + kvm_flush_remote_tlbs(kvm); } static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 19c961ac4e3c..e3b9ee268823 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1534,8 +1534,13 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) { struct kvm_memslots *slots = kvm_memslots(kvm); struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); - phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; - phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; + phys_addr_t start, end; + + if (WARN_ON_ONCE(!memslot)) + return; + + start = memslot->base_gfn << PAGE_SHIFT; + end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); stage2_wp_range(kvm, start, end); @@ -2251,7 +2256,7 @@ out: void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, + struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change) { @@ -2349,17 +2354,10 @@ out: return ret; } -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { } -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages) -{ - return 0; -} - void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { } diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 17e2bdd4b76f..14a162e295a9 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -12,7 +12,6 @@ #include <asm/cputype.h> #include <asm/kvm_emulate.h> -#include <asm/kvm_host.h> #include <kvm/arm_psci.h> #include <kvm/arm_hypercalls.h> diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c index cc12fe9b2df3..b13a9e3f99dd 100644 --- a/virt/kvm/arm/vgic/vgic-debug.c +++ b/virt/kvm/arm/vgic/vgic-debug.c @@ -178,6 +178,8 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, struct kvm_vcpu *vcpu) { char *type; + bool pending; + if (irq->intid < VGIC_NR_SGIS) type = "SGI"; else if (irq->intid < VGIC_NR_PRIVATE_IRQS) @@ -190,6 +192,16 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS) print_header(s, irq, vcpu); + pending = irq->pending_latch; + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + int err; + + err = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &pending); + WARN_ON_ONCE(err); + } + seq_printf(s, " %s %4d " " %2d " "%d%d%d%d%d%d%d " @@ -201,7 +213,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, "\n", type, irq->intid, (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1, - irq->pending_latch, + pending, irq->line_level, irq->active, irq->enabled, diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index ebc218840fc2..e72dcc454247 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -3,9 +3,11 @@ * VGICv3 MMIO handling functions */ +#include <linux/bitfield.h> #include <linux/irqchip/arm-gic-v3.h> #include <linux/kvm.h> #include <linux/kvm_host.h> +#include <linux/interrupt.h> #include <kvm/iodev.h> #include <kvm/arm_vgic.h> @@ -69,6 +71,8 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, if (vgic->enabled) value |= GICD_CTLR_ENABLE_SS_G1; value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS; + if (vgic->nassgireq) + value |= GICD_CTLR_nASSGIreq; break; case GICD_TYPER: value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS; @@ -80,6 +84,10 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19; } break; + case GICD_TYPER2: + if (kvm_vgic_global_state.has_gicv4_1) + value = GICD_TYPER2_nASSGIcap; + break; case GICD_IIDR: value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) | (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) | @@ -97,17 +105,46 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, unsigned long val) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - bool was_enabled = dist->enabled; switch (addr & 0x0c) { - case GICD_CTLR: + case GICD_CTLR: { + bool was_enabled, is_hwsgi; + + mutex_lock(&vcpu->kvm->lock); + + was_enabled = dist->enabled; + is_hwsgi = dist->nassgireq; + dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; - if (!was_enabled && dist->enabled) + /* Not a GICv4.1? No HW SGIs */ + if (!kvm_vgic_global_state.has_gicv4_1) + val &= ~GICD_CTLR_nASSGIreq; + + /* Dist stays enabled? nASSGIreq is RO */ + if (was_enabled && dist->enabled) { + val &= ~GICD_CTLR_nASSGIreq; + val |= FIELD_PREP(GICD_CTLR_nASSGIreq, is_hwsgi); + } + + /* Switching HW SGIs? */ + dist->nassgireq = val & GICD_CTLR_nASSGIreq; + if (is_hwsgi != dist->nassgireq) + vgic_v4_configure_vsgis(vcpu->kvm); + + if (kvm_vgic_global_state.has_gicv4_1 && + was_enabled != dist->enabled) + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_RELOAD_GICv4); + else if (!was_enabled && dist->enabled) vgic_kick_vcpus(vcpu->kvm); + + mutex_unlock(&vcpu->kvm->lock); break; + } case GICD_TYPER: + case GICD_TYPER2: case GICD_IIDR: + /* This is at best for documentation purposes... */ return; } } @@ -116,10 +153,22 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + switch (addr & 0x0c) { + case GICD_TYPER2: case GICD_IIDR: if (val != vgic_mmio_read_v3_misc(vcpu, addr, len)) return -EINVAL; + return 0; + case GICD_CTLR: + /* Not a GICv4.1? No HW SGIs */ + if (!kvm_vgic_global_state.has_gicv4_1) + val &= ~GICD_CTLR_nASSGIreq; + + dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; + dist->nassgireq = val & GICD_CTLR_nASSGIreq; + return 0; } vgic_mmio_write_v3_misc(vcpu, addr, len, val); @@ -257,8 +306,18 @@ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu, */ for (i = 0; i < len * 8; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + bool state = irq->pending_latch; - if (irq->pending_latch) + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + int err; + + err = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &state); + WARN_ON(err); + } + + if (state) value |= (1U << i); vgic_put_irq(vcpu->kvm, irq); @@ -942,8 +1001,18 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) * generate interrupts of either group. */ if (!irq->group || allow_group1) { - irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + if (!irq->hw) { + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } else { + /* HW SGI? Ask the GIC to inject it */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + true); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } } else { raw_spin_unlock_irqrestore(&irq->irq_lock, flags); } diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 97fb2a40e6ba..2199302597fa 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -5,6 +5,8 @@ #include <linux/bitops.h> #include <linux/bsearch.h> +#include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/kvm.h> #include <linux/kvm_host.h> #include <kvm/iodev.h> @@ -59,6 +61,11 @@ unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, return value; } +static void vgic_update_vsgi(struct vgic_irq *irq) +{ + WARN_ON(its_prop_update_vsgi(irq->host_irq, irq->priority, irq->group)); +} + void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { @@ -71,7 +78,12 @@ void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->group = !!(val & BIT(i)); - vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + vgic_update_vsgi(irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } else { + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } vgic_put_irq(vcpu->kvm, irq); } @@ -113,7 +125,21 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); - if (vgic_irq_is_mapped_level(irq)) { + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + if (!irq->enabled) { + struct irq_data *data; + + irq->enabled = true; + data = &irq_to_desc(irq->host_irq)->irq_data; + while (irqd_irq_disabled(data)) + enable_irq(irq->host_irq); + } + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + continue; + } else if (vgic_irq_is_mapped_level(irq)) { bool was_high = irq->line_level; /* @@ -148,6 +174,8 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw && vgic_irq_is_sgi(irq->intid) && irq->enabled) + disable_irq_nosync(irq->host_irq); irq->enabled = false; @@ -167,10 +195,22 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, for (i = 0; i < len * 8; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); unsigned long flags; + bool val; raw_spin_lock_irqsave(&irq->irq_lock, flags); - if (irq_is_pending(irq)) - value |= (1U << i); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + int err; + + val = false; + err = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &val); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + } else { + val = irq_is_pending(irq); + } + + value |= ((u32)val << i); raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); @@ -215,6 +255,21 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, } raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + /* HW SGI? Ask the GIC to inject it */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + true); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + continue; + } + if (irq->hw) vgic_hw_irq_spending(vcpu, irq, is_uaccess); else @@ -269,6 +324,20 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + /* HW SGI? Ask the GIC to clear its pending bit */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + false); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + continue; + } + if (irq->hw) vgic_hw_irq_cpending(vcpu, irq, is_uaccess); else @@ -318,8 +387,15 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, raw_spin_lock_irqsave(&irq->irq_lock, flags); - if (irq->hw) { + if (irq->hw && !vgic_irq_is_sgi(irq->intid)) { vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu); + } else if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + /* + * GICv4.1 VSGI feature doesn't track an active state, + * so let's not kid ourselves, there is nothing we can + * do here. + */ + irq->active = false; } else { u32 model = vcpu->kvm->arch.vgic.vgic_model; u8 active_source; @@ -493,6 +569,8 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, raw_spin_lock_irqsave(&irq->irq_lock, flags); /* Narrow the priority range to what we actually support */ irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) + vgic_update_vsgi(irq); raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 1bc09b523486..2c9fc13e2c59 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -540,6 +540,8 @@ int vgic_v3_map_resources(struct kvm *kvm) goto out; } + if (kvm_vgic_global_state.has_gicv4_1) + vgic_v4_configure_vsgis(kvm); dist->ready = true; out: diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index 1eb0f8c76219..27ac833e5ec7 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -97,6 +97,104 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) return IRQ_HANDLED; } +static void vgic_v4_sync_sgi_config(struct its_vpe *vpe, struct vgic_irq *irq) +{ + vpe->sgi_config[irq->intid].enabled = irq->enabled; + vpe->sgi_config[irq->intid].group = irq->group; + vpe->sgi_config[irq->intid].priority = irq->priority; +} + +static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + int i; + + /* + * With GICv4.1, every virtual SGI can be directly injected. So + * let's pretend that they are HW interrupts, tied to a host + * IRQ. The SGI code will do its magic. + */ + for (i = 0; i < VGIC_NR_SGIS; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct irq_desc *desc; + unsigned long flags; + int ret; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw) + goto unlock; + + irq->hw = true; + irq->host_irq = irq_find_mapping(vpe->sgi_domain, i); + + /* Transfer the full irq state to the vPE */ + vgic_v4_sync_sgi_config(vpe, irq); + desc = irq_to_desc(irq->host_irq); + ret = irq_domain_activate_irq(irq_desc_get_irq_data(desc), + false); + if (!WARN_ON(ret)) { + /* Transfer pending state */ + ret = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + irq->pending_latch); + WARN_ON(ret); + irq->pending_latch = false; + } + unlock: + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu) +{ + int i; + + for (i = 0; i < VGIC_NR_SGIS; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct irq_desc *desc; + unsigned long flags; + int ret; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (!irq->hw) + goto unlock; + + irq->hw = false; + ret = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &irq->pending_latch); + WARN_ON(ret); + + desc = irq_to_desc(irq->host_irq); + irq_domain_deactivate_irq(irq_desc_get_irq_data(desc)); + unlock: + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +/* Must be called with the kvm lock held */ +void vgic_v4_configure_vsgis(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i; + + kvm_arm_halt_guest(kvm); + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (dist->nassgireq) + vgic_v4_enable_vsgis(vcpu); + else + vgic_v4_disable_vsgis(vcpu); + } + + kvm_arm_resume_guest(kvm); +} + /** * vgic_v4_init - Initialize the GICv4 data structures * @kvm: Pointer to the VM being initialized @@ -141,6 +239,7 @@ int vgic_v4_init(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) { int irq = dist->its_vm.vpes[i]->irq; + unsigned long irq_flags = DB_IRQ_FLAGS; /* * Don't automatically enable the doorbell, as we're @@ -148,8 +247,14 @@ int vgic_v4_init(struct kvm *kvm) * blocked. Also disable the lazy disabling, as the * doorbell could kick us out of the guest too * early... + * + * On GICv4.1, the doorbell is managed in HW and must + * be left enabled. */ - irq_set_status_flags(irq, DB_IRQ_FLAGS); + if (kvm_vgic_global_state.has_gicv4_1) + irq_flags &= ~IRQ_NOAUTOEN; + irq_set_status_flags(irq, irq_flags); + ret = request_irq(irq, vgic_v4_doorbell_handler, 0, "vcpu", vcpu); if (ret) { diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index c7fefd6b1c80..769e4802645e 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -316,5 +316,6 @@ void vgic_its_invalidate_cache(struct kvm *kvm); bool vgic_supports_direct_msis(struct kvm *kvm); int vgic_v4_init(struct kvm *kvm); void vgic_v4_teardown(struct kvm *kvm); +void vgic_v4_configure_vsgis(struct kvm *kvm); #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 70f03ce0e5c1..74bdb7bf3295 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -149,8 +149,6 @@ static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); __visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); -static bool largepages_enabled = true; - #define KVM_EVENT_CREATE_VM 0 #define KVM_EVENT_DESTROY_VM 1 static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); @@ -566,7 +564,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void) return NULL; for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) - slots->id_to_index[i] = slots->memslots[i].id = i; + slots->id_to_index[i] = -1; return slots; } @@ -580,18 +578,14 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) memslot->dirty_bitmap = NULL; } -/* - * Free any memory in @free but not in @dont. - */ -static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont) +static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { - if (!dont || free->dirty_bitmap != dont->dirty_bitmap) - kvm_destroy_dirty_bitmap(free); + kvm_destroy_dirty_bitmap(slot); - kvm_arch_free_memslot(kvm, free, dont); + kvm_arch_free_memslot(kvm, slot); - free->npages = 0; + slot->flags = 0; + slot->npages = 0; } static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) @@ -602,7 +596,7 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) return; kvm_for_each_memslot(memslot, slots) - kvm_free_memslot(kvm, memslot, NULL); + kvm_free_memslot(kvm, memslot); kvfree(slots); } @@ -860,9 +854,9 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) /* * Allocation size is twice as large as the actual dirty bitmap size. - * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed. + * See kvm_vm_ioctl_get_dirty_log() why this is needed. */ -static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) +static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot) { unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); @@ -874,63 +868,165 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) } /* - * Insert memslot and re-sort memslots based on their GFN, - * so binary search could be used to lookup GFN. - * Sorting algorithm takes advantage of having initially - * sorted array and known changed memslot position. + * Delete a memslot by decrementing the number of used slots and shifting all + * other entries in the array forward one spot. */ -static void update_memslots(struct kvm_memslots *slots, - struct kvm_memory_slot *new, - enum kvm_mr_change change) +static inline void kvm_memslot_delete(struct kvm_memslots *slots, + struct kvm_memory_slot *memslot) { - int id = new->id; - int i = slots->id_to_index[id]; struct kvm_memory_slot *mslots = slots->memslots; + int i; - WARN_ON(mslots[i].id != id); - switch (change) { - case KVM_MR_CREATE: - slots->used_slots++; - WARN_ON(mslots[i].npages || !new->npages); - break; - case KVM_MR_DELETE: - slots->used_slots--; - WARN_ON(new->npages || !mslots[i].npages); - break; - default: - break; - } + if (WARN_ON(slots->id_to_index[memslot->id] == -1)) + return; - while (i < KVM_MEM_SLOTS_NUM - 1 && - new->base_gfn <= mslots[i + 1].base_gfn) { - if (!mslots[i + 1].npages) - break; + slots->used_slots--; + + if (atomic_read(&slots->lru_slot) >= slots->used_slots) + atomic_set(&slots->lru_slot, 0); + + for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) { mslots[i] = mslots[i + 1]; slots->id_to_index[mslots[i].id] = i; - i++; } + mslots[i] = *memslot; + slots->id_to_index[memslot->id] = -1; +} + +/* + * "Insert" a new memslot by incrementing the number of used slots. Returns + * the new slot's initial index into the memslots array. + */ +static inline int kvm_memslot_insert_back(struct kvm_memslots *slots) +{ + return slots->used_slots++; +} + +/* + * Move a changed memslot backwards in the array by shifting existing slots + * with a higher GFN toward the front of the array. Note, the changed memslot + * itself is not preserved in the array, i.e. not swapped at this time, only + * its new index into the array is tracked. Returns the changed memslot's + * current index into the memslots array. + */ +static inline int kvm_memslot_move_backward(struct kvm_memslots *slots, + struct kvm_memory_slot *memslot) +{ + struct kvm_memory_slot *mslots = slots->memslots; + int i; + + if (WARN_ON_ONCE(slots->id_to_index[memslot->id] == -1) || + WARN_ON_ONCE(!slots->used_slots)) + return -1; /* - * The ">=" is needed when creating a slot with base_gfn == 0, - * so that it moves before all those with base_gfn == npages == 0. - * - * On the other hand, if new->npages is zero, the above loop has - * already left i pointing to the beginning of the empty part of - * mslots, and the ">=" would move the hole backwards in this - * case---which is wrong. So skip the loop when deleting a slot. + * Move the target memslot backward in the array by shifting existing + * memslots with a higher GFN (than the target memslot) towards the + * front of the array. */ - if (new->npages) { - while (i > 0 && - new->base_gfn >= mslots[i - 1].base_gfn) { - mslots[i] = mslots[i - 1]; - slots->id_to_index[mslots[i].id] = i; - i--; - } - } else - WARN_ON_ONCE(i != slots->used_slots); + for (i = slots->id_to_index[memslot->id]; i < slots->used_slots - 1; i++) { + if (memslot->base_gfn > mslots[i + 1].base_gfn) + break; + + WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn); + + /* Shift the next memslot forward one and update its index. */ + mslots[i] = mslots[i + 1]; + slots->id_to_index[mslots[i].id] = i; + } + return i; +} + +/* + * Move a changed memslot forwards in the array by shifting existing slots with + * a lower GFN toward the back of the array. Note, the changed memslot itself + * is not preserved in the array, i.e. not swapped at this time, only its new + * index into the array is tracked. Returns the changed memslot's final index + * into the memslots array. + */ +static inline int kvm_memslot_move_forward(struct kvm_memslots *slots, + struct kvm_memory_slot *memslot, + int start) +{ + struct kvm_memory_slot *mslots = slots->memslots; + int i; + + for (i = start; i > 0; i--) { + if (memslot->base_gfn < mslots[i - 1].base_gfn) + break; + + WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn); + + /* Shift the next memslot back one and update its index. */ + mslots[i] = mslots[i - 1]; + slots->id_to_index[mslots[i].id] = i; + } + return i; +} + +/* + * Re-sort memslots based on their GFN to account for an added, deleted, or + * moved memslot. Sorting memslots by GFN allows using a binary search during + * memslot lookup. + * + * IMPORTANT: Slots are sorted from highest GFN to lowest GFN! I.e. the entry + * at memslots[0] has the highest GFN. + * + * The sorting algorithm takes advantage of having initially sorted memslots + * and knowing the position of the changed memslot. Sorting is also optimized + * by not swapping the updated memslot and instead only shifting other memslots + * and tracking the new index for the update memslot. Only once its final + * index is known is the updated memslot copied into its position in the array. + * + * - When deleting a memslot, the deleted memslot simply needs to be moved to + * the end of the array. + * + * - When creating a memslot, the algorithm "inserts" the new memslot at the + * end of the array and then it forward to its correct location. + * + * - When moving a memslot, the algorithm first moves the updated memslot + * backward to handle the scenario where the memslot's GFN was changed to a + * lower value. update_memslots() then falls through and runs the same flow + * as creating a memslot to move the memslot forward to handle the scenario + * where its GFN was changed to a higher value. + * + * Note, slots are sorted from highest->lowest instead of lowest->highest for + * historical reasons. Originally, invalid memslots where denoted by having + * GFN=0, thus sorting from highest->lowest naturally sorted invalid memslots + * to the end of the array. The current algorithm uses dedicated logic to + * delete a memslot and thus does not rely on invalid memslots having GFN=0. + * + * The other historical motiviation for highest->lowest was to improve the + * performance of memslot lookup. KVM originally used a linear search starting + * at memslots[0]. On x86, the largest memslot usually has one of the highest, + * if not *the* highest, GFN, as the bulk of the guest's RAM is located in a + * single memslot above the 4gb boundary. As the largest memslot is also the + * most likely to be referenced, sorting it to the front of the array was + * advantageous. The current binary search starts from the middle of the array + * and uses an LRU pointer to improve performance for all memslots and GFNs. + */ +static void update_memslots(struct kvm_memslots *slots, + struct kvm_memory_slot *memslot, + enum kvm_mr_change change) +{ + int i; + + if (change == KVM_MR_DELETE) { + kvm_memslot_delete(slots, memslot); + } else { + if (change == KVM_MR_CREATE) + i = kvm_memslot_insert_back(slots); + else + i = kvm_memslot_move_backward(slots, memslot); + i = kvm_memslot_move_forward(slots, memslot, i); - mslots[i] = *new; - slots->id_to_index[mslots[i].id] = i; + /* + * Copy the memslot to its new position in memslots and update + * its index accordingly. + */ + slots->memslots[i] = *memslot; + slots->id_to_index[memslot->id] = i; + } } static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem) @@ -984,6 +1080,112 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, } /* + * Note, at a minimum, the current number of used slots must be allocated, even + * when deleting a memslot, as we need a complete duplicate of the memslots for + * use when invalidating a memslot prior to deleting/moving the memslot. + */ +static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old, + enum kvm_mr_change change) +{ + struct kvm_memslots *slots; + size_t old_size, new_size; + + old_size = sizeof(struct kvm_memslots) + + (sizeof(struct kvm_memory_slot) * old->used_slots); + + if (change == KVM_MR_CREATE) + new_size = old_size + sizeof(struct kvm_memory_slot); + else + new_size = old_size; + + slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT); + if (likely(slots)) + memcpy(slots, old, old_size); + + return slots; +} + +static int kvm_set_memslot(struct kvm *kvm, + const struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot *old, + struct kvm_memory_slot *new, int as_id, + enum kvm_mr_change change) +{ + struct kvm_memory_slot *slot; + struct kvm_memslots *slots; + int r; + + slots = kvm_dup_memslots(__kvm_memslots(kvm, as_id), change); + if (!slots) + return -ENOMEM; + + if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { + /* + * Note, the INVALID flag needs to be in the appropriate entry + * in the freshly allocated memslots, not in @old or @new. + */ + slot = id_to_memslot(slots, old->id); + slot->flags |= KVM_MEMSLOT_INVALID; + + /* + * We can re-use the old memslots, the only difference from the + * newly installed memslots is the invalid flag, which will get + * dropped by update_memslots anyway. We'll also revert to the + * old memslots if preparing the new memory region fails. + */ + slots = install_new_memslots(kvm, as_id, slots); + + /* From this point no new shadow pages pointing to a deleted, + * or moved, memslot will be created. + * + * validation of sp->gfn happens in: + * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) + * - kvm_is_visible_gfn (mmu_check_root) + */ + kvm_arch_flush_shadow_memslot(kvm, slot); + } + + r = kvm_arch_prepare_memory_region(kvm, new, mem, change); + if (r) + goto out_slots; + + update_memslots(slots, new, change); + slots = install_new_memslots(kvm, as_id, slots); + + kvm_arch_commit_memory_region(kvm, mem, old, new, change); + + kvfree(slots); + return 0; + +out_slots: + if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) + slots = install_new_memslots(kvm, as_id, slots); + kvfree(slots); + return r; +} + +static int kvm_delete_memslot(struct kvm *kvm, + const struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot *old, int as_id) +{ + struct kvm_memory_slot new; + int r; + + if (!old->npages) + return -EINVAL; + + memset(&new, 0, sizeof(new)); + new.id = old->id; + + r = kvm_set_memslot(kvm, mem, old, &new, as_id, KVM_MR_DELETE); + if (r) + return r; + + kvm_free_memslot(kvm, old); + return 0; +} + +/* * Allocate some memory and give it an address in the guest physical address * space. * @@ -994,162 +1196,118 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, int __kvm_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem) { - int r; - gfn_t base_gfn; - unsigned long npages; - struct kvm_memory_slot *slot; struct kvm_memory_slot old, new; - struct kvm_memslots *slots = NULL, *old_memslots; - int as_id, id; + struct kvm_memory_slot *tmp; enum kvm_mr_change change; + int as_id, id; + int r; r = check_memory_region_flags(mem); if (r) - goto out; + return r; - r = -EINVAL; as_id = mem->slot >> 16; id = (u16)mem->slot; /* General sanity checks */ if (mem->memory_size & (PAGE_SIZE - 1)) - goto out; + return -EINVAL; if (mem->guest_phys_addr & (PAGE_SIZE - 1)) - goto out; + return -EINVAL; /* We can read the guest memory with __xxx_user() later on. */ if ((id < KVM_USER_MEM_SLOTS) && ((mem->userspace_addr & (PAGE_SIZE - 1)) || !access_ok((void __user *)(unsigned long)mem->userspace_addr, mem->memory_size))) - goto out; + return -EINVAL; if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM) - goto out; + return -EINVAL; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) - goto out; - - slot = id_to_memslot(__kvm_memslots(kvm, as_id), id); - base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; - npages = mem->memory_size >> PAGE_SHIFT; + return -EINVAL; - if (npages > KVM_MEM_MAX_NR_PAGES) - goto out; + /* + * Make a full copy of the old memslot, the pointer will become stale + * when the memslots are re-sorted by update_memslots(), and the old + * memslot needs to be referenced after calling update_memslots(), e.g. + * to free its resources and for arch specific behavior. + */ + tmp = id_to_memslot(__kvm_memslots(kvm, as_id), id); + if (tmp) { + old = *tmp; + tmp = NULL; + } else { + memset(&old, 0, sizeof(old)); + old.id = id; + } - new = old = *slot; + if (!mem->memory_size) + return kvm_delete_memslot(kvm, mem, &old, as_id); new.id = id; - new.base_gfn = base_gfn; - new.npages = npages; + new.base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; + new.npages = mem->memory_size >> PAGE_SHIFT; new.flags = mem->flags; + new.userspace_addr = mem->userspace_addr; - if (npages) { - if (!old.npages) - change = KVM_MR_CREATE; - else { /* Modify an existing slot. */ - if ((mem->userspace_addr != old.userspace_addr) || - (npages != old.npages) || - ((new.flags ^ old.flags) & KVM_MEM_READONLY)) - goto out; + if (new.npages > KVM_MEM_MAX_NR_PAGES) + return -EINVAL; - if (base_gfn != old.base_gfn) - change = KVM_MR_MOVE; - else if (new.flags != old.flags) - change = KVM_MR_FLAGS_ONLY; - else { /* Nothing to change. */ - r = 0; - goto out; - } - } - } else { - if (!old.npages) - goto out; + if (!old.npages) { + change = KVM_MR_CREATE; + new.dirty_bitmap = NULL; + memset(&new.arch, 0, sizeof(new.arch)); + } else { /* Modify an existing slot. */ + if ((new.userspace_addr != old.userspace_addr) || + (new.npages != old.npages) || + ((new.flags ^ old.flags) & KVM_MEM_READONLY)) + return -EINVAL; - change = KVM_MR_DELETE; - new.base_gfn = 0; - new.flags = 0; + if (new.base_gfn != old.base_gfn) + change = KVM_MR_MOVE; + else if (new.flags != old.flags) + change = KVM_MR_FLAGS_ONLY; + else /* Nothing to change. */ + return 0; + + /* Copy dirty_bitmap and arch from the current memslot. */ + new.dirty_bitmap = old.dirty_bitmap; + memcpy(&new.arch, &old.arch, sizeof(new.arch)); } if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { /* Check for overlaps */ - r = -EEXIST; - kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) { - if (slot->id == id) + kvm_for_each_memslot(tmp, __kvm_memslots(kvm, as_id)) { + if (tmp->id == id) continue; - if (!((base_gfn + npages <= slot->base_gfn) || - (base_gfn >= slot->base_gfn + slot->npages))) - goto out; + if (!((new.base_gfn + new.npages <= tmp->base_gfn) || + (new.base_gfn >= tmp->base_gfn + tmp->npages))) + return -EEXIST; } } - /* Free page dirty bitmap if unneeded */ + /* Allocate/free page dirty bitmap as needed */ if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) new.dirty_bitmap = NULL; + else if (!new.dirty_bitmap) { + r = kvm_alloc_dirty_bitmap(&new); + if (r) + return r; - r = -ENOMEM; - if (change == KVM_MR_CREATE) { - new.userspace_addr = mem->userspace_addr; - - if (kvm_arch_create_memslot(kvm, &new, npages)) - goto out_free; - } - - /* Allocate page dirty bitmap if needed */ - if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { - if (kvm_create_dirty_bitmap(&new) < 0) - goto out_free; - } - - slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT); - if (!slots) - goto out_free; - memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); - - if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { - slot = id_to_memslot(slots, id); - slot->flags |= KVM_MEMSLOT_INVALID; - - old_memslots = install_new_memslots(kvm, as_id, slots); - - /* From this point no new shadow pages pointing to a deleted, - * or moved, memslot will be created. - * - * validation of sp->gfn happens in: - * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) - * - kvm_is_visible_gfn (mmu_check_root) - */ - kvm_arch_flush_shadow_memslot(kvm, slot); - - /* - * We can re-use the old_memslots from above, the only difference - * from the currently installed memslots is the invalid flag. This - * will get overwritten by update_memslots anyway. - */ - slots = old_memslots; + if (kvm_dirty_log_manual_protect_and_init_set(kvm)) + bitmap_set(new.dirty_bitmap, 0, new.npages); } - r = kvm_arch_prepare_memory_region(kvm, &new, mem, change); + r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change); if (r) - goto out_slots; - - /* actual memory is freed via old in kvm_free_memslot below */ - if (change == KVM_MR_DELETE) { - new.dirty_bitmap = NULL; - memset(&new.arch, 0, sizeof(new.arch)); - } - - update_memslots(slots, &new, change); - old_memslots = install_new_memslots(kvm, as_id, slots); - - kvm_arch_commit_memory_region(kvm, mem, &old, &new, change); + goto out_bitmap; - kvm_free_memslot(kvm, &old, &new); - kvfree(old_memslots); + if (old.dirty_bitmap && !new.dirty_bitmap) + kvm_destroy_dirty_bitmap(&old); return 0; -out_slots: - kvfree(slots); -out_free: - kvm_free_memslot(kvm, &new, &old); -out: +out_bitmap: + if (new.dirty_bitmap && !old.dirty_bitmap) + kvm_destroy_dirty_bitmap(&new); return r; } EXPORT_SYMBOL_GPL(__kvm_set_memory_region); @@ -1175,31 +1333,43 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, return kvm_set_memory_region(kvm, mem); } -int kvm_get_dirty_log(struct kvm *kvm, - struct kvm_dirty_log *log, int *is_dirty) +#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT +/** + * kvm_get_dirty_log - get a snapshot of dirty pages + * @kvm: pointer to kvm instance + * @log: slot id and address to which we copy the log + * @is_dirty: set to '1' if any dirty pages were found + * @memslot: set to the associated memslot, always valid on success + */ +int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, + int *is_dirty, struct kvm_memory_slot **memslot) { struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; int i, as_id, id; unsigned long n; unsigned long any = 0; + *memslot = NULL; + *is_dirty = 0; + as_id = log->slot >> 16; id = (u16)log->slot; if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) return -EINVAL; slots = __kvm_memslots(kvm, as_id); - memslot = id_to_memslot(slots, id); - if (!memslot->dirty_bitmap) + *memslot = id_to_memslot(slots, id); + if (!(*memslot) || !(*memslot)->dirty_bitmap) return -ENOENT; - n = kvm_dirty_bitmap_bytes(memslot); + kvm_arch_sync_dirty_log(kvm, *memslot); + + n = kvm_dirty_bitmap_bytes(*memslot); for (i = 0; !any && i < n/sizeof(long); ++i) - any = memslot->dirty_bitmap[i]; + any = (*memslot)->dirty_bitmap[i]; - if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) + if (copy_to_user(log->dirty_bitmap, (*memslot)->dirty_bitmap, n)) return -EFAULT; if (any) @@ -1208,13 +1378,12 @@ int kvm_get_dirty_log(struct kvm *kvm, } EXPORT_SYMBOL_GPL(kvm_get_dirty_log); -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT +#else /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ /** * kvm_get_dirty_log_protect - get a snapshot of dirty pages * and reenable dirty page tracking for the corresponding pages. * @kvm: pointer to kvm instance * @log: slot id and address to which we copy the log - * @flush: true if TLB flush is needed by caller * * We need to keep it in mind that VCPU threads can write to the bitmap * concurrently. So, to avoid losing track of dirty pages we keep the @@ -1231,8 +1400,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log); * exiting to userspace will be logged for the next call. * */ -int kvm_get_dirty_log_protect(struct kvm *kvm, - struct kvm_dirty_log *log, bool *flush) +static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; @@ -1240,6 +1408,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, unsigned long n; unsigned long *dirty_bitmap; unsigned long *dirty_bitmap_buffer; + bool flush; as_id = log->slot >> 16; id = (u16)log->slot; @@ -1248,13 +1417,15 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, slots = __kvm_memslots(kvm, as_id); memslot = id_to_memslot(slots, id); + if (!memslot || !memslot->dirty_bitmap) + return -ENOENT; dirty_bitmap = memslot->dirty_bitmap; - if (!dirty_bitmap) - return -ENOENT; + + kvm_arch_sync_dirty_log(kvm, memslot); n = kvm_dirty_bitmap_bytes(memslot); - *flush = false; + flush = false; if (kvm->manual_dirty_log_protect) { /* * Unlike kvm_get_dirty_log, we always return false in *flush, @@ -1277,7 +1448,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, if (!dirty_bitmap[i]) continue; - *flush = true; + flush = true; mask = xchg(&dirty_bitmap[i], 0); dirty_bitmap_buffer[i] = mask; @@ -1288,21 +1459,55 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, spin_unlock(&kvm->mmu_lock); } + if (flush) + kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); + if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); + + +/** + * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot + * @kvm: kvm instance + * @log: slot id and address to which we copy the log + * + * Steps 1-4 below provide general overview of dirty page logging. See + * kvm_get_dirty_log_protect() function description for additional details. + * + * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we + * always flush the TLB (step 4) even if previous step failed and the dirty + * bitmap may be corrupt. Regardless of previous outcome the KVM logging API + * does not preclude user space subsequent dirty log read. Flushing TLB ensures + * writes will be marked dirty for next log read. + * + * 1. Take a snapshot of the bit and clear it if needed. + * 2. Write protect the corresponding page. + * 3. Copy the snapshot to the userspace. + * 4. Flush TLB's if needed. + */ +static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + int r; + + mutex_lock(&kvm->slots_lock); + + r = kvm_get_dirty_log_protect(kvm, log); + + mutex_unlock(&kvm->slots_lock); + return r; +} /** * kvm_clear_dirty_log_protect - clear dirty bits in the bitmap * and reenable dirty page tracking for the corresponding pages. * @kvm: pointer to kvm instance * @log: slot id and address from which to fetch the bitmap of dirty pages - * @flush: true if TLB flush is needed by caller */ -int kvm_clear_dirty_log_protect(struct kvm *kvm, - struct kvm_clear_dirty_log *log, bool *flush) +static int kvm_clear_dirty_log_protect(struct kvm *kvm, + struct kvm_clear_dirty_log *log) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; @@ -1311,6 +1516,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, unsigned long i, n; unsigned long *dirty_bitmap; unsigned long *dirty_bitmap_buffer; + bool flush; as_id = log->slot >> 16; id = (u16)log->slot; @@ -1322,10 +1528,10 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, slots = __kvm_memslots(kvm, as_id); memslot = id_to_memslot(slots, id); + if (!memslot || !memslot->dirty_bitmap) + return -ENOENT; dirty_bitmap = memslot->dirty_bitmap; - if (!dirty_bitmap) - return -ENOENT; n = ALIGN(log->num_pages, BITS_PER_LONG) / 8; @@ -1334,7 +1540,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63))) return -EINVAL; - *flush = false; + kvm_arch_sync_dirty_log(kvm, memslot); + + flush = false; dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n)) return -EFAULT; @@ -1357,28 +1565,32 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, * a problem if userspace sets them in log->dirty_bitmap. */ if (mask) { - *flush = true; + flush = true; kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask); } } spin_unlock(&kvm->mmu_lock); + if (flush) + kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); + return 0; } -EXPORT_SYMBOL_GPL(kvm_clear_dirty_log_protect); -#endif -bool kvm_largepages_enabled(void) +static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, + struct kvm_clear_dirty_log *log) { - return largepages_enabled; -} + int r; -void kvm_disable_largepages(void) -{ - largepages_enabled = false; + mutex_lock(&kvm->slots_lock); + + r = kvm_clear_dirty_log_protect(kvm, log); + + mutex_unlock(&kvm->slots_lock); + return r; } -EXPORT_SYMBOL_GPL(kvm_disable_largepages); +#endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { @@ -1754,12 +1966,6 @@ kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); -kvm_pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) -{ - return gfn_to_pfn_memslot_atomic(gfn_to_memslot(kvm, gfn), gfn); -} -EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); - kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn) { return gfn_to_pfn_memslot_atomic(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn); @@ -3310,9 +3516,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_IOEVENTFD_ANY_LENGTH: case KVM_CAP_CHECK_EXTENSION_VM: case KVM_CAP_ENABLE_CAP_VM: -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT - case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: -#endif return 1; #ifdef CONFIG_KVM_MMIO case KVM_CAP_COALESCED_MMIO: @@ -3320,6 +3523,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_COALESCED_PIO: return 1; #endif +#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT + case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: + return KVM_DIRTY_LOG_MANUAL_CAPS; +#endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: return KVM_MAX_IRQ_ROUTES; @@ -3347,11 +3554,17 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, { switch (cap->cap) { #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT - case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: - if (cap->flags || (cap->args[0] & ~1)) + case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: { + u64 allowed_options = KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE; + + if (cap->args[0] & KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE) + allowed_options = KVM_DIRTY_LOG_MANUAL_CAPS; + + if (cap->flags || (cap->args[0] & ~allowed_options)) return -EINVAL; kvm->manual_dirty_log_protect = cap->args[0]; return 0; + } #endif default: return kvm_vm_ioctl_enable_cap(kvm, cap); @@ -4435,14 +4648,22 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) return &kvm_running_vcpu; } -static void check_processor_compat(void *rtn) +struct kvm_cpu_compat_check { + void *opaque; + int *ret; +}; + +static void check_processor_compat(void *data) { - *(int *)rtn = kvm_arch_check_processor_compat(); + struct kvm_cpu_compat_check *c = data; + + *c->ret = kvm_arch_check_processor_compat(c->opaque); } int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module) { + struct kvm_cpu_compat_check c; int r; int cpu; @@ -4466,12 +4687,14 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, goto out_free_0; } - r = kvm_arch_hardware_setup(); + r = kvm_arch_hardware_setup(opaque); if (r < 0) goto out_free_1; + c.ret = &r; + c.opaque = opaque; for_each_online_cpu(cpu) { - smp_call_function_single(cpu, check_processor_compat, &r, 1); + smp_call_function_single(cpu, check_processor_compat, &c, 1); if (r < 0) goto out_free_2; } |