summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-12 11:34:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-12 11:34:04 -0700
commitb357bf6023a948cf6a9472f07a1b0caac0e4f8e8 (patch)
tree1471a2691cd56e8640cf6ad51e255b54903a164b /virt
parent0725d4e1b8b08a60838db3a6e65c23ea8824a048 (diff)
parent766d3571d8e50d3a73b77043dc632226f9e6b389 (diff)
downloadlinux-b357bf6023a948cf6a9472f07a1b0caac0e4f8e8.tar.bz2
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "Small update for KVM: ARM: - lazy context-switching of FPSIMD registers on arm64 - "split" regions for vGIC redistributor s390: - cleanups for nested - clock handling - crypto - storage keys - control register bits x86: - many bugfixes - implement more Hyper-V super powers - implement lapic_timer_advance_ns even when the LAPIC timer is emulated using the processor's VMX preemption timer. - two security-related bugfixes at the top of the branch" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (79 commits) kvm: fix typo in flag name kvm: x86: use correct privilege level for sgdt/sidt/fxsave/fxrstor access KVM: x86: pass kvm_vcpu to kvm_read_guest_virt and kvm_write_guest_virt_system KVM: x86: introduce linear_{read,write}_system kvm: nVMX: Enforce cpl=0 for VMX instructions kvm: nVMX: Add support for "VMWRITE to any supported field" kvm: nVMX: Restrict VMX capability MSR changes KVM: VMX: Optimize tscdeadline timer latency KVM: docs: nVMX: Remove known limitations as they do not exist now KVM: docs: mmu: KVM support exposing SLAT to guests kvm: no need to check return value of debugfs_create functions kvm: Make VM ioctl do valloc for some archs kvm: Change return type to vm_fault_t KVM: docs: mmu: Fix link to NPT presentation from KVM Forum 2008 kvm: x86: Amend the KVM_GET_SUPPORTED_CPUID API documentation KVM: x86: hyperv: declare KVM_CAP_HYPERV_TLBFLUSH capability KVM: x86: hyperv: simplistic HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}_EX implementation KVM: x86: hyperv: simplistic HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} implementation KVM: introduce kvm_make_vcpus_request_mask() API KVM: x86: hyperv: do rep check for each hypercall separately ...
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/arm/arm.c32
-rw-r--r--virt/kvm/arm/vgic/vgic-debug.c17
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c100
-rw-r--r--virt/kvm/arm/vgic/vgic-kvm-device.c53
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c112
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c99
-rw-r--r--virt/kvm/arm/vgic/vgic.h46
-rw-r--r--virt/kvm/kvm_main.c83
9 files changed, 404 insertions, 141 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index cca7e065a075..72143cfaf6ec 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -54,3 +54,6 @@ config HAVE_KVM_IRQ_BYPASS
config HAVE_KVM_VCPU_ASYNC_IOCTL
bool
+
+config HAVE_KVM_VCPU_RUN_PID_CHANGE
+ bool
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 2d9b4795edb2..04e554cae3a2 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -16,6 +16,7 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
+#include <linux/bug.h>
#include <linux/cpu_pm.h>
#include <linux/errno.h>
#include <linux/err.h>
@@ -41,6 +42,7 @@
#include <asm/mman.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
#include <asm/virt.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
@@ -163,7 +165,7 @@ int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
return 0;
}
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
{
return VM_FAULT_SIGBUS;
}
@@ -249,6 +251,21 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
+struct kvm *kvm_arch_alloc_vm(void)
+{
+ if (!has_vhe())
+ return kzalloc(sizeof(struct kvm), GFP_KERNEL);
+
+ return vzalloc(sizeof(struct kvm));
+}
+
+void kvm_arch_free_vm(struct kvm *kvm)
+{
+ if (!has_vhe())
+ kfree(kvm);
+ else
+ vfree(kvm);
+}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
@@ -290,7 +307,6 @@ out:
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
- kvm_vgic_vcpu_early_init(vcpu);
}
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
@@ -363,10 +379,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
kvm_vcpu_load_sysregs(vcpu);
+ kvm_arch_vcpu_load_fp(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ kvm_arch_vcpu_put_fp(vcpu);
kvm_vcpu_put_sysregs(vcpu);
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
@@ -678,9 +696,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
preempt_disable();
- /* Flush FP/SIMD state that can't survive guest entry/exit */
- kvm_fpsimd_flush_cpu_state();
-
kvm_pmu_flush_hwstate(vcpu);
local_irq_disable();
@@ -778,6 +793,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (static_branch_unlikely(&userspace_irqchip_in_use))
kvm_timer_sync_hwstate(vcpu);
+ kvm_arch_vcpu_ctxsync_fp(vcpu);
+
/*
* We may have taken a host interrupt in HYP mode (ie
* while executing the guest). This interrupt is still
@@ -1574,6 +1591,11 @@ int kvm_arch_init(void *opaque)
return -ENODEV;
}
+ if (!kvm_arch_check_sve_has_vhe()) {
+ kvm_pr_unimpl("SVE system without VHE unsupported. Broken cpu?");
+ return -ENODEV;
+ }
+
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
if (ret < 0) {
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c
index 4ffc0b5e6105..c589d4c2b478 100644
--- a/virt/kvm/arm/vgic/vgic-debug.c
+++ b/virt/kvm/arm/vgic/vgic-debug.c
@@ -264,21 +264,12 @@ static const struct file_operations vgic_debug_fops = {
.release = seq_release
};
-int vgic_debug_init(struct kvm *kvm)
+void vgic_debug_init(struct kvm *kvm)
{
- if (!kvm->debugfs_dentry)
- return -ENOENT;
-
- if (!debugfs_create_file("vgic-state", 0444,
- kvm->debugfs_dentry,
- kvm,
- &vgic_debug_fops))
- return -ENOMEM;
-
- return 0;
+ debugfs_create_file("vgic-state", 0444, kvm->debugfs_dentry, kvm,
+ &vgic_debug_fops);
}
-int vgic_debug_destroy(struct kvm *kvm)
+void vgic_debug_destroy(struct kvm *kvm)
{
- return 0;
}
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index e07156c30323..2673efce65f3 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -44,7 +44,7 @@
*
* CPU Interface:
*
- * - kvm_vgic_vcpu_early_init(): initialization of static data that
+ * - kvm_vgic_vcpu_init(): initialization of static data that
* doesn't depend on any sizing information or emulation type. No
* allocation is allowed there.
*/
@@ -67,46 +67,6 @@ void kvm_vgic_early_init(struct kvm *kvm)
spin_lock_init(&dist->lpi_list_lock);
}
-/**
- * kvm_vgic_vcpu_early_init() - Initialize static VGIC VCPU data structures
- * @vcpu: The VCPU whose VGIC data structures whould be initialized
- *
- * Only do initialization, but do not actually enable the VGIC CPU interface
- * yet.
- */
-void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- int i;
-
- INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
- spin_lock_init(&vgic_cpu->ap_list_lock);
-
- /*
- * Enable and configure all SGIs to be edge-triggered and
- * configure all PPIs as level-triggered.
- */
- for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
- struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
-
- INIT_LIST_HEAD(&irq->ap_list);
- spin_lock_init(&irq->irq_lock);
- irq->intid = i;
- irq->vcpu = NULL;
- irq->target_vcpu = vcpu;
- irq->targets = 1U << vcpu->vcpu_id;
- kref_init(&irq->refcount);
- if (vgic_irq_is_sgi(i)) {
- /* SGIs */
- irq->enabled = 1;
- irq->config = VGIC_CONFIG_EDGE;
- } else {
- /* PPIs */
- irq->config = VGIC_CONFIG_LEVEL;
- }
- }
-}
-
/* CREATION */
/**
@@ -167,8 +127,11 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
kvm->arch.vgic.vgic_model = type;
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
- kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
- kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
+
+ if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
+ kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
+ else
+ INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
out_unlock:
for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
@@ -221,13 +184,50 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
}
/**
- * kvm_vgic_vcpu_init() - Register VCPU-specific KVM iodevs
+ * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data
+ * structures and register VCPU-specific KVM iodevs
+ *
* @vcpu: pointer to the VCPU being created and initialized
+ *
+ * Only do initialization, but do not actually enable the
+ * VGIC CPU interface
*/
int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
{
- int ret = 0;
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ int ret = 0;
+ int i;
+
+ vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+ vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF;
+
+ INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
+ spin_lock_init(&vgic_cpu->ap_list_lock);
+
+ /*
+ * Enable and configure all SGIs to be edge-triggered and
+ * configure all PPIs as level-triggered.
+ */
+ for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
+ struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
+
+ INIT_LIST_HEAD(&irq->ap_list);
+ spin_lock_init(&irq->irq_lock);
+ irq->intid = i;
+ irq->vcpu = NULL;
+ irq->target_vcpu = vcpu;
+ irq->targets = 1U << vcpu->vcpu_id;
+ kref_init(&irq->refcount);
+ if (vgic_irq_is_sgi(i)) {
+ /* SGIs */
+ irq->enabled = 1;
+ irq->config = VGIC_CONFIG_EDGE;
+ } else {
+ /* PPIs */
+ irq->config = VGIC_CONFIG_LEVEL;
+ }
+ }
if (!irqchip_in_kernel(vcpu->kvm))
return 0;
@@ -303,13 +303,23 @@ out:
static void kvm_vgic_dist_destroy(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_redist_region *rdreg, *next;
dist->ready = false;
dist->initialized = false;
kfree(dist->spis);
+ dist->spis = NULL;
dist->nr_spis = 0;
+ if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+ list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) {
+ list_del(&rdreg->list);
+ kfree(rdreg);
+ }
+ INIT_LIST_HEAD(&dist->rd_regions);
+ }
+
if (vgic_supports_direct_msis(kvm))
vgic_v4_teardown(kvm);
}
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index 10ae6f394b71..6ada2432e37c 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -66,6 +66,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
int r = 0;
struct vgic_dist *vgic = &kvm->arch.vgic;
phys_addr_t *addr_ptr, alignment;
+ u64 undef_value = VGIC_ADDR_UNDEF;
mutex_lock(&kvm->lock);
switch (type) {
@@ -84,16 +85,61 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
addr_ptr = &vgic->vgic_dist_base;
alignment = SZ_64K;
break;
- case KVM_VGIC_V3_ADDR_TYPE_REDIST:
+ case KVM_VGIC_V3_ADDR_TYPE_REDIST: {
+ struct vgic_redist_region *rdreg;
+
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
if (r)
break;
if (write) {
- r = vgic_v3_set_redist_base(kvm, *addr);
+ r = vgic_v3_set_redist_base(kvm, 0, *addr, 0);
goto out;
}
- addr_ptr = &vgic->vgic_redist_base;
+ rdreg = list_first_entry(&vgic->rd_regions,
+ struct vgic_redist_region, list);
+ if (!rdreg)
+ addr_ptr = &undef_value;
+ else
+ addr_ptr = &rdreg->base;
break;
+ }
+ case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION:
+ {
+ struct vgic_redist_region *rdreg;
+ u8 index;
+
+ r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (r)
+ break;
+
+ index = *addr & KVM_VGIC_V3_RDIST_INDEX_MASK;
+
+ if (write) {
+ gpa_t base = *addr & KVM_VGIC_V3_RDIST_BASE_MASK;
+ u32 count = (*addr & KVM_VGIC_V3_RDIST_COUNT_MASK)
+ >> KVM_VGIC_V3_RDIST_COUNT_SHIFT;
+ u8 flags = (*addr & KVM_VGIC_V3_RDIST_FLAGS_MASK)
+ >> KVM_VGIC_V3_RDIST_FLAGS_SHIFT;
+
+ if (!count || flags)
+ r = -EINVAL;
+ else
+ r = vgic_v3_set_redist_base(kvm, index,
+ base, count);
+ goto out;
+ }
+
+ rdreg = vgic_v3_rdist_region_from_index(kvm, index);
+ if (!rdreg) {
+ r = -ENOENT;
+ goto out;
+ }
+
+ *addr = index;
+ *addr |= rdreg->base;
+ *addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT;
+ goto out;
+ }
default:
r = -ENODEV;
}
@@ -665,6 +711,7 @@ static int vgic_v3_has_attr(struct kvm_device *dev,
switch (attr->attr) {
case KVM_VGIC_V3_ADDR_TYPE_DIST:
case KVM_VGIC_V3_ADDR_TYPE_REDIST:
+ case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION:
return 0;
}
break;
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 671fe81f8e1d..287784095b5b 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -184,12 +184,17 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
int target_vcpu_id = vcpu->vcpu_id;
+ gpa_t last_rdist_typer = rdreg->base + GICR_TYPER +
+ (rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE;
u64 value;
value = (u64)(mpidr & GENMASK(23, 0)) << 32;
value |= ((target_vcpu_id & 0xffff) << 8);
- if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
+
+ if (addr == last_rdist_typer)
value |= GICR_TYPER_LAST;
if (vgic_has_its(vcpu->kvm))
value |= GICR_TYPER_PLPIS;
@@ -580,24 +585,32 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
struct vgic_dist *vgic = &kvm->arch.vgic;
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
+ struct vgic_redist_region *rdreg;
gpa_t rd_base, sgi_base;
int ret;
+ if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr))
+ return 0;
+
/*
* We may be creating VCPUs before having set the base address for the
* redistributor region, in which case we will come back to this
* function for all VCPUs when the base address is set. Just return
* without doing any work for now.
*/
- if (IS_VGIC_ADDR_UNDEF(vgic->vgic_redist_base))
+ rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions);
+ if (!rdreg)
return 0;
if (!vgic_v3_check_base(kvm))
return -EINVAL;
- rd_base = vgic->vgic_redist_base + vgic->vgic_redist_free_offset;
+ vgic_cpu->rdreg = rdreg;
+
+ rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE;
sgi_base = rd_base + SZ_64K;
kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
@@ -631,7 +644,7 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
goto out;
}
- vgic->vgic_redist_free_offset += 2 * SZ_64K;
+ rdreg->free_index++;
out:
mutex_unlock(&kvm->slots_lock);
return ret;
@@ -670,23 +683,96 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
return ret;
}
-int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr)
+/**
+ * vgic_v3_insert_redist_region - Insert a new redistributor region
+ *
+ * Performs various checks before inserting the rdist region in the list.
+ * Those tests depend on whether the size of the rdist region is known
+ * (ie. count != 0). The list is sorted by rdist region index.
+ *
+ * @kvm: kvm handle
+ * @index: redist region index
+ * @base: base of the new rdist region
+ * @count: number of redistributors the region is made of (0 in the old style
+ * single region, whose size is induced from the number of vcpus)
+ *
+ * Return 0 on success, < 0 otherwise
+ */
+static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
+ gpa_t base, uint32_t count)
{
- struct vgic_dist *vgic = &kvm->arch.vgic;
+ struct vgic_dist *d = &kvm->arch.vgic;
+ struct vgic_redist_region *rdreg;
+ struct list_head *rd_regions = &d->rd_regions;
+ size_t size = count * KVM_VGIC_V3_REDIST_SIZE;
int ret;
- /* vgic_check_ioaddr makes sure we don't do this twice */
- ret = vgic_check_ioaddr(kvm, &vgic->vgic_redist_base, addr, SZ_64K);
- if (ret)
- return ret;
+ /* single rdist region already set ?*/
+ if (!count && !list_empty(rd_regions))
+ return -EINVAL;
- vgic->vgic_redist_base = addr;
- if (!vgic_v3_check_base(kvm)) {
- vgic->vgic_redist_base = VGIC_ADDR_UNDEF;
+ /* cross the end of memory ? */
+ if (base + size < base)
return -EINVAL;
+
+ if (list_empty(rd_regions)) {
+ if (index != 0)
+ return -EINVAL;
+ } else {
+ rdreg = list_last_entry(rd_regions,
+ struct vgic_redist_region, list);
+ if (index != rdreg->index + 1)
+ return -EINVAL;
+
+ /* Cannot add an explicitly sized regions after legacy region */
+ if (!rdreg->count)
+ return -EINVAL;
}
/*
+ * For legacy single-region redistributor regions (!count),
+ * check that the redistributor region does not overlap with the
+ * distributor's address space.
+ */
+ if (!count && !IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) &&
+ vgic_dist_overlap(kvm, base, size))
+ return -EINVAL;
+
+ /* collision with any other rdist region? */
+ if (vgic_v3_rdist_overlap(kvm, base, size))
+ return -EINVAL;
+
+ rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL);
+ if (!rdreg)
+ return -ENOMEM;
+
+ rdreg->base = VGIC_ADDR_UNDEF;
+
+ ret = vgic_check_ioaddr(kvm, &rdreg->base, base, SZ_64K);
+ if (ret)
+ goto free;
+
+ rdreg->base = base;
+ rdreg->count = count;
+ rdreg->free_index = 0;
+ rdreg->index = index;
+
+ list_add_tail(&rdreg->list, rd_regions);
+ return 0;
+free:
+ kfree(rdreg);
+ return ret;
+}
+
+int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
+{
+ int ret;
+
+ ret = vgic_v3_insert_redist_region(kvm, index, addr, count);
+ if (ret)
+ return ret;
+
+ /*
* Register iodevs for each existing VCPU. Adding more VCPUs
* afterwards will register the iodevs when needed.
*/
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index bdcf8e7a6161..ff7dc890941a 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -419,6 +419,29 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
return 0;
}
+/**
+ * vgic_v3_rdist_overlap - check if a region overlaps with any
+ * existing redistributor region
+ *
+ * @kvm: kvm handle
+ * @base: base of the region
+ * @size: size of region
+ *
+ * Return: true if there is an overlap
+ */
+bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size)
+{
+ struct vgic_dist *d = &kvm->arch.vgic;
+ struct vgic_redist_region *rdreg;
+
+ list_for_each_entry(rdreg, &d->rd_regions, list) {
+ if ((base + size > rdreg->base) &&
+ (base < rdreg->base + vgic_v3_rd_region_size(kvm, rdreg)))
+ return true;
+ }
+ return false;
+}
+
/*
* Check for overlapping regions and for regions crossing the end of memory
* for base addresses which have already been set.
@@ -426,41 +449,83 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
bool vgic_v3_check_base(struct kvm *kvm)
{
struct vgic_dist *d = &kvm->arch.vgic;
- gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE;
-
- redist_size *= atomic_read(&kvm->online_vcpus);
+ struct vgic_redist_region *rdreg;
if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) &&
d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base)
return false;
- if (!IS_VGIC_ADDR_UNDEF(d->vgic_redist_base) &&
- d->vgic_redist_base + redist_size < d->vgic_redist_base)
- return false;
+ list_for_each_entry(rdreg, &d->rd_regions, list) {
+ if (rdreg->base + vgic_v3_rd_region_size(kvm, rdreg) <
+ rdreg->base)
+ return false;
+ }
- /* Both base addresses must be set to check if they overlap */
- if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) ||
- IS_VGIC_ADDR_UNDEF(d->vgic_redist_base))
+ if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base))
return true;
- if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base)
- return true;
- if (d->vgic_redist_base + redist_size <= d->vgic_dist_base)
- return true;
+ return !vgic_v3_rdist_overlap(kvm, d->vgic_dist_base,
+ KVM_VGIC_V3_DIST_SIZE);
+}
- return false;
+/**
+ * vgic_v3_rdist_free_slot - Look up registered rdist regions and identify one
+ * which has free space to put a new rdist region.
+ *
+ * @rd_regions: redistributor region list head
+ *
+ * A redistributor regions maps n redistributors, n = region size / (2 x 64kB).
+ * Stride between redistributors is 0 and regions are filled in the index order.
+ *
+ * Return: the redist region handle, if any, that has space to map a new rdist
+ * region.
+ */
+struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rd_regions)
+{
+ struct vgic_redist_region *rdreg;
+
+ list_for_each_entry(rdreg, rd_regions, list) {
+ if (!vgic_v3_redist_region_full(rdreg))
+ return rdreg;
+ }
+ return NULL;
}
+struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
+ u32 index)
+{
+ struct list_head *rd_regions = &kvm->arch.vgic.rd_regions;
+ struct vgic_redist_region *rdreg;
+
+ list_for_each_entry(rdreg, rd_regions, list) {
+ if (rdreg->index == index)
+ return rdreg;
+ }
+ return NULL;
+}
+
+
int vgic_v3_map_resources(struct kvm *kvm)
{
- int ret = 0;
struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu;
+ int ret = 0;
+ int c;
if (vgic_ready(kvm))
goto out;
- if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
- IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) {
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+ if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) {
+ kvm_debug("vcpu %d redistributor base not set\n", c);
+ ret = -ENXIO;
+ goto out;
+ }
+ }
+
+ if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) {
kvm_err("Need to set vgic distributor addresses first\n");
ret = -ENXIO;
goto out;
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 32c25d42c93f..ead00b2072b2 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -96,6 +96,13 @@
/* we only support 64 kB translation table page size */
#define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16)
+#define KVM_VGIC_V3_RDIST_INDEX_MASK GENMASK_ULL(11, 0)
+#define KVM_VGIC_V3_RDIST_FLAGS_MASK GENMASK_ULL(15, 12)
+#define KVM_VGIC_V3_RDIST_FLAGS_SHIFT 12
+#define KVM_VGIC_V3_RDIST_BASE_MASK GENMASK_ULL(51, 16)
+#define KVM_VGIC_V3_RDIST_COUNT_MASK GENMASK_ULL(63, 52)
+#define KVM_VGIC_V3_RDIST_COUNT_SHIFT 52
+
/* Requires the irq_lock to be held by the caller. */
static inline bool irq_is_pending(struct vgic_irq *irq)
{
@@ -215,7 +222,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info);
int vgic_v3_map_resources(struct kvm *kvm);
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
int vgic_v3_save_pending_tables(struct kvm *kvm);
-int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr);
+int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count);
int vgic_register_redist_iodev(struct kvm_vcpu *vcpu);
bool vgic_v3_check_base(struct kvm *kvm);
@@ -243,8 +250,8 @@ void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
int vgic_lazy_init(struct kvm *kvm);
int vgic_init(struct kvm *kvm);
-int vgic_debug_init(struct kvm *kvm);
-int vgic_debug_destroy(struct kvm *kvm);
+void vgic_debug_init(struct kvm *kvm);
+void vgic_debug_destroy(struct kvm *kvm);
bool lock_all_vcpus(struct kvm *kvm);
void unlock_all_vcpus(struct kvm *kvm);
@@ -265,6 +272,39 @@ static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
}
}
+static inline bool
+vgic_v3_redist_region_full(struct vgic_redist_region *region)
+{
+ if (!region->count)
+ return false;
+
+ return (region->free_index >= region->count);
+}
+
+struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rdregs);
+
+static inline size_t
+vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg)
+{
+ if (!rdreg->count)
+ return atomic_read(&kvm->online_vcpus) * KVM_VGIC_V3_REDIST_SIZE;
+ else
+ return rdreg->count * KVM_VGIC_V3_REDIST_SIZE;
+}
+
+struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
+ u32 index);
+
+bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size);
+
+static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size)
+{
+ struct vgic_dist *d = &kvm->arch.vgic;
+
+ return (base + size > d->vgic_dist_base) &&
+ (base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE);
+}
+
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid, struct vgic_irq **irq);
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c7b2e927f699..aa7da1d8ece2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -203,29 +203,47 @@ static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
return true;
}
-bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
+bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
+ unsigned long *vcpu_bitmap, cpumask_var_t tmp)
{
int i, cpu, me;
- cpumask_var_t cpus;
- bool called;
struct kvm_vcpu *vcpu;
-
- zalloc_cpumask_var(&cpus, GFP_ATOMIC);
+ bool called;
me = get_cpu();
+
kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!test_bit(i, vcpu_bitmap))
+ continue;
+
kvm_make_request(req, vcpu);
cpu = vcpu->cpu;
if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
continue;
- if (cpus != NULL && cpu != -1 && cpu != me &&
+ if (tmp != NULL && cpu != -1 && cpu != me &&
kvm_request_needs_ipi(vcpu, req))
- __cpumask_set_cpu(cpu, cpus);
+ __cpumask_set_cpu(cpu, tmp);
}
- called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT));
+
+ called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT));
put_cpu();
+
+ return called;
+}
+
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
+{
+ cpumask_var_t cpus;
+ bool called;
+ static unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]
+ = {[0 ... BITS_TO_LONGS(KVM_MAX_VCPUS)-1] = ULONG_MAX};
+
+ zalloc_cpumask_var(&cpus, GFP_ATOMIC);
+
+ called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap, cpus);
+
free_cpumask_var(cpus);
return called;
}
@@ -572,10 +590,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
return 0;
snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
- kvm->debugfs_dentry = debugfs_create_dir(dir_name,
- kvm_debugfs_dir);
- if (!kvm->debugfs_dentry)
- return -ENOMEM;
+ kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir);
kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
sizeof(*kvm->debugfs_stat_data),
@@ -591,11 +606,8 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
stat_data->kvm = kvm;
stat_data->offset = p->offset;
kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
- if (!debugfs_create_file(p->name, 0644,
- kvm->debugfs_dentry,
- stat_data,
- stat_fops_per_vm[p->kind]))
- return -ENOMEM;
+ debugfs_create_file(p->name, 0644, kvm->debugfs_dentry,
+ stat_data, stat_fops_per_vm[p->kind]);
}
return 0;
}
@@ -2340,7 +2352,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
-static int kvm_vcpu_fault(struct vm_fault *vmf)
+static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf)
{
struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data;
struct page *page;
@@ -2550,8 +2562,13 @@ static long kvm_vcpu_ioctl(struct file *filp,
oldpid = rcu_access_pointer(vcpu->pid);
if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) {
/* The thread running this VCPU changed. */
- struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
+ struct pid *newpid;
+
+ r = kvm_arch_vcpu_run_pid_change(vcpu);
+ if (r)
+ break;
+ newpid = get_task_pid(current, PIDTYPE_PID);
rcu_assign_pointer(vcpu->pid, newpid);
if (oldpid)
synchronize_rcu();
@@ -3896,29 +3913,18 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
kfree(env);
}
-static int kvm_init_debug(void)
+static void kvm_init_debug(void)
{
- int r = -EEXIST;
struct kvm_stats_debugfs_item *p;
kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
- if (kvm_debugfs_dir == NULL)
- goto out;
kvm_debugfs_num_entries = 0;
for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
- if (!debugfs_create_file(p->name, 0644, kvm_debugfs_dir,
- (void *)(long)p->offset,
- stat_fops[p->kind]))
- goto out_dir;
+ debugfs_create_file(p->name, 0644, kvm_debugfs_dir,
+ (void *)(long)p->offset,
+ stat_fops[p->kind]);
}
-
- return 0;
-
-out_dir:
- debugfs_remove_recursive(kvm_debugfs_dir);
-out:
- return r;
}
static int kvm_suspend(void)
@@ -4046,20 +4052,13 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
kvm_preempt_ops.sched_in = kvm_sched_in;
kvm_preempt_ops.sched_out = kvm_sched_out;
- r = kvm_init_debug();
- if (r) {
- pr_err("kvm: create debugfs files failed\n");
- goto out_undebugfs;
- }
+ kvm_init_debug();
r = kvm_vfio_ops_init();
WARN_ON(r);
return 0;
-out_undebugfs:
- unregister_syscore_ops(&kvm_syscore_ops);
- misc_deregister(&kvm_dev);
out_unreg:
kvm_async_pf_deinit();
out_free: