summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-08-06 09:18:21 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-06 09:18:21 -0400
commit80fac0f577a35c437219a2786c1804ab8ca1e998 (patch)
treedfe87e66f937b2b42ba6b87c4538b7e5bea06d07
parent4305f42401b29e2e024bd064618faf25aef5cb69 (diff)
parent45e11817d5703eceb65a673927a8bc74dc1286d6 (diff)
downloadlinux-80fac0f577a35c437219a2786c1804ab8ca1e998.tar.bz2
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more KVM updates from Paolo Bonzini: - ARM bugfix and MSI injection support - x86 nested virt tweak and OOPS fix - Simplify pvclock code (vdso bits acked by Andy Lutomirski). * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: nvmx: mark ept single context invalidation as supported nvmx: remove comment about missing nested vpid support KVM: lapic: fix access preemption timer stuff even if kernel_irqchip=off KVM: documentation: fix KVM_CAP_X2APIC_API information x86: vdso: use __pvclock_read_cycles pvclock: introduce seqcount-like API arm64: KVM: Set cpsr before spsr on fault injection KVM: arm: vgic-irqfd: Workaround changing kvm_set_routing_entry prototype KVM: arm/arm64: Enable MSI routing KVM: arm/arm64: Enable irqchip routing KVM: Move kvm_setup_default/empty_irq_routing declaration in arch specific header KVM: irqchip: Convey devid to kvm_set_msi KVM: Add devid in kvm_kernel_irq_routing_entry KVM: api: Pass the devid in the msi routing entry
-rw-r--r--Documentation/virtual/kvm/api.txt53
-rw-r--r--arch/arm/kvm/Kconfig2
-rw-r--r--arch/arm/kvm/Makefile1
-rw-r--r--arch/arm/kvm/irq.h19
-rw-r--r--arch/arm64/kvm/Kconfig2
-rw-r--r--arch/arm64/kvm/Makefile1
-rw-r--r--arch/arm64/kvm/inject_fault.c12
-rw-r--r--arch/arm64/kvm/irq.h19
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c25
-rw-r--r--arch/x86/include/asm/pvclock.h39
-rw-r--r--arch/x86/kernel/pvclock.c17
-rw-r--r--arch/x86/kvm/irq.h3
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/kvm/vmx.c15
-rw-r--r--include/kvm/arm_vgic.h7
-rw-r--r--include/linux/kvm_host.h12
-rw-r--r--include/uapi/linux/kvm.h5
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c4
-rw-r--r--virt/kvm/arm/vgic/vgic-irqfd.c116
-rw-r--r--virt/kvm/arm/vgic/vgic.c7
-rw-r--r--virt/kvm/irqchip.c28
21 files changed, 273 insertions, 117 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 5237e1b2fd66..739db9ab16b2 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1433,13 +1433,16 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
4.52 KVM_SET_GSI_ROUTING
Capability: KVM_CAP_IRQ_ROUTING
-Architectures: x86 s390
+Architectures: x86 s390 arm arm64
Type: vm ioctl
Parameters: struct kvm_irq_routing (in)
Returns: 0 on success, -1 on error
Sets the GSI routing table entries, overwriting any previously set entries.
+On arm/arm64, GSI routing has the following limitation:
+- GSI routing does not apply to KVM_IRQ_LINE but only to KVM_IRQFD.
+
struct kvm_irq_routing {
__u32 nr;
__u32 flags;
@@ -1468,7 +1471,13 @@ struct kvm_irq_routing_entry {
#define KVM_IRQ_ROUTING_S390_ADAPTER 3
#define KVM_IRQ_ROUTING_HV_SINT 4
-No flags are specified so far, the corresponding field must be set to zero.
+flags:
+- KVM_MSI_VALID_DEVID: used along with KVM_IRQ_ROUTING_MSI routing entry
+ type, specifies that the devid field contains a valid value. The per-VM
+ KVM_CAP_MSI_DEVID capability advertises the requirement to provide
+ the device ID. If this capability is not available, userspace should
+ never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail.
+- zero otherwise
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1479,9 +1488,16 @@ struct kvm_irq_routing_msi {
__u32 address_lo;
__u32 address_hi;
__u32 data;
- __u32 pad;
+ union {
+ __u32 pad;
+ __u32 devid;
+ };
};
+If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier
+for the device that wrote the MSI message. For PCI, this is usually a
+BFD identifier in the lower 16 bits.
+
On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled,
address_hi bits 31-8 provide bits 31-8 of the destination id. Bits 7-0 of
@@ -2199,18 +2215,19 @@ struct kvm_msi {
__u8 pad[12];
};
-flags: KVM_MSI_VALID_DEVID: devid contains a valid value
-devid: If KVM_MSI_VALID_DEVID is set, contains a unique device identifier
- for the device that wrote the MSI message.
- For PCI, this is usually a BFD identifier in the lower 16 bits.
+flags: KVM_MSI_VALID_DEVID: devid contains a valid value. The per-VM
+ KVM_CAP_MSI_DEVID capability advertises the requirement to provide
+ the device ID. If this capability is not available, userspace
+ should never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail.
-The per-VM KVM_CAP_MSI_DEVID capability advertises the need to provide
-the device ID. If this capability is not set, userland cannot rely on
-the kernel to allow the KVM_MSI_VALID_DEVID flag being set.
+If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier
+for the device that wrote the MSI message. For PCI, this is usually a
+BFD identifier in the lower 16 bits.
-On x86, address_hi is ignored unless the KVM_CAP_X2APIC_API capability is
-enabled. If it is enabled, address_hi bits 31-8 provide bits 31-8 of the
-destination id. Bits 7-0 of address_hi must be zero.
+On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
+feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled,
+address_hi bits 31-8 provide bits 31-8 of the destination id. Bits 7-0 of
+address_hi must be zero.
4.71 KVM_CREATE_PIT2
@@ -2383,9 +2400,13 @@ Note that closing the resamplefd is not sufficient to disable the
irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
-On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared
-Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is
-given by gsi + 32.
+On arm/arm64, gsi routing being supported, the following can happen:
+- in case no routing entry is associated to this gsi, injection fails
+- in case the gsi is associated to an irqchip routing entry,
+ irqchip.pin + 32 corresponds to the injected SPI ID.
+- in case the gsi is associated to an MSI routing entry, the MSI
+ message and device ID are translated into an LPI (support restricted
+ to GICv3 ITS in-kernel emulation).
4.76 KVM_PPC_ALLOCATE_HTAB
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 95a000515e43..3e1cd0452d67 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -32,6 +32,8 @@ config KVM
select KVM_VFIO
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQ_ROUTING
depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
---help---
Support hosting virtualized guest machines.
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 5e28df80dca7..10d77a66cad5 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -29,4 +29,5 @@ obj-y += $(KVM)/arm/vgic/vgic-v2.o
obj-y += $(KVM)/arm/vgic/vgic-mmio.o
obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o
+obj-y += $(KVM)/irqchip.o
obj-y += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/irq.h b/arch/arm/kvm/irq.h
new file mode 100644
index 000000000000..b74099b905fd
--- /dev/null
+++ b/arch/arm/kvm/irq.h
@@ -0,0 +1,19 @@
+/*
+ * irq.h: in kernel interrupt controller related definitions
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This header is included by irqchip.c. However, on ARM, interrupt
+ * controller declarations are located in include/kvm/arm_vgic.h since
+ * they are mostly shared between arm and arm64.
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include <kvm/arm_vgic.h>
+
+#endif
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 9d2eff0b3ad3..9c9edc98d271 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -37,6 +37,8 @@ config KVM
select KVM_ARM_VGIC_V3
select KVM_ARM_PMU if HW_PERF_EVENTS
select HAVE_KVM_MSI
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQ_ROUTING
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index a5b96642a9cb..695eb3c7ef41 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -30,5 +30,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index e9e0e6db73f6..898c0e6aedd4 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -132,16 +132,14 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
- bool is_aarch32;
+ bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u32 esr = 0;
- is_aarch32 = vcpu_mode_is_32bit(vcpu);
-
- *vcpu_spsr(vcpu) = cpsr;
*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
-
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
+
*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+ *vcpu_spsr(vcpu) = cpsr;
vcpu_sys_reg(vcpu, FAR_EL1) = addr;
@@ -172,11 +170,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
unsigned long cpsr = *vcpu_cpsr(vcpu);
u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
- *vcpu_spsr(vcpu) = cpsr;
*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
-
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
+
*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+ *vcpu_spsr(vcpu) = cpsr;
/*
* Build an unknown exception, depending on the instruction
diff --git a/arch/arm64/kvm/irq.h b/arch/arm64/kvm/irq.h
new file mode 100644
index 000000000000..b74099b905fd
--- /dev/null
+++ b/arch/arm64/kvm/irq.h
@@ -0,0 +1,19 @@
+/*
+ * irq.h: in kernel interrupt controller related definitions
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This header is included by irqchip.c. However, on ARM, interrupt
+ * controller declarations are located in include/kvm/arm_vgic.h since
+ * they are mostly shared between arm and arm64.
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include <kvm/arm_vgic.h>
+
+#endif
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 2f02d23a05ef..94d54d0defa7 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -96,9 +96,8 @@ static notrace cycle_t vread_pvclock(int *mode)
{
const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
cycle_t ret;
- u64 tsc, pvti_tsc;
- u64 last, delta, pvti_system_time;
- u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
+ u64 last;
+ u32 version;
/*
* Note: The kernel and hypervisor must guarantee that cpu ID
@@ -123,29 +122,15 @@ static notrace cycle_t vread_pvclock(int *mode)
*/
do {
- version = pvti->version;
-
- smp_rmb();
+ version = pvclock_read_begin(pvti);
if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
*mode = VCLOCK_NONE;
return 0;
}
- tsc = rdtsc_ordered();
- pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
- pvti_tsc_shift = pvti->tsc_shift;
- pvti_system_time = pvti->system_time;
- pvti_tsc = pvti->tsc_timestamp;
-
- /* Make sure that the version double-check is last. */
- smp_rmb();
- } while (unlikely((version & 1) || version != pvti->version));
-
- delta = tsc - pvti_tsc;
- ret = pvti_system_time +
- pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
- pvti_tsc_shift);
+ ret = __pvclock_read_cycles(pvti);
+ } while (pvclock_read_retry(pvti, version));
/* refer to vread_tsc() comment for rationale */
last = gtod->cycle_last;
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 7c1c89598688..d019f0cc80ec 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -25,6 +25,24 @@ void pvclock_resume(void);
void pvclock_touch_watchdogs(void);
+static __always_inline
+unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src)
+{
+ unsigned version = src->version & ~1;
+ /* Make sure that the version is read before the data. */
+ virt_rmb();
+ return version;
+}
+
+static __always_inline
+bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src,
+ unsigned version)
+{
+ /* Make sure that the version is re-read after the data. */
+ virt_rmb();
+ return unlikely(version != src->version);
+}
+
/*
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
* yielding a 64-bit result.
@@ -69,23 +87,12 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
}
static __always_inline
-unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
- cycle_t *cycles, u8 *flags)
+cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src)
{
- unsigned version;
- cycle_t offset;
- u64 delta;
-
- version = src->version;
- /* Make the latest version visible */
- smp_rmb();
-
- delta = rdtsc_ordered() - src->tsc_timestamp;
- offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
- src->tsc_shift);
- *cycles = src->system_time + offset;
- *flags = src->flags;
- return version;
+ u64 delta = rdtsc_ordered() - src->tsc_timestamp;
+ cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+ src->tsc_shift);
+ return src->system_time + offset;
}
struct pvclock_vsyscall_time_info {
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 06c58ce46762..3599404e3089 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -64,14 +64,9 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
u8 flags;
do {
- version = src->version;
- /* Make the latest version visible */
- smp_rmb();
-
+ version = pvclock_read_begin(src);
flags = src->flags;
- /* Make sure that the version double-check is last. */
- smp_rmb();
- } while ((src->version & 1) || version != src->version);
+ } while (pvclock_read_retry(src, version));
return flags & valid_flags;
}
@@ -84,10 +79,10 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
u8 flags;
do {
- version = __pvclock_read_cycles(src, &ret, &flags);
- /* Make sure that the version double-check is last. */
- smp_rmb();
- } while ((src->version & 1) || version != src->version);
+ version = pvclock_read_begin(src);
+ ret = __pvclock_read_cycles(src);
+ flags = src->flags;
+ } while (pvclock_read_retry(src, version));
if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
src->flags &= ~PVCLOCK_GUEST_STOPPED;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 61ebdc13a29a..035731eb3897 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -120,4 +120,7 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
int apic_has_pending_timer(struct kvm_vcpu *vcpu);
+int kvm_setup_default_irq_routing(struct kvm *kvm);
+int kvm_setup_empty_irq_routing(struct kvm *kvm);
+
#endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 730cf174090a..b62c85229711 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1349,6 +1349,9 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
{
+ if (!lapic_in_kernel(vcpu))
+ return false;
+
return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
}
EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bc354f003ce1..a45d8580f91e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2809,12 +2809,8 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_ept_caps |=
VMX_EPT_EXECUTE_ONLY_BIT;
vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
- /*
- * For nested guests, we don't do anything specific
- * for single context invalidation. Hence, only advertise
- * support for global context invalidation.
- */
- vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
+ vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
+ VMX_EPT_EXTENT_CONTEXT_BIT;
} else
vmx->nested.nested_vmx_ept_caps = 0;
@@ -2945,7 +2941,6 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
vmx->nested.nested_vmx_secondary_ctls_high);
break;
case MSR_IA32_VMX_EPT_VPID_CAP:
- /* Currently, no nested vpid support */
*pdata = vmx->nested.nested_vmx_ept_caps |
((u64)vmx->nested.nested_vmx_vpid_caps << 32);
break;
@@ -7609,12 +7604,16 @@ static int handle_invept(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_EPT_EXTENT_GLOBAL:
+ /*
+ * TODO: track mappings and invalidate
+ * single context requests appropriately
+ */
+ case VMX_EPT_EXTENT_CONTEXT:
kvm_mmu_sync_roots(vcpu);
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
nested_vmx_succeed(vcpu);
break;
default:
- /* Trap single context invalidation invept calls */
BUG_ON(1);
break;
}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 540da5149ba7..19b698ef3336 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -34,6 +34,7 @@
#define VGIC_MAX_SPI 1019
#define VGIC_MAX_RESERVED 1023
#define VGIC_MIN_LPI 8192
+#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
enum vgic_type {
VGIC_V2, /* Good ol' GICv2 */
@@ -314,4 +315,10 @@ static inline int kvm_vgic_get_max_vcpus(void)
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
+/**
+ * kvm_vgic_setup_default_irq_routing:
+ * Setup a default flat gsi routing table mapping all SPIs
+ */
+int kvm_vgic_setup_default_irq_routing(struct kvm *kvm);
+
#endif /* __KVM_ARM_VGIC_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index aafd702f3e21..01e908ac4a39 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -317,7 +317,13 @@ struct kvm_kernel_irq_routing_entry {
unsigned irqchip;
unsigned pin;
} irqchip;
- struct msi_msg msi;
+ struct {
+ u32 address_lo;
+ u32 address_hi;
+ u32 data;
+ u32 flags;
+ u32 devid;
+ } msi;
struct kvm_s390_adapter_int adapter;
struct kvm_hv_sint hv_sint;
};
@@ -1003,12 +1009,12 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
#ifdef CONFIG_S390
#define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that...
+#elif defined(CONFIG_ARM64)
+#define KVM_MAX_IRQ_ROUTES 4096
#else
#define KVM_MAX_IRQ_ROUTES 1024
#endif
-int kvm_setup_default_irq_routing(struct kvm *kvm);
-int kvm_setup_empty_irq_routing(struct kvm *kvm);
int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *entries,
unsigned nr,
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index e98bb4cce639..300ef255d1e0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -882,7 +882,10 @@ struct kvm_irq_routing_msi {
__u32 address_lo;
__u32 address_hi;
__u32 data;
- __u32 pad;
+ union {
+ __u32 pad;
+ __u32 devid;
+ };
};
struct kvm_irq_routing_s390_adapter {
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 1e30ce08700d..fb4b0a79a950 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -264,6 +264,10 @@ int vgic_init(struct kvm *kvm)
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_vgic_vcpu_init(vcpu);
+ ret = kvm_vgic_setup_default_irq_routing(kvm);
+ if (ret)
+ goto out;
+
dist->initialized = true;
out:
return ret;
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c
index c675513270bb..b31a51a14efb 100644
--- a/virt/kvm/arm/vgic/vgic-irqfd.c
+++ b/virt/kvm/arm/vgic/vgic-irqfd.c
@@ -17,36 +17,116 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <trace/events/kvm.h>
+#include <kvm/arm_vgic.h>
+#include "vgic.h"
-int kvm_irq_map_gsi(struct kvm *kvm,
- struct kvm_kernel_irq_routing_entry *entries,
- int gsi)
+/**
+ * vgic_irqfd_set_irq: inject the IRQ corresponding to the
+ * irqchip routing entry
+ *
+ * This is the entry point for irqfd IRQ injection
+ */
+static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id,
+ int level, bool line_status)
{
- return 0;
+ unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS;
+
+ if (!vgic_valid_spi(kvm, spi_id))
+ return -EINVAL;
+ return kvm_vgic_inject_irq(kvm, 0, spi_id, level);
}
-int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned int irqchip,
- unsigned int pin)
+/**
+ * kvm_set_routing_entry: populate a kvm routing entry
+ * from a user routing entry
+ *
+ * @kvm: the VM this entry is applied to
+ * @e: kvm kernel routing entry handle
+ * @ue: user api routing entry handle
+ * return 0 on success, -EINVAL on errors.
+ */
+#ifdef KVM_CAP_X2APIC_API
+int kvm_set_routing_entry(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue)
+#else
+/* Remove this version and the ifdefery once merged into 4.8 */
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue)
+#endif
{
- return pin;
+ int r = -EINVAL;
+
+ switch (ue->type) {
+ case KVM_IRQ_ROUTING_IRQCHIP:
+ e->set = vgic_irqfd_set_irq;
+ e->irqchip.irqchip = ue->u.irqchip.irqchip;
+ e->irqchip.pin = ue->u.irqchip.pin;
+ if ((e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) ||
+ (e->irqchip.irqchip >= KVM_NR_IRQCHIPS))
+ goto out;
+ break;
+ case KVM_IRQ_ROUTING_MSI:
+ e->set = kvm_set_msi;
+ e->msi.address_lo = ue->u.msi.address_lo;
+ e->msi.address_hi = ue->u.msi.address_hi;
+ e->msi.data = ue->u.msi.data;
+ e->msi.flags = ue->flags;
+ e->msi.devid = ue->u.msi.devid;
+ break;
+ default:
+ goto out;
+ }
+ r = 0;
+out:
+ return r;
}
-int kvm_set_irq(struct kvm *kvm, int irq_source_id,
- u32 irq, int level, bool line_status)
+/**
+ * kvm_set_msi: inject the MSI corresponding to the
+ * MSI routing entry
+ *
+ * This is the entry point for irqfd MSI injection
+ * and userspace MSI injection.
+ */
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id,
+ int level, bool line_status)
{
- unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS;
+ struct kvm_msi msi;
- trace_kvm_set_irq(irq, level, irq_source_id);
+ msi.address_lo = e->msi.address_lo;
+ msi.address_hi = e->msi.address_hi;
+ msi.data = e->msi.data;
+ msi.flags = e->msi.flags;
+ msi.devid = e->msi.devid;
- BUG_ON(!vgic_initialized(kvm));
+ if (!vgic_has_its(kvm))
+ return -ENODEV;
- return kvm_vgic_inject_irq(kvm, 0, spi, level);
+ return vgic_its_inject_msi(kvm, &msi);
}
-/* MSI not implemented yet */
-int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
- struct kvm *kvm, int irq_source_id,
- int level, bool line_status)
+int kvm_vgic_setup_default_irq_routing(struct kvm *kvm)
{
- return 0;
+ struct kvm_irq_routing_entry *entries;
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ u32 nr = dist->nr_spis;
+ int i, ret;
+
+ entries = kcalloc(nr, sizeof(struct kvm_kernel_irq_routing_entry),
+ GFP_KERNEL);
+ if (!entries)
+ return -ENOMEM;
+
+ for (i = 0; i < nr; i++) {
+ entries[i].gsi = i;
+ entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+ entries[i].u.irqchip.irqchip = 0;
+ entries[i].u.irqchip.pin = i;
+ }
+ ret = kvm_set_irq_routing(kvm, entries, nr, 0);
+ kfree(entries);
+ return ret;
}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 39f3358c6d91..e7aeac719e09 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -711,10 +711,3 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
return map_is_active;
}
-int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
-{
- if (vgic_has_its(kvm))
- return vgic_its_inject_msi(kvm, msi);
- else
- return -ENODEV;
-}
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index df99e9c3b64d..3bcc9990adf7 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -62,12 +62,14 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
{
struct kvm_kernel_irq_routing_entry route;
- if (!irqchip_in_kernel(kvm) || msi->flags != 0)
+ if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
return -EINVAL;
route.msi.address_lo = msi->address_lo;
route.msi.address_hi = msi->address_hi;
route.msi.data = msi->data;
+ route.msi.flags = msi->flags;
+ route.msi.devid = msi->devid;
return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
}
@@ -177,6 +179,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
unsigned flags)
{
struct kvm_irq_routing_table *new, *old;
+ struct kvm_kernel_irq_routing_entry *e;
u32 i, j, nr_rt_entries = 0;
int r;
@@ -200,23 +203,25 @@ int kvm_set_irq_routing(struct kvm *kvm,
new->chip[i][j] = -1;
for (i = 0; i < nr; ++i) {
- struct kvm_kernel_irq_routing_entry *e;
-
r = -ENOMEM;
e = kzalloc(sizeof(*e), GFP_KERNEL);
if (!e)
goto out;
r = -EINVAL;
- if (ue->flags) {
- kfree(e);
- goto out;
+ switch (ue->type) {
+ case KVM_IRQ_ROUTING_MSI:
+ if (ue->flags & ~KVM_MSI_VALID_DEVID)
+ goto free_entry;
+ break;
+ default:
+ if (ue->flags)
+ goto free_entry;
+ break;
}
r = setup_routing_entry(kvm, new, e, ue);
- if (r) {
- kfree(e);
- goto out;
- }
+ if (r)
+ goto free_entry;
++ue;
}
@@ -233,7 +238,10 @@ int kvm_set_irq_routing(struct kvm *kvm,
new = old;
r = 0;
+ goto out;
+free_entry:
+ kfree(e);
out:
free_irq_routing_table(new);