summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-06 18:38:31 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-06 18:38:31 -0700
commitc136b84393d4e340e1b53fc7f737dd5827b19ee5 (patch)
tree985a1bdfafe7ec5ce2d3c738f601cad3998d8ce9 /virt
parente0f25a3f2d052e36ff67a9b4db835c3e27e950d8 (diff)
parent1372324b328cd5dabaef5e345e37ad48c63df2a9 (diff)
downloadlinux-c136b84393d4e340e1b53fc7f737dd5827b19ee5.tar.bz2
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "PPC: - Better machine check handling for HV KVM - Ability to support guests with threads=2, 4 or 8 on POWER9 - Fix for a race that could cause delayed recognition of signals - Fix for a bug where POWER9 guests could sleep with interrupts pending. ARM: - VCPU request overhaul - allow timer and PMU to have their interrupt number selected from userspace - workaround for Cavium erratum 30115 - handling of memory poisonning - the usual crop of fixes and cleanups s390: - initial machine check forwarding - migration support for the CMMA page hinting information - cleanups and fixes x86: - nested VMX bugfixes and improvements - more reliable NMI window detection on AMD - APIC timer optimizations Generic: - VCPU request overhaul + documentation of common code patterns - kvm_stat improvements" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits) Update my email address kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS x86: kvm: mmu: use ept a/d in vmcs02 iff used in vmcs12 kvm: x86: mmu: allow A/D bits to be disabled in an mmu x86: kvm: mmu: make spte mmio mask more explicit x86: kvm: mmu: dead code thanks to access tracking KVM: PPC: Book3S: Fix typo in XICS-on-XIVE state saving code KVM: PPC: Book3S HV: Close race with testing for signals on guest entry KVM: PPC: Book3S HV: Simplify dynamic micro-threading code KVM: x86: remove ignored type attribute KVM: LAPIC: Fix lapic timer injection delay KVM: lapic: reorganize restart_apic_timer KVM: lapic: reorganize start_hv_timer kvm: nVMX: Check memory operand to INVVPID KVM: s390: Inject machine check into the nested guest KVM: s390: Inject machine check into the guest tools/kvm_stat: add new interactive command 'b' tools/kvm_stat: add new command line switch '-i' tools/kvm_stat: fix error on interactive command 'g' KVM: SVM: suppress unnecessary NMI singlestep on GIF=0 and nested exit ...
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/aarch32.c2
-rw-r--r--virt/kvm/arm/arch_timer.c139
-rw-r--r--virt/kvm/arm/arm.c82
-rw-r--r--virt/kvm/arm/hyp/vgic-v3-sr.c823
-rw-r--r--virt/kvm/arm/mmu.c23
-rw-r--r--virt/kvm/arm/pmu.c117
-rw-r--r--virt/kvm/arm/psci.c8
-rw-r--r--virt/kvm/arm/vgic/vgic-irqfd.c2
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v2.c24
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c22
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c68
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.h12
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c45
-rw-r--r--virt/kvm/arm/vgic/vgic.c68
-rw-r--r--virt/kvm/kvm_main.c12
15 files changed, 1297 insertions, 150 deletions
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c
index 528af4b2d09e..79c7c357804b 100644
--- a/virt/kvm/arm/aarch32.c
+++ b/virt/kvm/arm/aarch32.c
@@ -60,7 +60,7 @@ static const unsigned short cc_map[16] = {
/*
* Check if a trapped instruction should have been executed or not.
*/
-bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
+bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu)
{
unsigned long cpsr;
u32 cpsr_cond;
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 5976609ef27c..8e89d63005c7 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -21,6 +21,7 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/uaccess.h>
#include <clocksource/arm_arch_timer.h>
#include <asm/arch_timer.h>
@@ -35,6 +36,16 @@ static struct timecounter *timecounter;
static unsigned int host_vtimer_irq;
static u32 host_vtimer_irq_flags;
+static const struct kvm_irq_level default_ptimer_irq = {
+ .irq = 30,
+ .level = 1,
+};
+
+static const struct kvm_irq_level default_vtimer_irq = {
+ .irq = 27,
+ .level = 1,
+};
+
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
vcpu_vtimer(vcpu)->active_cleared_last = false;
@@ -95,7 +106,7 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
* If the vcpu is blocked we want to wake it up so that it will see
* the timer has expired when entering the guest.
*/
- kvm_vcpu_kick(vcpu);
+ kvm_vcpu_wake_up(vcpu);
}
static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
@@ -215,7 +226,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
if (likely(irqchip_in_kernel(vcpu->kvm))) {
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
timer_ctx->irq.irq,
- timer_ctx->irq.level);
+ timer_ctx->irq.level,
+ timer_ctx);
WARN_ON(ret);
}
}
@@ -445,23 +457,12 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
kvm_timer_update_state(vcpu);
}
-int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
- const struct kvm_irq_level *virt_irq,
- const struct kvm_irq_level *phys_irq)
+int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
/*
- * The vcpu timer irq number cannot be determined in
- * kvm_timer_vcpu_init() because it is called much before
- * kvm_vcpu_set_target(). To handle this, we determine
- * vcpu timer irq number when the vcpu is reset.
- */
- vtimer->irq.irq = virt_irq->irq;
- ptimer->irq.irq = phys_irq->irq;
-
- /*
* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
* and to 0 for ARMv7. We provide an implementation that always
* resets the timer to be disabled and unmasked and is compliant with
@@ -496,6 +497,8 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
/* Synchronize cntvoff across all vtimers of a VM. */
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
@@ -504,6 +507,9 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
timer->timer.function = kvm_timer_expire;
+
+ vtimer->irq.irq = default_vtimer_irq.irq;
+ ptimer->irq.irq = default_ptimer_irq.irq;
}
static void kvm_timer_init_interrupt(void *info)
@@ -613,6 +619,30 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq);
}
+static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
+{
+ int vtimer_irq, ptimer_irq;
+ int i, ret;
+
+ vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
+ ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
+ if (ret)
+ return false;
+
+ ptimer_irq = vcpu_ptimer(vcpu)->irq.irq;
+ ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu));
+ if (ret)
+ return false;
+
+ kvm_for_each_vcpu(i, vcpu, vcpu->kvm) {
+ if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq ||
+ vcpu_ptimer(vcpu)->irq.irq != ptimer_irq)
+ return false;
+ }
+
+ return true;
+}
+
int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -632,6 +662,11 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
if (!vgic_initialized(vcpu->kvm))
return -ENODEV;
+ if (!timer_irqs_are_valid(vcpu)) {
+ kvm_debug("incorrectly configured timer irqs\n");
+ return -EINVAL;
+ }
+
/*
* Find the physical IRQ number corresponding to the host_vtimer_irq
*/
@@ -681,3 +716,79 @@ void kvm_timer_init_vhe(void)
val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
write_sysreg(val, cnthctl_el2);
}
+
+static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
+ vcpu_ptimer(vcpu)->irq.irq = ptimer_irq;
+ }
+}
+
+int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+ int __user *uaddr = (int __user *)(long)attr->addr;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+ int irq;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return -EINVAL;
+
+ if (get_user(irq, uaddr))
+ return -EFAULT;
+
+ if (!(irq_is_ppi(irq)))
+ return -EINVAL;
+
+ if (vcpu->arch.timer_cpu.enabled)
+ return -EBUSY;
+
+ switch (attr->attr) {
+ case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
+ set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq);
+ break;
+ case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+ set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq);
+ break;
+ default:
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+ int __user *uaddr = (int __user *)(long)attr->addr;
+ struct arch_timer_context *timer;
+ int irq;
+
+ switch (attr->attr) {
+ case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
+ timer = vcpu_vtimer(vcpu);
+ break;
+ case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+ timer = vcpu_ptimer(vcpu);
+ break;
+ default:
+ return -ENXIO;
+ }
+
+ irq = timer->irq.irq;
+ return put_user(irq, uaddr);
+}
+
+int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
+ case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+ return 0;
+ }
+
+ return -ENXIO;
+}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 3417e184c8e1..a39a1e161e63 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -368,6 +368,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_timer_vcpu_put(vcpu);
}
+static void vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.power_off = true;
+ kvm_make_request(KVM_REQ_SLEEP, vcpu);
+ kvm_vcpu_kick(vcpu);
+}
+
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
@@ -387,7 +394,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
vcpu->arch.power_off = false;
break;
case KVM_MP_STATE_STOPPED:
- vcpu->arch.power_off = true;
+ vcpu_power_off(vcpu);
break;
default:
return -EINVAL;
@@ -520,6 +527,10 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
}
ret = kvm_timer_enable(vcpu);
+ if (ret)
+ return ret;
+
+ ret = kvm_arm_pmu_v3_enable(vcpu);
return ret;
}
@@ -536,21 +547,7 @@ void kvm_arm_halt_guest(struct kvm *kvm)
kvm_for_each_vcpu(i, vcpu, kvm)
vcpu->arch.pause = true;
- kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT);
-}
-
-void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.pause = true;
- kvm_vcpu_kick(vcpu);
-}
-
-void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu)
-{
- struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
-
- vcpu->arch.pause = false;
- swake_up(wq);
+ kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP);
}
void kvm_arm_resume_guest(struct kvm *kvm)
@@ -558,16 +555,23 @@ void kvm_arm_resume_guest(struct kvm *kvm)
int i;
struct kvm_vcpu *vcpu;
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_arm_resume_vcpu(vcpu);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ vcpu->arch.pause = false;
+ swake_up(kvm_arch_vcpu_wq(vcpu));
+ }
}
-static void vcpu_sleep(struct kvm_vcpu *vcpu)
+static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
{
struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
(!vcpu->arch.pause)));
+
+ if (vcpu->arch.power_off || vcpu->arch.pause) {
+ /* Awaken to handle a signal, request we sleep again later. */
+ kvm_make_request(KVM_REQ_SLEEP, vcpu);
+ }
}
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
@@ -575,6 +579,20 @@ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
return vcpu->arch.target >= 0;
}
+static void check_vcpu_requests(struct kvm_vcpu *vcpu)
+{
+ if (kvm_request_pending(vcpu)) {
+ if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
+ vcpu_req_sleep(vcpu);
+
+ /*
+ * Clear IRQ_PENDING requests that were made to guarantee
+ * that a VCPU sees new virtual interrupts.
+ */
+ kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
+ }
+}
+
/**
* kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
* @vcpu: The VCPU pointer
@@ -620,8 +638,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
update_vttbr(vcpu->kvm);
- if (vcpu->arch.power_off || vcpu->arch.pause)
- vcpu_sleep(vcpu);
+ check_vcpu_requests(vcpu);
/*
* Preparing the interrupts to be injected also
@@ -650,8 +667,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
run->exit_reason = KVM_EXIT_INTR;
}
+ /*
+ * Ensure we set mode to IN_GUEST_MODE after we disable
+ * interrupts and before the final VCPU requests check.
+ * See the comment in kvm_vcpu_exiting_guest_mode() and
+ * Documentation/virtual/kvm/vcpu-requests.rst
+ */
+ smp_store_mb(vcpu->mode, IN_GUEST_MODE);
+
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
- vcpu->arch.power_off || vcpu->arch.pause) {
+ kvm_request_pending(vcpu)) {
+ vcpu->mode = OUTSIDE_GUEST_MODE;
local_irq_enable();
kvm_pmu_sync_hwstate(vcpu);
kvm_timer_sync_hwstate(vcpu);
@@ -667,7 +693,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
trace_kvm_entry(*vcpu_pc(vcpu));
guest_enter_irqoff();
- vcpu->mode = IN_GUEST_MODE;
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
@@ -756,6 +781,7 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
* trigger a world-switch round on the running physical CPU to set the
* virtual IRQ/FIQ fields in the HCR appropriately.
*/
+ kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
return 0;
@@ -806,7 +832,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level);
+ return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
case KVM_ARM_IRQ_TYPE_SPI:
if (!irqchip_in_kernel(kvm))
return -ENXIO;
@@ -814,7 +840,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (irq_num < VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
+ return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
}
return -EINVAL;
@@ -884,7 +910,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
* Handle the "start in power-off" case.
*/
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
- vcpu->arch.power_off = true;
+ vcpu_power_off(vcpu);
else
vcpu->arch.power_off = false;
@@ -1115,9 +1141,6 @@ static void cpu_init_hyp_mode(void *dummy)
__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
__cpu_init_stage2();
- if (is_kernel_in_hyp_mode())
- kvm_timer_init_vhe();
-
kvm_arm_init_debug();
}
@@ -1137,6 +1160,7 @@ static void cpu_hyp_reinit(void)
* event was cancelled before the CPU was reset.
*/
__cpu_init_stage2();
+ kvm_timer_init_vhe();
} else {
cpu_init_hyp_mode(NULL);
}
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index 87940364570b..91728faa13fd 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -19,10 +19,12 @@
#include <linux/irqchip/arm-gic-v3.h>
#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#define vtr_to_max_lr_idx(v) ((v) & 0xf)
#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1)
+#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5))
static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
{
@@ -118,6 +120,90 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
}
}
+static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n)
+{
+ switch (n) {
+ case 0:
+ write_gicreg(val, ICH_AP0R0_EL2);
+ break;
+ case 1:
+ write_gicreg(val, ICH_AP0R1_EL2);
+ break;
+ case 2:
+ write_gicreg(val, ICH_AP0R2_EL2);
+ break;
+ case 3:
+ write_gicreg(val, ICH_AP0R3_EL2);
+ break;
+ }
+}
+
+static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n)
+{
+ switch (n) {
+ case 0:
+ write_gicreg(val, ICH_AP1R0_EL2);
+ break;
+ case 1:
+ write_gicreg(val, ICH_AP1R1_EL2);
+ break;
+ case 2:
+ write_gicreg(val, ICH_AP1R2_EL2);
+ break;
+ case 3:
+ write_gicreg(val, ICH_AP1R3_EL2);
+ break;
+ }
+}
+
+static u32 __hyp_text __vgic_v3_read_ap0rn(int n)
+{
+ u32 val;
+
+ switch (n) {
+ case 0:
+ val = read_gicreg(ICH_AP0R0_EL2);
+ break;
+ case 1:
+ val = read_gicreg(ICH_AP0R1_EL2);
+ break;
+ case 2:
+ val = read_gicreg(ICH_AP0R2_EL2);
+ break;
+ case 3:
+ val = read_gicreg(ICH_AP0R3_EL2);
+ break;
+ default:
+ unreachable();
+ }
+
+ return val;
+}
+
+static u32 __hyp_text __vgic_v3_read_ap1rn(int n)
+{
+ u32 val;
+
+ switch (n) {
+ case 0:
+ val = read_gicreg(ICH_AP1R0_EL2);
+ break;
+ case 1:
+ val = read_gicreg(ICH_AP1R1_EL2);
+ break;
+ case 2:
+ val = read_gicreg(ICH_AP1R2_EL2);
+ break;
+ case 3:
+ val = read_gicreg(ICH_AP1R3_EL2);
+ break;
+ default:
+ unreachable();
+ }
+
+ return val;
+}
+
void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
@@ -154,24 +240,27 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
switch (nr_pre_bits) {
case 7:
- cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
- cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
+ cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3);
+ cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2);
case 6:
- cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
+ cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1);
default:
- cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
+ cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0);
}
switch (nr_pre_bits) {
case 7:
- cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
- cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
+ cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3);
+ cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2);
case 6:
- cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
+ cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1);
default:
- cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
+ cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
}
} else {
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap))
+ write_gicreg(0, ICH_HCR_EL2);
+
cpu_if->vgic_elrsr = 0xffff;
cpu_if->vgic_ap0r[0] = 0;
cpu_if->vgic_ap0r[1] = 0;
@@ -224,26 +313,34 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
switch (nr_pre_bits) {
case 7:
- write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
- write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3);
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2);
case 6:
- write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1);
default:
- write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0);
}
switch (nr_pre_bits) {
case 7:
- write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
- write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3);
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2);
case 6:
- write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1);
default:
- write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0);
}
for (i = 0; i < used_lrs; i++)
__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
+ } else {
+ /*
+ * If we need to trap system registers, we must write
+ * ICH_HCR_EL2 anyway, even if no interrupts are being
+ * injected,
+ */
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap))
+ write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
}
/*
@@ -287,3 +384,697 @@ void __hyp_text __vgic_v3_write_vmcr(u32 vmcr)
{
write_gicreg(vmcr, ICH_VMCR_EL2);
}
+
+#ifdef CONFIG_ARM64
+
+static int __hyp_text __vgic_v3_bpr_min(void)
+{
+ /* See Pseudocode for VPriorityGroup */
+ return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2));
+}
+
+static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu)
+{
+ u32 esr = kvm_vcpu_get_hsr(vcpu);
+ u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
+
+ return crm != 8;
+}
+
+#define GICv3_IDLE_PRIORITY 0xff
+
+static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
+ u32 vmcr,
+ u64 *lr_val)
+{
+ unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+ u8 priority = GICv3_IDLE_PRIORITY;
+ int i, lr = -1;
+
+ for (i = 0; i < used_lrs; i++) {
+ u64 val = __gic_v3_get_lr(i);
+ u8 lr_prio = (val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
+
+ /* Not pending in the state? */
+ if ((val & ICH_LR_STATE) != ICH_LR_PENDING_BIT)
+ continue;
+
+ /* Group-0 interrupt, but Group-0 disabled? */
+ if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG0_MASK))
+ continue;
+
+ /* Group-1 interrupt, but Group-1 disabled? */
+ if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG1_MASK))
+ continue;
+
+ /* Not the highest priority? */
+ if (lr_prio >= priority)
+ continue;
+
+ /* This is a candidate */
+ priority = lr_prio;
+ *lr_val = val;
+ lr = i;
+ }
+
+ if (lr == -1)
+ *lr_val = ICC_IAR1_EL1_SPURIOUS;
+
+ return lr;
+}
+
+static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu,
+ int intid, u64 *lr_val)
+{
+ unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+ int i;
+
+ for (i = 0; i < used_lrs; i++) {
+ u64 val = __gic_v3_get_lr(i);
+
+ if ((val & ICH_LR_VIRTUAL_ID_MASK) == intid &&
+ (val & ICH_LR_ACTIVE_BIT)) {
+ *lr_val = val;
+ return i;
+ }
+ }
+
+ *lr_val = ICC_IAR1_EL1_SPURIOUS;
+ return -1;
+}
+
+static int __hyp_text __vgic_v3_get_highest_active_priority(void)
+{
+ u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2));
+ u32 hap = 0;
+ int i;
+
+ for (i = 0; i < nr_apr_regs; i++) {
+ u32 val;
+
+ /*
+ * The ICH_AP0Rn_EL2 and ICH_AP1Rn_EL2 registers
+ * contain the active priority levels for this VCPU
+ * for the maximum number of supported priority
+ * levels, and we return the full priority level only
+ * if the BPR is programmed to its minimum, otherwise
+ * we return a combination of the priority level and
+ * subpriority, as determined by the setting of the
+ * BPR, but without the full subpriority.
+ */
+ val = __vgic_v3_read_ap0rn(i);
+ val |= __vgic_v3_read_ap1rn(i);
+ if (!val) {
+ hap += 32;
+ continue;
+ }
+
+ return (hap + __ffs(val)) << __vgic_v3_bpr_min();
+ }
+
+ return GICv3_IDLE_PRIORITY;
+}
+
+static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr)
+{
+ return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
+}
+
+static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr)
+{
+ unsigned int bpr;
+
+ if (vmcr & ICH_VMCR_CBPR_MASK) {
+ bpr = __vgic_v3_get_bpr0(vmcr);
+ if (bpr < 7)
+ bpr++;
+ } else {
+ bpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
+ }
+
+ return bpr;
+}
+
+/*
+ * Convert a priority to a preemption level, taking the relevant BPR
+ * into account by zeroing the sub-priority bits.
+ */
+static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp)
+{
+ unsigned int bpr;
+
+ if (!grp)
+ bpr = __vgic_v3_get_bpr0(vmcr) + 1;
+ else
+ bpr = __vgic_v3_get_bpr1(vmcr);
+
+ return pri & (GENMASK(7, 0) << bpr);
+}
+
+/*
+ * The priority value is independent of any of the BPR values, so we
+ * normalize it using the minumal BPR value. This guarantees that no
+ * matter what the guest does with its BPR, we can always set/get the
+ * same value of a priority.
+ */
+static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp)
+{
+ u8 pre, ap;
+ u32 val;
+ int apr;
+
+ pre = __vgic_v3_pri_to_pre(pri, vmcr, grp);
+ ap = pre >> __vgic_v3_bpr_min();
+ apr = ap / 32;
+
+ if (!grp) {
+ val = __vgic_v3_read_ap0rn(apr);
+ __vgic_v3_write_ap0rn(val | BIT(ap % 32), apr);
+ } else {
+ val = __vgic_v3_read_ap1rn(apr);
+ __vgic_v3_write_ap1rn(val | BIT(ap % 32), apr);
+ }
+}
+
+static int __hyp_text __vgic_v3_clear_highest_active_priority(void)
+{
+ u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2));
+ u32 hap = 0;
+ int i;
+
+ for (i = 0; i < nr_apr_regs; i++) {
+ u32 ap0, ap1;
+ int c0, c1;
+
+ ap0 = __vgic_v3_read_ap0rn(i);
+ ap1 = __vgic_v3_read_ap1rn(i);
+ if (!ap0 && !ap1) {
+ hap += 32;
+ continue;
+ }
+
+ c0 = ap0 ? __ffs(ap0) : 32;
+ c1 = ap1 ? __ffs(ap1) : 32;
+
+ /* Always clear the LSB, which is the highest priority */
+ if (c0 < c1) {
+ ap0 &= ~BIT(c0);
+ __vgic_v3_write_ap0rn(ap0, i);
+ hap += c0;
+ } else {
+ ap1 &= ~BIT(c1);
+ __vgic_v3_write_ap1rn(ap1, i);
+ hap += c1;
+ }
+
+ /* Rescale to 8 bits of priority */
+ return hap << __vgic_v3_bpr_min();
+ }
+
+ return GICv3_IDLE_PRIORITY;
+}
+
+static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 lr_val;
+ u8 lr_prio, pmr;
+ int lr, grp;
+
+ grp = __vgic_v3_get_group(vcpu);
+
+ lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val);
+ if (lr < 0)
+ goto spurious;
+
+ if (grp != !!(lr_val & ICH_LR_GROUP))
+ goto spurious;
+
+ pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
+ lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
+ if (pmr <= lr_prio)
+ goto spurious;
+
+ if (__vgic_v3_get_highest_active_priority() <= __vgic_v3_pri_to_pre(lr_prio, vmcr, grp))
+ goto spurious;
+
+ lr_val &= ~ICH_LR_STATE;
+ /* No active state for LPIs */
+ if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI)
+ lr_val |= ICH_LR_ACTIVE_BIT;
+ __gic_v3_set_lr(lr_val, lr);
+ __vgic_v3_set_active_priority(lr_prio, vmcr, grp);
+ vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK);
+ return;
+
+spurious:
+ vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS);
+}
+
+static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val)
+{
+ lr_val &= ~ICH_LR_ACTIVE_BIT;
+ if (lr_val & ICH_LR_HW) {
+ u32 pid;
+
+ pid = (lr_val & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT;
+ gic_write_dir(pid);
+ }
+
+ __gic_v3_set_lr(lr_val, lr);
+}
+
+static void __hyp_text __vgic_v3_bump_eoicount(void)
+{
+ u32 hcr;
+
+ hcr = read_gicreg(ICH_HCR_EL2);
+ hcr += 1 << ICH_HCR_EOIcount_SHIFT;
+ write_gicreg(hcr, ICH_HCR_EL2);
+}
+
+static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ u32 vid = vcpu_get_reg(vcpu, rt);
+ u64 lr_val;
+ int lr;
+
+ /* EOImode == 0, nothing to be done here */
+ if (!(vmcr & ICH_VMCR_EOIM_MASK))
+ return;
+
+ /* No deactivate to be performed on an LPI */
+ if (vid >= VGIC_MIN_LPI)
+ return;
+
+ lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
+ if (lr == -1) {
+ __vgic_v3_bump_eoicount();
+ return;
+ }
+
+ __vgic_v3_clear_active_lr(lr, lr_val);
+}
+
+static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u32 vid = vcpu_get_reg(vcpu, rt);
+ u64 lr_val;
+ u8 lr_prio, act_prio;
+ int lr, grp;
+
+ grp = __vgic_v3_get_group(vcpu);
+
+ /* Drop priority in any case */
+ act_prio = __vgic_v3_clear_highest_active_priority();
+
+ /* If EOIing an LPI, no deactivate to be performed */
+ if (vid >= VGIC_MIN_LPI)
+ return;
+
+ /* EOImode == 1, nothing to be done here */
+ if (vmcr & ICH_VMCR_EOIM_MASK)
+ return;
+
+ lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
+ if (lr == -1) {
+ __vgic_v3_bump_eoicount();
+ return;
+ }
+
+ lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
+
+ /* If priorities or group do not match, the guest has fscked-up. */
+ if (grp != !!(lr_val & ICH_LR_GROUP) ||
+ __vgic_v3_pri_to_pre(lr_prio, vmcr, grp) != act_prio)
+ return;
+
+ /* Let's now perform the deactivation */
+ __vgic_v3_clear_active_lr(lr, lr_val);
+}
+
+static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK));
+}
+
+static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK));
+}
+
+static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 val = vcpu_get_reg(vcpu, rt);
+
+ if (val & 1)
+ vmcr |= ICH_VMCR_ENG0_MASK;
+ else
+ vmcr &= ~ICH_VMCR_ENG0_MASK;
+
+ __vgic_v3_write_vmcr(vmcr);
+}
+
+static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 val = vcpu_get_reg(vcpu, rt);
+
+ if (val & 1)
+ vmcr |= ICH_VMCR_ENG1_MASK;
+ else
+ vmcr &= ~ICH_VMCR_ENG1_MASK;
+
+ __vgic_v3_write_vmcr(vmcr);
+}
+
+static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr));
+}
+
+static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr));
+}
+
+static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 val = vcpu_get_reg(vcpu, rt);
+ u8 bpr_min = __vgic_v3_bpr_min() - 1;
+
+ /* Enforce BPR limiting */
+ if (val < bpr_min)
+ val = bpr_min;
+
+ val <<= ICH_VMCR_BPR0_SHIFT;
+ val &= ICH_VMCR_BPR0_MASK;
+ vmcr &= ~ICH_VMCR_BPR0_MASK;
+ vmcr |= val;
+
+ __vgic_v3_write_vmcr(vmcr);
+}
+
+static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 val = vcpu_get_reg(vcpu, rt);
+ u8 bpr_min = __vgic_v3_bpr_min();
+
+ if (vmcr & ICH_VMCR_CBPR_MASK)
+ return;
+
+ /* Enforce BPR limiting */
+ if (val < bpr_min)
+ val = bpr_min;
+
+ val <<= ICH_VMCR_BPR1_SHIFT;
+ val &= ICH_VMCR_BPR1_MASK;
+ vmcr &= ~ICH_VMCR_BPR1_MASK;
+ vmcr |= val;
+
+ __vgic_v3_write_vmcr(vmcr);
+}
+
+static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n)
+{
+ u32 val;
+
+ if (!__vgic_v3_get_group(vcpu))
+ val = __vgic_v3_read_ap0rn(n);
+ else
+ val = __vgic_v3_read_ap1rn(n);
+
+ vcpu_set_reg(vcpu, rt, val);
+}
+
+static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n)
+{
+ u32 val = vcpu_get_reg(vcpu, rt);
+
+ if (!__vgic_v3_get_group(vcpu))
+ __vgic_v3_write_ap0rn(val, n);
+ else
+ __vgic_v3_write_ap1rn(val, n);
+}
+
+static void __hyp_text __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ __vgic_v3_read_apxrn(vcpu, rt, 0);
+}
+
+static void __hyp_text __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ __vgic_v3_read_apxrn(vcpu, rt, 1);
+}
+
+static void __hyp_text __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ __vgic_v3_read_apxrn(vcpu, rt, 2);
+}
+
+static void __hyp_text __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ __vgic_v3_read_apxrn(vcpu, rt, 3);
+}
+
+static void __hyp_text __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ __vgic_v3_write_apxrn(vcpu, rt, 0);
+}
+
+static void __hyp_text __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ __vgic_v3_write_apxrn(vcpu, rt, 1);
+}
+
+static void __hyp_text __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ __vgic_v3_write_apxrn(vcpu, rt, 2);
+}
+
+static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ __vgic_v3_write_apxrn(vcpu, rt, 3);
+}
+
+static void __hyp_text __vgic_v3_read_hppir(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ u64 lr_val;
+ int lr, lr_grp, grp;
+
+ grp = __vgic_v3_get_group(vcpu);
+
+ lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val);
+ if (lr == -1)
+ goto spurious;
+
+ lr_grp = !!(lr_val & ICH_LR_GROUP);
+ if (lr_grp != grp)
+ lr_val = ICC_IAR1_EL1_SPURIOUS;
+
+spurious:
+ vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK);
+}
+
+static void __hyp_text __vgic_v3_read_pmr(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ vmcr &= ICH_VMCR_PMR_MASK;
+ vmcr >>= ICH_VMCR_PMR_SHIFT;
+ vcpu_set_reg(vcpu, rt, vmcr);
+}
+
+static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ u32 val = vcpu_get_reg(vcpu, rt);
+
+ val <<= ICH_VMCR_PMR_SHIFT;
+ val &= ICH_VMCR_PMR_MASK;
+ vmcr &= ~ICH_VMCR_PMR_MASK;
+ vmcr |= val;
+
+ write_gicreg(vmcr, ICH_VMCR_EL2);
+}
+
+static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ u32 val = __vgic_v3_get_highest_active_priority();
+ vcpu_set_reg(vcpu, rt, val);
+}
+
+static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ u32 vtr, val;
+
+ vtr = read_gicreg(ICH_VTR_EL2);
+ /* PRIbits */
+ val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
+ /* IDbits */
+ val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
+ /* SEIS */
+ val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT;
+ /* A3V */
+ val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
+ /* EOImode */
+ val |= ((vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT) << ICC_CTLR_EL1_EOImode_SHIFT;
+ /* CBPR */
+ val |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT;
+
+ vcpu_set_reg(vcpu, rt, val);
+}
+
+static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ u32 val = vcpu_get_reg(vcpu, rt);
+
+ if (val & ICC_CTLR_EL1_CBPR_MASK)
+ vmcr |= ICH_VMCR_CBPR_MASK;
+ else
+ vmcr &= ~ICH_VMCR_CBPR_MASK;
+
+ if (val & ICC_CTLR_EL1_EOImode_MASK)
+ vmcr |= ICH_VMCR_EOIM_MASK;
+ else
+ vmcr &= ~ICH_VMCR_EOIM_MASK;
+
+ write_gicreg(vmcr, ICH_VMCR_EL2);
+}
+
+int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
+{
+ int rt;
+ u32 esr;
+ u32 vmcr;
+ void (*fn)(struct kvm_vcpu *, u32, int);
+ bool is_read;
+ u32 sysreg;
+
+ esr = kvm_vcpu_get_hsr(vcpu);
+ if (vcpu_mode_is_32bit(vcpu)) {
+ if (!kvm_condition_valid(vcpu))
+ return 1;
+
+ sysreg = esr_cp15_to_sysreg(esr);
+ } else {
+ sysreg = esr_sys64_to_sysreg(esr);
+ }
+
+ is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
+
+ switch (sysreg) {
+ case SYS_ICC_IAR0_EL1:
+ case SYS_ICC_IAR1_EL1:
+ if (unlikely(!is_read))
+ return 0;
+ fn = __vgic_v3_read_iar;
+ break;
+ case SYS_ICC_EOIR0_EL1:
+ case SYS_ICC_EOIR1_EL1:
+ if (unlikely(is_read))
+ return 0;
+ fn = __vgic_v3_write_eoir;
+ break;
+ case SYS_ICC_IGRPEN1_EL1:
+ if (is_read)
+ fn = __vgic_v3_read_igrpen1;
+ else
+ fn = __vgic_v3_write_igrpen1;
+ break;
+ case SYS_ICC_BPR1_EL1:
+ if (is_read)
+ fn = __vgic_v3_read_bpr1;
+ else
+ fn = __vgic_v3_write_bpr1;
+ break;
+ case SYS_ICC_AP0Rn_EL1(0):
+ case SYS_ICC_AP1Rn_EL1(0):
+ if (is_read)
+ fn = __vgic_v3_read_apxr0;
+ else
+ fn = __vgic_v3_write_apxr0;
+ break;
+ case SYS_ICC_AP0Rn_EL1(1):
+ case SYS_ICC_AP1Rn_EL1(1):
+ if (is_read)
+ fn = __vgic_v3_read_apxr1;
+ else
+ fn = __vgic_v3_write_apxr1;
+ break;
+ case SYS_ICC_AP0Rn_EL1(2):
+ case SYS_ICC_AP1Rn_EL1(2):
+ if (is_read)
+ fn = __vgic_v3_read_apxr2;
+ else
+ fn = __vgic_v3_write_apxr2;
+ break;
+ case SYS_ICC_AP0Rn_EL1(3):
+ case SYS_ICC_AP1Rn_EL1(3):
+ if (is_read)
+ fn = __vgic_v3_read_apxr3;
+ else
+ fn = __vgic_v3_write_apxr3;
+ break;
+ case SYS_ICC_HPPIR0_EL1:
+ case SYS_ICC_HPPIR1_EL1:
+ if (unlikely(!is_read))
+ return 0;
+ fn = __vgic_v3_read_hppir;
+ break;
+ case SYS_ICC_IGRPEN0_EL1:
+ if (is_read)
+ fn = __vgic_v3_read_igrpen0;
+ else
+ fn = __vgic_v3_write_igrpen0;
+ break;
+ case SYS_ICC_BPR0_EL1:
+ if (is_read)
+ fn = __vgic_v3_read_bpr0;
+ else
+ fn = __vgic_v3_write_bpr0;
+ break;
+ case SYS_ICC_DIR_EL1:
+ if (unlikely(is_read))
+ return 0;
+ fn = __vgic_v3_write_dir;
+ break;
+ case SYS_ICC_RPR_EL1:
+ if (unlikely(!is_read))
+ return 0;
+ fn = __vgic_v3_read_rpr;
+ break;
+ case SYS_ICC_CTLR_EL1:
+ if (is_read)
+ fn = __vgic_v3_read_ctlr;
+ else
+ fn = __vgic_v3_write_ctlr;
+ break;
+ case SYS_ICC_PMR_EL1:
+ if (is_read)
+ fn = __vgic_v3_read_pmr;
+ else
+ fn = __vgic_v3_write_pmr;
+ break;
+ default:
+ return 0;
+ }
+
+ vmcr = __vgic_v3_read_vmcr();
+ rt = kvm_vcpu_sys_get_rt(vcpu);
+ fn(vcpu, vmcr, rt);
+
+ return 1;
+}
+
+#endif
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 1c44aa35f909..0e1fc75f3585 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -20,6 +20,7 @@
#include <linux/kvm_host.h>
#include <linux/io.h>
#include <linux/hugetlb.h>
+#include <linux/sched/signal.h>
#include <trace/events/kvm.h>
#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
@@ -1262,6 +1263,24 @@ static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
__coherent_cache_guest_page(vcpu, pfn, size);
}
+static void kvm_send_hwpoison_signal(unsigned long address,
+ struct vm_area_struct *vma)
+{
+ siginfo_t info;
+
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_MCEERR_AR;
+ info.si_addr = (void __user *)address;
+
+ if (is_vm_hugetlb_page(vma))
+ info.si_addr_lsb = huge_page_shift(hstate_vma(vma));
+ else
+ info.si_addr_lsb = PAGE_SHIFT;
+
+ send_sig_info(SIGBUS, &info, current);
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
@@ -1331,6 +1350,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
smp_rmb();
pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
+ if (pfn == KVM_PFN_ERR_HWPOISON) {
+ kvm_send_hwpoison_signal(hva, vma);
+ return 0;
+ }
if (is_error_noslot_pfn(pfn))
return -EFAULT;
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 4b43e7f3b158..fc8a723ff387 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -203,6 +203,24 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
return reg;
}
+static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = &vcpu->arch.pmu;
+ bool overflow = !!kvm_pmu_overflow_status(vcpu);
+
+ if (pmu->irq_level == overflow)
+ return;
+
+ pmu->irq_level = overflow;
+
+ if (likely(irqchip_in_kernel(vcpu->kvm))) {
+ int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+ pmu->irq_num, overflow,
+ &vcpu->arch.pmu);
+ WARN_ON(ret);
+ }
+}
+
/**
* kvm_pmu_overflow_set - set PMU overflow interrupt
* @vcpu: The vcpu pointer
@@ -210,37 +228,18 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
*/
void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val)
{
- u64 reg;
-
if (val == 0)
return;
vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val;
- reg = kvm_pmu_overflow_status(vcpu);
- if (reg != 0)
- kvm_vcpu_kick(vcpu);
+ kvm_pmu_check_overflow(vcpu);
}
static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
{
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
- bool overflow;
-
if (!kvm_arm_pmu_v3_ready(vcpu))
return;
-
- overflow = !!kvm_pmu_overflow_status(vcpu);
- if (pmu->irq_level == overflow)
- return;
-
- pmu->irq_level = overflow;
-
- if (likely(irqchip_in_kernel(vcpu->kvm))) {
- int ret;
- ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
- pmu->irq_num, overflow);
- WARN_ON(ret);
- }
+ kvm_pmu_check_overflow(vcpu);
}
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
@@ -451,34 +450,74 @@ bool kvm_arm_support_pmu_v3(void)
return (perf_num_counters() > 0);
}
-static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
+int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
{
- if (!kvm_arm_support_pmu_v3())
- return -ENODEV;
+ if (!vcpu->arch.pmu.created)
+ return 0;
/*
- * We currently require an in-kernel VGIC to use the PMU emulation,
- * because we do not support forwarding PMU overflow interrupts to
- * userspace yet.
+ * A valid interrupt configuration for the PMU is either to have a
+ * properly configured interrupt number and using an in-kernel
+ * irqchip, or to not have an in-kernel GIC and not set an IRQ.
*/
- if (!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))
+ if (irqchip_in_kernel(vcpu->kvm)) {
+ int irq = vcpu->arch.pmu.irq_num;
+ if (!kvm_arm_pmu_irq_initialized(vcpu))
+ return -EINVAL;
+
+ /*
+ * If we are using an in-kernel vgic, at this point we know
+ * the vgic will be initialized, so we can check the PMU irq
+ * number against the dimensions of the vgic and make sure
+ * it's valid.
+ */
+ if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
+ return -EINVAL;
+ } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
+ return -EINVAL;
+ }
+
+ kvm_pmu_vcpu_reset(vcpu);
+ vcpu->arch.pmu.ready = true;
+
+ return 0;
+}
+
+static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_arm_support_pmu_v3())
return -ENODEV;
- if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) ||
- !kvm_arm_pmu_irq_initialized(vcpu))
+ if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
return -ENXIO;
- if (kvm_arm_pmu_v3_ready(vcpu))
+ if (vcpu->arch.pmu.created)
return -EBUSY;
- kvm_pmu_vcpu_reset(vcpu);
- vcpu->arch.pmu.ready = true;
+ if (irqchip_in_kernel(vcpu->kvm)) {
+ int ret;
+
+ /*
+ * If using the PMU with an in-kernel virtual GIC
+ * implementation, we require the GIC to be already
+ * initialized when initializing the PMU.
+ */
+ if (!vgic_initialized(vcpu->kvm))
+ return -ENODEV;
+
+ if (!kvm_arm_pmu_irq_initialized(vcpu))
+ return -ENXIO;
+ ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
+ &vcpu->arch.pmu);
+ if (ret)
+ return ret;
+ }
+
+ vcpu->arch.pmu.created = true;
return 0;
}
-#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS)
-
/*
* For one VM the interrupt type must be same for each vcpu.
* As a PPI, the interrupt number is the same for all vcpus,
@@ -512,6 +551,9 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
int __user *uaddr = (int __user *)(long)attr->addr;
int irq;
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return -EINVAL;
+
if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
return -ENODEV;
@@ -519,7 +561,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
return -EFAULT;
/* The PMU overflow interrupt can be a PPI or a valid SPI. */
- if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq)))
+ if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
return -EINVAL;
if (!pmu_irq_is_valid(vcpu->kvm, irq))
@@ -546,6 +588,9 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
int __user *uaddr = (int __user *)(long)attr->addr;
int irq;
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return -EINVAL;
+
if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
return -ENODEV;
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index a08d7a93aebb..f1e363bab5e8 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -57,6 +57,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
* for KVM will preserve the register state.
*/
kvm_vcpu_block(vcpu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
return PSCI_RET_SUCCESS;
}
@@ -64,6 +65,8 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
{
vcpu->arch.power_off = true;
+ kvm_make_request(KVM_REQ_SLEEP, vcpu);
+ kvm_vcpu_kick(vcpu);
}
static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
@@ -178,10 +181,9 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
* after this call is handled and before the VCPUs have been
* re-initialized.
*/
- kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm)
tmp->arch.power_off = true;
- kvm_vcpu_kick(tmp);
- }
+ kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
vcpu->run->system_event.type = type;
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c
index f138ed2e9c63..b7baf581611a 100644
--- a/virt/kvm/arm/vgic/vgic-irqfd.c
+++ b/virt/kvm/arm/vgic/vgic-irqfd.c
@@ -34,7 +34,7 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
if (!vgic_valid_spi(kvm, spi_id))
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, 0, spi_id, level);
+ return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL);
}
/**
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index 63e0bbdcddcc..37522e65eb53 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -308,34 +308,36 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = {
vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP,
- vgic_mmio_read_rao, vgic_mmio_write_wi, 1,
+ vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
- vgic_mmio_read_enable, vgic_mmio_write_senable, 1,
+ vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR,
- vgic_mmio_read_enable, vgic_mmio_write_cenable, 1,
+ vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
- vgic_mmio_read_pending, vgic_mmio_write_spending, 1,
+ vgic_mmio_read_pending, vgic_mmio_write_spending, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
- vgic_mmio_read_pending, vgic_mmio_write_cpending, 1,
+ vgic_mmio_read_pending, vgic_mmio_write_cpending, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
- vgic_mmio_read_active, vgic_mmio_write_sactive, 1,
+ vgic_mmio_read_active, vgic_mmio_write_sactive,
+ NULL, vgic_mmio_uaccess_write_sactive, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR,
- vgic_mmio_read_active, vgic_mmio_write_cactive, 1,
+ vgic_mmio_read_active, vgic_mmio_write_cactive,
+ NULL, vgic_mmio_uaccess_write_cactive, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI,
- vgic_mmio_read_priority, vgic_mmio_write_priority, 8,
- VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+ vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL,
+ 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET,
- vgic_mmio_read_target, vgic_mmio_write_target, 8,
+ vgic_mmio_read_target, vgic_mmio_write_target, NULL, NULL, 8,
VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG,
- vgic_mmio_read_config, vgic_mmio_write_config, 2,
+ vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT,
vgic_mmio_read_raz, vgic_mmio_write_sgir, 4,
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 201d5e2e973d..714fa3933546 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -456,11 +456,13 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
- vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, NULL, 1,
+ vgic_mmio_read_active, vgic_mmio_write_sactive,
+ NULL, vgic_mmio_uaccess_write_sactive, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER,
- vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, NULL, 1,
- VGIC_ACCESS_32bit),
+ vgic_mmio_read_active, vgic_mmio_write_cactive,
+ NULL, vgic_mmio_uaccess_write_cactive,
+ 1, VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR,
vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL,
8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
@@ -526,12 +528,14 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
vgic_mmio_read_pending, vgic_mmio_write_cpending,
vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0,
- vgic_mmio_read_active, vgic_mmio_write_sactive, 4,
- VGIC_ACCESS_32bit),
- REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0,
- vgic_mmio_read_active, vgic_mmio_write_cactive, 4,
- VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
+ vgic_mmio_read_active, vgic_mmio_write_sactive,
+ NULL, vgic_mmio_uaccess_write_sactive,
+ 4, VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0,
+ vgic_mmio_read_active, vgic_mmio_write_cactive,
+ NULL, vgic_mmio_uaccess_write_cactive,
+ 4, VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0,
vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 1c17b2a2f105..c1e4bdd66131 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -231,56 +231,94 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
* be migrated while we don't hold the IRQ locks and we don't want to be
* chasing moving targets.
*
- * For private interrupts, we only have to make sure the single and only VCPU
- * that can potentially queue the IRQ is stopped.
+ * For private interrupts we don't have to do anything because userspace
+ * accesses to the VGIC state already require all VCPUs to be stopped, and
+ * only the VCPU itself can modify its private interrupts active state, which
+ * guarantees that the VCPU is not running.
*/
static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
{
- if (intid < VGIC_NR_PRIVATE_IRQS)
- kvm_arm_halt_vcpu(vcpu);
- else
+ if (intid > VGIC_NR_PRIVATE_IRQS)
kvm_arm_halt_guest(vcpu->kvm);
}
/* See vgic_change_active_prepare */
static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid)
{
- if (intid < VGIC_NR_PRIVATE_IRQS)
- kvm_arm_resume_vcpu(vcpu);
- else
+ if (intid > VGIC_NR_PRIVATE_IRQS)
kvm_arm_resume_guest(vcpu->kvm);
}
-void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
+static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
- vgic_change_active_prepare(vcpu, intid);
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
vgic_mmio_change_active(vcpu, irq, false);
vgic_put_irq(vcpu->kvm, irq);
}
- vgic_change_active_finish(vcpu, intid);
}
-void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
+void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
+ mutex_lock(&vcpu->kvm->lock);
vgic_change_active_prepare(vcpu, intid);
+
+ __vgic_mmio_write_cactive(vcpu, addr, len, val);
+
+ vgic_change_active_finish(vcpu, intid);
+ mutex_unlock(&vcpu->kvm->lock);
+}
+
+void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ __vgic_mmio_write_cactive(vcpu, addr, len, val);
+}
+
+static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ int i;
+
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
vgic_mmio_change_active(vcpu, irq, true);
vgic_put_irq(vcpu->kvm, irq);
}
+}
+
+void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+
+ mutex_lock(&vcpu->kvm->lock);
+ vgic_change_active_prepare(vcpu, intid);
+
+ __vgic_mmio_write_sactive(vcpu, addr, len, val);
+
vgic_change_active_finish(vcpu, intid);
+ mutex_unlock(&vcpu->kvm->lock);
+}
+
+void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ __vgic_mmio_write_sactive(vcpu, addr, len, val);
}
unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index ea4171acdef3..5693f6df45ec 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -75,7 +75,7 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
* The _WITH_LENGTH version instantiates registers with a fixed length
* and is mutually exclusive with the _PER_IRQ version.
*/
-#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \
+#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, ur, uw, bpi, acc) \
{ \
.reg_offset = off, \
.bits_per_irq = bpi, \
@@ -83,6 +83,8 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
.access_flags = acc, \
.read = rd, \
.write = wr, \
+ .uaccess_read = ur, \
+ .uaccess_write = uw, \
}
#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \
@@ -165,6 +167,14 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val);
+void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
+
+void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
+
unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len);
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 030248e669f6..96ea597db0e7 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -21,6 +21,10 @@
#include "vgic.h"
+static bool group0_trap;
+static bool group1_trap;
+static bool common_trap;
+
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
@@ -258,6 +262,12 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
/* Get the show on the road... */
vgic_v3->vgic_hcr = ICH_HCR_EN;
+ if (group0_trap)
+ vgic_v3->vgic_hcr |= ICH_HCR_TALL0;
+ if (group1_trap)
+ vgic_v3->vgic_hcr |= ICH_HCR_TALL1;
+ if (common_trap)
+ vgic_v3->vgic_hcr |= ICH_HCR_TC;
}
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
@@ -429,6 +439,26 @@ out:
return ret;
}
+DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap);
+
+static int __init early_group0_trap_cfg(char *buf)
+{
+ return strtobool(buf, &group0_trap);
+}
+early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg);
+
+static int __init early_group1_trap_cfg(char *buf)
+{
+ return strtobool(buf, &group1_trap);
+}
+early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg);
+
+static int __init early_common_trap_cfg(char *buf)
+{
+ return strtobool(buf, &common_trap);
+}
+early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg);
+
/**
* vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
* @node: pointer to the DT node
@@ -480,6 +510,21 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
if (kvm_vgic_global_state.vcpu_base == 0)
kvm_info("disabling GICv2 emulation\n");
+#ifdef CONFIG_ARM64
+ if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
+ group0_trap = true;
+ group1_trap = true;
+ }
+#endif
+
+ if (group0_trap || group1_trap || common_trap) {
+ kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n",
+ group0_trap ? "G0" : "",
+ group1_trap ? "G1" : "",
+ common_trap ? "C" : "");
+ static_branch_enable(&vgic_v3_cpuif_trap);
+ }
+
kvm_vgic_global_state.vctrl_base = NULL;
kvm_vgic_global_state.type = VGIC_V3;
kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 83b24d20ff8f..fed717e07938 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -35,11 +35,12 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
/*
* Locking order is always:
- * its->cmd_lock (mutex)
- * its->its_lock (mutex)
- * vgic_cpu->ap_list_lock
- * kvm->lpi_list_lock
- * vgic_irq->irq_lock
+ * kvm->lock (mutex)
+ * its->cmd_lock (mutex)
+ * its->its_lock (mutex)
+ * vgic_cpu->ap_list_lock
+ * kvm->lpi_list_lock
+ * vgic_irq->irq_lock
*
* If you need to take multiple locks, always take the upper lock first,
* then the lower ones, e.g. first take the its_lock, then the irq_lock.
@@ -234,10 +235,14 @@ static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
/*
* Only valid injection if changing level for level-triggered IRQs or for a
- * rising edge.
+ * rising edge, and in-kernel connected IRQ lines can only be controlled by
+ * their owner.
*/
-static bool vgic_validate_injection(struct vgic_irq *irq, bool level)
+static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
{
+ if (irq->owner != owner)
+ return false;
+
switch (irq->config) {
case VGIC_CONFIG_LEVEL:
return irq->line_level != level;
@@ -285,8 +290,10 @@ retry:
* won't see this one until it exits for some other
* reason.
*/
- if (vcpu)
+ if (vcpu) {
+ kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
+ }
return false;
}
@@ -332,6 +339,7 @@ retry:
spin_unlock(&irq->irq_lock);
spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
+ kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
return true;
@@ -346,13 +354,16 @@ retry:
* false: to ignore the call
* Level-sensitive true: raise the input signal
* false: lower the input signal
+ * @owner: The opaque pointer to the owner of the IRQ being raised to verify
+ * that the caller is allowed to inject this IRQ. Userspace
+ * injections will have owner == NULL.
*
* The VGIC is not concerned with devices being active-LOW or active-HIGH for
* level-sensitive interrupts. You can think of the level parameter as 1
* being HIGH and 0 being LOW and all devices being active-HIGH.
*/
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
- bool level)
+ bool level, void *owner)
{
struct kvm_vcpu *vcpu;
struct vgic_irq *irq;
@@ -374,7 +385,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
spin_lock(&irq->irq_lock);
- if (!vgic_validate_injection(irq, level)) {
+ if (!vgic_validate_injection(irq, level, owner)) {
/* Nothing to see here, move along... */
spin_unlock(&irq->irq_lock);
vgic_put_irq(kvm, irq);
@@ -431,6 +442,39 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
}
/**
+ * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
+ *
+ * @vcpu: Pointer to the VCPU (used for PPIs)
+ * @intid: The virtual INTID identifying the interrupt (PPI or SPI)
+ * @owner: Opaque pointer to the owner
+ *
+ * Returns 0 if intid is not already used by another in-kernel device and the
+ * owner is set, otherwise returns an error code.
+ */
+int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
+{
+ struct vgic_irq *irq;
+ int ret = 0;
+
+ if (!vgic_initialized(vcpu->kvm))
+ return -EAGAIN;
+
+ /* SGIs and LPIs cannot be wired up to any device */
+ if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
+ return -EINVAL;
+
+ irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
+ spin_lock(&irq->irq_lock);
+ if (irq->owner && irq->owner != owner)
+ ret = -EEXIST;
+ else
+ irq->owner = owner;
+ spin_unlock(&irq->irq_lock);
+
+ return ret;
+}
+
+/**
* vgic_prune_ap_list - Remove non-relevant interrupts from the list
*
* @vcpu: The VCPU pointer
@@ -721,8 +765,10 @@ void vgic_kick_vcpus(struct kvm *kvm)
* a good kick...
*/
kvm_for_each_vcpu(c, vcpu, kvm) {
- if (kvm_vgic_vcpu_pending_irq(vcpu))
+ if (kvm_vgic_vcpu_pending_irq(vcpu)) {
+ kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
+ }
}
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f0fe9d02f6bb..19f0ecb9b93e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -73,17 +73,17 @@ MODULE_LICENSE("GPL");
/* Architectures should define their poll value according to the halt latency */
unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
-module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
+module_param(halt_poll_ns, uint, 0644);
EXPORT_SYMBOL_GPL(halt_poll_ns);
/* Default doubles per-vcpu halt_poll_ns. */
unsigned int halt_poll_ns_grow = 2;
-module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
+module_param(halt_poll_ns_grow, uint, 0644);
EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
/* Default resets per-vcpu halt_poll_ns . */
unsigned int halt_poll_ns_shrink;
-module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
+module_param(halt_poll_ns_shrink, uint, 0644);
EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
/*
@@ -3191,6 +3191,12 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
return PTR_ERR(file);
}
+ /*
+ * Don't call kvm_put_kvm anymore at this point; file->f_op is
+ * already set, with ->release() being kvm_vm_release(). In error
+ * cases it will be called by the final fput(file) and will take
+ * care of doing kvm_put_kvm(kvm).
+ */
if (kvm_create_vm_debugfs(kvm, r) < 0) {
put_unused_fd(r);
fput(file);