summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 14:20:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 14:20:14 -0700
commitbf9095424d027e942e1d1ee74977e17b7df8e455 (patch)
tree57659cf68b7df09005bc5ada4d315d66472cebf3 /arch/arm64/kvm
parent98931dd95fd489fcbfa97da563505a6f071d7c77 (diff)
parentffd1925a596ce68bed7d81c61cb64bc35f788a9d (diff)
downloadlinux-bf9095424d027e942e1d1ee74977e17b7df8e455.tar.bz2
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "S390: - ultravisor communication device driver - fix TEID on terminating storage key ops RISC-V: - Added Sv57x4 support for G-stage page table - Added range based local HFENCE functions - Added remote HFENCE functions based on VCPU requests - Added ISA extension registers in ONE_REG interface - Updated KVM RISC-V maintainers entry to cover selftests support ARM: - Add support for the ARMv8.6 WFxT extension - Guard pages for the EL2 stacks - Trap and emulate AArch32 ID registers to hide unsupported features - Ability to select and save/restore the set of hypercalls exposed to the guest - Support for PSCI-initiated suspend in collaboration with userspace - GICv3 register-based LPI invalidation support - Move host PMU event merging into the vcpu data structure - GICv3 ITS save/restore fixes - The usual set of small-scale cleanups and fixes x86: - New ioctls to get/set TSC frequency for a whole VM - Allow userspace to opt out of hypercall patching - Only do MSR filtering for MSRs accessed by rdmsr/wrmsr AMD SEV improvements: - Add KVM_EXIT_SHUTDOWN metadata for SEV-ES - V_TSC_AUX support Nested virtualization improvements for AMD: - Support for "nested nested" optimizations (nested vVMLOAD/VMSAVE, nested vGIF) - Allow AVIC to co-exist with a nested guest running - Fixes for LBR virtualizations when a nested guest is running, and nested LBR virtualization support - PAUSE filtering for nested hypervisors Guest support: - Decoupling of vcpu_is_preempted from PV spinlocks" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (199 commits) KVM: x86: Fix the intel_pt PMI handling wrongly considered from guest KVM: selftests: x86: Sync the new name of the test case to .gitignore Documentation: kvm: reorder ARM-specific section about KVM_SYSTEM_EVENT_SUSPEND x86, kvm: use correct GFP flags for preemption disabled KVM: LAPIC: Drop pending LAPIC timer injection when canceling the timer x86/kvm: Alloc dummy async #PF token outside of raw spinlock KVM: x86: avoid calling x86 emulator without a decoded instruction KVM: SVM: Use kzalloc for sev ioctl interfaces to prevent kernel data leak x86/fpu: KVM: Set the base guest FPU uABI size to sizeof(struct kvm_xsave) s390/uv_uapi: depend on CONFIG_S390 KVM: selftests: x86: Fix test failure on arch lbr capable platforms KVM: LAPIC: Trace LAPIC timer expiration on every vmentry KVM: s390: selftest: Test suppression indication on key prot exception KVM: s390: Don't indicate suppression on dirtying, failing memop selftests: drivers/s390x: Add uvdevice tests drivers/s390/char: Add Ultravisor io device MAINTAINERS: Update KVM RISC-V entry to cover selftests support RISC-V: KVM: Introduce ISA extension register RISC-V: KVM: Cleanup stale TLB entries when host CPU changes RISC-V: KVM: Add remote HFENCE functions based on VCPU requests ...
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/Makefile4
-rw-r--r--arch/arm64/kvm/arch_timer.c47
-rw-r--r--arch/arm64/kvm/arm.c164
-rw-r--r--arch/arm64/kvm/guest.c10
-rw-r--r--arch/arm64/kvm/handle_exit.c49
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mm.h6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S32
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c18
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c78
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c31
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c57
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c3
-rw-r--r--arch/arm64/kvm/hypercalls.c327
-rw-r--r--arch/arm64/kvm/mmu.c68
-rw-r--r--arch/arm64/kvm/pmu-emul.c3
-rw-r--r--arch/arm64/kvm/pmu.c40
-rw-r--r--arch/arm64/kvm/psci.c248
-rw-r--r--arch/arm64/kvm/sys_regs.c294
-rw-r--r--arch/arm64/kvm/sys_regs.h9
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c13
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c160
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v2.c18
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c125
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c4
-rw-r--r--arch/arm64/kvm/vgic/vgic.h10
25 files changed, 1294 insertions, 524 deletions
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 261644b1a6bb..aa127ae9f675 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_KVM) += hyp/
kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
- vgic-sys-reg-v3.o fpsimd.o pmu.o pkvm.o \
+ vgic-sys-reg-v3.o fpsimd.o pkvm.o \
arch_timer.o trng.o vmid.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
@@ -22,7 +22,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
vgic/vgic-its.o vgic/vgic-debug.o
-kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o
+kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
always-y := hyp_constants.h hyp-constants.s
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 6e542e2eae32..4e39ace073af 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -208,18 +208,16 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
+static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
+ u64 val)
{
- u64 cval, now;
-
- cval = timer_get_cval(timer_ctx);
- now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
+ u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
- if (now < cval) {
+ if (now < val) {
u64 ns;
ns = cyclecounter_cyc2ns(timecounter->cc,
- cval - now,
+ val - now,
timecounter->mask,
&timecounter->frac);
return ns;
@@ -228,6 +226,11 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
return 0;
}
+static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
+{
+ return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx));
+}
+
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
{
WARN_ON(timer_ctx && timer_ctx->loaded);
@@ -236,6 +239,20 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
(ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
}
+static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
+{
+ return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
+ (vcpu->arch.flags & KVM_ARM64_WFIT));
+}
+
+static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_context *ctx = vcpu_vtimer(vcpu);
+ u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
+
+ return kvm_counter_compute_delta(ctx, val);
+}
+
/*
* Returns the earliest expiration time in ns among guest timers.
* Note that it will return 0 if none of timers can fire.
@@ -253,6 +270,9 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
}
+ if (vcpu_has_wfit_active(vcpu))
+ min_delta = min(min_delta, wfit_delay_ns(vcpu));
+
/* If none of timers can fire, then return 0 */
if (min_delta == ULLONG_MAX)
return 0;
@@ -350,15 +370,9 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
return cval <= now;
}
-bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- struct timer_map map;
-
- get_timer_map(vcpu, &map);
-
- return kvm_timer_should_fire(map.direct_vtimer) ||
- kvm_timer_should_fire(map.direct_ptimer) ||
- kvm_timer_should_fire(map.emul_ptimer);
+ return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
}
/*
@@ -484,7 +498,8 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
*/
if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
!kvm_timer_irq_can_fire(map.direct_ptimer) &&
- !kvm_timer_irq_can_fire(map.emul_ptimer))
+ !kvm_timer_irq_can_fire(map.emul_ptimer) &&
+ !vcpu_has_wfit_active(vcpu))
return;
/*
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index cedc3ba2c098..400bb0fe2745 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -97,6 +97,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
}
mutex_unlock(&kvm->lock);
break;
+ case KVM_CAP_ARM_SYSTEM_SUSPEND:
+ r = 0;
+ set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
+ break;
default:
r = -EINVAL;
break;
@@ -153,9 +157,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_vgic_early_init(kvm);
/* The maximum number of VCPUs is limited by the host's GIC model */
- kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
+ kvm->max_vcpus = kvm_arm_default_max_vcpus();
set_default_spectre(kvm);
+ kvm_arm_init_hypercalls(kvm);
return ret;
out_free_stage2_pgd:
@@ -210,6 +215,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_VCPU_ATTRIBUTES:
case KVM_CAP_PTP_KVM:
+ case KVM_CAP_ARM_SYSTEM_SUSPEND:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -230,7 +236,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_MAX_VCPUS:
case KVM_CAP_MAX_VCPU_ID:
if (kvm)
- r = kvm->arch.max_vcpus;
+ r = kvm->max_vcpus;
else
r = kvm_arm_default_max_vcpus();
break;
@@ -306,7 +312,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
return -EBUSY;
- if (id >= kvm->arch.max_vcpus)
+ if (id >= kvm->max_vcpus)
return -EINVAL;
return 0;
@@ -356,11 +362,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_arm_vcpu_destroy(vcpu);
}
-int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
-{
- return kvm_timer_is_pending(vcpu);
-}
-
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
@@ -432,20 +433,34 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->cpu = -1;
}
-static void vcpu_power_off(struct kvm_vcpu *vcpu)
+void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
{
- vcpu->arch.power_off = true;
+ vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu);
}
+bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED;
+}
+
+static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED;
+ kvm_make_request(KVM_REQ_SUSPEND, vcpu);
+ kvm_vcpu_kick(vcpu);
+}
+
+static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED;
+}
+
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- if (vcpu->arch.power_off)
- mp_state->mp_state = KVM_MP_STATE_STOPPED;
- else
- mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+ *mp_state = vcpu->arch.mp_state;
return 0;
}
@@ -457,10 +472,13 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
- vcpu->arch.power_off = false;
+ vcpu->arch.mp_state = *mp_state;
break;
case KVM_MP_STATE_STOPPED:
- vcpu_power_off(vcpu);
+ kvm_arm_vcpu_power_off(vcpu);
+ break;
+ case KVM_MP_STATE_SUSPENDED:
+ kvm_arm_vcpu_suspend(vcpu);
break;
default:
ret = -EINVAL;
@@ -480,7 +498,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
- && !v->arch.power_off && !v->arch.pause);
+ && !kvm_arm_vcpu_stopped(v) && !v->arch.pause);
}
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
@@ -592,15 +610,15 @@ void kvm_arm_resume_guest(struct kvm *kvm)
}
}
-static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_sleep(struct kvm_vcpu *vcpu)
{
struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
rcuwait_wait_event(wait,
- (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
+ (!kvm_arm_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
TASK_INTERRUPTIBLE);
- if (vcpu->arch.power_off || vcpu->arch.pause) {
+ if (kvm_arm_vcpu_stopped(vcpu) || vcpu->arch.pause) {
/* Awaken to handle a signal, request we sleep again later. */
kvm_make_request(KVM_REQ_SLEEP, vcpu);
}
@@ -639,6 +657,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
preempt_enable();
kvm_vcpu_halt(vcpu);
+ vcpu->arch.flags &= ~KVM_ARM64_WFIT;
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
preempt_disable();
@@ -646,11 +665,53 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
preempt_enable();
}
-static void check_vcpu_requests(struct kvm_vcpu *vcpu)
+static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_arm_vcpu_suspended(vcpu))
+ return 1;
+
+ kvm_vcpu_wfi(vcpu);
+
+ /*
+ * The suspend state is sticky; we do not leave it until userspace
+ * explicitly marks the vCPU as runnable. Request that we suspend again
+ * later.
+ */
+ kvm_make_request(KVM_REQ_SUSPEND, vcpu);
+
+ /*
+ * Check to make sure the vCPU is actually runnable. If so, exit to
+ * userspace informing it of the wakeup condition.
+ */
+ if (kvm_arch_vcpu_runnable(vcpu)) {
+ memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
+ vcpu->run->system_event.type = KVM_SYSTEM_EVENT_WAKEUP;
+ vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+ return 0;
+ }
+
+ /*
+ * Otherwise, we were unblocked to process a different event, such as a
+ * pending signal. Return 1 and allow kvm_arch_vcpu_ioctl_run() to
+ * process the event.
+ */
+ return 1;
+}
+
+/**
+ * check_vcpu_requests - check and handle pending vCPU requests
+ * @vcpu: the VCPU pointer
+ *
+ * Return: 1 if we should enter the guest
+ * 0 if we should exit to userspace
+ * < 0 if we should exit to userspace, where the return value indicates
+ * an error
+ */
+static int check_vcpu_requests(struct kvm_vcpu *vcpu)
{
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
- vcpu_req_sleep(vcpu);
+ kvm_vcpu_sleep(vcpu);
if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
kvm_reset_vcpu(vcpu);
@@ -675,7 +736,12 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu))
kvm_pmu_handle_pmcr(vcpu,
__vcpu_sys_reg(vcpu, PMCR_EL0));
+
+ if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
+ return kvm_vcpu_suspend(vcpu);
}
+
+ return 1;
}
static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
@@ -792,7 +858,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (!ret)
ret = 1;
- check_vcpu_requests(vcpu);
+ if (ret > 0)
+ ret = check_vcpu_requests(vcpu);
/*
* Preparing the interrupts to be injected also
@@ -816,6 +883,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_vgic_flush_hwstate(vcpu);
+ kvm_pmu_update_vcpu_events(vcpu);
+
/*
* Ensure we set mode to IN_GUEST_MODE after we disable
* interrupts and before the final VCPU requests check.
@@ -1125,9 +1194,9 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
* Handle the "start in power-off" case.
*/
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
- vcpu_power_off(vcpu);
+ kvm_arm_vcpu_power_off(vcpu);
else
- vcpu->arch.power_off = false;
+ vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
return 0;
}
@@ -1485,7 +1554,6 @@ static void cpu_prepare_hyp_mode(int cpu)
tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
params->tcr_el2 = tcr;
- params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE);
params->pgd_pa = kvm_mmu_get_httbr();
if (is_protected_kvm_enabled())
params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
@@ -1763,8 +1831,6 @@ static int init_subsystems(void)
kvm_register_perf_callbacks(NULL);
- kvm_sys_reg_table_init();
-
out:
if (err || !is_protected_kvm_enabled())
on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
@@ -1935,14 +2001,46 @@ static int init_hyp_mode(void)
* Map the Hyp stack pages
*/
for_each_possible_cpu(cpu) {
+ struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
- err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
- PAGE_HYP);
+ unsigned long hyp_addr;
+
+ /*
+ * Allocate a contiguous HYP private VA range for the stack
+ * and guard page. The allocation is also aligned based on
+ * the order of its size.
+ */
+ err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+ if (err) {
+ kvm_err("Cannot allocate hyp stack guard page\n");
+ goto out_err;
+ }
+ /*
+ * Since the stack grows downwards, map the stack to the page
+ * at the higher address and leave the lower guard page
+ * unbacked.
+ *
+ * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * and addresses corresponding to the guard page have the
+ * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ */
+ err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
+ __pa(stack_page), PAGE_HYP);
if (err) {
kvm_err("Cannot map hyp stack\n");
goto out_err;
}
+
+ /*
+ * Save the stack PA in nvhe_init_params. This will be needed
+ * to recreate the stack mapping in protected nVHE mode.
+ * __hyp_pa() won't do the right thing there, since the stack
+ * has been mapped in the flexible private VA space.
+ */
+ params->stack_pa = __pa(stack_page);
+
+ params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
for_each_possible_cpu(cpu) {
@@ -2091,6 +2189,12 @@ int kvm_arch_init(void *opaque)
return -ENODEV;
}
+ err = kvm_sys_reg_table_init();
+ if (err) {
+ kvm_info("Error initializing system register tables");
+ return err;
+ }
+
in_hyp_mode = is_kernel_in_hyp_mode();
if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 7e15b03fbdf8..8c607199cad1 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -18,7 +18,7 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
-#include <kvm/arm_psci.h>
+#include <kvm/arm_hypercalls.h>
#include <asm/cputype.h>
#include <linux/uaccess.h>
#include <asm/fpsimd.h>
@@ -756,7 +756,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
case KVM_REG_ARM_CORE: return get_core_reg(vcpu, reg);
- case KVM_REG_ARM_FW: return kvm_arm_get_fw_reg(vcpu, reg);
+ case KVM_REG_ARM_FW:
+ case KVM_REG_ARM_FW_FEAT_BMAP:
+ return kvm_arm_get_fw_reg(vcpu, reg);
case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg);
}
@@ -774,7 +776,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg);
- case KVM_REG_ARM_FW: return kvm_arm_set_fw_reg(vcpu, reg);
+ case KVM_REG_ARM_FW:
+ case KVM_REG_ARM_FW_FEAT_BMAP:
+ return kvm_arm_set_fw_reg(vcpu, reg);
case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg);
}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 0b829292dc54..f66c0142b335 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -80,24 +80,51 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
*
* @vcpu: the vcpu pointer
*
- * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * WFE[T]: Yield the CPU and come back to this vcpu when the scheduler
* decides to.
* WFI: Simply call kvm_vcpu_halt(), which will halt execution of
* world-switches and schedule other host processes until there is an
* incoming IRQ or FIQ to the VM.
+ * WFIT: Same as WFI, with a timed wakeup implemented as a background timer
+ *
+ * WF{I,E}T can immediately return if the deadline has already expired.
*/
static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
{
- if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+
+ if (esr & ESR_ELx_WFx_ISS_WFE) {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
vcpu->stat.wfe_exit_stat++;
- kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
} else {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
- kvm_vcpu_wfi(vcpu);
}
+ if (esr & ESR_ELx_WFx_ISS_WFxT) {
+ if (esr & ESR_ELx_WFx_ISS_RV) {
+ u64 val, now;
+
+ now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT);
+ val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
+
+ if (now >= val)
+ goto out;
+ } else {
+ /* Treat WFxT as WFx if RN is invalid */
+ esr &= ~ESR_ELx_WFx_ISS_WFxT;
+ }
+ }
+
+ if (esr & ESR_ELx_WFx_ISS_WFE) {
+ kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
+ } else {
+ if (esr & ESR_ELx_WFx_ISS_WFxT)
+ vcpu->arch.flags |= KVM_ARM64_WFIT;
+
+ kvm_vcpu_wfi(vcpu);
+ }
+out:
kvm_incr_pc(vcpu);
return 1;
@@ -169,6 +196,7 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64,
[ESR_ELx_EC_CP14_MR] = kvm_handle_cp14_32,
[ESR_ELx_EC_CP14_LS] = kvm_handle_cp14_load_store,
+ [ESR_ELx_EC_CP10_ID] = kvm_handle_cp10_id,
[ESR_ELx_EC_CP14_64] = kvm_handle_cp14_64,
[ESR_ELx_EC_HVC32] = handle_hvc,
[ESR_ELx_EC_SMC32] = handle_smc,
@@ -297,13 +325,8 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
u64 elr_in_kimg = __phys_to_kimg(elr_phys);
u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt;
u64 mode = spsr & PSR_MODE_MASK;
+ u64 panic_addr = elr_virt + hyp_offset;
- /*
- * The nVHE hyp symbols are not included by kallsyms to avoid issues
- * with aliasing. That means that the symbols cannot be printed with the
- * "%pS" format specifier, so fall back to the vmlinux address if
- * there's no better option.
- */
if (mode != PSR_MODE_EL2t && mode != PSR_MODE_EL2h) {
kvm_err("Invalid host exception to nVHE hyp!\n");
} else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
@@ -323,9 +346,11 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
if (file)
kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
else
- kvm_err("nVHE hyp BUG at: %016llx!\n", elr_virt + hyp_offset);
+ kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr,
+ (void *)panic_addr);
} else {
- kvm_err("nVHE hyp panic at: %016llx!\n", elr_virt + hyp_offset);
+ kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr,
+ (void *)panic_addr);
}
/*
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 2d08510c6cc1..42d8eb9bfe72 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -19,8 +19,10 @@ int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
-unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
- enum kvm_pgtable_prot prot);
+int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
+ enum kvm_pgtable_prot prot,
+ unsigned long *haddr);
+int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
unsigned long *start, unsigned long *end)
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 727c979b2b69..ea6a397b64a6 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -80,7 +80,7 @@ SYM_FUNC_START(__hyp_do_panic)
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, lr
- ldr lr, =nvhe_hyp_panic_handler
+ adr_l lr, nvhe_hyp_panic_handler
hyp_kimg_va lr, x6
msr elr_el2, lr
@@ -125,13 +125,11 @@ alternative_else_nop_endif
add sp, sp, #16
/*
* Compute the idmap address of __kvm_handle_stub_hvc and
- * jump there. Since we use kimage_voffset, do not use the
- * HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead
- * (by loading it from the constant pool).
+ * jump there.
*
* Preserve x0-x4, which may contain stub parameters.
*/
- ldr x5, =__kvm_handle_stub_hvc
+ adr_l x5, __kvm_handle_stub_hvc
hyp_pa x5, x6
br x5
SYM_FUNC_END(__host_hvc)
@@ -153,6 +151,18 @@ SYM_FUNC_END(__host_hvc)
.macro invalid_host_el2_vect
.align 7
+
+ /*
+ * Test whether the SP has overflowed, without corrupting a GPR.
+ * nVHE hypervisor stacks are aligned so that the PAGE_SHIFT bit
+ * of SP should always be 1.
+ */
+ add sp, sp, x0 // sp' = sp + x0
+ sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
+ tbz x0, #PAGE_SHIFT, .L__hyp_sp_overflow\@
+ sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
+ sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
+
/* If a guest is loaded, panic out of it. */
stp x0, x1, [sp, #-16]!
get_loaded_vcpu x0, x1
@@ -165,6 +175,18 @@ SYM_FUNC_END(__host_hvc)
* been partially clobbered by __host_enter.
*/
b hyp_panic
+
+.L__hyp_sp_overflow\@:
+ /*
+ * Reset SP to the top of the stack, to allow handling the hyp_panic.
+ * This corrupts the stack but is ok, since we won't be attempting
+ * any unwinding here.
+ */
+ ldr_this_cpu x0, kvm_init_params + NVHE_INIT_STACK_HYP_VA, x1
+ mov sp, x0
+
+ b hyp_panic_bad_stack
+ ASM_BUG()
.endm
.macro invalid_host_el1_vect
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 5e2197db0d32..3cea4b6ac23e 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -160,7 +160,23 @@ static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ct
DECLARE_REG(size_t, size, host_ctxt, 2);
DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3);
- cpu_reg(host_ctxt, 1) = __pkvm_create_private_mapping(phys, size, prot);
+ /*
+ * __pkvm_create_private_mapping() populates a pointer with the
+ * hypervisor start address of the allocation.
+ *
+ * However, handle___pkvm_create_private_mapping() hypercall crosses the
+ * EL1/EL2 boundary so the pointer would not be valid in this context.
+ *
+ * Instead pass the allocation address as the return value (or return
+ * ERR_PTR() on failure).
+ */
+ unsigned long haddr;
+ int err = __pkvm_create_private_mapping(phys, size, prot, &haddr);
+
+ if (err)
+ haddr = (unsigned long)ERR_PTR(err);
+
+ cpu_reg(host_ctxt, 1) = haddr;
}
static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index cdbe8e246418..96193cb31a39 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -37,36 +37,60 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
return err;
}
-unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
- enum kvm_pgtable_prot prot)
+/**
+ * pkvm_alloc_private_va_range - Allocates a private VA range.
+ * @size: The size of the VA range to reserve.
+ * @haddr: The hypervisor virtual start address of the allocation.
+ *
+ * The private virtual address (VA) range is allocated above __io_map_base
+ * and aligned based on the order of @size.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
{
- unsigned long addr;
- int err;
+ unsigned long base, addr;
+ int ret = 0;
hyp_spin_lock(&pkvm_pgd_lock);
- size = PAGE_ALIGN(size + offset_in_page(phys));
- addr = __io_map_base;
- __io_map_base += size;
+ /* Align the allocation based on the order of its size */
+ addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
- /* Are we overflowing on the vmemmap ? */
- if (__io_map_base > __hyp_vmemmap) {
- __io_map_base -= size;
- addr = (unsigned long)ERR_PTR(-ENOMEM);
- goto out;
- }
+ /* The allocated size is always a multiple of PAGE_SIZE */
+ base = addr + PAGE_ALIGN(size);
- err = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, size, phys, prot);
- if (err) {
- addr = (unsigned long)ERR_PTR(err);
- goto out;
+ /* Are we overflowing on the vmemmap ? */
+ if (!addr || base > __hyp_vmemmap)
+ ret = -ENOMEM;
+ else {
+ __io_map_base = base;
+ *haddr = addr;
}
- addr = addr + offset_in_page(phys);
-out:
hyp_spin_unlock(&pkvm_pgd_lock);
- return addr;
+ return ret;
+}
+
+int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
+ enum kvm_pgtable_prot prot,
+ unsigned long *haddr)
+{
+ unsigned long addr;
+ int err;
+
+ size = PAGE_ALIGN(size + offset_in_page(phys));
+ err = pkvm_alloc_private_va_range(size, &addr);
+ if (err)
+ return err;
+
+ err = __pkvm_create_mappings(addr, size, phys, prot);
+ if (err)
+ return err;
+
+ *haddr = addr + offset_in_page(phys);
+ return err;
}
int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot)
@@ -146,7 +170,8 @@ int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot)
int hyp_map_vectors(void)
{
phys_addr_t phys;
- void *bp_base;
+ unsigned long bp_base;
+ int ret;
if (!kvm_system_needs_idmapped_vectors()) {
__hyp_bp_vect_base = __bp_harden_hyp_vecs;
@@ -154,13 +179,12 @@ int hyp_map_vectors(void)
}
phys = __hyp_pa(__bp_harden_hyp_vecs);
- bp_base = (void *)__pkvm_create_private_mapping(phys,
- __BP_HARDEN_HYP_VECS_SZ,
- PAGE_HYP_EXEC);
- if (IS_ERR_OR_NULL(bp_base))
- return PTR_ERR(bp_base);
+ ret = __pkvm_create_private_mapping(phys, __BP_HARDEN_HYP_VECS_SZ,
+ PAGE_HYP_EXEC, &bp_base);
+ if (ret)
+ return ret;
- __hyp_bp_vect_base = bp_base;
+ __hyp_bp_vect_base = (void *)bp_base;
return 0;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 27af337f9fea..e8d4ea2fcfa0 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -99,17 +99,42 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
return ret;
for (i = 0; i < hyp_nr_cpus; i++) {
+ struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
+ unsigned long hyp_addr;
+
start = (void *)kern_hyp_va(per_cpu_base[i]);
end = start + PAGE_ALIGN(hyp_percpu_size);
ret = pkvm_create_mappings(start, end, PAGE_HYP);
if (ret)
return ret;
- end = (void *)per_cpu_ptr(&kvm_init_params, i)->stack_hyp_va;
- start = end - PAGE_SIZE;
- ret = pkvm_create_mappings(start, end, PAGE_HYP);
+ /*
+ * Allocate a contiguous HYP private VA range for the stack
+ * and guard page. The allocation is also aligned based on
+ * the order of its size.
+ */
+ ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+ if (ret)
+ return ret;
+
+ /*
+ * Since the stack grows downwards, map the stack to the page
+ * at the higher address and leave the lower guard page
+ * unbacked.
+ *
+ * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * and addresses corresponding to the guard page have the
+ * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ */
+ hyp_spin_lock(&pkvm_pgd_lock);
+ ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
+ PAGE_SIZE, params->stack_pa, PAGE_HYP);
+ hyp_spin_unlock(&pkvm_pgd_lock);
if (ret)
return ret;
+
+ /* Update stack_hyp_va to end of the stack's private VA range */
+ params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
/*
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index caace61ea459..6db801db8f27 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -150,16 +150,13 @@ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
}
}
-/**
+/*
* Disable host events, enable guest events
*/
-static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
+#ifdef CONFIG_HW_PERF_EVENTS
+static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
+ struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
if (pmu->events_host)
write_sysreg(pmu->events_host, pmcntenclr_el0);
@@ -170,16 +167,12 @@ static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
return (pmu->events_host || pmu->events_guest);
}
-/**
+/*
* Disable guest events, enable host events
*/
-static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
+static void __pmu_switch_to_host(struct kvm_vcpu *vcpu)
{
- struct kvm_host_data *host;
- struct kvm_pmu_events *pmu;
-
- host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
- pmu = &host->pmu_events;
+ struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
if (pmu->events_guest)
write_sysreg(pmu->events_guest, pmcntenclr_el0);
@@ -187,8 +180,12 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
if (pmu->events_host)
write_sysreg(pmu->events_host, pmcntenset_el0);
}
+#else
+#define __pmu_switch_to_guest(v) ({ false; })
+#define __pmu_switch_to_host(v) do {} while (0)
+#endif
-/**
+/*
* Handler for protected VM MSR, MRS or System instruction execution in AArch64.
*
* Returns true if the hypervisor has handled the exit, and control should go
@@ -205,23 +202,6 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
kvm_handle_pvm_sysreg(vcpu, exit_code));
}
-/**
- * Handler for protected floating-point and Advanced SIMD accesses.
- *
- * Returns true if the hypervisor has handled the exit, and control should go
- * back to the guest, or false if it hasn't.
- */
-static bool kvm_handle_pvm_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
- /* Linux guests assume support for floating-point and Advanced SIMD. */
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
- PVM_ID_AA64PFR0_ALLOW));
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
- PVM_ID_AA64PFR0_ALLOW));
-
- return kvm_hyp_handle_fpsimd(vcpu, exit_code);
-}
-
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
@@ -237,7 +217,7 @@ static const exit_handler_fn pvm_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64,
[ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted,
- [ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd,
+ [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
@@ -304,7 +284,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
- pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
+ pmu_switch_needed = __pmu_switch_to_guest(vcpu);
__sysreg_save_state_nvhe(host_ctxt);
/*
@@ -366,7 +346,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__debug_restore_host_buffers_nvhe(vcpu);
if (pmu_switch_needed)
- __pmu_switch_to_host(host_ctxt);
+ __pmu_switch_to_host(vcpu);
/* Returning to host will clear PSR.I, remask PMR if needed */
if (system_uses_irq_prio_masking())
@@ -377,7 +357,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
return exit_code;
}
-void __noreturn hyp_panic(void)
+asmlinkage void __noreturn hyp_panic(void)
{
u64 spsr = read_sysreg_el2(SYS_SPSR);
u64 elr = read_sysreg_el2(SYS_ELR);
@@ -399,6 +379,11 @@ void __noreturn hyp_panic(void)
unreachable();
}
+asmlinkage void __noreturn hyp_panic_bad_stack(void)
+{
+ hyp_panic();
+}
+
asmlinkage void kvm_unexpected_el2_exception(void)
{
return __kvm_unexpected_el2_exception();
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 619f94fc95fa..b6d86e423319 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -90,9 +90,6 @@ static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
u64 set_mask = 0;
u64 allow_mask = PVM_ID_AA64PFR0_ALLOW;
- if (!vcpu_has_sve(vcpu))
- allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
-
set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val,
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 202b8c455724..c9f401fa01a9 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -9,6 +9,13 @@
#include <kvm/arm_hypercalls.h>
#include <kvm/arm_psci.h>
+#define KVM_ARM_SMCCC_STD_FEATURES \
+ GENMASK(KVM_REG_ARM_STD_BMAP_BIT_COUNT - 1, 0)
+#define KVM_ARM_SMCCC_STD_HYP_FEATURES \
+ GENMASK(KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT - 1, 0)
+#define KVM_ARM_SMCCC_VENDOR_HYP_FEATURES \
+ GENMASK(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_COUNT - 1, 0)
+
static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
{
struct system_time_snapshot systime_snapshot;
@@ -58,13 +65,73 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
val[3] = lower_32_bits(cycles);
}
+static bool kvm_hvc_call_default_allowed(u32 func_id)
+{
+ switch (func_id) {
+ /*
+ * List of function-ids that are not gated with the bitmapped
+ * feature firmware registers, and are to be allowed for
+ * servicing the call by default.
+ */
+ case ARM_SMCCC_VERSION_FUNC_ID:
+ case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
+ return true;
+ default:
+ /* PSCI 0.2 and up is in the 0:0x1f range */
+ if (ARM_SMCCC_OWNER_NUM(func_id) == ARM_SMCCC_OWNER_STANDARD &&
+ ARM_SMCCC_FUNC_NUM(func_id) <= 0x1f)
+ return true;
+
+ /*
+ * KVM's PSCI 0.1 doesn't comply with SMCCC, and has
+ * its own function-id base and range
+ */
+ if (func_id >= KVM_PSCI_FN(0) && func_id <= KVM_PSCI_FN(3))
+ return true;
+
+ return false;
+ }
+}
+
+static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id)
+{
+ struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
+
+ switch (func_id) {
+ case ARM_SMCCC_TRNG_VERSION:
+ case ARM_SMCCC_TRNG_FEATURES:
+ case ARM_SMCCC_TRNG_GET_UUID:
+ case ARM_SMCCC_TRNG_RND32:
+ case ARM_SMCCC_TRNG_RND64:
+ return test_bit(KVM_REG_ARM_STD_BIT_TRNG_V1_0,
+ &smccc_feat->std_bmap);
+ case ARM_SMCCC_HV_PV_TIME_FEATURES:
+ case ARM_SMCCC_HV_PV_TIME_ST:
+ return test_bit(KVM_REG_ARM_STD_HYP_BIT_PV_TIME,
+ &smccc_feat->std_hyp_bmap);
+ case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID:
+ case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID:
+ return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT,
+ &smccc_feat->vendor_hyp_bmap);
+ case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID:
+ return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_PTP,
+ &smccc_feat->vendor_hyp_bmap);
+ default:
+ return kvm_hvc_call_default_allowed(func_id);
+ }
+}
+
int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
{
+ struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
u32 func_id = smccc_get_function(vcpu);
u64 val[4] = {SMCCC_RET_NOT_SUPPORTED};
u32 feature;
gpa_t gpa;
+ if (!kvm_hvc_call_allowed(vcpu, func_id))
+ goto out;
+
switch (func_id) {
case ARM_SMCCC_VERSION_FUNC_ID:
val[0] = ARM_SMCCC_VERSION_1_1;
@@ -120,7 +187,9 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
}
break;
case ARM_SMCCC_HV_PV_TIME_FEATURES:
- val[0] = SMCCC_RET_SUCCESS;
+ if (test_bit(KVM_REG_ARM_STD_HYP_BIT_PV_TIME,
+ &smccc_feat->std_hyp_bmap))
+ val[0] = SMCCC_RET_SUCCESS;
break;
}
break;
@@ -139,8 +208,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
val[3] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3;
break;
case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID:
- val[0] = BIT(ARM_SMCCC_KVM_FUNC_FEATURES);
- val[0] |= BIT(ARM_SMCCC_KVM_FUNC_PTP);
+ val[0] = smccc_feat->vendor_hyp_bmap;
break;
case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID:
kvm_ptp_get_time(vcpu, val);
@@ -155,6 +223,259 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
return kvm_psci_call(vcpu);
}
+out:
smccc_set_retval(vcpu, val[0], val[1], val[2], val[3]);
return 1;
}
+
+static const u64 kvm_arm_fw_reg_ids[] = {
+ KVM_REG_ARM_PSCI_VERSION,
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1,
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2,
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3,
+ KVM_REG_ARM_STD_BMAP,
+ KVM_REG_ARM_STD_HYP_BMAP,
+ KVM_REG_ARM_VENDOR_HYP_BMAP,
+};
+
+void kvm_arm_init_hypercalls(struct kvm *kvm)
+{
+ struct kvm_smccc_features *smccc_feat = &kvm->arch.smccc_feat;
+
+ smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES;
+ smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES;
+ smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES;
+}
+
+int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
+{
+ return ARRAY_SIZE(kvm_arm_fw_reg_ids);
+}
+
+int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(kvm_arm_fw_reg_ids); i++) {
+ if (put_user(kvm_arm_fw_reg_ids[i], uindices++))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+#define KVM_REG_FEATURE_LEVEL_MASK GENMASK(3, 0)
+
+/*
+ * Convert the workaround level into an easy-to-compare number, where higher
+ * values mean better protection.
+ */
+static int get_kernel_wa_level(u64 regid)
+{
+ switch (regid) {
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+ switch (arm64_get_spectre_v2_state()) {
+ case SPECTRE_VULNERABLE:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
+ case SPECTRE_MITIGATED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
+ case SPECTRE_UNAFFECTED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED;
+ }
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+ switch (arm64_get_spectre_v4_state()) {
+ case SPECTRE_MITIGATED:
+ /*
+ * As for the hypercall discovery, we pretend we
+ * don't have any FW mitigation if SSBS is there at
+ * all times.
+ */
+ if (cpus_have_final_cap(ARM64_SSBS))
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
+ fallthrough;
+ case SPECTRE_UNAFFECTED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
+ case SPECTRE_VULNERABLE:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
+ }
+ break;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
+ switch (arm64_get_spectre_bhb_state()) {
+ case SPECTRE_VULNERABLE:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
+ case SPECTRE_MITIGATED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL;
+ case SPECTRE_UNAFFECTED:
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED;
+ }
+ return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
+ }
+
+ return -EINVAL;
+}
+
+int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+
+ switch (reg->id) {
+ case KVM_REG_ARM_PSCI_VERSION:
+ val = kvm_psci_version(vcpu);
+ break;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
+ val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
+ break;
+ case KVM_REG_ARM_STD_BMAP:
+ val = READ_ONCE(smccc_feat->std_bmap);
+ break;
+ case KVM_REG_ARM_STD_HYP_BMAP:
+ val = READ_ONCE(smccc_feat->std_hyp_bmap);
+ break;
+ case KVM_REG_ARM_VENDOR_HYP_BMAP:
+ val = READ_ONCE(smccc_feat->vendor_hyp_bmap);
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val)
+{
+ int ret = 0;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_smccc_features *smccc_feat = &kvm->arch.smccc_feat;
+ unsigned long *fw_reg_bmap, fw_reg_features;
+
+ switch (reg_id) {
+ case KVM_REG_ARM_STD_BMAP:
+ fw_reg_bmap = &smccc_feat->std_bmap;
+ fw_reg_features = KVM_ARM_SMCCC_STD_FEATURES;
+ break;
+ case KVM_REG_ARM_STD_HYP_BMAP:
+ fw_reg_bmap = &smccc_feat->std_hyp_bmap;
+ fw_reg_features = KVM_ARM_SMCCC_STD_HYP_FEATURES;
+ break;
+ case KVM_REG_ARM_VENDOR_HYP_BMAP:
+ fw_reg_bmap = &smccc_feat->vendor_hyp_bmap;
+ fw_reg_features = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ /* Check for unsupported bit */
+ if (val & ~fw_reg_features)
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+
+ if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) &&
+ val != *fw_reg_bmap) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ WRITE_ONCE(*fw_reg_bmap, val);
+out:
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+ int wa_level;
+
+ if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ switch (reg->id) {
+ case KVM_REG_ARM_PSCI_VERSION:
+ {
+ bool wants_02;
+
+ wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
+
+ switch (val) {
+ case KVM_ARM_PSCI_0_1:
+ if (wants_02)
+ return -EINVAL;
+ vcpu->kvm->arch.psci_version = val;
+ return 0;
+ case KVM_ARM_PSCI_0_2:
+ case KVM_ARM_PSCI_1_0:
+ case KVM_ARM_PSCI_1_1:
+ if (!wants_02)
+ return -EINVAL;
+ vcpu->kvm->arch.psci_version = val;
+ return 0;
+ }
+ break;
+ }
+
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
+ if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
+ return -EINVAL;
+
+ if (get_kernel_wa_level(reg->id) < val)
+ return -EINVAL;
+
+ return 0;
+
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+ if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))
+ return -EINVAL;
+
+ /* The enabled bit must not be set unless the level is AVAIL. */
+ if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) &&
+ (val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL)
+ return -EINVAL;
+
+ /*
+ * Map all the possible incoming states to the only two we
+ * really want to deal with.
+ */
+ switch (val & KVM_REG_FEATURE_LEVEL_MASK) {
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
+ wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
+ break;
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
+ case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
+ wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /*
+ * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the
+ * other way around.
+ */
+ if (get_kernel_wa_level(reg->id) < wa_level)
+ return -EINVAL;
+
+ return 0;
+ case KVM_REG_ARM_STD_BMAP:
+ case KVM_REG_ARM_STD_HYP_BMAP:
+ case KVM_REG_ARM_VENDOR_HYP_BMAP:
+ return kvm_arm_set_fw_reg_bmap(vcpu, reg->id, val);
+ default:
+ return -ENOENT;
+ }
+
+ return -EINVAL;
+}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 5400fc020164..f5651a05b6a8 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -258,8 +258,8 @@ static bool kvm_host_owns_hyp_mappings(void)
return true;
}
-static int __create_hyp_mappings(unsigned long start, unsigned long size,
- unsigned long phys, enum kvm_pgtable_prot prot)
+int __create_hyp_mappings(unsigned long start, unsigned long size,
+ unsigned long phys, enum kvm_pgtable_prot prot)
{
int err;
@@ -457,23 +457,22 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
return 0;
}
-static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
- unsigned long *haddr,
- enum kvm_pgtable_prot prot)
+
+/**
+ * hyp_alloc_private_va_range - Allocates a private VA range.
+ * @size: The size of the VA range to reserve.
+ * @haddr: The hypervisor virtual start address of the allocation.
+ *
+ * The private virtual address (VA) range is allocated below io_map_base
+ * and aligned based on the order of @size.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
{
unsigned long base;
int ret = 0;
- if (!kvm_host_owns_hyp_mappings()) {
- base = kvm_call_hyp_nvhe(__pkvm_create_private_mapping,
- phys_addr, size, prot);
- if (IS_ERR_OR_NULL((void *)base))
- return PTR_ERR((void *)base);
- *haddr = base;
-
- return 0;
- }
-
mutex_lock(&kvm_hyp_pgd_mutex);
/*
@@ -484,8 +483,10 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
*
* The allocated size is always a multiple of PAGE_SIZE.
*/
- size = PAGE_ALIGN(size + offset_in_page(phys_addr));
- base = io_map_base - size;
+ base = io_map_base - PAGE_ALIGN(size);
+
+ /* Align the allocation based on the order of its size */
+ base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
/*
* Verify that BIT(VA_BITS - 1) hasn't been flipped by
@@ -495,19 +496,40 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
if ((base ^ io_map_base) & BIT(VA_BITS - 1))
ret = -ENOMEM;
else
- io_map_base = base;
+ *haddr = io_map_base = base;
mutex_unlock(&kvm_hyp_pgd_mutex);
+ return ret;
+}
+
+static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
+ unsigned long *haddr,
+ enum kvm_pgtable_prot prot)
+{
+ unsigned long addr;
+ int ret = 0;
+
+ if (!kvm_host_owns_hyp_mappings()) {
+ addr = kvm_call_hyp_nvhe(__pkvm_create_private_mapping,
+ phys_addr, size, prot);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+ *haddr = addr;
+
+ return 0;
+ }
+
+ size = PAGE_ALIGN(size + offset_in_page(phys_addr));
+ ret = hyp_alloc_private_va_range(size, &addr);
if (ret)
- goto out;
+ return ret;
- ret = __create_hyp_mappings(base, size, phys_addr, prot);
+ ret = __create_hyp_mappings(addr, size, phys_addr, prot);
if (ret)
- goto out;
+ return ret;
- *haddr = base + offset_in_page(phys_addr);
-out:
+ *haddr = addr + offset_in_page(phys_addr);
return ret;
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 3dc990ac4f44..11c43bed5f97 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -774,8 +774,7 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
{
struct arm_pmu_entry *entry;
- if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF ||
- is_protected_kvm_enabled())
+ if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
return;
mutex_lock(&arm_pmus_lock);
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index 03a6c1f4a09a..7887133d15f0 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -5,7 +5,8 @@
*/
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
-#include <asm/kvm_hyp.h>
+
+static DEFINE_PER_CPU(struct kvm_pmu_events, kvm_pmu_events);
/*
* Given the perf event attributes and system type, determine
@@ -25,21 +26,26 @@ static bool kvm_pmu_switch_needed(struct perf_event_attr *attr)
return (attr->exclude_host != attr->exclude_guest);
}
+struct kvm_pmu_events *kvm_get_pmu_events(void)
+{
+ return this_cpu_ptr(&kvm_pmu_events);
+}
+
/*
* Add events to track that we may want to switch at guest entry/exit
* time.
*/
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
{
- struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data);
+ struct kvm_pmu_events *pmu = kvm_get_pmu_events();
- if (!kvm_arm_support_pmu_v3() || !ctx || !kvm_pmu_switch_needed(attr))
+ if (!kvm_arm_support_pmu_v3() || !pmu || !kvm_pmu_switch_needed(attr))
return;
if (!attr->exclude_host)
- ctx->pmu_events.events_host |= set;
+ pmu->events_host |= set;
if (!attr->exclude_guest)
- ctx->pmu_events.events_guest |= set;
+ pmu->events_guest |= set;
}
/*
@@ -47,13 +53,13 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
*/
void kvm_clr_pmu_events(u32 clr)
{
- struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data);
+ struct kvm_pmu_events *pmu = kvm_get_pmu_events();
- if (!kvm_arm_support_pmu_v3() || !ctx)
+ if (!kvm_arm_support_pmu_v3() || !pmu)
return;
- ctx->pmu_events.events_host &= ~clr;
- ctx->pmu_events.events_guest &= ~clr;
+ pmu->events_host &= ~clr;
+ pmu->events_guest &= ~clr;
}
#define PMEVTYPER_READ_CASE(idx) \
@@ -169,16 +175,16 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events)
*/
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
{
- struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
u32 events_guest, events_host;
if (!kvm_arm_support_pmu_v3() || !has_vhe())
return;
preempt_disable();
- host = this_cpu_ptr_hyp_sym(kvm_host_data);
- events_guest = host->pmu_events.events_guest;
- events_host = host->pmu_events.events_host;
+ pmu = kvm_get_pmu_events();
+ events_guest = pmu->events_guest;
+ events_host = pmu->events_host;
kvm_vcpu_pmu_enable_el0(events_guest);
kvm_vcpu_pmu_disable_el0(events_host);
@@ -190,15 +196,15 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
*/
void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
{
- struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
u32 events_guest, events_host;
if (!kvm_arm_support_pmu_v3() || !has_vhe())
return;
- host = this_cpu_ptr_hyp_sym(kvm_host_data);
- events_guest = host->pmu_events.events_guest;
- events_host = host->pmu_events.events_host;
+ pmu = kvm_get_pmu_events();
+ events_guest = pmu->events_guest;
+ events_host = pmu->events_host;
kvm_vcpu_pmu_enable_el0(events_host);
kvm_vcpu_pmu_disable_el0(events_guest);
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index 708d80e8e60d..7fbc4c1b9df0 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -51,13 +51,6 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
return PSCI_RET_SUCCESS;
}
-static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.power_off = true;
- kvm_make_request(KVM_REQ_SLEEP, vcpu);
- kvm_vcpu_kick(vcpu);
-}
-
static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu,
unsigned long affinity)
{
@@ -83,7 +76,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
*/
if (!vcpu)
return PSCI_RET_INVALID_PARAMS;
- if (!vcpu->arch.power_off) {
+ if (!kvm_arm_vcpu_stopped(vcpu)) {
if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
return PSCI_RET_ALREADY_ON;
else
@@ -107,12 +100,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
kvm_make_request(KVM_REQ_VCPU_RESET, vcpu);
/*
- * Make sure the reset request is observed if the change to
- * power_off is observed.
+ * Make sure the reset request is observed if the RUNNABLE mp_state is
+ * observed.
*/
smp_wmb();
- vcpu->arch.power_off = false;
+ vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
kvm_vcpu_wake_up(vcpu);
return PSCI_RET_SUCCESS;
@@ -150,7 +143,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
mpidr = kvm_vcpu_get_mpidr_aff(tmp);
if ((mpidr & target_affinity_mask) == target_affinity) {
matching_cpus++;
- if (!tmp->arch.power_off)
+ if (!kvm_arm_vcpu_stopped(tmp))
return PSCI_0_2_AFFINITY_LEVEL_ON;
}
}
@@ -176,7 +169,7 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags)
* re-initialized.
*/
kvm_for_each_vcpu(i, tmp, vcpu->kvm)
- tmp->arch.power_off = true;
+ tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
@@ -202,6 +195,15 @@ static void kvm_psci_system_reset2(struct kvm_vcpu *vcpu)
KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2);
}
+static void kvm_psci_system_suspend(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ memset(&run->system_event, 0, sizeof(vcpu->run->system_event));
+ run->system_event.type = KVM_SYSTEM_EVENT_SUSPEND;
+ run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu)
{
int i;
@@ -245,7 +247,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
val = kvm_psci_vcpu_suspend(vcpu);
break;
case PSCI_0_2_FN_CPU_OFF:
- kvm_psci_vcpu_off(vcpu);
+ kvm_arm_vcpu_power_off(vcpu);
val = PSCI_RET_SUCCESS;
break;
case PSCI_0_2_FN_CPU_ON:
@@ -305,9 +307,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
{
+ unsigned long val = PSCI_RET_NOT_SUPPORTED;
u32 psci_fn = smccc_get_function(vcpu);
+ struct kvm *kvm = vcpu->kvm;
u32 arg;
- unsigned long val;
int ret = 1;
switch(psci_fn) {
@@ -320,6 +323,8 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
if (val)
break;
+ val = PSCI_RET_NOT_SUPPORTED;
+
switch(arg) {
case PSCI_0_2_FN_PSCI_VERSION:
case PSCI_0_2_FN_CPU_SUSPEND:
@@ -336,18 +341,32 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
case ARM_SMCCC_VERSION_FUNC_ID:
val = 0;
break;
+ case PSCI_1_0_FN_SYSTEM_SUSPEND:
+ case PSCI_1_0_FN64_SYSTEM_SUSPEND:
+ if (test_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags))
+ val = 0;
+ break;
case PSCI_1_1_FN_SYSTEM_RESET2:
case PSCI_1_1_FN64_SYSTEM_RESET2:
- if (minor >= 1) {
+ if (minor >= 1)
val = 0;
- break;
- }
- fallthrough;
- default:
- val = PSCI_RET_NOT_SUPPORTED;
break;
}
break;
+ case PSCI_1_0_FN_SYSTEM_SUSPEND:
+ kvm_psci_narrow_to_32bit(vcpu);
+ fallthrough;
+ case PSCI_1_0_FN64_SYSTEM_SUSPEND:
+ /*
+ * Return directly to userspace without changing the vCPU's
+ * registers. Userspace depends on reading the SMCCC parameters
+ * to implement SYSTEM_SUSPEND.
+ */
+ if (test_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags)) {
+ kvm_psci_system_suspend(vcpu);
+ return 0;
+ }
+ break;
case PSCI_1_1_FN_SYSTEM_RESET2:
kvm_psci_narrow_to_32bit(vcpu);
fallthrough;
@@ -365,7 +384,7 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
val = PSCI_RET_INVALID_PARAMS;
break;
}
- fallthrough;
+ break;
default:
return kvm_psci_0_2_call(vcpu);
}
@@ -382,7 +401,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
switch (psci_fn) {
case KVM_PSCI_FN_CPU_OFF:
- kvm_psci_vcpu_off(vcpu);
+ kvm_arm_vcpu_power_off(vcpu);
val = PSCI_RET_SUCCESS;
break;
case KVM_PSCI_FN_CPU_ON:
@@ -437,186 +456,3 @@ int kvm_psci_call(struct kvm_vcpu *vcpu)
return -EINVAL;
}
}
-
-int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
-{
- return 4; /* PSCI version and three workaround registers */
-}
-
-int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
-{
- if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
- return -EFAULT;
-
- if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
- return -EFAULT;
-
- if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
- return -EFAULT;
-
- if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, uindices++))
- return -EFAULT;
-
- return 0;
-}
-
-#define KVM_REG_FEATURE_LEVEL_WIDTH 4
-#define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
-
-/*
- * Convert the workaround level into an easy-to-compare number, where higher
- * values mean better protection.
- */
-static int get_kernel_wa_level(u64 regid)
-{
- switch (regid) {
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
- switch (arm64_get_spectre_v2_state()) {
- case SPECTRE_VULNERABLE:
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
- case SPECTRE_MITIGATED:
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
- case SPECTRE_UNAFFECTED:
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED;
- }
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
- switch (arm64_get_spectre_v4_state()) {
- case SPECTRE_MITIGATED:
- /*
- * As for the hypercall discovery, we pretend we
- * don't have any FW mitigation if SSBS is there at
- * all times.
- */
- if (cpus_have_final_cap(ARM64_SSBS))
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
- fallthrough;
- case SPECTRE_UNAFFECTED:
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
- case SPECTRE_VULNERABLE:
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
- }
- break;
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
- switch (arm64_get_spectre_bhb_state()) {
- case SPECTRE_VULNERABLE:
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
- case SPECTRE_MITIGATED:
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL;
- case SPECTRE_UNAFFECTED:
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED;
- }
- return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
- }
-
- return -EINVAL;
-}
-
-int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- void __user *uaddr = (void __user *)(long)reg->addr;
- u64 val;
-
- switch (reg->id) {
- case KVM_REG_ARM_PSCI_VERSION:
- val = kvm_psci_version(vcpu);
- break;
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
- val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
- break;
- default:
- return -ENOENT;
- }
-
- if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
-
- return 0;
-}
-
-int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
-{
- void __user *uaddr = (void __user *)(long)reg->addr;
- u64 val;
- int wa_level;
-
- if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
-
- switch (reg->id) {
- case KVM_REG_ARM_PSCI_VERSION:
- {
- bool wants_02;
-
- wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
-
- switch (val) {
- case KVM_ARM_PSCI_0_1:
- if (wants_02)
- return -EINVAL;
- vcpu->kvm->arch.psci_version = val;
- return 0;
- case KVM_ARM_PSCI_0_2:
- case KVM_ARM_PSCI_1_0:
- case KVM_ARM_PSCI_1_1:
- if (!wants_02)
- return -EINVAL;
- vcpu->kvm->arch.psci_version = val;
- return 0;
- }
- break;
- }
-
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
- if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
- return -EINVAL;
-
- if (get_kernel_wa_level(reg->id) < val)
- return -EINVAL;
-
- return 0;
-
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
- if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
- KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))
- return -EINVAL;
-
- /* The enabled bit must not be set unless the level is AVAIL. */
- if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) &&
- (val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL)
- return -EINVAL;
-
- /*
- * Map all the possible incoming states to the only two we
- * really want to deal with.
- */
- switch (val & KVM_REG_FEATURE_LEVEL_MASK) {
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
- wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
- break;
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
- case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
- wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
- break;
- default:
- return -EINVAL;
- }
-
- /*
- * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the
- * other way around.
- */
- if (get_kernel_wa_level(reg->id) < wa_level)
- return -EINVAL;
-
- return 0;
- default:
- return -ENOENT;
- }
-
- return -EINVAL;
-}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 18b403b58b53..c06c0477fab5 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1145,6 +1145,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
if (!vcpu_has_ptrauth(vcpu))
val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) |
ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3));
+ if (!cpus_have_final_cap(ARM64_HAS_WFXT))
+ val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_WFXT);
break;
case SYS_ID_AA64DFR0_EL1:
/* Limit debug to ARMv8.0 */
@@ -2020,20 +2022,22 @@ static const struct sys_reg_desc cp14_64_regs[] = {
{ Op1( 0), CRm( 2), .access = trap_raz_wi },
};
+#define CP15_PMU_SYS_REG(_map, _Op1, _CRn, _CRm, _Op2) \
+ AA32(_map), \
+ Op1(_Op1), CRn(_CRn), CRm(_CRm), Op2(_Op2), \
+ .visibility = pmu_visibility
+
/* Macro to expand the PMEVCNTRn register */
#define PMU_PMEVCNTR(n) \
- /* PMEVCNTRn */ \
- { Op1(0), CRn(0b1110), \
- CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
- access_pmu_evcntr }
+ { CP15_PMU_SYS_REG(DIRECT, 0, 0b1110, \
+ (0b1000 | (((n) >> 3) & 0x3)), ((n) & 0x7)), \
+ .access = access_pmu_evcntr }
/* Macro to expand the PMEVTYPERn register */
#define PMU_PMEVTYPER(n) \
- /* PMEVTYPERn */ \
- { Op1(0), CRn(0b1110), \
- CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
- access_pmu_evtyper }
-
+ { CP15_PMU_SYS_REG(DIRECT, 0, 0b1110, \
+ (0b1100 | (((n) >> 3) & 0x3)), ((n) & 0x7)), \
+ .access = access_pmu_evtyper }
/*
* Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
* depending on the way they are accessed (as a 32bit or a 64bit
@@ -2073,25 +2077,25 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
/* PMU */
- { Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmcr },
- { Op1( 0), CRn( 9), CRm(12), Op2( 1), access_pmcnten },
- { Op1( 0), CRn( 9), CRm(12), Op2( 2), access_pmcnten },
- { Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs },
- { Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc },
- { Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr },
- { AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid },
- { AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid },
- { Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr },
- { Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper },
- { Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr },
- { Op1( 0), CRn( 9), CRm(14), Op2( 0), access_pmuserenr },
- { Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten },
- { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten },
- { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs },
- { AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 4), access_pmceid },
- { AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 5), access_pmceid },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 0), .access = access_pmcr },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 1), .access = access_pmcnten },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 2), .access = access_pmcnten },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 3), .access = access_pmovs },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 4), .access = access_pmswinc },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 12, 5), .access = access_pmselr },
+ { CP15_PMU_SYS_REG(LO, 0, 9, 12, 6), .access = access_pmceid },
+ { CP15_PMU_SYS_REG(LO, 0, 9, 12, 7), .access = access_pmceid },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 13, 0), .access = access_pmu_evcntr },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 13, 1), .access = access_pmu_evtyper },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 13, 2), .access = access_pmu_evcntr },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 0), .access = access_pmuserenr },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 1), .access = access_pminten },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 2), .access = access_pminten },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 3), .access = access_pmovs },
+ { CP15_PMU_SYS_REG(HI, 0, 9, 14, 4), .access = access_pmceid },
+ { CP15_PMU_SYS_REG(HI, 0, 9, 14, 5), .access = access_pmceid },
/* PMMIR */
- { Op1( 0), CRn( 9), CRm(14), Op2( 6), trap_raz_wi },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 9, 14, 6), .access = trap_raz_wi },
/* PRRR/MAIR0 */
{ AA32(LO), Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, MAIR_EL1 },
@@ -2176,7 +2180,7 @@ static const struct sys_reg_desc cp15_regs[] = {
PMU_PMEVTYPER(29),
PMU_PMEVTYPER(30),
/* PMCCFILTR */
- { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 14, 15, 7), .access = access_pmu_evtyper },
{ Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr },
{ Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr },
@@ -2185,7 +2189,7 @@ static const struct sys_reg_desc cp15_regs[] = {
static const struct sys_reg_desc cp15_64_regs[] = {
{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 },
- { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
+ { CP15_PMU_SYS_REG(DIRECT, 0, 0, 9, 0), .access = access_pmu_evcntr },
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 },
{ Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
@@ -2193,25 +2197,24 @@ static const struct sys_reg_desc cp15_64_regs[] = {
{ SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer },
};
-static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
- bool is_32)
+static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
+ bool is_32)
{
unsigned int i;
for (i = 0; i < n; i++) {
if (!is_32 && table[i].reg && !table[i].reset) {
- kvm_err("sys_reg table %p entry %d has lacks reset\n",
- table, i);
- return 1;
+ kvm_err("sys_reg table %pS entry %d lacks reset\n", &table[i], i);
+ return false;
}
if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
- kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
- return 1;
+ kvm_err("sys_reg table %pS entry %d out of order\n", &table[i - 1], i - 1);
+ return false;
}
}
- return 0;
+ return true;
}
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu)
@@ -2252,27 +2255,27 @@ static void perform_access(struct kvm_vcpu *vcpu,
* @table: array of trap descriptors
* @num: size of the trap descriptor array
*
- * Return 0 if the access has been handled, and -1 if not.
+ * Return true if the access has been handled, false if not.
*/
-static int emulate_cp(struct kvm_vcpu *vcpu,
- struct sys_reg_params *params,
- const struct sys_reg_desc *table,
- size_t num)
+static bool emulate_cp(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *table,
+ size_t num)
{
const struct sys_reg_desc *r;
if (!table)
- return -1; /* Not handled */
+ return false; /* Not handled */
r = find_reg(params, table, num);
if (r) {
perform_access(vcpu, params, r);
- return 0;
+ return true;
}
/* Not handled */
- return -1;
+ return false;
}
static void unhandled_cp_access(struct kvm_vcpu *vcpu,
@@ -2336,7 +2339,7 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
* potential register operation in the case of a read and return
* with success.
*/
- if (!emulate_cp(vcpu, &params, global, nr_global)) {
+ if (emulate_cp(vcpu, &params, global, nr_global)) {
/* Split up the value between registers for the read side */
if (!params.is_write) {
vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval));
@@ -2350,34 +2353,144 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
return 1;
}
+static bool emulate_sys_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *params);
+
+/*
+ * The CP10 ID registers are architecturally mapped to AArch64 feature
+ * registers. Abuse that fact so we can rely on the AArch64 handler for accesses
+ * from AArch32.
+ */
+static bool kvm_esr_cp10_id_to_sys64(u64 esr, struct sys_reg_params *params)
+{
+ u8 reg_id = (esr >> 10) & 0xf;
+ bool valid;
+
+ params->is_write = ((esr & 1) == 0);
+ params->Op0 = 3;
+ params->Op1 = 0;
+ params->CRn = 0;
+ params->CRm = 3;
+
+ /* CP10 ID registers are read-only */
+ valid = !params->is_write;
+
+ switch (reg_id) {
+ /* MVFR0 */
+ case 0b0111:
+ params->Op2 = 0;
+ break;
+ /* MVFR1 */
+ case 0b0110:
+ params->Op2 = 1;
+ break;
+ /* MVFR2 */
+ case 0b0101:
+ params->Op2 = 2;
+ break;
+ default:
+ valid = false;
+ }
+
+ if (valid)
+ return true;
+
+ kvm_pr_unimpl("Unhandled cp10 register %s: %u\n",
+ params->is_write ? "write" : "read", reg_id);
+ return false;
+}
+
+/**
+ * kvm_handle_cp10_id() - Handles a VMRS trap on guest access to a 'Media and
+ * VFP Register' from AArch32.
+ * @vcpu: The vCPU pointer
+ *
+ * MVFR{0-2} are architecturally mapped to the AArch64 MVFR{0-2}_EL1 registers.
+ * Work out the correct AArch64 system register encoding and reroute to the
+ * AArch64 system register emulation.
+ */
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu)
+{
+ int Rt = kvm_vcpu_sys_get_rt(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ struct sys_reg_params params;
+
+ /* UNDEF on any unhandled register access */
+ if (!kvm_esr_cp10_id_to_sys64(esr, &params)) {
+ kvm_inject_undefined(vcpu);
+ return 1;
+ }
+
+ if (emulate_sys_reg(vcpu, &params))
+ vcpu_set_reg(vcpu, Rt, params.regval);
+
+ return 1;
+}
+
+/**
+ * kvm_emulate_cp15_id_reg() - Handles an MRC trap on a guest CP15 access where
+ * CRn=0, which corresponds to the AArch32 feature
+ * registers.
+ * @vcpu: the vCPU pointer
+ * @params: the system register access parameters.
+ *
+ * Our cp15 system register tables do not enumerate the AArch32 feature
+ * registers. Conveniently, our AArch64 table does, and the AArch32 system
+ * register encoding can be trivially remapped into the AArch64 for the feature
+ * registers: Append op0=3, leaving op1, CRn, CRm, and op2 the same.
+ *
+ * According to DDI0487G.b G7.3.1, paragraph "Behavior of VMSAv8-32 32-bit
+ * System registers with (coproc=0b1111, CRn==c0)", read accesses from this
+ * range are either UNKNOWN or RES0. Rerouting remains architectural as we
+ * treat undefined registers in this range as RAZ.
+ */
+static int kvm_emulate_cp15_id_reg(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params)
+{
+ int Rt = kvm_vcpu_sys_get_rt(vcpu);
+
+ /* Treat impossible writes to RO registers as UNDEFINED */
+ if (params->is_write) {
+ unhandled_cp_access(vcpu, params);
+ return 1;
+ }
+
+ params->Op0 = 3;
+
+ /*
+ * All registers where CRm > 3 are known to be UNKNOWN/RAZ from AArch32.
+ * Avoid conflicting with future expansion of AArch64 feature registers
+ * and simply treat them as RAZ here.
+ */
+ if (params->CRm > 3)
+ params->regval = 0;
+ else if (!emulate_sys_reg(vcpu, params))
+ return 1;
+
+ vcpu_set_reg(vcpu, Rt, params->regval);
+ return 1;
+}
+
/**
* kvm_handle_cp_32 -- handles a mrc/mcr trap on a guest CP14/CP15 access
* @vcpu: The VCPU pointer
* @run: The kvm_run struct
*/
static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
const struct sys_reg_desc *global,
size_t nr_global)
{
- struct sys_reg_params params;
- u64 esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
- params.CRm = (esr >> 1) & 0xf;
- params.regval = vcpu_get_reg(vcpu, Rt);
- params.is_write = ((esr & 1) == 0);
- params.CRn = (esr >> 10) & 0xf;
- params.Op0 = 0;
- params.Op1 = (esr >> 14) & 0x7;
- params.Op2 = (esr >> 17) & 0x7;
+ params->regval = vcpu_get_reg(vcpu, Rt);
- if (!emulate_cp(vcpu, &params, global, nr_global)) {
- if (!params.is_write)
- vcpu_set_reg(vcpu, Rt, params.regval);
+ if (emulate_cp(vcpu, params, global, nr_global)) {
+ if (!params->is_write)
+ vcpu_set_reg(vcpu, Rt, params->regval);
return 1;
}
- unhandled_cp_access(vcpu, &params);
+ unhandled_cp_access(vcpu, params);
return 1;
}
@@ -2388,7 +2501,20 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu)
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu)
{
- return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
+ struct sys_reg_params params;
+
+ params = esr_cp1x_32_to_params(kvm_vcpu_get_esr(vcpu));
+
+ /*
+ * Certain AArch32 ID registers are handled by rerouting to the AArch64
+ * system register table. Registers in the ID range where CRm=0 are
+ * excluded from this scheme as they do not trivially map into AArch64
+ * system register encodings.
+ */
+ if (params.Op1 == 0 && params.CRn == 0 && params.CRm)
+ return kvm_emulate_cp15_id_reg(vcpu, &params);
+
+ return kvm_handle_cp_32(vcpu, &params, cp15_regs, ARRAY_SIZE(cp15_regs));
}
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
@@ -2398,7 +2524,11 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu)
{
- return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs));
+ struct sys_reg_params params;
+
+ params = esr_cp1x_32_to_params(kvm_vcpu_get_esr(vcpu));
+
+ return kvm_handle_cp_32(vcpu, &params, cp14_regs, ARRAY_SIZE(cp14_regs));
}
static bool is_imp_def_sys_reg(struct sys_reg_params *params)
@@ -2407,7 +2537,14 @@ static bool is_imp_def_sys_reg(struct sys_reg_params *params)
return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011;
}
-static int emulate_sys_reg(struct kvm_vcpu *vcpu,
+/**
+ * emulate_sys_reg - Emulate a guest access to an AArch64 system register
+ * @vcpu: The VCPU pointer
+ * @params: Decoded system register parameters
+ *
+ * Return: true if the system register access was successful, false otherwise.
+ */
+static bool emulate_sys_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *params)
{
const struct sys_reg_desc *r;
@@ -2416,7 +2553,10 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
if (likely(r)) {
perform_access(vcpu, params, r);
- } else if (is_imp_def_sys_reg(params)) {
+ return true;
+ }
+
+ if (is_imp_def_sys_reg(params)) {
kvm_inject_undefined(vcpu);
} else {
print_sys_reg_msg(params,
@@ -2424,7 +2564,7 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
*vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
kvm_inject_undefined(vcpu);
}
- return 1;
+ return false;
}
/**
@@ -2452,18 +2592,18 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
struct sys_reg_params params;
unsigned long esr = kvm_vcpu_get_esr(vcpu);
int Rt = kvm_vcpu_sys_get_rt(vcpu);
- int ret;
trace_kvm_handle_sys_reg(esr);
params = esr_sys64_to_params(esr);
params.regval = vcpu_get_reg(vcpu, Rt);
- ret = emulate_sys_reg(vcpu, &params);
+ if (!emulate_sys_reg(vcpu, &params))
+ return 1;
if (!params.is_write)
vcpu_set_reg(vcpu, Rt, params.regval);
- return ret;
+ return 1;
}
/******************************************************************************
@@ -2866,18 +3006,22 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
return write_demux_regids(uindices);
}
-void kvm_sys_reg_table_init(void)
+int kvm_sys_reg_table_init(void)
{
+ bool valid = true;
unsigned int i;
struct sys_reg_desc clidr;
/* Make sure tables are unique and in order. */
- BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false));
- BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true));
- BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true));
- BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true));
- BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true));
- BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false));
+ valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false);
+ valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true);
+ valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true);
+ valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true);
+ valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true);
+ valid &= check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false);
+
+ if (!valid)
+ return -EINVAL;
/* We abuse the reset function to overwrite the table itself. */
for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
@@ -2900,4 +3044,6 @@ void kvm_sys_reg_table_init(void)
break;
/* Clear all higher bits. */
cache_levels &= (1 << (i*3))-1;
+
+ return 0;
}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index cc0cc95a0280..aee8ea054f0d 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -35,12 +35,19 @@ struct sys_reg_params {
.Op2 = ((esr) >> 17) & 0x7, \
.is_write = !((esr) & 1) })
+#define esr_cp1x_32_to_params(esr) \
+ ((struct sys_reg_params){ .Op1 = ((esr) >> 14) & 0x7, \
+ .CRn = ((esr) >> 10) & 0xf, \
+ .CRm = ((esr) >> 1) & 0xf, \
+ .Op2 = ((esr) >> 17) & 0x7, \
+ .is_write = !((esr) & 1) })
+
struct sys_reg_desc {
/* Sysreg string for debug */
const char *name;
enum {
- AA32_ZEROHIGH,
+ AA32_DIRECT,
AA32_LO,
AA32_HI,
} aarch32_map;
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index fc00304fe7d8..f6d4f4052555 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -98,11 +98,11 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
ret = 0;
if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
- kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS;
+ kvm->max_vcpus = VGIC_V2_MAX_CPUS;
else
- kvm->arch.max_vcpus = VGIC_V3_MAX_CPUS;
+ kvm->max_vcpus = VGIC_V3_MAX_CPUS;
- if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) {
+ if (atomic_read(&kvm->online_vcpus) > kvm->max_vcpus) {
ret = -E2BIG;
goto out_unlock;
}
@@ -319,7 +319,12 @@ int vgic_init(struct kvm *kvm)
vgic_debug_init(kvm);
- dist->implementation_rev = 2;
+ /*
+ * If userspace didn't set the GIC implementation revision,
+ * default to the latest and greatest. You know want it.
+ */
+ if (!dist->implementation_rev)
+ dist->implementation_rev = KVM_VGIC_IMP_REV_LATEST;
dist->initialized = true;
out:
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 2e13402be3bd..9d3299a70242 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -683,7 +683,7 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
if (!vcpu)
return E_ITS_INT_UNMAPPED_INTERRUPT;
- if (!vcpu->arch.vgic_cpu.lpis_enabled)
+ if (!vgic_lpis_enabled(vcpu))
return -EBUSY;
vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq);
@@ -894,6 +894,18 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
return update_affinity(ite->irq, vcpu);
}
+static bool __is_visible_gfn_locked(struct vgic_its *its, gpa_t gpa)
+{
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ int idx;
+ bool ret;
+
+ idx = srcu_read_lock(&its->dev->kvm->srcu);
+ ret = kvm_is_visible_gfn(its->dev->kvm, gfn);
+ srcu_read_unlock(&its->dev->kvm->srcu, idx);
+ return ret;
+}
+
/*
* Check whether an ID can be stored into the corresponding guest table.
* For a direct table this is pretty easy, but gets a bit nasty for
@@ -908,9 +920,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser);
int esz = GITS_BASER_ENTRY_SIZE(baser);
- int index, idx;
- gfn_t gfn;
- bool ret;
+ int index;
switch (type) {
case GITS_BASER_TYPE_DEVICE:
@@ -933,12 +943,11 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
return false;
addr = base + id * esz;
- gfn = addr >> PAGE_SHIFT;
if (eaddr)
*eaddr = addr;
- goto out;
+ return __is_visible_gfn_locked(its, addr);
}
/* calculate and check the index into the 1st level */
@@ -964,27 +973,42 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
/* Find the address of the actual entry */
index = id % (SZ_64K / esz);
indirect_ptr += index * esz;
- gfn = indirect_ptr >> PAGE_SHIFT;
if (eaddr)
*eaddr = indirect_ptr;
-out:
- idx = srcu_read_lock(&its->dev->kvm->srcu);
- ret = kvm_is_visible_gfn(its->dev->kvm, gfn);
- srcu_read_unlock(&its->dev->kvm->srcu, idx);
- return ret;
+ return __is_visible_gfn_locked(its, indirect_ptr);
}
+/*
+ * Check whether an event ID can be stored in the corresponding Interrupt
+ * Translation Table, which starts at device->itt_addr.
+ */
+static bool vgic_its_check_event_id(struct vgic_its *its, struct its_device *device,
+ u32 event_id)
+{
+ const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+ int ite_esz = abi->ite_esz;
+ gpa_t gpa;
+
+ /* max table size is: BIT_ULL(device->num_eventid_bits) * ite_esz */
+ if (event_id >= BIT_ULL(device->num_eventid_bits))
+ return false;
+
+ gpa = device->itt_addr + event_id * ite_esz;
+ return __is_visible_gfn_locked(its, gpa);
+}
+
+/*
+ * Add a new collection into the ITS collection table.
+ * Returns 0 on success, and a negative error value for generic errors.
+ */
static int vgic_its_alloc_collection(struct vgic_its *its,
struct its_collection **colp,
u32 coll_id)
{
struct its_collection *collection;
- if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
- return E_ITS_MAPC_COLLECTION_OOR;
-
collection = kzalloc(sizeof(*collection), GFP_KERNEL_ACCOUNT);
if (!collection)
return -ENOMEM;
@@ -1061,7 +1085,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
if (!device)
return E_ITS_MAPTI_UNMAPPED_DEVICE;
- if (event_id >= BIT_ULL(device->num_eventid_bits))
+ if (!vgic_its_check_event_id(its, device, event_id))
return E_ITS_MAPTI_ID_OOR;
if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI)
@@ -1078,7 +1102,12 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
collection = find_collection(its, coll_id);
if (!collection) {
- int ret = vgic_its_alloc_collection(its, &collection, coll_id);
+ int ret;
+
+ if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
+ return E_ITS_MAPC_COLLECTION_OOR;
+
+ ret = vgic_its_alloc_collection(its, &collection, coll_id);
if (ret)
return ret;
new_coll = collection;
@@ -1233,6 +1262,10 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
if (!collection) {
int ret;
+ if (!vgic_its_check_id(its, its->baser_coll_table,
+ coll_id, NULL))
+ return E_ITS_MAPC_COLLECTION_OOR;
+
ret = vgic_its_alloc_collection(its, &collection,
coll_id);
if (ret)
@@ -1272,6 +1305,11 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
return 0;
}
+int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq)
+{
+ return update_lpi_config(kvm, irq, NULL, true);
+}
+
/*
* The INV command syncs the configuration bits from the memory table.
* Must be called with the its_lock mutex held.
@@ -1288,7 +1326,41 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
if (!ite)
return E_ITS_INV_UNMAPPED_INTERRUPT;
- return update_lpi_config(kvm, ite->irq, NULL, true);
+ return vgic_its_inv_lpi(kvm, ite->irq);
+}
+
+/**
+ * vgic_its_invall - invalidate all LPIs targetting a given vcpu
+ * @vcpu: the vcpu for which the RD is targetted by an invalidation
+ *
+ * Contrary to the INVALL command, this targets a RD instead of a
+ * collection, and we don't need to hold the its_lock, since no ITS is
+ * involved here.
+ */
+int vgic_its_invall(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ int irq_count, i = 0;
+ u32 *intids;
+
+ irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
+ if (irq_count < 0)
+ return irq_count;
+
+ for (i = 0; i < irq_count; i++) {
+ struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intids[i]);
+ if (!irq)
+ continue;
+ update_lpi_config(kvm, irq, vcpu, false);
+ vgic_put_irq(kvm, irq);
+ }
+
+ kfree(intids);
+
+ if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
+ its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
+
+ return 0;
}
/*
@@ -1305,32 +1377,13 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
u32 coll_id = its_cmd_get_collection(its_cmd);
struct its_collection *collection;
struct kvm_vcpu *vcpu;
- struct vgic_irq *irq;
- u32 *intids;
- int irq_count, i;
collection = find_collection(its, coll_id);
if (!its_is_collection_mapped(collection))
return E_ITS_INVALL_UNMAPPED_COLLECTION;
vcpu = kvm_get_vcpu(kvm, collection->target_addr);
-
- irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
- if (irq_count < 0)
- return irq_count;
-
- for (i = 0; i < irq_count; i++) {
- irq = vgic_get_irq(kvm, NULL, intids[i]);
- if (!irq)
- continue;
- update_lpi_config(kvm, irq, vcpu, false);
- vgic_put_irq(kvm, irq);
- }
-
- kfree(intids);
-
- if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
- its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
+ vgic_its_invall(vcpu);
return 0;
}
@@ -2175,6 +2228,9 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
if (!collection)
return -EINVAL;
+ if (!vgic_its_check_event_id(its, dev, event_id))
+ return -EINVAL;
+
ite = vgic_its_alloc_ite(dev, collection, event_id);
if (IS_ERR(ite))
return PTR_ERR(ite);
@@ -2183,8 +2239,10 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
vcpu = kvm_get_vcpu(kvm, collection->target_addr);
irq = vgic_add_lpi(kvm, lpi_id, vcpu);
- if (IS_ERR(irq))
+ if (IS_ERR(irq)) {
+ its_free_ite(kvm, ite);
return PTR_ERR(irq);
+ }
ite->irq = irq;
return offset;
@@ -2296,6 +2354,7 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
void *ptr, void *opaque)
{
struct its_device *dev;
+ u64 baser = its->baser_device_table;
gpa_t itt_addr;
u8 num_eventid_bits;
u64 entry = *(u64 *)ptr;
@@ -2316,6 +2375,9 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
/* dte entry is valid */
offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT;
+ if (!vgic_its_check_id(its, baser, id, NULL))
+ return -EINVAL;
+
dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits);
if (IS_ERR(dev))
return PTR_ERR(dev);
@@ -2445,6 +2507,9 @@ static int vgic_its_restore_device_tables(struct vgic_its *its)
if (ret > 0)
ret = 0;
+ if (ret < 0)
+ vgic_its_free_device_list(its->dev->kvm, its);
+
return ret;
}
@@ -2461,6 +2526,11 @@ static int vgic_its_save_cte(struct vgic_its *its,
return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz);
}
+/*
+ * Restore a collection entry into the ITS collection table.
+ * Return +1 on success, 0 if the entry was invalid (which should be
+ * interpreted as end-of-table), and a negative error value for generic errors.
+ */
static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
{
struct its_collection *collection;
@@ -2487,6 +2557,10 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
collection = find_collection(its, coll_id);
if (collection)
return -EEXIST;
+
+ if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
+ return -EINVAL;
+
ret = vgic_its_alloc_collection(its, &collection, coll_id);
if (ret)
return ret;
@@ -2566,6 +2640,9 @@ static int vgic_its_restore_collection_table(struct vgic_its *its)
if (ret > 0)
return 0;
+ if (ret < 0)
+ vgic_its_free_collection_list(its->dev->kvm, its);
+
return ret;
}
@@ -2597,7 +2674,10 @@ static int vgic_its_restore_tables_v0(struct vgic_its *its)
if (ret)
return ret;
- return vgic_its_restore_device_tables(its);
+ ret = vgic_its_restore_device_tables(its);
+ if (ret)
+ vgic_its_free_collection_list(its->dev->kvm, its);
+ return ret;
}
static int vgic_its_commit_v0(struct vgic_its *its)
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
index 12e4c223e6b8..77a67e9d3d14 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c
@@ -73,9 +73,13 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ u32 reg;
+
switch (addr & 0x0c) {
case GIC_DIST_IIDR:
- if (val != vgic_mmio_read_v2_misc(vcpu, addr, len))
+ reg = vgic_mmio_read_v2_misc(vcpu, addr, len);
+ if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK)
return -EINVAL;
/*
@@ -87,8 +91,16 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu,
* migration from old kernels to new kernels with legacy
* userspace.
*/
- vcpu->kvm->arch.vgic.v2_groups_user_writable = true;
- return 0;
+ reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg);
+ switch (reg) {
+ case KVM_VGIC_IMP_REV_2:
+ case KVM_VGIC_IMP_REV_3:
+ vcpu->kvm->arch.vgic.v2_groups_user_writable = true;
+ dist->implementation_rev = reg;
+ return 0;
+ default:
+ return -EINVAL;
+ }
}
vgic_mmio_write_v2_misc(vcpu, addr, len, val);
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 58e40b4874f8..f7aa7bcd6fb8 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -155,13 +155,27 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
unsigned long val)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ u32 reg;
switch (addr & 0x0c) {
case GICD_TYPER2:
- case GICD_IIDR:
if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
return -EINVAL;
return 0;
+ case GICD_IIDR:
+ reg = vgic_mmio_read_v3_misc(vcpu, addr, len);
+ if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK)
+ return -EINVAL;
+
+ reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg);
+ switch (reg) {
+ case KVM_VGIC_IMP_REV_2:
+ case KVM_VGIC_IMP_REV_3:
+ dist->implementation_rev = reg;
+ return 0;
+ default:
+ return -EINVAL;
+ }
case GICD_CTLR:
/* Not a GICv4.1? No HW SGIs */
if (!kvm_vgic_global_state.has_gicv4_1)
@@ -221,34 +235,58 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
vgic_put_irq(vcpu->kvm, irq);
}
+bool vgic_lpis_enabled(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+ return atomic_read(&vgic_cpu->ctlr) == GICR_CTLR_ENABLE_LPIS;
+}
+
static unsigned long vgic_mmio_read_v3r_ctlr(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ unsigned long val;
- return vgic_cpu->lpis_enabled ? GICR_CTLR_ENABLE_LPIS : 0;
-}
+ val = atomic_read(&vgic_cpu->ctlr);
+ if (vgic_get_implementation_rev(vcpu) >= KVM_VGIC_IMP_REV_3)
+ val |= GICR_CTLR_IR | GICR_CTLR_CES;
+ return val;
+}
static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- bool was_enabled = vgic_cpu->lpis_enabled;
+ u32 ctlr;
if (!vgic_has_its(vcpu->kvm))
return;
- vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS;
+ if (!(val & GICR_CTLR_ENABLE_LPIS)) {
+ /*
+ * Don't disable if RWP is set, as there already an
+ * ongoing disable. Funky guest...
+ */
+ ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr,
+ GICR_CTLR_ENABLE_LPIS,
+ GICR_CTLR_RWP);
+ if (ctlr != GICR_CTLR_ENABLE_LPIS)
+ return;
- if (was_enabled && !vgic_cpu->lpis_enabled) {
vgic_flush_pending_lpis(vcpu);
vgic_its_invalidate_cache(vcpu->kvm);
- }
+ atomic_set_release(&vgic_cpu->ctlr, 0);
+ } else {
+ ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr, 0,
+ GICR_CTLR_ENABLE_LPIS);
+ if (ctlr != 0)
+ return;
- if (!was_enabled && vgic_cpu->lpis_enabled)
vgic_enable_lpis(vcpu);
+ }
}
static bool vgic_mmio_vcpu_rdist_is_last(struct kvm_vcpu *vcpu)
@@ -478,11 +516,10 @@ static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu,
unsigned long val)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
u64 old_propbaser, propbaser;
/* Storing a value with LPIs already enabled is undefined */
- if (vgic_cpu->lpis_enabled)
+ if (vgic_lpis_enabled(vcpu))
return;
do {
@@ -513,7 +550,7 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
u64 old_pendbaser, pendbaser;
/* Storing a value with LPIs already enabled is undefined */
- if (vgic_cpu->lpis_enabled)
+ if (vgic_lpis_enabled(vcpu))
return;
do {
@@ -525,6 +562,63 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
pendbaser) != old_pendbaser);
}
+static unsigned long vgic_mmio_read_sync(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ return !!atomic_read(&vcpu->arch.vgic_cpu.syncr_busy);
+}
+
+static void vgic_set_rdist_busy(struct kvm_vcpu *vcpu, bool busy)
+{
+ if (busy) {
+ atomic_inc(&vcpu->arch.vgic_cpu.syncr_busy);
+ smp_mb__after_atomic();
+ } else {
+ smp_mb__before_atomic();
+ atomic_dec(&vcpu->arch.vgic_cpu.syncr_busy);
+ }
+}
+
+static void vgic_mmio_write_invlpi(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ struct vgic_irq *irq;
+
+ /*
+ * If the guest wrote only to the upper 32bit part of the
+ * register, drop the write on the floor, as it is only for
+ * vPEs (which we don't support for obvious reasons).
+ *
+ * Also discard the access if LPIs are not enabled.
+ */
+ if ((addr & 4) || !vgic_lpis_enabled(vcpu))
+ return;
+
+ vgic_set_rdist_busy(vcpu, true);
+
+ irq = vgic_get_irq(vcpu->kvm, NULL, lower_32_bits(val));
+ if (irq) {
+ vgic_its_inv_lpi(vcpu->kvm, irq);
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ vgic_set_rdist_busy(vcpu, false);
+}
+
+static void vgic_mmio_write_invall(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ /* See vgic_mmio_write_invlpi() for the early return rationale */
+ if ((addr & 4) || !vgic_lpis_enabled(vcpu))
+ return;
+
+ vgic_set_rdist_busy(vcpu, true);
+ vgic_its_invall(vcpu);
+ vgic_set_rdist_busy(vcpu, false);
+}
+
/*
* The GICv3 per-IRQ registers are split to control PPIs and SGIs in the
* redistributors, while SPIs are covered by registers in the distributor
@@ -630,6 +724,15 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER,
vgic_mmio_read_pendbase, vgic_mmio_write_pendbase, 8,
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_INVLPIR,
+ vgic_mmio_read_raz, vgic_mmio_write_invlpi, 8,
+ VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_INVALLR,
+ vgic_mmio_read_raz, vgic_mmio_write_invall, 8,
+ VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_SYNCR,
+ vgic_mmio_read_sync, vgic_mmio_write_wi, 4,
+ VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
VGIC_ACCESS_32bit),
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index b549af8b1dc2..826ff6f2a4e7 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -612,6 +612,10 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
static const struct midr_range broken_seis[] = {
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
{},
};
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 3fd6c86a7ef3..4c6bdd321faa 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -98,6 +98,11 @@
#define DEBUG_SPINLOCK_BUG_ON(p)
#endif
+static inline u32 vgic_get_implementation_rev(struct kvm_vcpu *vcpu)
+{
+ return vcpu->kvm->arch.vgic.implementation_rev;
+}
+
/* Requires the irq_lock to be held by the caller. */
static inline bool irq_is_pending(struct vgic_irq *irq)
{
@@ -308,6 +313,7 @@ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size)
(base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE);
}
+bool vgic_lpis_enabled(struct kvm_vcpu *vcpu);
int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid, struct vgic_irq **irq);
@@ -317,6 +323,10 @@ void vgic_lpi_translation_cache_init(struct kvm *kvm);
void vgic_lpi_translation_cache_destroy(struct kvm *kvm);
void vgic_its_invalidate_cache(struct kvm *kvm);
+/* GICv4.1 MMIO interface */
+int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq);
+int vgic_its_invall(struct kvm_vcpu *vcpu);
+
bool vgic_supports_direct_msis(struct kvm *kvm);
int vgic_v4_init(struct kvm *kvm);
void vgic_v4_teardown(struct kvm *kvm);