From d42e26716d038d9689a23c193b934cdf0e2a2117 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 3 Feb 2022 17:41:55 +0000 Subject: KVM: arm64: Stash OSLSR_EL1 in the cpu context An upcoming change to KVM will emulate the OS Lock from the PoV of the guest. Add OSLSR_EL1 to the cpu context and handle reads using the stored value. Define some mnemonics for for handling the OSLM field and use them to make the reset value of OSLSR_EL1 more readable. Wire up a custom handler for writes from userspace and prevent any of the invariant bits from changing. Note that the OSLK bit is not invariant and will be made writable by the aforementioned change. Reviewed-by: Reiji Watanabe Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220203174159.2887882-3-oupton@google.com --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/include/asm/sysreg.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5bc01e62c08a..cc1cc40d89f0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -171,6 +171,7 @@ enum vcpu_sysreg { PAR_EL1, /* Physical Address Register */ MDSCR_EL1, /* Monitor Debug System Control Register */ MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ + OSLSR_EL1, /* OS Lock Status Register */ DISR_EL1, /* Deferred Interrupt Status Register */ /* Performance Monitors Registers */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 898bee0004ae..abc85eaa453d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -129,7 +129,12 @@ #define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7) #define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0) #define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4) + #define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4) +#define SYS_OSLSR_OSLM_MASK (BIT(3) | BIT(0)) +#define SYS_OSLSR_OSLM_NI 0 +#define SYS_OSLSR_OSLM_IMPLEMENTED BIT(3) + #define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4) #define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4) #define SYS_DBGCLAIMSET_EL1 sys_reg(2, 0, 7, 8, 6) -- cgit v1.2.3 From f24adc65c5568a8d94e2693f5441de80f1ffe0d3 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 3 Feb 2022 17:41:56 +0000 Subject: KVM: arm64: Allow guest to set the OSLK bit Allow writes to OSLAR and forward the OSLK bit to OSLSR. Do nothing with the value for now. Reviewed-by: Reiji Watanabe Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220203174159.2887882-4-oupton@google.com --- arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/kvm/sys_regs.c | 37 ++++++++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 7 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index abc85eaa453d..906a3550fc50 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -128,12 +128,15 @@ #define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6) #define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7) #define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0) + #define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4) +#define SYS_OSLAR_OSLK BIT(0) #define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4) #define SYS_OSLSR_OSLM_MASK (BIT(3) | BIT(0)) #define SYS_OSLSR_OSLM_NI 0 #define SYS_OSLSR_OSLM_IMPLEMENTED BIT(3) +#define SYS_OSLSR_OSLK BIT(1) #define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4) #define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b8286c31e01c..b0d7240ef49f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -44,6 +44,10 @@ * 64bit interface. */ +static int reg_from_user(u64 *val, const void __user *uaddr, u64 id); +static int reg_to_user(void __user *uaddr, const u64 *val, u64 id); +static u64 sys_reg_to_index(const struct sys_reg_desc *reg); + static bool read_from_write_only(struct kvm_vcpu *vcpu, struct sys_reg_params *params, const struct sys_reg_desc *r) @@ -287,6 +291,24 @@ static bool trap_loregion(struct kvm_vcpu *vcpu, return trap_raz_wi(vcpu, p, r); } +static bool trap_oslar_el1(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 oslsr; + + if (!p->is_write) + return read_from_write_only(vcpu, p, r); + + /* Forward the OSLK bit to OSLSR */ + oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~SYS_OSLSR_OSLK; + if (p->regval & SYS_OSLAR_OSLK) + oslsr |= SYS_OSLSR_OSLK; + + __vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr; + return true; +} + static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -309,9 +331,14 @@ static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, if (err) return err; - if (val != rd->val) + /* + * The only modifiable bit is the OSLK bit. Refuse the write if + * userspace attempts to change any other bit in the register. + */ + if ((val ^ rd->val) & ~SYS_OSLSR_OSLK) return -EINVAL; + __vcpu_sys_reg(vcpu, rd->reg) = val; return 0; } @@ -1180,10 +1207,6 @@ static bool access_raz_id_reg(struct kvm_vcpu *vcpu, return __access_id_reg(vcpu, p, r, true); } -static int reg_from_user(u64 *val, const void __user *uaddr, u64 id); -static int reg_to_user(void __user *uaddr, const u64 *val, u64 id); -static u64 sys_reg_to_index(const struct sys_reg_desc *reg); - /* Visibility overrides for SVE-specific control registers */ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) @@ -1463,7 +1486,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { DBG_BCR_BVR_WCR_WVR_EL1(15), { SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_OSLAR_EL1), trap_raz_wi }, + { SYS_DESC(SYS_OSLAR_EL1), trap_oslar_el1 }, { SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1, reset_val, OSLSR_EL1, SYS_OSLSR_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, }, { SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi }, @@ -1937,7 +1960,7 @@ static const struct sys_reg_desc cp14_regs[] = { DBGBXVR(0), /* DBGOSLAR */ - { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_oslar_el1 }, DBGBXVR(1), /* DBGOSLSR */ { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1, NULL, OSLSR_EL1 }, -- cgit v1.2.3 From 7dabf02f43a1670d13282463fc0106f01dfd6f9c Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 3 Feb 2022 17:41:57 +0000 Subject: KVM: arm64: Emulate the OS Lock The OS lock blocks all debug exceptions at every EL. To date, KVM has not implemented the OS lock for its guests, despite the fact that it is mandatory per the architecture. Simple context switching between the guest and host is not appropriate, as its effects are not constrained to the guest context. Emulate the OS Lock by clearing MDE and SS in MDSCR_EL1, thereby blocking all but software breakpoint instructions. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220203174159.2887882-5-oupton@google.com --- arch/arm64/include/asm/kvm_host.h | 4 ++++ arch/arm64/kvm/debug.c | 26 ++++++++++++++++++++++---- arch/arm64/kvm/sys_regs.c | 6 +++--- 3 files changed, 29 insertions(+), 7 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index cc1cc40d89f0..3c73e4de4229 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -726,6 +726,10 @@ void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu); void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); + +#define kvm_vcpu_os_lock_enabled(vcpu) \ + (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & SYS_OSLSR_OSLK)) + int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index db9361338b2a..4fd5c216c4bb 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -105,9 +105,11 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu) * - Userspace is using the hardware to debug the guest * (KVM_GUESTDBG_USE_HW is set). * - The guest is not using debug (KVM_ARM64_DEBUG_DIRTY is clear). + * - The guest has enabled the OS Lock (debug exceptions are blocked). */ if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) || - !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) + !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) || + kvm_vcpu_os_lock_enabled(vcpu)) vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); @@ -160,8 +162,8 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) kvm_arm_setup_mdcr_el2(vcpu); - /* Is Guest debugging in effect? */ - if (vcpu->guest_debug) { + /* Check if we need to use the debug registers. */ + if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) { /* Save guest debug state */ save_guest_debug_regs(vcpu); @@ -223,6 +225,19 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) trace_kvm_arm_set_regset("WAPTS", get_num_wrps(), &vcpu->arch.debug_ptr->dbg_wcr[0], &vcpu->arch.debug_ptr->dbg_wvr[0]); + + /* + * The OS Lock blocks debug exceptions in all ELs when it is + * enabled. If the guest has enabled the OS Lock, constrain its + * effects to the guest. Emulate the behavior by clearing + * MDSCR_EL1.MDE. In so doing, we ensure that host debug + * exceptions are unaffected by guest configuration of the OS + * Lock. + */ + } else if (kvm_vcpu_os_lock_enabled(vcpu)) { + mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); + mdscr &= ~DBG_MDSCR_MDE; + vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); } } @@ -244,7 +259,10 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) { trace_kvm_arm_clear_debug(vcpu->guest_debug); - if (vcpu->guest_debug) { + /* + * Restore the guest's debug registers if we were using them. + */ + if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) { restore_guest_debug_regs(vcpu); /* diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b0d7240ef49f..dd34b5ab51d4 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1457,9 +1457,9 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, * Debug handling: We do trap most, if not all debug related system * registers. The implementation is good enough to ensure that a guest * can use these with minimal performance degradation. The drawback is - * that we don't implement any of the external debug, none of the - * OSlock protocol. This should be revisited if we ever encounter a - * more demanding guest... + * that we don't implement any of the external debug architecture. + * This should be revisited if we ever encounter a more demanding + * guest... */ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DC_ISW), access_dcsw }, -- cgit v1.2.3 From fcc5bf89635a05e627cdd2e9ec52c989c8dfe2ab Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 18 Jan 2022 01:57:01 +0000 Subject: KVM: arm64: Use read/write spin lock for MMU protection Replace MMU spinlock with rwlock and update all instances of the lock being acquired with a write lock acquisition. Future commit will add a fast path for permission relaxation during dirty logging under a read lock. Signed-off-by: Jing Zhang Tested-by: Fuad Tabba Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220118015703.3630552-2-jingzhangos@google.com --- arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/mmu.c | 36 ++++++++++++++++++------------------ 2 files changed, 20 insertions(+), 18 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5bc01e62c08a..4866842c31e1 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -50,6 +50,8 @@ #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) +#define KVM_HAVE_MMU_RWLOCK + /* * Mode of operation configurable with kvm-arm.mode early param. * See Documentation/admin-guide/kernel-parameters.txt for more information. diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index bc2aba953299..cafd5813c949 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -58,7 +58,7 @@ static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr, break; if (resched && next != end) - cond_resched_lock(&kvm->mmu_lock); + cond_resched_rwlock_write(&kvm->mmu_lock); } while (addr = next, addr != end); return ret; @@ -179,7 +179,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); phys_addr_t end = start + size; - assert_spin_locked(&kvm->mmu_lock); + lockdep_assert_held_write(&kvm->mmu_lock); WARN_ON(size & ~PAGE_MASK); WARN_ON(stage2_apply_range(kvm, start, end, kvm_pgtable_stage2_unmap, may_block)); @@ -213,13 +213,13 @@ static void stage2_flush_vm(struct kvm *kvm) int idx, bkt; idx = srcu_read_lock(&kvm->srcu); - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, bkt, slots) stage2_flush_memslot(kvm, memslot); - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); } @@ -720,13 +720,13 @@ void stage2_unmap_vm(struct kvm *kvm) idx = srcu_read_lock(&kvm->srcu); mmap_read_lock(current->mm); - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, bkt, slots) stage2_unmap_memslot(kvm, memslot); - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); mmap_read_unlock(current->mm); srcu_read_unlock(&kvm->srcu, idx); } @@ -736,14 +736,14 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); struct kvm_pgtable *pgt = NULL; - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); pgt = mmu->pgt; if (pgt) { mmu->pgd_phys = 0; mmu->pgt = NULL; free_percpu(mmu->last_vcpu_ran); } - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); if (pgt) { kvm_pgtable_stage2_destroy(pgt); @@ -783,10 +783,10 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (ret) break; - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot, &cache); - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); if (ret) break; @@ -834,9 +834,9 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) start = memslot->base_gfn << PAGE_SHIFT; end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); stage2_wp_range(&kvm->arch.mmu, start, end); - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); } @@ -1212,7 +1212,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault && device) return -ENOEXEC; - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); pgt = vcpu->arch.hw_mmu->pgt; if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; @@ -1271,7 +1271,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } out_unlock: - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); return ret != -EAGAIN ? ret : 0; @@ -1286,10 +1286,10 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) trace_kvm_access_fault(fault_ipa); - spin_lock(&vcpu->kvm->mmu_lock); + write_lock(&vcpu->kvm->mmu_lock); mmu = vcpu->arch.hw_mmu; kpte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa); - spin_unlock(&vcpu->kvm->mmu_lock); + write_unlock(&vcpu->kvm->mmu_lock); pte = __pte(kpte); if (pte_valid(pte)) @@ -1692,9 +1692,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, gpa_t gpa = slot->base_gfn << PAGE_SHIFT; phys_addr_t size = slot->npages << PAGE_SHIFT; - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); unmap_stage2_range(&kvm->arch.mmu, gpa, size); - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); } /* -- cgit v1.2.3 From 417838392f2e657ee25cc30e373ff4c35a0faa90 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Mon, 22 Nov 2021 12:18:41 +0000 Subject: KVM: arm64: Introduce a new VMID allocator for KVM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A new VMID allocator for arm64 KVM use. This is based on arm64 ASID allocator algorithm. One major deviation from the ASID allocator is the way we flush the context. Unlike ASID allocator, we expect less frequent rollover in the case of VMIDs. Hence, instead of marking the CPU as flush_pending and issuing a local context invalidation on the next context switch, we  broadcast TLB flush + I-cache invalidation over the inner shareable domain on rollover. Signed-off-by: Shameer Kolothum Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211122121844.867-2-shameerali.kolothum.thodi@huawei.com --- arch/arm64/include/asm/kvm_host.h | 4 + arch/arm64/kvm/vmid.c | 177 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 arch/arm64/kvm/vmid.c (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5bc01e62c08a..a79b7c1b353f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -692,6 +692,10 @@ int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +int kvm_arm_vmid_alloc_init(void); +void kvm_arm_vmid_alloc_free(void); +void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); + static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) { vcpu_arch->steal.base = GPA_INVALID; diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c new file mode 100644 index 000000000000..aa01c97f7df0 --- /dev/null +++ b/arch/arm64/kvm/vmid.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * VMID allocator. + * + * Based on Arm64 ASID allocator algorithm. + * Please refer arch/arm64/mm/context.c for detailed + * comments on algorithm. + * + * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. + * Copyright (C) 2012 ARM Ltd. + */ + +#include +#include + +#include +#include + +static unsigned int kvm_arm_vmid_bits; +static DEFINE_RAW_SPINLOCK(cpu_vmid_lock); + +static atomic64_t vmid_generation; +static unsigned long *vmid_map; + +static DEFINE_PER_CPU(atomic64_t, active_vmids); +static DEFINE_PER_CPU(u64, reserved_vmids); + +#define VMID_MASK (~GENMASK(kvm_arm_vmid_bits - 1, 0)) +#define VMID_FIRST_VERSION (1UL << kvm_arm_vmid_bits) + +#define NUM_USER_VMIDS VMID_FIRST_VERSION +#define vmid2idx(vmid) ((vmid) & ~VMID_MASK) +#define idx2vmid(idx) vmid2idx(idx) + +#define vmid_gen_match(vmid) \ + (!(((vmid) ^ atomic64_read(&vmid_generation)) >> kvm_arm_vmid_bits)) + +static void flush_context(void) +{ + int cpu; + u64 vmid; + + bitmap_clear(vmid_map, 0, NUM_USER_VMIDS); + + for_each_possible_cpu(cpu) { + vmid = atomic64_xchg_relaxed(&per_cpu(active_vmids, cpu), 0); + + /* Preserve reserved VMID */ + if (vmid == 0) + vmid = per_cpu(reserved_vmids, cpu); + __set_bit(vmid2idx(vmid), vmid_map); + per_cpu(reserved_vmids, cpu) = vmid; + } + + /* + * Unlike ASID allocator, we expect less frequent rollover in + * case of VMIDs. Hence, instead of marking the CPU as + * flush_pending and issuing a local context invalidation on + * the next context-switch, we broadcast TLB flush + I-cache + * invalidation over the inner shareable domain on rollover. + */ + kvm_call_hyp(__kvm_flush_vm_context); +} + +static bool check_update_reserved_vmid(u64 vmid, u64 newvmid) +{ + int cpu; + bool hit = false; + + /* + * Iterate over the set of reserved VMIDs looking for a match + * and update to use newvmid (i.e. the same VMID in the current + * generation). + */ + for_each_possible_cpu(cpu) { + if (per_cpu(reserved_vmids, cpu) == vmid) { + hit = true; + per_cpu(reserved_vmids, cpu) = newvmid; + } + } + + return hit; +} + +static u64 new_vmid(struct kvm_vmid *kvm_vmid) +{ + static u32 cur_idx = 1; + u64 vmid = atomic64_read(&kvm_vmid->id); + u64 generation = atomic64_read(&vmid_generation); + + if (vmid != 0) { + u64 newvmid = generation | (vmid & ~VMID_MASK); + + if (check_update_reserved_vmid(vmid, newvmid)) { + atomic64_set(&kvm_vmid->id, newvmid); + return newvmid; + } + + if (!__test_and_set_bit(vmid2idx(vmid), vmid_map)) { + atomic64_set(&kvm_vmid->id, newvmid); + return newvmid; + } + } + + vmid = find_next_zero_bit(vmid_map, NUM_USER_VMIDS, cur_idx); + if (vmid != NUM_USER_VMIDS) + goto set_vmid; + + /* We're out of VMIDs, so increment the global generation count */ + generation = atomic64_add_return_relaxed(VMID_FIRST_VERSION, + &vmid_generation); + flush_context(); + + /* We have more VMIDs than CPUs, so this will always succeed */ + vmid = find_next_zero_bit(vmid_map, NUM_USER_VMIDS, 1); + +set_vmid: + __set_bit(vmid, vmid_map); + cur_idx = vmid; + vmid = idx2vmid(vmid) | generation; + atomic64_set(&kvm_vmid->id, vmid); + return vmid; +} + +void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) +{ + unsigned long flags; + u64 vmid, old_active_vmid; + + vmid = atomic64_read(&kvm_vmid->id); + + /* + * Please refer comments in check_and_switch_context() in + * arch/arm64/mm/context.c. + */ + old_active_vmid = atomic64_read(this_cpu_ptr(&active_vmids)); + if (old_active_vmid && vmid_gen_match(vmid) && + atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids), + old_active_vmid, vmid)) + return; + + raw_spin_lock_irqsave(&cpu_vmid_lock, flags); + + /* Check that our VMID belongs to the current generation. */ + vmid = atomic64_read(&kvm_vmid->id); + if (!vmid_gen_match(vmid)) + vmid = new_vmid(kvm_vmid); + + atomic64_set(this_cpu_ptr(&active_vmids), vmid); + raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags); +} + +/* + * Initialize the VMID allocator + */ +int kvm_arm_vmid_alloc_init(void) +{ + kvm_arm_vmid_bits = kvm_get_vmid_bits(); + + /* + * Expect allocation after rollover to fail if we don't have + * at least one more VMID than CPUs. VMID #0 is always reserved. + */ + WARN_ON(NUM_USER_VMIDS - 1 <= num_possible_cpus()); + atomic64_set(&vmid_generation, VMID_FIRST_VERSION); + vmid_map = kcalloc(BITS_TO_LONGS(NUM_USER_VMIDS), + sizeof(*vmid_map), GFP_KERNEL); + if (!vmid_map) + return -ENOMEM; + + return 0; +} + +void kvm_arm_vmid_alloc_free(void) +{ + kfree(vmid_map); +} -- cgit v1.2.3 From f8051e960922a9de8e42159103d5d9c697ef17ec Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Mon, 22 Nov 2021 12:18:42 +0000 Subject: KVM: arm64: Make VMID bits accessible outside of allocator Since we already set the kvm_arm_vmid_bits in the VMID allocator init function, make it accessible outside as well so that it can be used in the subsequent patch. Suggested-by: Will Deacon Signed-off-by: Shameer Kolothum Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211122121844.867-3-shameerali.kolothum.thodi@huawei.com --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kernel/image-vars.h | 3 +++ arch/arm64/kvm/vmid.c | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a79b7c1b353f..1fea13a0857e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -692,6 +692,7 @@ int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +extern unsigned int kvm_arm_vmid_bits; int kvm_arm_vmid_alloc_init(void); void kvm_arm_vmid_alloc_free(void); void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 7eaf1f7c4168..884844849c70 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -79,6 +79,9 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors); /* Kernel symbol used by icache_is_vpipt(). */ KVM_NVHE_ALIAS(__icache_flags); +/* VMID bits set by the KVM VMID allocator */ +KVM_NVHE_ALIAS(kvm_arm_vmid_bits); + /* Kernel symbols needed for cpus_have_final/const_caps checks. */ KVM_NVHE_ALIAS(arm64_const_caps_ready); KVM_NVHE_ALIAS(cpu_hwcap_keys); diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c index aa01c97f7df0..9aff692b6b7d 100644 --- a/arch/arm64/kvm/vmid.c +++ b/arch/arm64/kvm/vmid.c @@ -16,7 +16,7 @@ #include #include -static unsigned int kvm_arm_vmid_bits; +unsigned int kvm_arm_vmid_bits; static DEFINE_RAW_SPINLOCK(cpu_vmid_lock); static atomic64_t vmid_generation; -- cgit v1.2.3 From 3248136b3637e1671e4fa46e32e2122f9ec4bc3d Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Mon, 22 Nov 2021 12:18:43 +0000 Subject: KVM: arm64: Align the VMID allocation with the arm64 ASID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the moment, the VMID algorithm will send an SGI to all the CPUs to force an exit and then broadcast a full TLB flush and I-Cache invalidation. This patch uses the new VMID allocator. The benefits are:    - Aligns with arm64 ASID algorithm.    - CPUs are not forced to exit at roll-over. Instead, the VMID will be marked reserved and context invalidation is broadcasted. This will reduce the IPIs traffic.   - More flexible to add support for pinned KVM VMIDs in the future.     With the new algo, the code is now adapted:     - The call to update_vmid() will be done with preemption disabled as the new algo requires to store information per-CPU. Signed-off-by: Julien Grall Signed-off-by: Shameer Kolothum Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211122121844.867-4-shameerali.kolothum.thodi@huawei.com --- arch/arm64/include/asm/kvm_host.h | 4 +- arch/arm64/include/asm/kvm_mmu.h | 4 +- arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/arm.c | 105 ++++++---------------------------- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 3 +- arch/arm64/kvm/mmu.c | 1 - 6 files changed, 22 insertions(+), 97 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 1fea13a0857e..47aeed22e2ad 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -71,9 +71,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); struct kvm_vmid { - /* The VMID generation used for the virt. memory system */ - u64 vmid_gen; - u32 vmid; + atomic64_t id; }; struct kvm_s2_mmu { diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 81839e9a8a24..74735a864eee 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -115,6 +115,7 @@ alternative_cb_end #include #include #include +#include void kvm_update_va_mask(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); @@ -266,7 +267,8 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; baddr = mmu->pgd_phys; - vmid_field = (u64)READ_ONCE(vmid->vmid) << VTTBR_VMID_SHIFT; + vmid_field = atomic64_read(&vmid->id) << VTTBR_VMID_SHIFT; + vmid_field &= VTTBR_VMID_MASK(kvm_arm_vmid_bits); return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; } diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 91861fd8b897..261644b1a6bb 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -14,7 +14,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o pkvm.o \ - arch_timer.o trng.o\ + arch_timer.o trng.o vmid.o \ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ecc5958e27fe..be2fd84d526b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -53,11 +53,6 @@ static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); -/* The VMID used in the VTTBR */ -static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); -static u32 kvm_next_vmid; -static DEFINE_SPINLOCK(kvm_vmid_lock); - static bool vgic_present; static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); @@ -489,87 +484,6 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) } #endif -/* Just ensure a guest exit from a particular CPU */ -static void exit_vm_noop(void *info) -{ -} - -void force_vm_exit(const cpumask_t *mask) -{ - preempt_disable(); - smp_call_function_many(mask, exit_vm_noop, NULL, true); - preempt_enable(); -} - -/** - * need_new_vmid_gen - check that the VMID is still valid - * @vmid: The VMID to check - * - * return true if there is a new generation of VMIDs being used - * - * The hardware supports a limited set of values with the value zero reserved - * for the host, so we check if an assigned value belongs to a previous - * generation, which requires us to assign a new value. If we're the first to - * use a VMID for the new generation, we must flush necessary caches and TLBs - * on all CPUs. - */ -static bool need_new_vmid_gen(struct kvm_vmid *vmid) -{ - u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); - smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ - return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen); -} - -/** - * update_vmid - Update the vmid with a valid VMID for the current generation - * @vmid: The stage-2 VMID information struct - */ -static void update_vmid(struct kvm_vmid *vmid) -{ - if (!need_new_vmid_gen(vmid)) - return; - - spin_lock(&kvm_vmid_lock); - - /* - * We need to re-check the vmid_gen here to ensure that if another vcpu - * already allocated a valid vmid for this vm, then this vcpu should - * use the same vmid. - */ - if (!need_new_vmid_gen(vmid)) { - spin_unlock(&kvm_vmid_lock); - return; - } - - /* First user of a new VMID generation? */ - if (unlikely(kvm_next_vmid == 0)) { - atomic64_inc(&kvm_vmid_gen); - kvm_next_vmid = 1; - - /* - * On SMP we know no other CPUs can use this CPU's or each - * other's VMID after force_vm_exit returns since the - * kvm_vmid_lock blocks them from reentry to the guest. - */ - force_vm_exit(cpu_all_mask); - /* - * Now broadcast TLB + ICACHE invalidation over the inner - * shareable domain to make sure all data structures are - * clean. - */ - kvm_call_hyp(__kvm_flush_vm_context); - } - - WRITE_ONCE(vmid->vmid, kvm_next_vmid); - kvm_next_vmid++; - kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1; - - smp_wmb(); - WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen)); - - spin_unlock(&kvm_vmid_lock); -} - static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) { return vcpu->arch.target >= 0; @@ -793,7 +707,6 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) } return kvm_request_pending(vcpu) || - need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) || xfer_to_guest_mode_work_pending(); } @@ -855,8 +768,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (!ret) ret = 1; - update_vmid(&vcpu->arch.hw_mmu->vmid); - check_vcpu_requests(vcpu); /* @@ -866,6 +777,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ preempt_disable(); + /* + * The VMID allocator only tracks active VMIDs per + * physical CPU, and therefore the VMID allocated may not be + * preserved on VMID roll-over if the task was preempted, + * making a thread's VMID inactive. So we need to call + * kvm_arm_vmid_update() in non-premptible context. + */ + kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid); + kvm_pmu_flush_hwstate(vcpu); local_irq_disable(); @@ -2161,6 +2081,12 @@ int kvm_arch_init(void *opaque) if (err) return err; + err = kvm_arm_vmid_alloc_init(); + if (err) { + kvm_err("Failed to initialize VMID allocator.\n"); + return err; + } + if (!in_hyp_mode) { err = init_hyp_mode(); if (err) @@ -2200,6 +2126,7 @@ out_hyp: if (!in_hyp_mode) teardown_hyp_mode(); out_err: + kvm_arm_vmid_alloc_free(); return err; } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 674f10564373..78edf077fa3b 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -138,8 +138,7 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd); mmu->pgt = &host_kvm.pgt; - WRITE_ONCE(mmu->vmid.vmid_gen, 0); - WRITE_ONCE(mmu->vmid.vmid, 0); + atomic64_set(&mmu->vmid.id, 0); return 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index bc2aba953299..cde9166edbef 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -653,7 +653,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) mmu->pgt = pgt; mmu->pgd_phys = __pa(pgt->pgd); - WRITE_ONCE(mmu->vmid.vmid_gen, 0); return 0; out_destroy_pgtable: -- cgit v1.2.3 From 100b4f092f878dc379f1fcef9ce567c25dee3473 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Mon, 22 Nov 2021 12:18:44 +0000 Subject: KVM: arm64: Make active_vmids invalid on vCPU schedule out MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Like ASID allocator, we copy the active_vmids into the reserved_vmids on a rollover. But it's unlikely that every CPU will have a vCPU as current task and we may end up unnecessarily reserving the VMID space. Hence, set active_vmids to an invalid one when scheduling out a vCPU. Signed-off-by: Shameer Kolothum Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211122121844.867-5-shameerali.kolothum.thodi@huawei.com --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/vmid.c | 25 ++++++++++++++++++++++--- 3 files changed, 24 insertions(+), 3 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 47aeed22e2ad..305ea13c4fd0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -694,6 +694,7 @@ extern unsigned int kvm_arm_vmid_bits; int kvm_arm_vmid_alloc_init(void); void kvm_arm_vmid_alloc_free(void); void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); +void kvm_arm_vmid_clear_active(void); static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index be2fd84d526b..418014998f18 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -417,6 +417,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); kvm_vcpu_pmu_restore_host(vcpu); + kvm_arm_vmid_clear_active(); vcpu->cpu = -1; } diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c index 9aff692b6b7d..8d5f0506fd87 100644 --- a/arch/arm64/kvm/vmid.c +++ b/arch/arm64/kvm/vmid.c @@ -32,6 +32,13 @@ static DEFINE_PER_CPU(u64, reserved_vmids); #define vmid2idx(vmid) ((vmid) & ~VMID_MASK) #define idx2vmid(idx) vmid2idx(idx) +/* + * As vmid #0 is always reserved, we will never allocate one + * as below and can be treated as invalid. This is used to + * set the active_vmids on vCPU schedule out. + */ +#define VMID_ACTIVE_INVALID VMID_FIRST_VERSION + #define vmid_gen_match(vmid) \ (!(((vmid) ^ atomic64_read(&vmid_generation)) >> kvm_arm_vmid_bits)) @@ -122,6 +129,12 @@ set_vmid: return vmid; } +/* Called from vCPU sched out with preemption disabled */ +void kvm_arm_vmid_clear_active(void) +{ + atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID); +} + void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) { unsigned long flags; @@ -132,11 +145,17 @@ void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) /* * Please refer comments in check_and_switch_context() in * arch/arm64/mm/context.c. + * + * Unlike ASID allocator, we set the active_vmids to + * VMID_ACTIVE_INVALID on vCPU schedule out to avoid + * reserving the VMID space needlessly on rollover. + * Hence explicitly check here for a "!= 0" to + * handle the sync with a concurrent rollover. */ old_active_vmid = atomic64_read(this_cpu_ptr(&active_vmids)); - if (old_active_vmid && vmid_gen_match(vmid) && - atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids), - old_active_vmid, vmid)) + if (old_active_vmid != 0 && vmid_gen_match(vmid) && + 0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids), + old_active_vmid, vmid)) return; raw_spin_lock_irqsave(&cpu_vmid_lock, flags); -- cgit v1.2.3 From 5177fe91e4cf78a659aada2c9cf712db4d788481 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Jan 2022 16:17:54 +0000 Subject: KVM: arm64: Do not change the PMU event filter after a VCPU has run Userspace can specify which events a guest is allowed to use with the KVM_ARM_VCPU_PMU_V3_FILTER attribute. The list of allowed events can be identified by a guest from reading the PMCEID{0,1}_EL0 registers. Changing the PMU event filter after a VCPU has run can cause reads of the registers performed before the filter is changed to return different values than reads performed with the new event filter in place. The architecture defines the two registers as read-only, and this behaviour contradicts that. Keep track when the first VCPU has run and deny changes to the PMU event filter to prevent this from happening. Signed-off-by: Marc Zyngier [ Alexandru E: Added commit message, updated ioctl documentation ] Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127161759.53553-2-alexandru.elisei@arm.com --- Documentation/virt/kvm/devices/vcpu.rst | 2 +- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 4 ++++ arch/arm64/kvm/pmu-emul.c | 33 ++++++++++++++++++++------------- 4 files changed, 26 insertions(+), 14 deletions(-) (limited to 'arch/arm64/include') diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index 60a29972d3f1..d063aaee5bb7 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -70,7 +70,7 @@ irqchip. -ENODEV PMUv3 not supported or GIC not initialized -ENXIO PMUv3 not properly configured or in-kernel irqchip not configured as required prior to calling this attribute - -EBUSY PMUv3 already initialized + -EBUSY PMUv3 already initialized or a VCPU has already run -EINVAL Invalid filter range ======= ====================================================== diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5bc01e62c08a..2869259e10c0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -136,6 +136,7 @@ struct kvm_arch { /* Memory Tagging Extension enabled for the guest */ bool mte_enabled; + bool ran_once; }; struct kvm_vcpu_fault_info { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ecc5958e27fe..4783dbf66df2 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -634,6 +634,10 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (kvm_vm_is_protected(kvm)) kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu); + mutex_lock(&kvm->lock); + kvm->arch.ran_once = true; + mutex_unlock(&kvm->lock); + return ret; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index fbcfd4ec6f92..bc771bc1a041 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -924,6 +924,8 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { + struct kvm *kvm = vcpu->kvm; + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; @@ -941,7 +943,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) int __user *uaddr = (int __user *)(long)attr->addr; int irq; - if (!irqchip_in_kernel(vcpu->kvm)) + if (!irqchip_in_kernel(kvm)) return -EINVAL; if (get_user(irq, uaddr)) @@ -951,7 +953,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!(irq_is_ppi(irq) || irq_is_spi(irq))) return -EINVAL; - if (!pmu_irq_is_valid(vcpu->kvm, irq)) + if (!pmu_irq_is_valid(kvm, irq)) return -EINVAL; if (kvm_arm_pmu_irq_initialized(vcpu)) @@ -966,7 +968,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) struct kvm_pmu_event_filter filter; int nr_events; - nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; + nr_events = kvm_pmu_event_mask(kvm) + 1; uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; @@ -978,12 +980,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) filter.action != KVM_PMU_EVENT_DENY)) return -EINVAL; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&kvm->lock); + + if (kvm->arch.ran_once) { + mutex_unlock(&kvm->lock); + return -EBUSY; + } - if (!vcpu->kvm->arch.pmu_filter) { - vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); - if (!vcpu->kvm->arch.pmu_filter) { - mutex_unlock(&vcpu->kvm->lock); + if (!kvm->arch.pmu_filter) { + kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); + if (!kvm->arch.pmu_filter) { + mutex_unlock(&kvm->lock); return -ENOMEM; } @@ -994,17 +1001,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) * events, the default is to allow. */ if (filter.action == KVM_PMU_EVENT_ALLOW) - bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events); + bitmap_zero(kvm->arch.pmu_filter, nr_events); else - bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events); + bitmap_fill(kvm->arch.pmu_filter, nr_events); } if (filter.action == KVM_PMU_EVENT_ALLOW) - bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); + bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents); else - bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); + bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&kvm->lock); return 0; } -- cgit v1.2.3 From 46b18782147248b62f00e98a7f87abaf934951e8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Jan 2022 16:17:56 +0000 Subject: KVM: arm64: Keep a per-VM pointer to the default PMU As we are about to allow selection of the PMU exposed to a guest, start by keeping track of the default one instead of only the PMU version. Signed-off-by: Marc Zyngier Signed-off-by: Alexandru Elisei Link: https://lore.kernel.org/r/20220127161759.53553-4-alexandru.elisei@arm.com --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/pmu-emul.c | 42 ++++++++++++++++++++++++--------------- 2 files changed, 27 insertions(+), 17 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2869259e10c0..57141a3a3740 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -129,7 +129,7 @@ struct kvm_arch { * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+). */ unsigned long *pmu_filter; - unsigned int pmuver; + struct arm_pmu *arm_pmu; u8 pfr0_csv2; u8 pfr0_csv3; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index bc771bc1a041..b238b3d5515c 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -24,7 +24,11 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); static u32 kvm_pmu_event_mask(struct kvm *kvm) { - switch (kvm->arch.pmuver) { + unsigned int pmuver; + + pmuver = kvm->arch.arm_pmu->pmuver; + + switch (pmuver) { case ID_AA64DFR0_PMUVER_8_0: return GENMASK(9, 0); case ID_AA64DFR0_PMUVER_8_1: @@ -33,7 +37,7 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) case ID_AA64DFR0_PMUVER_8_7: return GENMASK(15, 0); default: /* Shouldn't be here, just for sanity */ - WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver); + WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); return 0; } } @@ -600,6 +604,7 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) */ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) { + struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; struct perf_event *event; @@ -636,7 +641,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) return; memset(&attr, 0, sizeof(struct perf_event_attr)); - attr.type = PERF_TYPE_RAW; + attr.type = arm_pmu->pmu.type; attr.size = sizeof(attr); attr.pinned = 1; attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); @@ -750,12 +755,11 @@ void kvm_host_pmu_init(struct arm_pmu *pmu) static_branch_enable(&kvm_arm_pmu_available); } -static int kvm_pmu_probe_pmuver(void) +static struct arm_pmu *kvm_pmu_probe_armpmu(void) { struct perf_event_attr attr = { }; struct perf_event *event; - struct arm_pmu *pmu; - int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF; + struct arm_pmu *pmu = NULL; /* * Create a dummy event that only counts user cycles. As we'll never @@ -780,19 +784,20 @@ static int kvm_pmu_probe_pmuver(void) if (IS_ERR(event)) { pr_err_once("kvm: pmu event creation failed %ld\n", PTR_ERR(event)); - return ID_AA64DFR0_PMUVER_IMP_DEF; + return NULL; } if (event->pmu) { pmu = to_arm_pmu(event->pmu); - if (pmu->pmuver) - pmuver = pmu->pmuver; + if (pmu->pmuver == 0 || + pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF) + pmu = NULL; } perf_event_disable(event); perf_event_release_kernel(event); - return pmuver; + return pmu; } u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) @@ -810,7 +815,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled * as RAZ */ - if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4) + if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_4) val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); base = 32; } @@ -932,11 +937,16 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (vcpu->arch.pmu.created) return -EBUSY; - if (!vcpu->kvm->arch.pmuver) - vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver(); - - if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF) - return -ENODEV; + mutex_lock(&kvm->lock); + if (!kvm->arch.arm_pmu) { + /* No PMU set, get the default one */ + kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); + if (!kvm->arch.arm_pmu) { + mutex_unlock(&kvm->lock); + return -ENODEV; + } + } + mutex_unlock(&kvm->lock); switch (attr->attr) { case KVM_ARM_VCPU_PMU_V3_IRQ: { -- cgit v1.2.3 From 6ee7fca2a4a023b14aa1f1f3c4f6c833116116ef Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 27 Jan 2022 16:17:58 +0000 Subject: KVM: arm64: Add KVM_ARM_VCPU_PMU_V3_SET_PMU attribute When KVM creates an event and there are more than one PMUs present on the system, perf_init_event() will go through the list of available PMUs and will choose the first one that can create the event. The order of the PMUs in this list depends on the probe order, which can change under various circumstances, for example if the order of the PMU nodes change in the DTB or if asynchronous driver probing is enabled on the kernel command line (with the driver_async_probe=armv8-pmu option). Another consequence of this approach is that on heteregeneous systems all virtual machines that KVM creates will use the same PMU. This might cause unexpected behaviour for userspace: when a VCPU is executing on the physical CPU that uses this default PMU, PMU events in the guest work correctly; but when the same VCPU executes on another CPU, PMU events in the guest will suddenly stop counting. Fortunately, perf core allows user to specify on which PMU to create an event by using the perf_event_attr->type field, which is used by perf_init_event() as an index in the radix tree of available PMUs. Add the KVM_ARM_VCPU_PMU_V3_CTRL(KVM_ARM_VCPU_PMU_V3_SET_PMU) VCPU attribute to allow userspace to specify the arm_pmu that KVM will use when creating events for that VCPU. KVM will make no attempt to run the VCPU on the physical CPUs that share the PMU, leaving it up to userspace to manage the VCPU threads' affinity accordingly. To ensure that KVM doesn't expose an asymmetric system to the guest, the PMU set for one VCPU will be used by all other VCPUs. Once a VCPU has run, the PMU cannot be changed in order to avoid changing the list of available events for a VCPU, or to change the semantics of existing events. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127161759.53553-6-alexandru.elisei@arm.com --- Documentation/virt/kvm/devices/vcpu.rst | 28 +++++++++++++++++++++++ arch/arm64/include/uapi/asm/kvm.h | 1 + arch/arm64/kvm/pmu-emul.c | 40 +++++++++++++++++++++++++++++++++ tools/arch/arm64/include/uapi/asm/kvm.h | 1 + 4 files changed, 70 insertions(+) (limited to 'arch/arm64/include') diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index d063aaee5bb7..e8c5770590a2 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -104,6 +104,34 @@ hardware event. Filtering event 0x1E (CHAIN) has no effect either, as it isn't strictly speaking an event. Filtering the cycle counter is possible using event 0x11 (CPU_CYCLES). +1.4 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_SET_PMU +------------------------------------------ + +:Parameters: in kvm_device_attr.addr the address to an int representing the PMU + identifier. + +:Returns: + + ======= ==================================================== + -EBUSY PMUv3 already initialized, a VCPU has already run or + an event filter has already been set + -EFAULT Error accessing the PMU identifier + -ENXIO PMU not found + -ENODEV PMUv3 not supported or GIC not initialized + -ENOMEM Could not allocate memory + ======= ==================================================== + +Request that the VCPU uses the specified hardware PMU when creating guest events +for the purpose of PMU emulation. The PMU identifier can be read from the "type" +file for the desired PMU instance under /sys/devices (or, equivalent, +/sys/bus/even_source). This attribute is particularly useful on heterogeneous +systems where there are at least two CPU PMUs on the system. The PMU that is set +for one VCPU will be used by all the other VCPUs. It isn't possible to set a PMU +if a PMU event filter is already present. + +Note that KVM will not make any attempts to run the VCPU on the physical CPUs +associated with the PMU specified by this attribute. This is entirely left to +userspace. 2. GROUP: KVM_ARM_VCPU_TIMER_CTRL ================================= diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index b3edde68bc3e..1d0a0a2a9711 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags { #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 #define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 7bab73f85b58..18361f367495 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -948,6 +948,36 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) return true; } +static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) +{ + struct kvm *kvm = vcpu->kvm; + struct arm_pmu_entry *entry; + struct arm_pmu *arm_pmu; + int ret = -ENXIO; + + mutex_lock(&kvm->lock); + mutex_lock(&arm_pmus_lock); + + list_for_each_entry(entry, &arm_pmus, entry) { + arm_pmu = entry->arm_pmu; + if (arm_pmu->pmu.type == pmu_id) { + if (kvm->arch.ran_once || + (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { + ret = -EBUSY; + break; + } + + kvm->arch.arm_pmu = arm_pmu; + ret = 0; + break; + } + } + + mutex_unlock(&arm_pmus_lock); + mutex_unlock(&kvm->lock); + return ret; +} + int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { struct kvm *kvm = vcpu->kvm; @@ -1046,6 +1076,15 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return 0; } + case KVM_ARM_VCPU_PMU_V3_SET_PMU: { + int __user *uaddr = (int __user *)(long)attr->addr; + int pmu_id; + + if (get_user(pmu_id, uaddr)) + return -EFAULT; + + return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id); + } case KVM_ARM_VCPU_PMU_V3_INIT: return kvm_arm_pmu_v3_init(vcpu); } @@ -1083,6 +1122,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_PMU_V3_IRQ: case KVM_ARM_VCPU_PMU_V3_INIT: case KVM_ARM_VCPU_PMU_V3_FILTER: + case KVM_ARM_VCPU_PMU_V3_SET_PMU: if (kvm_vcpu_has_pmu(vcpu)) return 0; } diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index b3edde68bc3e..1d0a0a2a9711 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags { #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 #define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 -- cgit v1.2.3 From 583cda1b0e7d5d49db5fc15db623166310e36bf6 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 27 Jan 2022 16:17:59 +0000 Subject: KVM: arm64: Refuse to run VCPU if the PMU doesn't match the physical CPU Userspace can assign a PMU to a VCPU with the KVM_ARM_VCPU_PMU_V3_SET_PMU device ioctl. If the VCPU is scheduled on a physical CPU which has a different PMU, the perf events needed to emulate a guest PMU won't be scheduled in and the guest performance counters will stop counting. Treat it as an userspace error and refuse to run the VCPU in this situation. Suggested-by: Marc Zyngier Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127161759.53553-7-alexandru.elisei@arm.com --- Documentation/virt/kvm/devices/vcpu.rst | 6 +++++- arch/arm64/include/asm/kvm_host.h | 12 ++++++++++++ arch/arm64/include/uapi/asm/kvm.h | 3 +++ arch/arm64/kvm/arm.c | 17 +++++++++++++++++ arch/arm64/kvm/pmu-emul.c | 1 + 5 files changed, 38 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index e8c5770590a2..260db203a1e2 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -131,7 +131,11 @@ if a PMU event filter is already present. Note that KVM will not make any attempts to run the VCPU on the physical CPUs associated with the PMU specified by this attribute. This is entirely left to -userspace. +userspace. However, attempting to run the VCPU on a physical CPU not supported +by the PMU will fail and KVM_RUN will return with +exit_reason = KVM_EXIT_FAIL_ENTRY and populate the fail_entry struct by setting +hardare_entry_failure_reason field to KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED and +the cpu field to the processor id. 2. GROUP: KVM_ARM_VCPU_TIMER_CTRL ================================= diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 57141a3a3740..0bed0e33e1ae 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -131,6 +131,8 @@ struct kvm_arch { unsigned long *pmu_filter; struct arm_pmu *arm_pmu; + cpumask_var_t supported_cpus; + u8 pfr0_csv2; u8 pfr0_csv3; @@ -436,6 +438,7 @@ struct kvm_vcpu_arch { #define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */ #define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */ #define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14) +#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */ #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_USE_SW_BP | \ @@ -454,6 +457,15 @@ struct kvm_vcpu_arch { #define vcpu_has_ptrauth(vcpu) false #endif +#define vcpu_on_unsupported_cpu(vcpu) \ + ((vcpu)->arch.flags & KVM_ARM64_ON_UNSUPPORTED_CPU) + +#define vcpu_set_on_unsupported_cpu(vcpu) \ + ((vcpu)->arch.flags |= KVM_ARM64_ON_UNSUPPORTED_CPU) + +#define vcpu_clear_on_unsupported_cpu(vcpu) \ + ((vcpu)->arch.flags &= ~KVM_ARM64_ON_UNSUPPORTED_CPU) + #define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs) /* diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 1d0a0a2a9711..d49f714f48e6 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -414,6 +414,9 @@ struct kvm_arm_copy_mte_tags { #define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS #define KVM_PSCI_RET_DENIED PSCI_RET_DENIED +/* run->fail_entry.hardware_entry_failure_reason codes. */ +#define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0) + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4783dbf66df2..13c1318d8b9a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -150,6 +150,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; + if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) + goto out_free_stage2_pgd; + cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask); + kvm_vgic_early_init(kvm); /* The maximum number of VCPUs is limited by the host's GIC model */ @@ -176,6 +180,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) void kvm_arch_destroy_vm(struct kvm *kvm) { bitmap_free(kvm->arch.pmu_filter); + free_cpumask_var(kvm->arch.supported_cpus); kvm_vgic_destroy(kvm); @@ -411,6 +416,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_disable(vcpu); kvm_arch_vcpu_load_debug_state_flags(vcpu); + + if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus)) + vcpu_set_on_unsupported_cpu(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -423,6 +431,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_vgic_put(vcpu); kvm_vcpu_pmu_restore_host(vcpu); + vcpu_clear_on_unsupported_cpu(vcpu); vcpu->cpu = -1; } @@ -796,6 +805,14 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) } } + if (unlikely(vcpu_on_unsupported_cpu(vcpu))) { + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + run->fail_entry.hardware_entry_failure_reason = KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED; + run->fail_entry.cpu = smp_processor_id(); + *ret = 0; + return true; + } + return kvm_request_pending(vcpu) || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) || xfer_to_guest_mode_work_pending(); diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 18361f367495..4526a5824dac 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -968,6 +968,7 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) } kvm->arch.arm_pmu = arm_pmu; + cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus); ret = 0; break; } -- cgit v1.2.3 From 34739fd95fab3a5efb0422e4f012b685e33598dc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 Feb 2022 15:35:24 +0000 Subject: KVM: arm64: Indicate SYSTEM_RESET2 in kvm_run::system_event flags field When handling reset and power-off PSCI calls from the guest, we initialise X0 to PSCI_RET_INTERNAL_FAILURE in case the VMM tries to re-run the vCPU after issuing the call. Unfortunately, this also means that the VMM cannot see which PSCI call was issued and therefore cannot distinguish between PSCI SYSTEM_RESET and SYSTEM_RESET2 calls, which is necessary in order to determine the validity of the "reset_type" in X1. Allocate bit 0 of the previously unused 'flags' field of the system_event structure so that we can indicate the PSCI call used to initiate the reset. Cc: Marc Zyngier Cc: James Morse Cc: Alexandru Elisei Cc: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220221153524.15397-4-will@kernel.org --- Documentation/virt/kvm/api.rst | 5 +++++ arch/arm64/include/uapi/asm/kvm.h | 7 +++++++ arch/arm64/kvm/psci.c | 15 +++++++++++---- 3 files changed, 23 insertions(+), 4 deletions(-) (limited to 'arch/arm64/include') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index a4267104db50..8305ac9747b6 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5939,6 +5939,11 @@ Valid values for 'type' are: to ignore the request, or to gather VM memory core dump and/or reset/shutdown of the VM. +Valid flags are: + + - KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (arm64 only) -- the guest issued + a SYSTEM_RESET2 call according to v1.1 of the PSCI specification. + :: /* KVM_EXIT_IOAPIC_EOI */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index b3edde68bc3e..06bc08fdf8ea 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -413,6 +413,13 @@ struct kvm_arm_copy_mte_tags { #define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS #define KVM_PSCI_RET_DENIED PSCI_RET_DENIED +/* arm64-specific kvm_run::system_event flags */ +/* + * Reset caused by a PSCI v1.1 SYSTEM_RESET2 call. + * Valid only when the system event has a type of KVM_SYSTEM_EVENT_RESET. + */ +#define KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (1ULL << 0) + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 30fcc5a99483..ecb4b74cb12a 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -162,7 +162,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) return PSCI_0_2_AFFINITY_LEVEL_OFF; } -static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) +static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags) { unsigned long i; struct kvm_vcpu *tmp; @@ -182,17 +182,24 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); vcpu->run->system_event.type = type; + vcpu->run->system_event.flags = flags; vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; } static void kvm_psci_system_off(struct kvm_vcpu *vcpu) { - kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN, 0); } static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) { - kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET, 0); +} + +static void kvm_psci_system_reset2(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET, + KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2); } static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) @@ -364,7 +371,7 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) arg < PSCI_1_1_RESET_TYPE_VENDOR_START) { val = PSCI_RET_INVALID_PARAMS; } else { - kvm_psci_system_reset(vcpu); + kvm_psci_system_reset2(vcpu); val = PSCI_RET_INTERNAL_FAILURE; ret = 0; } -- cgit v1.2.3 From 06394531b425794dc56f3d525b7994d25b8072f7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 11 Mar 2022 17:39:47 +0000 Subject: KVM: arm64: Generalise VM features into a set of flags We currently deal with a set of booleans for VM features, while they could be better represented as set of flags contained in an unsigned long, similarily to what we are doing on the CPU side. Signed-off-by: Marc Zyngier [Oliver: Flag-ify the 'ran_once' boolean] Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220311174001.605719-2-oupton@google.com --- arch/arm64/include/asm/kvm_host.h | 15 +++++++++------ arch/arm64/kvm/arm.c | 7 ++++--- arch/arm64/kvm/mmio.c | 3 ++- arch/arm64/kvm/pmu-emul.c | 4 ++-- 4 files changed, 17 insertions(+), 12 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 76f795b628f1..0e96087885fe 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -122,7 +122,12 @@ struct kvm_arch { * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is * supported. */ - bool return_nisv_io_abort_to_user; +#define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0 + /* Memory Tagging Extension enabled for the guest */ +#define KVM_ARCH_FLAG_MTE_ENABLED 1 + /* At least one vCPU has ran in the VM */ +#define KVM_ARCH_FLAG_HAS_RAN_ONCE 2 + unsigned long flags; /* * VM-wide PMU filter, implemented as a bitmap and big enough for @@ -135,10 +140,6 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; - - /* Memory Tagging Extension enabled for the guest */ - bool mte_enabled; - bool ran_once; }; struct kvm_vcpu_fault_info { @@ -810,7 +811,9 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) -#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled) +#define kvm_has_mte(kvm) \ + (system_supports_mte() && \ + test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags)) #define kvm_vcpu_has_pmu(vcpu) \ (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f49ebdd9c990..17021bc8ee2c 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -84,7 +84,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, switch (cap->cap) { case KVM_CAP_ARM_NISV_TO_USER: r = 0; - kvm->arch.return_nisv_io_abort_to_user = true; + set_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, + &kvm->arch.flags); break; case KVM_CAP_ARM_MTE: mutex_lock(&kvm->lock); @@ -92,7 +93,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = -EINVAL; } else { r = 0; - kvm->arch.mte_enabled = true; + set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); } mutex_unlock(&kvm->lock); break; @@ -559,7 +560,7 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu); mutex_lock(&kvm->lock); - kvm->arch.ran_once = true; + set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags); mutex_unlock(&kvm->lock); return ret; diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 3e2d8ba11a02..3dd38a151d2a 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -135,7 +135,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) * volunteered to do so, and bail out otherwise. */ if (!kvm_vcpu_dabt_isvalid(vcpu)) { - if (vcpu->kvm->arch.return_nisv_io_abort_to_user) { + if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, + &vcpu->kvm->arch.flags)) { run->exit_reason = KVM_EXIT_ARM_NISV; run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu); run->arm_nisv.fault_ipa = fault_ipa; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 4526a5824dac..78fdc443adc7 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -961,7 +961,7 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) list_for_each_entry(entry, &arm_pmus, entry) { arm_pmu = entry->arm_pmu; if (arm_pmu->pmu.type == pmu_id) { - if (kvm->arch.ran_once || + if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) || (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { ret = -EBUSY; break; @@ -1044,7 +1044,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) mutex_lock(&kvm->lock); - if (kvm->arch.ran_once) { + if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) { mutex_unlock(&kvm->lock); return -EBUSY; } -- cgit v1.2.3