diff options
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 4 | ||||
-rw-r--r-- | arch/arm64/include/uapi/asm/kvm.h | 6 | ||||
-rw-r--r-- | arch/arm64/kvm/arm.c | 3 | ||||
-rw-r--r-- | arch/arm64/kvm/guest.c | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/mmu.c | 8 | ||||
-rw-r--r-- | arch/arm64/kvm/sys_regs.c | 4 | ||||
-rw-r--r-- | arch/mips/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/mips/kvm/mmu.c | 12 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_book3s_64.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_host.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_radix.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_nested.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 8 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500_mmu_host.c | 4 | ||||
-rw-r--r-- | arch/riscv/kvm/mmu.c | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/ibt.h | 11 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 26 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 14 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/paging_tmpl.h | 4 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 66 | ||||
-rw-r--r-- | tools/objtool/check.c | 3 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 95 | ||||
-rw-r--r-- | virt/kvm/pfncache.c | 17 |
25 files changed, 157 insertions, 156 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f38ef299f13b..e9c9388ccc02 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -929,6 +929,10 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); (system_supports_mte() && \ test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags)) +#define kvm_supports_32bit_el0() \ + (system_supports_32bit_el0() && \ + !static_branch_unlikely(&arm64_mismatched_32bit_el0)) + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 3bb134355874..316917b98707 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -75,9 +75,11 @@ struct kvm_regs { /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 -#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ + KVM_ARM_DEVICE_TYPE_SHIFT) #define KVM_ARM_DEVICE_ID_SHIFT 16 -#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) +#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ + KVM_ARM_DEVICE_ID_SHIFT) /* Supported device IDs */ #define KVM_ARM_DEVICE_VGIC_V2 0 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 986cee6fbc7f..2ff0ef62abad 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -757,8 +757,7 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) if (likely(!vcpu_mode_is_32bit(vcpu))) return false; - return !system_supports_32bit_el0() || - static_branch_unlikely(&arm64_mismatched_32bit_el0); + return !kvm_supports_32bit_el0(); } /** diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 8c607199cad1..f802a3b3f8db 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -242,7 +242,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { case PSR_AA32_MODE_USR: - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) return -EINVAL; break; case PSR_AA32_MODE_FIQ: diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 87f1cd0df36e..c9a13e487187 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -993,7 +993,7 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot, * THP doesn't start to split while we are adjusting the * refcounts. * - * We are sure this doesn't happen, because mmu_notifier_retry + * We are sure this doesn't happen, because mmu_invalidate_retry * was successful and we are holding the mmu_lock, so if this * THP is trying to split, it will be blocked in the mmu * notifier before touching any of the pages, specifically @@ -1188,9 +1188,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return ret; } - mmu_seq = vcpu->kvm->mmu_notifier_seq; + mmu_seq = vcpu->kvm->mmu_invalidate_seq; /* - * Ensure the read of mmu_notifier_seq happens before we call + * Ensure the read of mmu_invalidate_seq happens before we call * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk * the page we just got a reference to gets unmapped before we have a * chance to grab the mmu_lock, which ensure that if the page gets @@ -1246,7 +1246,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, else write_lock(&kvm->mmu_lock); pgt = vcpu->arch.hw_mmu->pgt; - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; /* diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c059b259aea6..3234f50b8c4b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -652,7 +652,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, r->reg) = val; } @@ -701,7 +701,7 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val = __vcpu_sys_reg(vcpu, PMCR_EL0); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 717716cc51c5..5cedb28e8a40 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -84,8 +84,6 @@ #define KVM_MAX_VCPUS 16 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_HALT_POLL_NS_DEFAULT 500000 diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index db17e870bdff..74cd64a24d05 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -615,17 +615,17 @@ retry: * Used to check for invalidations in progress, of the pfn that is * returned by pfn_to_pfn_prot below. */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; /* - * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in - * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't + * Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads + * in gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't * risk the page we get a reference to getting unmapped before we have a - * chance to grab the mmu_lock without mmu_notifier_retry() noticing. + * chance to grab the mmu_lock without mmu_invalidate_retry() noticing. * * This smp_rmb() pairs with the effective smp_wmb() of the combination * of the pte_unmap_unlock() after the PTE is zapped, and the * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before - * mmu_notifier_seq is incremented. + * mmu_invalidate_seq is incremented. */ smp_rmb(); @@ -638,7 +638,7 @@ retry: spin_lock(&kvm->mmu_lock); /* Check if an invalidation has taken place since we got pfn */ - if (mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_invalidate_retry(kvm, mmu_seq)) { /* * This can happen when mappings are changed asynchronously, but * also synchronously if a COW is triggered by diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 4def2bd17b9b..d49065af08e9 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -666,7 +666,7 @@ static inline pte_t *find_kvm_host_pte(struct kvm *kvm, unsigned long mmu_seq, VM_WARN(!spin_is_locked(&kvm->mmu_lock), "%s called with kvm mmu_lock not held \n", __func__); - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) return NULL; pte = __find_linux_pte(kvm->mm->pgd, ea, NULL, hshift); diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 1ae09992c9ea..bc6a381b5346 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -90,7 +90,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, unsigned long pfn; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* Get host physical address for gpa */ @@ -151,7 +151,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, cpte = kvmppc_mmu_hpte_cache_next(vcpu); spin_lock(&kvm->mmu_lock); - if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) { + if (!cpte || mmu_invalidate_retry(kvm, mmu_seq)) { r = -EAGAIN; goto out_unlock; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 514fd45c1994..e9744b41a226 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -578,7 +578,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, return -EFAULT; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); ret = -EFAULT; @@ -693,7 +693,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, /* Check if we might have been invalidated; let the guest retry if so */ ret = RESUME_GUEST; - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { + if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) { unlock_rmap(rmap); goto out_unlock; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 9d4b3feda3b6..5d5e12f3bf86 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -640,7 +640,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, /* Check if we might have been invalidated; let the guest retry if so */ spin_lock(&kvm->mmu_lock); ret = -EAGAIN; - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; /* Now traverse again under the lock and change the tree */ @@ -830,7 +830,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, bool large_enable; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* @@ -1191,7 +1191,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, * Increase the mmu notifier sequence number to prevent any page * fault that read the memslot earlier from writing a PTE. */ - kvm->mmu_notifier_seq++; + kvm->mmu_invalidate_seq++; spin_unlock(&kvm->mmu_lock); } diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index be8249cc6107..5a64a1341e6f 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -1580,7 +1580,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, /* 2. Find the host pte for this L1 guest real address */ /* Used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* See if can find translation in our partition scoped tables for L1 */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 2257fb18cb72..5a05953ae13f 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -219,7 +219,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, g_ptel = ptel; /* used later to detect if we might have been invalidated */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* Find the memslot (if any) for this address */ @@ -366,7 +366,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, rmap = real_vmalloc_addr(rmap); lock_rmap(rmap); /* Check for pending invalidations under the rmap chain lock */ - if (mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_invalidate_retry(kvm, mmu_seq)) { /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; @@ -932,7 +932,7 @@ static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, int i; /* Used later to detect if we might have been invalidated */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); @@ -960,7 +960,7 @@ static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, long ret = H_SUCCESS; /* Used later to detect if we might have been invalidated */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 7f16afc331ef..05668e964140 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -339,7 +339,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, unsigned long flags; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* @@ -460,7 +460,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, } spin_lock(&kvm->mmu_lock); - if (mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_invalidate_retry(kvm, mmu_seq)) { ret = -EAGAIN; goto out; } diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 3a35b2d95697..3620ecac2fa1 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -666,7 +666,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, return ret; } - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable); if (hfn == KVM_PFN_ERR_HWPOISON) { @@ -686,7 +686,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, spin_lock(&kvm->mmu_lock); - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; if (writable) { diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h index 689880eca9ba..9b08082a5d9f 100644 --- a/arch/x86/include/asm/ibt.h +++ b/arch/x86/include/asm/ibt.h @@ -31,6 +31,16 @@ #define __noendbr __attribute__((nocf_check)) +/* + * Create a dummy function pointer reference to prevent objtool from marking + * the function as needing to be "sealed" (i.e. ENDBR converted to NOP by + * apply_ibt_endbr()). + */ +#define IBT_NOSEAL(fname) \ + ".pushsection .discard.ibt_endbr_noseal\n\t" \ + _ASM_PTR fname "\n\t" \ + ".popsection\n\t" + static inline __attribute_const__ u32 gen_endbr(void) { u32 endbr; @@ -84,6 +94,7 @@ extern __noendbr void ibt_restore(u64 save); #ifndef __ASSEMBLY__ #define ASM_ENDBR +#define IBT_NOSEAL(name) #define __noendbr diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5ffa578cafe1..2c96c43c313a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -53,7 +53,7 @@ #define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) /* memory slots that are not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 3 +#define KVM_INTERNAL_MEM_SLOTS 3 #define KVM_HALT_POLL_NS_DEFAULT 200000 diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b4eeb7c75dfa..f092c54d1a2f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -326,7 +326,8 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ name ":\n\t" \ - ASM_ENDBR + ASM_ENDBR \ + IBT_NOSEAL(name) #define FOP_FUNC(name) \ __FOP_FUNC(#name) @@ -446,27 +447,12 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); FOP_END /* Special case for SETcc - 1 instruction per cc */ - -/* - * Depending on .config the SETcc functions look like: - * - * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] - * SETcc %al [3 bytes] - * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] - * INT3 [1 byte; CONFIG_SLS] - */ -#define SETCC_ALIGN 16 - #define FOP_SETCC(op) \ - ".align " __stringify(SETCC_ALIGN) " \n\t" \ - ".type " #op ", @function \n\t" \ - #op ": \n\t" \ - ASM_ENDBR \ + FOP_FUNC(op) \ #op " %al \n\t" \ - __FOP_RET(#op) \ - ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" + FOP_RET(op) -__FOP_START(setcc, SETCC_ALIGN) +FOP_START(setcc) FOP_SETCC(seto) FOP_SETCC(setno) FOP_SETCC(setc) @@ -1079,7 +1065,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt) static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) { u8 rc; - void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf); + void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf); flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; asm("push %[flags]; popf; " CALL_NOSPEC diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index eccddb136954..126fa9aec64c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2914,7 +2914,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) * If addresses are being invalidated, skip prefetching to avoid * accidentally prefetching those addresses. */ - if (unlikely(vcpu->kvm->mmu_notifier_count)) + if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) return; __direct_pte_prefetch(vcpu, sp, sptep); @@ -2928,7 +2928,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) * * There are several ways to safely use this helper: * - * - Check mmu_notifier_retry_hva() after grabbing the mapping level, before + * - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before * consuming it. In this case, mmu_lock doesn't need to be held during the * lookup, but it does need to be held while checking the MMU notifier. * @@ -3056,7 +3056,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault return; /* - * mmu_notifier_retry() was successful and mmu_lock is held, so + * mmu_invalidate_retry() was successful and mmu_lock is held, so * the pmd can't be split from under us. */ fault->goal_level = fault->req_level; @@ -4203,7 +4203,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu, return true; return fault->slot && - mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva); + mmu_invalidate_retry_hva(vcpu->kvm, mmu_seq, fault->hva); } static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) @@ -4227,7 +4227,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (r) return r; - mmu_seq = vcpu->kvm->mmu_notifier_seq; + mmu_seq = vcpu->kvm->mmu_invalidate_seq; smp_rmb(); r = kvm_faultin_pfn(vcpu, fault); @@ -6055,7 +6055,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) write_lock(&kvm->mmu_lock); - kvm_inc_notifier_count(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_begin(kvm, gfn_start, gfn_end); flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); @@ -6069,7 +6069,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end - gfn_start); - kvm_dec_notifier_count(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_end(kvm, gfn_start, gfn_end); write_unlock(&kvm->mmu_lock); } diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index f5958071220c..39e0205e7300 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -589,7 +589,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, * If addresses are being invalidated, skip prefetching to avoid * accidentally prefetching those addresses. */ - if (unlikely(vcpu->kvm->mmu_notifier_count)) + if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) return; if (sp->role.direct) @@ -838,7 +838,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault else fault->max_level = walker.level; - mmu_seq = vcpu->kvm->mmu_notifier_seq; + mmu_seq = vcpu->kvm->mmu_invalidate_seq; smp_rmb(); r = kvm_faultin_pfn(vcpu, fault); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1c480b1821e1..f4519d3689e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -656,12 +656,12 @@ struct kvm_irq_routing_table { }; #endif -#ifndef KVM_PRIVATE_MEM_SLOTS -#define KVM_PRIVATE_MEM_SLOTS 0 +#ifndef KVM_INTERNAL_MEM_SLOTS +#define KVM_INTERNAL_MEM_SLOTS 0 #endif #define KVM_MEM_SLOTS_NUM SHRT_MAX -#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_PRIVATE_MEM_SLOTS) +#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS) #ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) @@ -765,10 +765,10 @@ struct kvm { #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) struct mmu_notifier mmu_notifier; - unsigned long mmu_notifier_seq; - long mmu_notifier_count; - unsigned long mmu_notifier_range_start; - unsigned long mmu_notifier_range_end; + unsigned long mmu_invalidate_seq; + long mmu_invalidate_in_progress; + unsigned long mmu_invalidate_range_start; + unsigned long mmu_invalidate_range_end; #endif struct list_head devices; u64 manual_dirty_log_protect; @@ -1357,10 +1357,10 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); #endif -void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end); -void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end); +void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, + unsigned long end); +void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, + unsigned long end); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@ -1907,42 +1907,44 @@ extern const struct kvm_stats_header kvm_vcpu_stats_header; extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) -static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) +static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { - if (unlikely(kvm->mmu_notifier_count)) + if (unlikely(kvm->mmu_invalidate_in_progress)) return 1; /* - * Ensure the read of mmu_notifier_count happens before the read - * of mmu_notifier_seq. This interacts with the smp_wmb() in - * mmu_notifier_invalidate_range_end to make sure that the caller - * either sees the old (non-zero) value of mmu_notifier_count or - * the new (incremented) value of mmu_notifier_seq. - * PowerPC Book3s HV KVM calls this under a per-page lock - * rather than under kvm->mmu_lock, for scalability, so - * can't rely on kvm->mmu_lock to keep things ordered. + * Ensure the read of mmu_invalidate_in_progress happens before + * the read of mmu_invalidate_seq. This interacts with the + * smp_wmb() in mmu_notifier_invalidate_range_end to make sure + * that the caller either sees the old (non-zero) value of + * mmu_invalidate_in_progress or the new (incremented) value of + * mmu_invalidate_seq. + * + * PowerPC Book3s HV KVM calls this under a per-page lock rather + * than under kvm->mmu_lock, for scalability, so can't rely on + * kvm->mmu_lock to keep things ordered. */ smp_rmb(); - if (kvm->mmu_notifier_seq != mmu_seq) + if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } -static inline int mmu_notifier_retry_hva(struct kvm *kvm, - unsigned long mmu_seq, - unsigned long hva) +static inline int mmu_invalidate_retry_hva(struct kvm *kvm, + unsigned long mmu_seq, + unsigned long hva) { lockdep_assert_held(&kvm->mmu_lock); /* - * If mmu_notifier_count is non-zero, then the range maintained by - * kvm_mmu_notifier_invalidate_range_start contains all addresses that - * might be being invalidated. Note that it may include some false + * If mmu_invalidate_in_progress is non-zero, then the range maintained + * by kvm_mmu_notifier_invalidate_range_start contains all addresses + * that might be being invalidated. Note that it may include some false * positives, due to shortcuts when handing concurrent invalidations. */ - if (unlikely(kvm->mmu_notifier_count) && - hva >= kvm->mmu_notifier_range_start && - hva < kvm->mmu_notifier_range_end) + if (unlikely(kvm->mmu_invalidate_in_progress) && + hva >= kvm->mmu_invalidate_range_start && + hva < kvm->mmu_invalidate_range_end) return 1; - if (kvm->mmu_notifier_seq != mmu_seq) + if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0cec74da7ffe..91678252a9b6 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4096,7 +4096,8 @@ static int validate_ibt(struct objtool_file *file) * These sections can reference text addresses, but not with * the intent to indirect branch to them. */ - if (!strncmp(sec->name, ".discard", 8) || + if ((!strncmp(sec->name, ".discard", 8) && + strcmp(sec->name, ".discard.ibt_endbr_noseal")) || !strncmp(sec->name, ".debug", 6) || !strcmp(sec->name, ".altinstructions") || !strcmp(sec->name, ".ibt_endbr_seal") || diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 515dfe9d3bcf..584a5bab3af3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -702,30 +702,31 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, /* * .change_pte() must be surrounded by .invalidate_range_{start,end}(). - * If mmu_notifier_count is zero, then no in-progress invalidations, - * including this one, found a relevant memslot at start(); rechecking - * memslots here is unnecessary. Note, a false positive (count elevated - * by a different invalidation) is sub-optimal but functionally ok. + * If mmu_invalidate_in_progress is zero, then no in-progress + * invalidations, including this one, found a relevant memslot at + * start(); rechecking memslots here is unnecessary. Note, a false + * positive (count elevated by a different invalidation) is sub-optimal + * but functionally ok. */ WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); - if (!READ_ONCE(kvm->mmu_notifier_count)) + if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) return; kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn); } -void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end) +void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, + unsigned long end) { /* * The count increase must become visible at unlock time as no * spte can be established without taking the mmu_lock and * count is also read inside the mmu_lock critical section. */ - kvm->mmu_notifier_count++; - if (likely(kvm->mmu_notifier_count == 1)) { - kvm->mmu_notifier_range_start = start; - kvm->mmu_notifier_range_end = end; + kvm->mmu_invalidate_in_progress++; + if (likely(kvm->mmu_invalidate_in_progress == 1)) { + kvm->mmu_invalidate_range_start = start; + kvm->mmu_invalidate_range_end = end; } else { /* * Fully tracking multiple concurrent ranges has diminishing @@ -736,10 +737,10 @@ void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, * accumulate and persist until all outstanding invalidates * complete. */ - kvm->mmu_notifier_range_start = - min(kvm->mmu_notifier_range_start, start); - kvm->mmu_notifier_range_end = - max(kvm->mmu_notifier_range_end, end); + kvm->mmu_invalidate_range_start = + min(kvm->mmu_invalidate_range_start, start); + kvm->mmu_invalidate_range_end = + max(kvm->mmu_invalidate_range_end, end); } } @@ -752,7 +753,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, .end = range->end, .pte = __pte(0), .handler = kvm_unmap_gfn_range, - .on_lock = kvm_inc_notifier_count, + .on_lock = kvm_mmu_invalidate_begin, .on_unlock = kvm_arch_guest_memory_reclaimed, .flush_on_ret = true, .may_block = mmu_notifier_range_blockable(range), @@ -763,7 +764,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, /* * Prevent memslot modification between range_start() and range_end() * so that conditionally locking provides the same result in both - * functions. Without that guarantee, the mmu_notifier_count + * functions. Without that guarantee, the mmu_invalidate_in_progress * adjustments will be imbalanced. * * Pairs with the decrement in range_end(). @@ -779,7 +780,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, * any given time, and the caches themselves can check for hva overlap, * i.e. don't need to rely on memslot overlap checks for performance. * Because this runs without holding mmu_lock, the pfn caches must use - * mn_active_invalidate_count (see above) instead of mmu_notifier_count. + * mn_active_invalidate_count (see above) instead of + * mmu_invalidate_in_progress. */ gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end, hva_range.may_block); @@ -789,22 +791,22 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, return 0; } -void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end) +void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, + unsigned long end) { /* * This sequence increase will notify the kvm page fault that * the page that is going to be mapped in the spte could have * been freed. */ - kvm->mmu_notifier_seq++; + kvm->mmu_invalidate_seq++; smp_wmb(); /* * The above sequence increase must be visible before the * below count decrease, which is ensured by the smp_wmb above - * in conjunction with the smp_rmb in mmu_notifier_retry(). + * in conjunction with the smp_rmb in mmu_invalidate_retry(). */ - kvm->mmu_notifier_count--; + kvm->mmu_invalidate_in_progress--; } static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, @@ -816,7 +818,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, .end = range->end, .pte = __pte(0), .handler = (void *)kvm_null_fn, - .on_lock = kvm_dec_notifier_count, + .on_lock = kvm_mmu_invalidate_end, .on_unlock = (void *)kvm_null_fn, .flush_on_ret = false, .may_block = mmu_notifier_range_blockable(range), @@ -837,7 +839,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, if (wake) rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait); - BUG_ON(kvm->mmu_notifier_count < 0); + BUG_ON(kvm->mmu_invalidate_in_progress < 0); } static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, @@ -1134,6 +1136,9 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) if (!kvm) return ERR_PTR(-ENOMEM); + /* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */ + __module_get(kvm_chardev_ops.owner); + KVM_MMU_LOCK_INIT(kvm); mmgrab(current->mm); kvm->mm = current->mm; @@ -1211,9 +1216,17 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) if (r) goto out_err_no_mmu_notifier; + r = kvm_coalesced_mmio_init(kvm); + if (r < 0) + goto out_no_coalesced_mmio; + + r = kvm_create_vm_debugfs(kvm, fdname); + if (r) + goto out_err_no_debugfs; + r = kvm_arch_post_init_vm(kvm); if (r) - goto out_err_mmu_notifier; + goto out_err; mutex_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); @@ -1222,25 +1235,13 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) preempt_notifier_inc(); kvm_init_pm_notifier(kvm); - /* - * When the fd passed to this ioctl() is opened it pins the module, - * but try_module_get() also prevents getting a reference if the module - * is in MODULE_STATE_GOING (e.g. if someone ran "rmmod --wait"). - */ - if (!try_module_get(kvm_chardev_ops.owner)) { - r = -ENODEV; - goto out_err_mmu_notifier; - } - - r = kvm_create_vm_debugfs(kvm, fdname); - if (r) - goto out_err; - return kvm; out_err: - module_put(kvm_chardev_ops.owner); -out_err_mmu_notifier: + kvm_destroy_vm_debugfs(kvm); +out_err_no_debugfs: + kvm_coalesced_mmio_free(kvm); +out_no_coalesced_mmio: #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) if (kvm->mmu_notifier.ops) mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); @@ -1259,6 +1260,7 @@ out_err_no_irq_srcu: out_err_no_srcu: kvm_arch_free_vm(kvm); mmdrop(current->mm); + module_put(kvm_chardev_ops.owner); return ERR_PTR(r); } @@ -2516,7 +2518,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, { unsigned int flags = FOLL_HWPOISON; struct page *page; - int npages = 0; + int npages; might_sleep(); @@ -4378,7 +4380,7 @@ void kvm_unregister_device_ops(u32 type) static int kvm_ioctl_create_device(struct kvm *kvm, struct kvm_create_device *cd) { - const struct kvm_device_ops *ops = NULL; + const struct kvm_device_ops *ops; struct kvm_device *dev; bool test = cd->flags & KVM_CREATE_DEVICE_TEST; int type; @@ -4913,11 +4915,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) goto put_fd; } -#ifdef CONFIG_KVM_MMIO - r = kvm_coalesced_mmio_init(kvm); - if (r < 0) - goto put_kvm; -#endif file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); if (IS_ERR(file)) { r = PTR_ERR(file); diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index ab519f72f2cd..68ff41d39545 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -112,27 +112,28 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s { /* * mn_active_invalidate_count acts for all intents and purposes - * like mmu_notifier_count here; but the latter cannot be used - * here because the invalidation of caches in the mmu_notifier - * event occurs _before_ mmu_notifier_count is elevated. + * like mmu_invalidate_in_progress here; but the latter cannot + * be used here because the invalidation of caches in the + * mmu_notifier event occurs _before_ mmu_invalidate_in_progress + * is elevated. * * Note, it does not matter that mn_active_invalidate_count * is not protected by gpc->lock. It is guaranteed to * be elevated before the mmu_notifier acquires gpc->lock, and - * isn't dropped until after mmu_notifier_seq is updated. + * isn't dropped until after mmu_invalidate_seq is updated. */ if (kvm->mn_active_invalidate_count) return true; /* * Ensure mn_active_invalidate_count is read before - * mmu_notifier_seq. This pairs with the smp_wmb() in + * mmu_invalidate_seq. This pairs with the smp_wmb() in * mmu_notifier_invalidate_range_end() to guarantee either the * old (non-zero) value of mn_active_invalidate_count or the - * new (incremented) value of mmu_notifier_seq is observed. + * new (incremented) value of mmu_invalidate_seq is observed. */ smp_rmb(); - return kvm->mmu_notifier_seq != mmu_seq; + return kvm->mmu_invalidate_seq != mmu_seq; } static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) @@ -155,7 +156,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) gpc->valid = false; do { - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); write_unlock_irq(&gpc->lock); |