diff options
Diffstat (limited to 'arch/x86/kvm/mmu/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 597 |
1 files changed, 304 insertions, 293 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 45e1573f8f1d..f4653688fa6d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -193,11 +193,12 @@ struct kvm_mmu_role_regs { /* * Yes, lot's of underscores. They're a hint that you probably shouldn't be - * reading from the role_regs. Once the mmu_role is constructed, it becomes + * reading from the role_regs. Once the root_role is constructed, it becomes * the single source of truth for the MMU's state. */ #define BUILD_MMU_ROLE_REGS_ACCESSOR(reg, name, flag) \ -static inline bool __maybe_unused ____is_##reg##_##name(struct kvm_mmu_role_regs *regs)\ +static inline bool __maybe_unused \ +____is_##reg##_##name(const struct kvm_mmu_role_regs *regs) \ { \ return !!(regs->reg & flag); \ } @@ -221,17 +222,26 @@ BUILD_MMU_ROLE_REGS_ACCESSOR(efer, lma, EFER_LMA); #define BUILD_MMU_ROLE_ACCESSOR(base_or_ext, reg, name) \ static inline bool __maybe_unused is_##reg##_##name(struct kvm_mmu *mmu) \ { \ - return !!(mmu->mmu_role. base_or_ext . reg##_##name); \ + return !!(mmu->cpu_role. base_or_ext . reg##_##name); \ } -BUILD_MMU_ROLE_ACCESSOR(ext, cr0, pg); BUILD_MMU_ROLE_ACCESSOR(base, cr0, wp); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, pse); -BUILD_MMU_ROLE_ACCESSOR(ext, cr4, pae); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, smep); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, smap); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, pke); BUILD_MMU_ROLE_ACCESSOR(ext, cr4, la57); BUILD_MMU_ROLE_ACCESSOR(base, efer, nx); +BUILD_MMU_ROLE_ACCESSOR(ext, efer, lma); + +static inline bool is_cr0_pg(struct kvm_mmu *mmu) +{ + return mmu->cpu_role.base.level > 0; +} + +static inline bool is_cr4_pae(struct kvm_mmu *mmu) +{ + return !mmu->cpu_role.base.has_4_byte_gpte; +} static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu) { @@ -244,19 +254,6 @@ static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu) return regs; } -static int role_regs_to_root_level(struct kvm_mmu_role_regs *regs) -{ - if (!____is_cr0_pg(regs)) - return 0; - else if (____is_efer_lma(regs)) - return ____is_cr4_la57(regs) ? PT64_ROOT_5LEVEL : - PT64_ROOT_4LEVEL; - else if (____is_cr4_pae(regs)) - return PT32E_ROOT_LEVEL; - else - return PT32_ROOT_LEVEL; -} - static inline bool kvm_available_flush_tlb_with_range(void) { return kvm_x86_ops.tlb_remote_flush_with_range; @@ -714,6 +711,9 @@ static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc) static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) { + if (sp->role.passthrough) + return sp->gfn; + if (!sp->role.direct) return sp->gfns[index]; @@ -722,6 +722,11 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) { + if (sp->role.passthrough) { + WARN_ON_ONCE(gfn != sp->gfn); + return; + } + if (!sp->role.direct) { sp->gfns[index] = gfn; return; @@ -1478,9 +1483,11 @@ static bool slot_rmap_walk_okay(struct slot_rmap_walk_iterator *iterator) static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator) { - if (++iterator->rmap <= iterator->end_rmap) { + while (++iterator->rmap <= iterator->end_rmap) { iterator->gfn += (1UL << KVM_HPAGE_GFN_SHIFT(iterator->level)); - return; + + if (iterator->rmap->val) + return; } if (++iterator->level > iterator->end_level) { @@ -1833,27 +1840,35 @@ static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); +static bool sp_has_gptes(struct kvm_mmu_page *sp) +{ + if (sp->role.direct) + return false; + + if (sp->role.passthrough) + return false; + + return true; +} + #define for_each_valid_sp(_kvm, _sp, _list) \ hlist_for_each_entry(_sp, _list, hash_link) \ if (is_obsolete_sp((_kvm), (_sp))) { \ } else -#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ +#define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \ for_each_valid_sp(_kvm, _sp, \ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \ - if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else + if ((_sp)->gfn != (_gfn) || !sp_has_gptes(_sp)) {} else -static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, +static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { int ret = vcpu->arch.mmu->sync_page(vcpu, sp); - if (ret < 0) { + if (ret < 0) kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); - return false; - } - - return !!ret; + return ret; } static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm, @@ -1975,7 +1990,7 @@ static int mmu_sync_children(struct kvm_vcpu *vcpu, for_each_sp(pages, sp, parents, i) { kvm_unlink_unsync_page(vcpu->kvm, sp); - flush |= kvm_sync_page(vcpu, sp, &invalid_list); + flush |= kvm_sync_page(vcpu, sp, &invalid_list) > 0; mmu_pages_clear_parents(&parents); } if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) { @@ -2011,15 +2026,16 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, int direct, unsigned int access) { - bool direct_mmu = vcpu->arch.mmu->direct_map; + bool direct_mmu = vcpu->arch.mmu->root_role.direct; union kvm_mmu_page_role role; struct hlist_head *sp_list; unsigned quadrant; struct kvm_mmu_page *sp; + int ret; int collisions = 0; LIST_HEAD(invalid_list); - role = vcpu->arch.mmu->mmu_role.base; + role = vcpu->arch.mmu->root_role; role.level = level; role.direct = direct; role.access = access; @@ -2028,6 +2044,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; role.quadrant = quadrant; } + if (level <= vcpu->arch.mmu->cpu_role.base.level) + role.passthrough = 0; sp_list = &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]; for_each_valid_sp(vcpu->kvm, sp, sp_list) { @@ -2068,11 +2086,13 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, * If the sync fails, the page is zapped. If so, break * in order to rebuild it. */ - if (!kvm_sync_page(vcpu, sp, &invalid_list)) + ret = kvm_sync_page(vcpu, sp, &invalid_list); + if (ret < 0) break; WARN_ON(!list_empty(&invalid_list)); - kvm_flush_remote_tlbs(vcpu->kvm); + if (ret > 0) + kvm_flush_remote_tlbs(vcpu->kvm); } __clear_sp_write_flooding_count(sp); @@ -2089,7 +2109,7 @@ trace_get_page: sp->gfn = gfn; sp->role = role; hlist_add_head(&sp->hash_link, sp_list); - if (!direct) { + if (sp_has_gptes(sp)) { account_shadowed(vcpu->kvm, sp); if (level == PG_LEVEL_4K && kvm_vcpu_write_protect_gfn(vcpu, gfn)) kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, 1); @@ -2109,11 +2129,11 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato { iterator->addr = addr; iterator->shadow_addr = root; - iterator->level = vcpu->arch.mmu->shadow_root_level; + iterator->level = vcpu->arch.mmu->root_role.level; if (iterator->level >= PT64_ROOT_4LEVEL && - vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL && - !vcpu->arch.mmu->direct_map) + vcpu->arch.mmu->cpu_role.base.level < PT64_ROOT_4LEVEL && + !vcpu->arch.mmu->root_role.direct) iterator->level = PT32E_ROOT_LEVEL; if (iterator->level == PT32E_ROOT_LEVEL) { @@ -2298,7 +2318,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, /* Zapping children means active_mmu_pages has become unstable. */ list_unstable = *nr_zapped; - if (!sp->role.invalid && !sp->role.direct) + if (!sp->role.invalid && sp_has_gptes(sp)) unaccount_shadowed(kvm, sp); if (sp->unsync) @@ -2478,7 +2498,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) pgprintk("%s: looking for gfn %llx\n", __func__, gfn); r = 0; write_lock(&kvm->mmu_lock); - for_each_gfn_indirect_valid_sp(kvm, sp, gfn) { + for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) { pgprintk("%s: gfn %llx role %x\n", __func__, gfn, sp->role.word); r = 1; @@ -2495,7 +2515,7 @@ static int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) gpa_t gpa; int r; - if (vcpu->arch.mmu->direct_map) + if (vcpu->arch.mmu->root_role.direct) return 0; gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); @@ -2540,7 +2560,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, * that case, KVM must complete emulation of the guest TLB flush before * allowing shadow pages to become unsync (writable by the guest). */ - for_each_gfn_indirect_valid_sp(kvm, sp, gfn) { + for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) { if (!can_unsync) return -EPERM; @@ -2642,6 +2662,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, *sptep, write_fault, gfn); if (unlikely(is_noslot_pfn(pfn))) { + vcpu->stat.pf_mmio_spte_created++; mark_mmio_spte(vcpu, sptep, gfn, pte_access); return RET_PF_EMULATE; } @@ -2962,7 +2983,6 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) return ret; direct_pte_prefetch(vcpu, it.sptep); - ++vcpu->stat.pf_fixed; return ret; } @@ -2989,14 +3009,12 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) return -EFAULT; } -static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, - unsigned int access, int *ret_val) +static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, + unsigned int access) { /* The pfn is invalid, report the error! */ - if (unlikely(is_error_pfn(fault->pfn))) { - *ret_val = kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn); - return true; - } + if (unlikely(is_error_pfn(fault->pfn))) + return kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn); if (unlikely(!fault->slot)) { gva_t gva = fault->is_tdp ? 0 : fault->addr; @@ -3013,44 +3031,48 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa * and only if L1's MAXPHYADDR is inaccurate with respect to * the hardware's). */ - if (unlikely(!shadow_mmio_value) || - unlikely(fault->gfn > kvm_mmu_max_gfn())) { - *ret_val = RET_PF_EMULATE; - return true; - } + if (unlikely(!enable_mmio_caching) || + unlikely(fault->gfn > kvm_mmu_max_gfn())) + return RET_PF_EMULATE; } - return false; + return RET_PF_CONTINUE; } static bool page_fault_can_be_fast(struct kvm_page_fault *fault) { /* - * Do not fix the mmio spte with invalid generation number which - * need to be updated by slow page fault path. + * Page faults with reserved bits set, i.e. faults on MMIO SPTEs, only + * reach the common page fault handler if the SPTE has an invalid MMIO + * generation number. Refreshing the MMIO generation needs to go down + * the slow path. Note, EPT Misconfigs do NOT set the PRESENT flag! */ if (fault->rsvd) return false; - /* See if the page fault is due to an NX violation */ - if (unlikely(fault->exec && fault->present)) - return false; - /* * #PF can be fast if: - * 1. The shadow page table entry is not present, which could mean that - * the fault is potentially caused by access tracking (if enabled). - * 2. The shadow page table entry is present and the fault - * is caused by write-protect, that means we just need change the W - * bit of the spte which can be done out of mmu-lock. * - * However, if access tracking is disabled we know that a non-present - * page must be a genuine page fault where we have to create a new SPTE. - * So, if access tracking is disabled, we return true only for write - * accesses to a present page. + * 1. The shadow page table entry is not present and A/D bits are + * disabled _by KVM_, which could mean that the fault is potentially + * caused by access tracking (if enabled). If A/D bits are enabled + * by KVM, but disabled by L1 for L2, KVM is forced to disable A/D + * bits for L2 and employ access tracking, but the fast page fault + * mechanism only supports direct MMUs. + * 2. The shadow page table entry is present, the access is a write, + * and no reserved bits are set (MMIO SPTEs cannot be "fixed"), i.e. + * the fault was caused by a write-protection violation. If the + * SPTE is MMU-writable (determined later), the fault can be fixed + * by setting the Writable bit, which can be done out of mmu_lock. */ + if (!fault->present) + return !kvm_ad_enabled(); - return shadow_acc_track_mask != 0 || (fault->write && fault->present); + /* + * Note, instruction fetches and writes are mutually exclusive, ignore + * the "exec" flag. + */ + return fault->write; } /* @@ -3165,13 +3187,25 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) new_spte = spte; - if (is_access_track_spte(spte)) + /* + * KVM only supports fixing page faults outside of MMU lock for + * direct MMUs, nested MMUs are always indirect, and KVM always + * uses A/D bits for non-nested MMUs. Thus, if A/D bits are + * enabled, the SPTE can't be an access-tracked SPTE. + */ + if (unlikely(!kvm_ad_enabled()) && is_access_track_spte(spte)) new_spte = restore_acc_track_spte(new_spte); /* - * Currently, to simplify the code, write-protection can - * be removed in the fast path only if the SPTE was - * write-protected for dirty-logging or access tracking. + * To keep things simple, only SPTEs that are MMU-writable can + * be made fully writable outside of mmu_lock, e.g. only SPTEs + * that were write-protected for dirty-logging or access + * tracking are handled here. Don't bother checking if the + * SPTE is writable to prioritize running with A/D bits enabled. + * The is_access_allowed() check above handles the common case + * of the fault being spurious, and the SPTE is known to be + * shadow-present, i.e. except for access tracking restoration + * making the new SPTE writable, the check is wasteful. */ if (fault->write && is_mmu_writable_spte(spte)) { new_spte |= PT_WRITABLE_MASK; @@ -3217,6 +3251,9 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) trace_fast_page_fault(vcpu, fault, sptep, spte, ret); walk_shadow_page_lockless_end(vcpu); + if (ret != RET_PF_INVALID) + vcpu->stat.pf_fast++; + return ret; } @@ -3303,7 +3340,7 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu) * This should not be called while L2 is active, L2 can't invalidate * _only_ its own roots, e.g. INVVPID unconditionally exits. */ - WARN_ON_ONCE(mmu->mmu_role.base.guest_mode); + WARN_ON_ONCE(mmu->root_role.guest_mode); for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { root_hpa = mmu->prev_roots[i].hpa; @@ -3346,7 +3383,7 @@ static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva, static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; - u8 shadow_root_level = mmu->shadow_root_level; + u8 shadow_root_level = mmu->root_role.level; hpa_t root; unsigned i; int r; @@ -3470,7 +3507,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * On SVM, reading PDPTRs might access guest memory, which might fault * and thus might sleep. Grab the PDPTRs before acquiring mmu_lock. */ - if (mmu->root_level == PT32E_ROOT_LEVEL) { + if (mmu->cpu_role.base.level == PT32E_ROOT_LEVEL) { for (i = 0; i < 4; ++i) { pdptrs[i] = mmu->get_pdptr(vcpu, i); if (!(pdptrs[i] & PT_PRESENT_MASK)) @@ -3494,9 +3531,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * Do we shadow a long mode page table? If so we need to * write-protect the guests page table root. */ - if (mmu->root_level >= PT64_ROOT_4LEVEL) { + if (mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) { root = mmu_alloc_root(vcpu, root_gfn, 0, - mmu->shadow_root_level, false); + mmu->root_role.level, false); mmu->root.hpa = root; goto set_root_pgd; } @@ -3511,8 +3548,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * or a PAE 3-level page table. In either case we need to be aware that * the shadow page table may be a PAE or a long mode page table. */ - pm_mask = PT_PRESENT_MASK | shadow_me_mask; - if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL) { + pm_mask = PT_PRESENT_MASK | shadow_me_value; + if (mmu->root_role.level >= PT64_ROOT_4LEVEL) { pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; if (WARN_ON_ONCE(!mmu->pml4_root)) { @@ -3521,7 +3558,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) } mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask; - if (mmu->shadow_root_level == PT64_ROOT_5LEVEL) { + if (mmu->root_role.level == PT64_ROOT_5LEVEL) { if (WARN_ON_ONCE(!mmu->pml5_root)) { r = -EIO; goto out_unlock; @@ -3533,7 +3570,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) for (i = 0; i < 4; ++i) { WARN_ON_ONCE(IS_VALID_PAE_ROOT(mmu->pae_root[i])); - if (mmu->root_level == PT32E_ROOT_LEVEL) { + if (mmu->cpu_role.base.level == PT32E_ROOT_LEVEL) { if (!(pdptrs[i] & PT_PRESENT_MASK)) { mmu->pae_root[i] = INVALID_PAE_ROOT; continue; @@ -3546,9 +3583,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) mmu->pae_root[i] = root | pm_mask; } - if (mmu->shadow_root_level == PT64_ROOT_5LEVEL) + if (mmu->root_role.level == PT64_ROOT_5LEVEL) mmu->root.hpa = __pa(mmu->pml5_root); - else if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) + else if (mmu->root_role.level == PT64_ROOT_4LEVEL) mmu->root.hpa = __pa(mmu->pml4_root); else mmu->root.hpa = __pa(mmu->pae_root); @@ -3564,7 +3601,7 @@ out_unlock: static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; - bool need_pml5 = mmu->shadow_root_level > PT64_ROOT_4LEVEL; + bool need_pml5 = mmu->root_role.level > PT64_ROOT_4LEVEL; u64 *pml5_root = NULL; u64 *pml4_root = NULL; u64 *pae_root; @@ -3575,8 +3612,9 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) * equivalent level in the guest's NPT to shadow. Allocate the tables * on demand, as running a 32-bit L1 VMM on 64-bit KVM is very rare. */ - if (mmu->direct_map || mmu->root_level >= PT64_ROOT_4LEVEL || - mmu->shadow_root_level < PT64_ROOT_4LEVEL) + if (mmu->root_role.direct || + mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL || + mmu->root_role.level < PT64_ROOT_4LEVEL) return 0; /* @@ -3672,7 +3710,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) int i; struct kvm_mmu_page *sp; - if (vcpu->arch.mmu->direct_map) + if (vcpu->arch.mmu->root_role.direct) return; if (!VALID_PAGE(vcpu->arch.mmu->root.hpa)) @@ -3680,7 +3718,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); - if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) { + if (vcpu->arch.mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) { hpa_t root = vcpu->arch.mmu->root.hpa; sp = to_shadow_page(root); @@ -3902,14 +3940,33 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, arch.token = alloc_apf_token(vcpu); arch.gfn = gfn; - arch.direct_map = vcpu->arch.mmu->direct_map; + arch.direct_map = vcpu->arch.mmu->root_role.direct; arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu); return kvm_setup_async_pf(vcpu, cr2_or_gpa, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); } -static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int *r) +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) +{ + int r; + + if ((vcpu->arch.mmu->root_role.direct != work->arch.direct_map) || + work->wakeup_all) + return; + + r = kvm_mmu_reload(vcpu); + if (unlikely(r)) + return; + + if (!vcpu->arch.mmu->root_role.direct && + work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu)) + return; + + kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true); +} + +static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_memory_slot *slot = fault->slot; bool async; @@ -3920,7 +3977,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, * be zapped before KVM inserts a new MMIO SPTE for the gfn. */ if (slot && (slot->flags & KVM_MEMSLOT_INVALID)) - goto out_retry; + return RET_PF_RETRY; if (!kvm_is_visible_memslot(slot)) { /* Don't expose private memslots to L2. */ @@ -3928,7 +3985,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, fault->slot = NULL; fault->pfn = KVM_PFN_NOSLOT; fault->map_writable = false; - return false; + return RET_PF_CONTINUE; } /* * If the APIC access page exists but is disabled, go directly @@ -3937,10 +3994,8 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, * when the AVIC is re-enabled. */ if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT && - !kvm_apicv_activated(vcpu->kvm)) { - *r = RET_PF_EMULATE; - return true; - } + !kvm_apicv_activated(vcpu->kvm)) + return RET_PF_EMULATE; } async = false; @@ -3948,26 +4003,23 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, fault->write, &fault->map_writable, &fault->hva); if (!async) - return false; /* *pfn has correct page already */ + return RET_PF_CONTINUE; /* *pfn has correct page already */ if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) { trace_kvm_try_async_get_page(fault->addr, fault->gfn); if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) { trace_kvm_async_pf_doublefault(fault->addr, fault->gfn); kvm_make_request(KVM_REQ_APF_HALT, vcpu); - goto out_retry; - } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) - goto out_retry; + return RET_PF_RETRY; + } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) { + return RET_PF_RETRY; + } } fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, NULL, fault->write, &fault->map_writable, &fault->hva); - return false; - -out_retry: - *r = RET_PF_RETRY; - return true; + return RET_PF_CONTINUE; } /* @@ -4022,10 +4074,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (kvm_faultin_pfn(vcpu, fault, &r)) + r = kvm_faultin_pfn(vcpu, fault); + if (r != RET_PF_CONTINUE) return r; - if (handle_abnormal_pfn(vcpu, fault, ACC_ALL, &r)) + r = handle_abnormal_pfn(vcpu, fault, ACC_ALL); + if (r != RET_PF_CONTINUE) return r; r = RET_PF_RETRY; @@ -4120,7 +4174,6 @@ static void nonpaging_init_context(struct kvm_mmu *context) context->gva_to_gpa = nonpaging_gva_to_gpa; context->sync_page = nonpaging_sync_page; context->invlpg = NULL; - context->direct_map = true; } static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd, @@ -4214,7 +4267,7 @@ static bool fast_pgd_switch(struct kvm *kvm, struct kvm_mmu *mmu, void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd) { struct kvm_mmu *mmu = vcpu->arch.mmu; - union kvm_mmu_page_role new_role = mmu->mmu_role.base; + union kvm_mmu_page_role new_role = mmu->root_role; if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role)) { /* kvm_mmu_ensure_valid_pgd will set up a new root. */ @@ -4391,12 +4444,12 @@ static bool guest_can_use_gbpages(struct kvm_vcpu *vcpu) guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES); } -static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, - struct kvm_mmu *context) +static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, + struct kvm_mmu *context) { __reset_rsvds_bits_mask(&context->guest_rsvd_check, vcpu->arch.reserved_gpa_bits, - context->root_level, is_efer_nx(context), + context->cpu_role.base.level, is_efer_nx(context), guest_can_use_gbpages(vcpu), is_cr4_pse(context), guest_cpuid_is_amd_or_hygon(vcpu)); @@ -4461,16 +4514,6 @@ static inline u64 reserved_hpa_bits(void) static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { - /* - * KVM uses NX when TDP is disabled to handle a variety of scenarios, - * notably for huge SPTEs if iTLB multi-hit mitigation is enabled and - * to generate correct permissions for CR0.WP=0/CR4.SMEP=1/EFER.NX=0. - * The iTLB multi-hit workaround can be toggled at any time, so assume - * NX can be used by any non-nested shadow MMU to avoid having to reset - * MMU contexts. Note, KVM forces EFER.NX=1 when TDP is disabled. - */ - bool uses_nx = is_efer_nx(context) || !tdp_enabled; - /* @amd adds a check on bit of SPTEs, which KVM shouldn't use anyways. */ bool is_amd = true; /* KVM doesn't use 2-level page tables for the shadow MMU. */ @@ -4478,19 +4521,28 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct rsvd_bits_validate *shadow_zero_check; int i; - WARN_ON_ONCE(context->shadow_root_level < PT32E_ROOT_LEVEL); + WARN_ON_ONCE(context->root_role.level < PT32E_ROOT_LEVEL); shadow_zero_check = &context->shadow_zero_check; __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(), - context->shadow_root_level, uses_nx, + context->root_role.level, + context->root_role.efer_nx, guest_can_use_gbpages(vcpu), is_pse, is_amd); if (!shadow_me_mask) return; - for (i = context->shadow_root_level; --i >= 0;) { - shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask; - shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask; + for (i = context->root_role.level; --i >= 0;) { + /* + * So far shadow_me_value is a constant during KVM's life + * time. Bits in shadow_me_value are allowed to be set. + * Bits in shadow_me_mask but not in shadow_me_value are + * not allowed to be set. + */ + shadow_zero_check->rsvd_bits_mask[0][i] |= shadow_me_mask; + shadow_zero_check->rsvd_bits_mask[1][i] |= shadow_me_mask; + shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_value; + shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_value; } } @@ -4515,7 +4567,7 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context) if (boot_cpu_is_amd()) __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(), - context->shadow_root_level, false, + context->root_role.level, false, boot_cpu_has(X86_FEATURE_GBPAGES), false, true); else @@ -4526,7 +4578,7 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context) if (!shadow_me_mask) return; - for (i = context->shadow_root_level; --i >= 0;) { + for (i = context->root_role.level; --i >= 0;) { shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask; shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask; } @@ -4700,7 +4752,7 @@ static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu, if (!is_cr0_pg(mmu)) return; - reset_rsvds_bits_mask(vcpu, mmu); + reset_guest_rsvds_bits_mask(vcpu, mmu); update_permission_bitmask(mmu, false); update_pkru_bitmask(mmu); } @@ -4711,7 +4763,6 @@ static void paging64_init_context(struct kvm_mmu *context) context->gva_to_gpa = paging64_gva_to_gpa; context->sync_page = paging64_sync_page; context->invlpg = paging64_invlpg; - context->direct_map = false; } static void paging32_init_context(struct kvm_mmu *context) @@ -4720,51 +4771,45 @@ static void paging32_init_context(struct kvm_mmu *context) context->gva_to_gpa = paging32_gva_to_gpa; context->sync_page = paging32_sync_page; context->invlpg = paging32_invlpg; - context->direct_map = false; -} - -static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu, - struct kvm_mmu_role_regs *regs) -{ - union kvm_mmu_extended_role ext = {0}; - - if (____is_cr0_pg(regs)) { - ext.cr0_pg = 1; - ext.cr4_pae = ____is_cr4_pae(regs); - ext.cr4_smep = ____is_cr4_smep(regs); - ext.cr4_smap = ____is_cr4_smap(regs); - ext.cr4_pse = ____is_cr4_pse(regs); - - /* PKEY and LA57 are active iff long mode is active. */ - ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs); - ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs); - ext.efer_lma = ____is_efer_lma(regs); - } - - ext.valid = 1; - - return ext; } -static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu, - struct kvm_mmu_role_regs *regs, - bool base_only) +static union kvm_cpu_role +kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs) { - union kvm_mmu_role role = {0}; + union kvm_cpu_role role = {0}; role.base.access = ACC_ALL; - if (____is_cr0_pg(regs)) { - role.base.efer_nx = ____is_efer_nx(regs); - role.base.cr0_wp = ____is_cr0_wp(regs); - } role.base.smm = is_smm(vcpu); role.base.guest_mode = is_guest_mode(vcpu); + role.ext.valid = 1; - if (base_only) + if (!____is_cr0_pg(regs)) { + role.base.direct = 1; return role; + } - role.ext = kvm_calc_mmu_role_ext(vcpu, regs); + role.base.efer_nx = ____is_efer_nx(regs); + role.base.cr0_wp = ____is_cr0_wp(regs); + role.base.smep_andnot_wp = ____is_cr4_smep(regs) && !____is_cr0_wp(regs); + role.base.smap_andnot_wp = ____is_cr4_smap(regs) && !____is_cr0_wp(regs); + role.base.has_4_byte_gpte = !____is_cr4_pae(regs); + if (____is_efer_lma(regs)) + role.base.level = ____is_cr4_la57(regs) ? PT64_ROOT_5LEVEL + : PT64_ROOT_4LEVEL; + else if (____is_cr4_pae(regs)) + role.base.level = PT32E_ROOT_LEVEL; + else + role.base.level = PT32_ROOT_LEVEL; + + role.ext.cr4_smep = ____is_cr4_smep(regs); + role.ext.cr4_smap = ____is_cr4_smap(regs); + role.ext.cr4_pse = ____is_cr4_pse(regs); + + /* PKEY and LA57 are active iff long mode is active. */ + role.ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs); + role.ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs); + role.ext.efer_lma = ____is_efer_lma(regs); return role; } @@ -4781,40 +4826,43 @@ static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu) return max_tdp_level; } -static union kvm_mmu_role +static union kvm_mmu_page_role kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, - struct kvm_mmu_role_regs *regs, bool base_only) + union kvm_cpu_role cpu_role) { - union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, regs, base_only); + union kvm_mmu_page_role role = {0}; - role.base.ad_disabled = (shadow_accessed_mask == 0); - role.base.level = kvm_mmu_get_tdp_level(vcpu); - role.base.direct = true; - role.base.has_4_byte_gpte = false; + role.access = ACC_ALL; + role.cr0_wp = true; + role.efer_nx = true; + role.smm = cpu_role.base.smm; + role.guest_mode = cpu_role.base.guest_mode; + role.ad_disabled = !kvm_ad_enabled(); + role.level = kvm_mmu_get_tdp_level(vcpu); + role.direct = true; + role.has_4_byte_gpte = false; return role; } -static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) +static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu, + union kvm_cpu_role cpu_role) { struct kvm_mmu *context = &vcpu->arch.root_mmu; - struct kvm_mmu_role_regs regs = vcpu_to_role_regs(vcpu); - union kvm_mmu_role new_role = - kvm_calc_tdp_mmu_root_page_role(vcpu, ®s, false); + union kvm_mmu_page_role root_role = kvm_calc_tdp_mmu_root_page_role(vcpu, cpu_role); - if (new_role.as_u64 == context->mmu_role.as_u64) + if (cpu_role.as_u64 == context->cpu_role.as_u64 && + root_role.word == context->root_role.word) return; - context->mmu_role.as_u64 = new_role.as_u64; + context->cpu_role.as_u64 = cpu_role.as_u64; + context->root_role.word = root_role.word; context->page_fault = kvm_tdp_page_fault; context->sync_page = nonpaging_sync_page; context->invlpg = NULL; - context->shadow_root_level = kvm_mmu_get_tdp_level(vcpu); - context->direct_map = true; context->get_guest_pgd = get_cr3; context->get_pdptr = kvm_pdptr_read; context->inject_page_fault = kvm_inject_page_fault; - context->root_level = role_regs_to_root_level(®s); if (!is_cr0_pg(context)) context->gva_to_gpa = nonpaging_gva_to_gpa; @@ -4827,46 +4875,16 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) reset_tdp_shadow_zero_bits_mask(context); } -static union kvm_mmu_role -kvm_calc_shadow_root_page_role_common(struct kvm_vcpu *vcpu, - struct kvm_mmu_role_regs *regs, bool base_only) -{ - union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, regs, base_only); - - role.base.smep_andnot_wp = role.ext.cr4_smep && !____is_cr0_wp(regs); - role.base.smap_andnot_wp = role.ext.cr4_smap && !____is_cr0_wp(regs); - role.base.has_4_byte_gpte = ____is_cr0_pg(regs) && !____is_cr4_pae(regs); - - return role; -} - -static union kvm_mmu_role -kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, - struct kvm_mmu_role_regs *regs, bool base_only) -{ - union kvm_mmu_role role = - kvm_calc_shadow_root_page_role_common(vcpu, regs, base_only); - - role.base.direct = !____is_cr0_pg(regs); - - if (!____is_efer_lma(regs)) - role.base.level = PT32E_ROOT_LEVEL; - else if (____is_cr4_la57(regs)) - role.base.level = PT64_ROOT_5LEVEL; - else - role.base.level = PT64_ROOT_4LEVEL; - - return role; -} - static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *context, - struct kvm_mmu_role_regs *regs, - union kvm_mmu_role new_role) + union kvm_cpu_role cpu_role, + union kvm_mmu_page_role root_role) { - if (new_role.as_u64 == context->mmu_role.as_u64) + if (cpu_role.as_u64 == context->cpu_role.as_u64 && + root_role.word == context->root_role.word) return; - context->mmu_role.as_u64 = new_role.as_u64; + context->cpu_role.as_u64 = cpu_role.as_u64; + context->root_role.word = root_role.word; if (!is_cr0_pg(context)) nonpaging_init_context(context); @@ -4874,35 +4892,34 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte paging64_init_context(context); else paging32_init_context(context); - context->root_level = role_regs_to_root_level(regs); reset_guest_paging_metadata(vcpu, context); - context->shadow_root_level = new_role.base.level; - reset_shadow_zero_bits_mask(vcpu, context); } static void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, - struct kvm_mmu_role_regs *regs) + union kvm_cpu_role cpu_role) { struct kvm_mmu *context = &vcpu->arch.root_mmu; - union kvm_mmu_role new_role = - kvm_calc_shadow_mmu_root_page_role(vcpu, regs, false); + union kvm_mmu_page_role root_role; - shadow_mmu_init_context(vcpu, context, regs, new_role); -} + root_role = cpu_role.base; -static union kvm_mmu_role -kvm_calc_shadow_npt_root_page_role(struct kvm_vcpu *vcpu, - struct kvm_mmu_role_regs *regs) -{ - union kvm_mmu_role role = - kvm_calc_shadow_root_page_role_common(vcpu, regs, false); + /* KVM uses PAE paging whenever the guest isn't using 64-bit paging. */ + root_role.level = max_t(u32, root_role.level, PT32E_ROOT_LEVEL); - role.base.direct = false; - role.base.level = kvm_mmu_get_tdp_level(vcpu); + /* + * KVM forces EFER.NX=1 when TDP is disabled, reflect it in the MMU role. + * KVM uses NX when TDP is disabled to handle a variety of scenarios, + * notably for huge SPTEs if iTLB multi-hit mitigation is enabled and + * to generate correct permissions for CR0.WP=0/CR4.SMEP=1/EFER.NX=0. + * The iTLB multi-hit workaround can be toggled at any time, so assume + * NX can be used by any non-nested shadow MMU to avoid having to reset + * MMU contexts. + */ + root_role.efer_nx = true; - return role; + shadow_mmu_init_context(vcpu, context, cpu_role, root_role); } void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, @@ -4914,24 +4931,34 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, .cr4 = cr4 & ~X86_CR4_PKE, .efer = efer, }; - union kvm_mmu_role new_role; + union kvm_cpu_role cpu_role = kvm_calc_cpu_role(vcpu, ®s); + union kvm_mmu_page_role root_role; + + /* NPT requires CR0.PG=1. */ + WARN_ON_ONCE(cpu_role.base.direct); - new_role = kvm_calc_shadow_npt_root_page_role(vcpu, ®s); + root_role = cpu_role.base; + root_role.level = kvm_mmu_get_tdp_level(vcpu); + if (root_role.level == PT64_ROOT_5LEVEL && + cpu_role.base.level == PT64_ROOT_4LEVEL) + root_role.passthrough = 1; - shadow_mmu_init_context(vcpu, context, ®s, new_role); + shadow_mmu_init_context(vcpu, context, cpu_role, root_role); kvm_mmu_new_pgd(vcpu, nested_cr3); } EXPORT_SYMBOL_GPL(kvm_init_shadow_npt_mmu); -static union kvm_mmu_role +static union kvm_cpu_role kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, bool execonly, u8 level) { - union kvm_mmu_role role = {0}; - - /* SMM flag is inherited from root_mmu */ - role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm; + union kvm_cpu_role role = {0}; + /* + * KVM does not support SMM transfer monitors, and consequently does not + * support the "entry to SMM" control either. role.base.smm is always 0. + */ + WARN_ON_ONCE(is_smm(vcpu)); role.base.level = level; role.base.has_4_byte_gpte = false; role.base.direct = false; @@ -4939,7 +4966,6 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, role.base.guest_mode = true; role.base.access = ACC_ALL; - /* EPT, and thus nested EPT, does not consume CR0, CR4, nor EFER. */ role.ext.word = 0; role.ext.execonly = execonly; role.ext.valid = 1; @@ -4953,22 +4979,20 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, { struct kvm_mmu *context = &vcpu->arch.guest_mmu; u8 level = vmx_eptp_page_walk_level(new_eptp); - union kvm_mmu_role new_role = + union kvm_cpu_role new_mode = kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty, execonly, level); - if (new_role.as_u64 != context->mmu_role.as_u64) { - context->mmu_role.as_u64 = new_role.as_u64; + if (new_mode.as_u64 != context->cpu_role.as_u64) { + /* EPT, and thus nested EPT, does not consume CR0, CR4, nor EFER. */ + context->cpu_role.as_u64 = new_mode.as_u64; + context->root_role.word = new_mode.base.word; - context->shadow_root_level = level; - - context->ept_ad = accessed_dirty; context->page_fault = ept_page_fault; context->gva_to_gpa = ept_gva_to_gpa; context->sync_page = ept_sync_page; context->invlpg = ept_invlpg; - context->root_level = level; - context->direct_map = false; + update_permission_bitmask(context, true); context->pkru_mask = 0; reset_rsvds_bits_mask_ept(vcpu, context, execonly, huge_page_level); @@ -4979,49 +5003,30 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, } EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); -static void init_kvm_softmmu(struct kvm_vcpu *vcpu) +static void init_kvm_softmmu(struct kvm_vcpu *vcpu, + union kvm_cpu_role cpu_role) { struct kvm_mmu *context = &vcpu->arch.root_mmu; - struct kvm_mmu_role_regs regs = vcpu_to_role_regs(vcpu); - kvm_init_shadow_mmu(vcpu, ®s); + kvm_init_shadow_mmu(vcpu, cpu_role); context->get_guest_pgd = get_cr3; context->get_pdptr = kvm_pdptr_read; context->inject_page_fault = kvm_inject_page_fault; } -static union kvm_mmu_role -kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu, struct kvm_mmu_role_regs *regs) +static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu, + union kvm_cpu_role new_mode) { - union kvm_mmu_role role; - - role = kvm_calc_shadow_root_page_role_common(vcpu, regs, false); - - /* - * Nested MMUs are used only for walking L2's gva->gpa, they never have - * shadow pages of their own and so "direct" has no meaning. Set it - * to "true" to try to detect bogus usage of the nested MMU. - */ - role.base.direct = true; - role.base.level = role_regs_to_root_level(regs); - return role; -} - -static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) -{ - struct kvm_mmu_role_regs regs = vcpu_to_role_regs(vcpu); - union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu, ®s); struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; - if (new_role.as_u64 == g_context->mmu_role.as_u64) + if (new_mode.as_u64 == g_context->cpu_role.as_u64) return; - g_context->mmu_role.as_u64 = new_role.as_u64; + g_context->cpu_role.as_u64 = new_mode.as_u64; g_context->get_guest_pgd = get_cr3; g_context->get_pdptr = kvm_pdptr_read; g_context->inject_page_fault = kvm_inject_page_fault; - g_context->root_level = new_role.base.level; /* * L2 page tables are never shadowed, so there is no need to sync @@ -5051,12 +5056,15 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) void kvm_init_mmu(struct kvm_vcpu *vcpu) { + struct kvm_mmu_role_regs regs = vcpu_to_role_regs(vcpu); + union kvm_cpu_role cpu_role = kvm_calc_cpu_role(vcpu, ®s); + if (mmu_is_nested(vcpu)) - init_kvm_nested_mmu(vcpu); + init_kvm_nested_mmu(vcpu, cpu_role); else if (tdp_enabled) - init_kvm_tdp_mmu(vcpu); + init_kvm_tdp_mmu(vcpu, cpu_role); else - init_kvm_softmmu(vcpu); + init_kvm_softmmu(vcpu, cpu_role); } EXPORT_SYMBOL_GPL(kvm_init_mmu); @@ -5074,9 +5082,12 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) * problem is swept under the rug; KVM's CPUID API is horrific and * it's all but impossible to solve it without introducing a new API. */ - vcpu->arch.root_mmu.mmu_role.ext.valid = 0; - vcpu->arch.guest_mmu.mmu_role.ext.valid = 0; - vcpu->arch.nested_mmu.mmu_role.ext.valid = 0; + vcpu->arch.root_mmu.root_role.word = 0; + vcpu->arch.guest_mmu.root_role.word = 0; + vcpu->arch.nested_mmu.root_role.word = 0; + vcpu->arch.root_mmu.cpu_role.ext.valid = 0; + vcpu->arch.guest_mmu.cpu_role.ext.valid = 0; + vcpu->arch.nested_mmu.cpu_role.ext.valid = 0; kvm_mmu_reset_context(vcpu); /* @@ -5097,13 +5108,13 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) { int r; - r = mmu_topup_memory_caches(vcpu, !vcpu->arch.mmu->direct_map); + r = mmu_topup_memory_caches(vcpu, !vcpu->arch.mmu->root_role.direct); if (r) goto out; r = mmu_alloc_special_roots(vcpu); if (r) goto out; - if (vcpu->arch.mmu->direct_map) + if (vcpu->arch.mmu->root_role.direct) r = mmu_alloc_direct_roots(vcpu); else r = mmu_alloc_shadow_roots(vcpu); @@ -5330,7 +5341,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ++vcpu->kvm->stat.mmu_pte_write; - for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { + for_each_gfn_valid_sp_with_gptes(vcpu->kvm, sp, gfn) { if (detect_write_misaligned(sp, gpa, bytes) || detect_write_flooding(sp)) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); @@ -5356,11 +5367,11 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, write_unlock(&vcpu->kvm->mmu_lock); } -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, +int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len) { int r, emulation_type = EMULTYPE_PF; - bool direct = vcpu->arch.mmu->direct_map; + bool direct = vcpu->arch.mmu->root_role.direct; if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) return RET_PF_RETRY; @@ -5391,7 +5402,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, * paging in both guests. If true, we simply unprotect the page * and resume the guest. */ - if (vcpu->arch.mmu->direct_map && + if (vcpu->arch.mmu->root_role.direct && (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); return 1; @@ -5625,7 +5636,7 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) if (!tdp_enabled) set_memory_decrypted((unsigned long)mmu->pae_root, 1); else - WARN_ON_ONCE(shadow_me_mask); + WARN_ON_ONCE(shadow_me_value); for (i = 0; i < 4; ++i) mmu->pae_root[i] = INVALID_PAE_ROOT; @@ -6287,7 +6298,7 @@ int kvm_mmu_vendor_module_init(void) */ BUILD_BUG_ON(sizeof(union kvm_mmu_page_role) != sizeof(u32)); BUILD_BUG_ON(sizeof(union kvm_mmu_extended_role) != sizeof(u32)); - BUILD_BUG_ON(sizeof(union kvm_mmu_role) != sizeof(u64)); + BUILD_BUG_ON(sizeof(union kvm_cpu_role) != sizeof(u64)); kvm_mmu_reset_all_pte_masks(); |