From 84e5ffd045f33e4fa32370135436d987478d0bf7 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 20 Apr 2022 21:12:04 +0800 Subject: KVM: X86/MMU: Fix shadowing 5-level NPT for 4-level NPT L1 guest When shadowing 5-level NPT for 4-level NPT L1 guest, the root_sp is allocated with role.level = 5 and the guest pagetable's root gfn. And root_sp->spt[0] is also allocated with the same gfn and the same role except role.level = 4. Luckily that they are different shadow pages, but only root_sp->spt[0] is the real translation of the guest pagetable. Here comes a problem: If the guest switches from gCR4_LA57=0 to gCR4_LA57=1 (or vice verse) and uses the same gfn as the root page for nested NPT before and after switching gCR4_LA57. The host (hCR4_LA57=1) might use the same root_sp for the guest even the guest switches gCR4_LA57. The guest will see unexpected page mapped and L2 may exploit the bug and hurt L1. It is lucky that the problem can't hurt L0. And three special cases need to be handled: The root_sp should be like role.direct=1 sometimes: its contents are not backed by gptes, root_sp->gfns is meaningless. (For a normal high level sp in shadow paging, sp->gfns is often unused and kept zero, but it could be relevant and meaningful if sp->gfns is used because they are backed by concrete gptes.) For such root_sp in the case, root_sp is just a portal to contribute root_sp->spt[0], and root_sp->gfns should not be used and root_sp->spt[0] should not be dropped if gpte[0] of the guest root pagetable is changed. Such root_sp should not be accounted too. So add role.passthrough to distinguish the shadow pages in the hash when gCR4_LA57 is toggled and fix above special cases by using it in kvm_mmu_page_{get|set}_gfn() and sp_has_gptes(). Signed-off-by: Lai Jiangshan Message-Id: <20220420131204.2850-3-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 16 ++++++++++++++++ arch/x86/kvm/mmu/paging_tmpl.h | 1 + 2 files changed, 17 insertions(+) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index caa208a3fcdc..abdce06f6880 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -734,6 +734,9 @@ static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc) static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) { + if (sp->role.passthrough) + return sp->gfn; + if (!sp->role.direct) return sp->gfns[index]; @@ -742,6 +745,11 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) { + if (sp->role.passthrough) { + WARN_ON_ONCE(gfn != sp->gfn); + return; + } + if (!sp->role.direct) { sp->gfns[index] = gfn; return; @@ -1858,6 +1866,9 @@ static bool sp_has_gptes(struct kvm_mmu_page *sp) if (sp->role.direct) return false; + if (sp->role.passthrough) + return false; + return true; } @@ -2054,6 +2065,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; role.quadrant = quadrant; } + if (level <= vcpu->arch.mmu->cpu_role.base.level) + role.passthrough = 0; sp_list = &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]; for_each_valid_sp(vcpu->kvm, sp, sp_list) { @@ -4907,6 +4920,9 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, root_role = cpu_role.base; root_role.level = kvm_mmu_get_tdp_level(vcpu); + if (root_role.level == PT64_ROOT_5LEVEL && + cpu_role.base.level == PT64_ROOT_4LEVEL) + root_role.passthrough = 1; shadow_mmu_init_context(vcpu, context, cpu_role, root_role); kvm_mmu_new_pgd(vcpu, nested_cr3); diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 66f1acf153c4..b025decf610d 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -1007,6 +1007,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) .level = 0xf, .access = 0x7, .quadrant = 0x3, + .passthrough = 0x1, }; /* -- cgit v1.2.3