From bb3541f175a977198d128f3a4e13534e019754a3 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sat, 21 May 2016 14:14:44 +0200 Subject: KVM: x86: Fix typos Signed-off-by: Andrea Gelmini Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index def97b3a392b..837bf23c5b06 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -523,7 +523,7 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte) } /* Rules for using mmu_spte_update: - * Update the state bits, it means the mapped pfn is not changged. + * Update the state bits, it means the mapped pfn is not changed. * * Whenever we overwrite a writable spte with a read-only one we * should flush remote TLBs. Otherwise rmap_write_protect -- cgit v1.2.3 From 8d5cf1610da526c3c1286bd7b3ac9f35f96ed43d Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 12 Jul 2016 18:18:48 -0400 Subject: kvm: mmu: extend the is_present check to 32 bits This is safe because this function is called on host controlled page table and non-present/non-MMIO sptes never use bits 1..31. For the EPT case, this ensures that cases where only the execute bit is set is marked valid. Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 837bf23c5b06..55a20c0524fe 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -304,7 +304,7 @@ static int is_nx(struct kvm_vcpu *vcpu) static int is_shadow_present_pte(u64 pte) { - return pte & PT_PRESENT_MASK && !is_mmio_spte(pte); + return (pte & 0xFFFFFFFFull) && !is_mmio_spte(pte); } static int is_large_pte(u64 pte) -- cgit v1.2.3 From 812f30b234539ccb0139f92dfdbec1e8158cf535 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 12 Jul 2016 18:18:50 -0400 Subject: kvm: mmu: remove is_present_gpte() We have two versions of the above function. To prevent confusion and bugs in the future, remove the non-FNAME version entirely and replace all calls with the actual check. Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/mmu.h | 5 ----- arch/x86/kvm/paging_tmpl.h | 2 +- arch/x86/kvm/x86.c | 2 +- 4 files changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 55a20c0524fe..6471f8788bd2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3189,7 +3189,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) MMU_WARN_ON(VALID_PAGE(root)); if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i); - if (!is_present_gpte(pdptr)) { + if (!(pdptr & PT_PRESENT_MASK)) { vcpu->arch.mmu.pae_root[i] = 0; continue; } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 66b33b96a31b..ddc56e91f2e4 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -93,11 +93,6 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) return kvm_mmu_load(vcpu); } -static inline int is_present_gpte(unsigned long pte) -{ - return pte & PT_PRESENT_MASK; -} - /* * Currently, we have two sorts of write-protection, a) the first one * write-protects guest page to sync the guest modification, b) another one is diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index bc019f70e0b6..fda5b64ae8f1 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -131,7 +131,7 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte) static inline int FNAME(is_present_gpte)(unsigned long pte) { #if PTTYPE != PTTYPE_EPT - return is_present_gpte(pte); + return pte & PT_PRESENT_MASK; #else return pte & 7; #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0cc6cf834cdd..bb6e8bfaee3b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -540,7 +540,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) goto out; } for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { - if (is_present_gpte(pdpte[i]) && + if ((pdpte[i] & PT_PRESENT_MASK) && (pdpte[i] & vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) { ret = 0; -- cgit v1.2.3 From ffb128c89b77b44da18ccf51844a8e750e2c427a Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 12 Jul 2016 18:18:49 -0400 Subject: kvm: mmu: don't set the present bit unconditionally To support execute only mappings on behalf of L1 hypervisors, we need to teach set_spte() to honor all three of L1's XWR bits. As a start, add a new variable "shadow_present_mask" that will be set for non-EPT shadow paging and clear for EPT. Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 13 +++++++------ arch/x86/kvm/vmx.c | 1 + arch/x86/kvm/x86.c | 4 ++-- 4 files changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7a628fb6a2c2..d0845b289adb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1031,7 +1031,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask); + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask); void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6471f8788bd2..b8628e905806 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -175,6 +175,7 @@ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_mmio_mask; +static u64 __read_mostly shadow_present_mask; static void mmu_spte_set(u64 *sptep, u64 spte); static void mmu_free_roots(struct kvm_vcpu *vcpu); @@ -282,13 +283,14 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) } void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask) + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask) { shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; shadow_dirty_mask = dirty_mask; shadow_nx_mask = nx_mask; shadow_x_mask = x_mask; + shadow_present_mask = p_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); @@ -2245,10 +2247,9 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, { u64 spte; - BUILD_BUG_ON(VMX_EPT_READABLE_MASK != PT_PRESENT_MASK || - VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); + BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); - spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | + spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK | shadow_user_mask | shadow_x_mask | shadow_accessed_mask; mmu_spte_set(sptep, spte); @@ -2515,13 +2516,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, kvm_pfn_t pfn, bool speculative, bool can_unsync, bool host_writable) { - u64 spte; + u64 spte = 0; int ret = 0; if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) return 0; - spte = PT_PRESENT_MASK; + spte |= shadow_present_mask; if (!speculative) spte |= shadow_accessed_mask; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e51503063181..a75d09d2a799 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6473,6 +6473,7 @@ static __init int hardware_setup(void) (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK); + 0ull, VMX_EPT_EXECUTABLE_MASK, VMX_EPT_READABLE_MASK); ept_set_mmio_spte_mask(); kvm_enable_tdp(); } else diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bb6e8bfaee3b..0c1fbb8d9d11 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5878,8 +5878,8 @@ int kvm_arch_init(void *opaque) kvm_x86_ops = ops; kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, - PT_DIRTY_MASK, PT64_NX_MASK, 0); - + PT_DIRTY_MASK, PT64_NX_MASK, 0, + PT_PRESENT_MASK); kvm_timer_init(); perf_register_guest_info_callbacks(&kvm_guest_cbs); -- cgit v1.2.3 From d95c55687e11febe3ab1aacfe82b58b1822c52c4 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 12 Jul 2016 18:18:51 -0400 Subject: kvm: mmu: track read permission explicitly for shadow EPT page tables To support execute only mappings on behalf of L1 hypervisors, reuse ACC_USER_MASK to signify if the L1 hypervisor has the R bit set. For the nested EPT case, we assumed that the U bit was always set since there was no equivalent in EPT page tables. Strictly speaking, this was not necessary because handle_ept_violation never set PFERR_USER_MASK in the error code (uf=0 in the parlance of update_permission_bitmask). We now have to set both U and UF correctly, respectively in FNAME(gpte_access) and in handle_ept_violation. Also in handle_ept_violation bit 3 of the exit qualification is not enough to detect a present PTE; all three bits 3-5 have to be checked. Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 10 +++++++--- arch/x86/kvm/paging_tmpl.h | 8 +++++++- arch/x86/kvm/vmx.c | 15 +++++++++------ 3 files changed, 23 insertions(+), 10 deletions(-) (limited to 'arch/x86/kvm/mmu.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b8628e905806..3041902ec827 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2522,6 +2522,12 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) return 0; + /* + * For the EPT case, shadow_present_mask is 0 if hardware + * supports exec-only page table entries. In that case, + * ACC_USER_MASK and shadow_user_mask are used to represent + * read access. See FNAME(gpte_access) in paging_tmpl.h. + */ spte |= shadow_present_mask; if (!speculative) spte |= shadow_accessed_mask; @@ -3915,9 +3921,7 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, * clearer. */ smap = cr4_smap && u && !uf && !ff; - } else - /* Not really needed: no U/S accesses on ept */ - u = 1; + } fault = (ff && !x) || (uf && !u) || (wf && !w) || (smapf && smap); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index fda5b64ae8f1..a01105485315 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -181,13 +181,19 @@ no_present: return true; } +/* + * For PTTYPE_EPT, a page table can be executable but not readable + * on supported processors. Therefore, set_spte does not automatically + * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK + * to signify readability since it isn't used in the EPT case + */ static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte) { unsigned access; #if PTTYPE == PTTYPE_EPT access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) | ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) | - ACC_USER_MASK; + ((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0); #else BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK); BUILD_BUG_ON(ACC_EXEC_MASK != 1); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a75d09d2a799..bd7d60f66b93 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6117,12 +6117,14 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); trace_kvm_page_fault(gpa, exit_qualification); - /* It is a write fault? */ - error_code = exit_qualification & PFERR_WRITE_MASK; + /* it is a read fault? */ + error_code = (exit_qualification << 2) & PFERR_USER_MASK; + /* it is a write fault? */ + error_code |= exit_qualification & PFERR_WRITE_MASK; /* It is a fetch fault? */ error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK; /* ept page table is present? */ - error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK; + error_code |= (exit_qualification & 0x38) != 0; vcpu->arch.exit_qualification = exit_qualification; @@ -6469,11 +6471,12 @@ static __init int hardware_setup(void) vmx_disable_intercept_msr_write_x2apic(0x83f); if (enable_ept) { - kvm_mmu_set_mask_ptes(0ull, + kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, - 0ull, VMX_EPT_EXECUTABLE_MASK); - 0ull, VMX_EPT_EXECUTABLE_MASK, VMX_EPT_READABLE_MASK); + 0ull, VMX_EPT_EXECUTABLE_MASK, + cpu_has_vmx_ept_execute_only() ? + 0ull : VMX_EPT_READABLE_MASK); ept_set_mmio_spte_mask(); kvm_enable_tdp(); } else -- cgit v1.2.3