diff options
Diffstat (limited to 'arch/arm64/kvm/mmu.c')
-rw-r--r-- | arch/arm64/kvm/mmu.c | 67 |
1 files changed, 66 insertions, 1 deletions
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c10207fed2f3..c6a97d463892 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -822,6 +822,45 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot, return PAGE_SIZE; } +/* + * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be + * able to see the page's tags and therefore they must be initialised first. If + * PG_mte_tagged is set, tags have already been initialised. + * + * The race in the test/set of the PG_mte_tagged flag is handled by: + * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs + * racing to santise the same page + * - mmap_lock protects between a VM faulting a page in and the VMM performing + * an mprotect() to add VM_MTE + */ +static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, + unsigned long size) +{ + unsigned long i, nr_pages = size >> PAGE_SHIFT; + struct page *page; + + if (!kvm_has_mte(kvm)) + return 0; + + /* + * pfn_to_online_page() is used to reject ZONE_DEVICE pages + * that may not support tags. + */ + page = pfn_to_online_page(pfn); + + if (!page) + return -EFAULT; + + for (i = 0; i < nr_pages; i++, page++) { + if (!test_bit(PG_mte_tagged, &page->flags)) { + mte_clear_page_tags(page_address(page)); + set_bit(PG_mte_tagged, &page->flags); + } + } + + return 0; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -830,6 +869,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, bool write_fault, writable, force_pte = false; bool exec_fault; bool device = false; + bool shared; unsigned long mmu_seq; struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; @@ -873,6 +913,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = PAGE_SHIFT; } + shared = (vma->vm_flags & VM_PFNMAP); + switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SHIFT: @@ -971,8 +1013,18 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (writable) prot |= KVM_PGTABLE_PROT_W; - if (fault_status != FSC_PERM && !device) + if (fault_status != FSC_PERM && !device) { + /* Check the VMM hasn't introduced a new VM_SHARED VMA */ + if (kvm_has_mte(kvm) && shared) { + ret = -EFAULT; + goto out_unlock; + } + ret = sanitise_mte_tags(kvm, pfn, vma_pagesize); + if (ret) + goto out_unlock; + clean_dcache_guest_page(pfn, vma_pagesize); + } if (exec_fault) { prot |= KVM_PGTABLE_PROT_X; @@ -1168,12 +1220,17 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { kvm_pfn_t pfn = pte_pfn(range->pte); + int ret; if (!kvm->arch.mmu.pgt) return false; WARN_ON(range->end - range->start != 1); + ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE); + if (ret) + return false; + /* * We've moved a page around, probably through CoW, so let's treat it * just like a translation fault and clean the cache to the PoC. @@ -1382,6 +1439,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, break; /* + * VM_SHARED mappings are not allowed with MTE to avoid races + * when updating the PG_mte_tagged page flag, see + * sanitise_mte_tags for more details. + */ + if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) + return -EINVAL; + + /* * Take the intersection of this VMA with the memory region */ vm_start = max(hva, vma->vm_start); |