From 628c28eefd6f2cef03b212081b466ae43fd093a3 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 11 Mar 2015 14:49:56 +0000 Subject: xen: unify foreign GFN map/unmap for auto-xlated physmap guests Auto-translated physmap guests (arm, arm64 and x86 PVHVM/PVH) map and unmap foreign GFNs using the same method (updating the physmap). Unify the two arm and x86 implementations into one commont one. Note that on arm and arm64, the correct error code will be returned (instead of always -EFAULT) and map/unmap failure warnings are no longer printed. These changes are required if the foreign domain is paging (-ENOENT failures are expected and must be propagated up to the caller). Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- arch/arm/xen/enlighten.c | 90 ++---------------------------------------------- 1 file changed, 3 insertions(+), 87 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 263a2044c65b..5c04389fc9ef 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -53,105 +53,21 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); static __read_mostly int xen_events_irq = -1; -/* map fgmfn of domid to lpfn in the current domain */ -static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn, - unsigned int domid) -{ - int rc; - struct xen_add_to_physmap_range xatp = { - .domid = DOMID_SELF, - .foreign_domid = domid, - .size = 1, - .space = XENMAPSPACE_gmfn_foreign, - }; - xen_ulong_t idx = fgmfn; - xen_pfn_t gpfn = lpfn; - int err = 0; - - set_xen_guest_handle(xatp.idxs, &idx); - set_xen_guest_handle(xatp.gpfns, &gpfn); - set_xen_guest_handle(xatp.errs, &err); - - rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); - if (rc || err) { - pr_warn("Failed to map pfn to mfn rc:%d:%d pfn:%lx mfn:%lx\n", - rc, err, lpfn, fgmfn); - return 1; - } - return 0; -} - -struct remap_data { - xen_pfn_t fgmfn; /* foreign domain's gmfn */ - pgprot_t prot; - domid_t domid; - struct vm_area_struct *vma; - int index; - struct page **pages; - struct xen_remap_mfn_info *info; -}; - -static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, - void *data) -{ - struct remap_data *info = data; - struct page *page = info->pages[info->index++]; - unsigned long pfn = page_to_pfn(page); - pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot)); - - if (map_foreign_page(pfn, info->fgmfn, info->domid)) - return -EFAULT; - set_pte_at(info->vma->vm_mm, addr, ptep, pte); - - return 0; -} - int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t mfn, int nr, pgprot_t prot, unsigned domid, struct page **pages) { - int err; - struct remap_data data; - - /* TBD: Batching, current sole caller only does page at a time */ - if (nr > 1) - return -EINVAL; - - data.fgmfn = mfn; - data.prot = prot; - data.domid = domid; - data.vma = vma; - data.index = 0; - data.pages = pages; - err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, - remap_pte_fn, &data); - return err; + return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, + prot, domid, pages); } EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, int nr, struct page **pages) { - int i; - - for (i = 0; i < nr; i++) { - struct xen_remove_from_physmap xrp; - unsigned long rc, pfn; - - pfn = page_to_pfn(pages[i]); - - xrp.domid = DOMID_SELF; - xrp.gpfn = pfn; - rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); - if (rc) { - pr_warn("Failed to unmap pfn:%lx rc:%ld\n", - pfn, rc); - return rc; - } - } - return 0; + return xen_xlate_unmap_gfn_range(vma, nr, pages); } EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); -- cgit v1.2.3 From 4e8c0c8c4bf3a5b5c98046e146ab3884bf7a7d0e Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 11 Mar 2015 14:49:57 +0000 Subject: xen/privcmd: improve performance of MMAPBATCH_V2 Make the IOCTL_PRIVCMD_MMAPBATCH_V2 (and older V1 version) map multiple frames at a time rather than one at a time, despite the pages being non-consecutive GFNs. xen_remap_foreign_mfn_array() is added which maps an array of GFNs (instead of a consecutive range of GFNs). Since per-frame errors are returned in an array, privcmd must set the MMAPBATCH_V1 error bits as part of the "report errors" phase, after all the frames are mapped. Migrate times are significantly improved (when using a PV toolstack domain). For example, for an idle 12 GiB PV guest: Before After real 0m38.179s 0m26.868s user 0m15.096s 0m13.652s sys 0m28.988s 0m18.732s Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- arch/arm/xen/enlighten.c | 20 ++++++-- arch/x86/xen/mmu.c | 101 ++++++++++++++++++++++++++++++++-------- drivers/xen/privcmd.c | 117 +++++++++++++++++++++++++++++++++-------------- drivers/xen/xlate_mmu.c | 46 +++++++++++-------- include/xen/xen-ops.h | 45 ++++++++++++++++-- 5 files changed, 249 insertions(+), 80 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 5c04389fc9ef..224081ccc92f 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -53,15 +53,27 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); static __read_mostly int xen_events_irq = -1; -int xen_remap_domain_mfn_range(struct vm_area_struct *vma, +int xen_remap_domain_mfn_array(struct vm_area_struct *vma, unsigned long addr, - xen_pfn_t mfn, int nr, - pgprot_t prot, unsigned domid, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned domid, struct page **pages) { - return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, + return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr, prot, domid, pages); } +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); + +/* Not used by XENFEAT_auto_translated guests. */ +int xen_remap_domain_mfn_range(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t mfn, int nr, + pgprot_t prot, unsigned domid, + struct page **pages) +{ + return -ENOSYS; +} EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3d536a56ddf1..29b3be230ede 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2439,7 +2439,8 @@ void __init xen_hvm_init_mmu_ops(void) #define REMAP_BATCH_SIZE 16 struct remap_data { - unsigned long mfn; + xen_pfn_t *mfn; + bool contiguous; pgprot_t prot; struct mmu_update *mmu_update; }; @@ -2448,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, void *data) { struct remap_data *rmd = data; - pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot)); + pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot)); + + /* If we have a contigious range, just update the mfn itself, + else update pointer to be "next mfn". */ + if (rmd->contiguous) + (*rmd->mfn)++; + else + rmd->mfn++; rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; rmd->mmu_update->val = pte_val_ma(pte); @@ -2457,26 +2465,26 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, return 0; } -int xen_remap_domain_mfn_range(struct vm_area_struct *vma, - unsigned long addr, - xen_pfn_t mfn, int nr, - pgprot_t prot, unsigned domid, - struct page **pages) - +static int do_remap_mfn(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned domid, + struct page **pages) { + int err = 0; struct remap_data rmd; struct mmu_update mmu_update[REMAP_BATCH_SIZE]; - int batch; unsigned long range; - int err = 0; + int mapped = 0; BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); if (xen_feature(XENFEAT_auto_translated_physmap)) { #ifdef CONFIG_XEN_PVH /* We need to update the local page tables and the xen HAP */ - return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, prot, - domid, pages); + return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr, + prot, domid, pages); #else return -EINVAL; #endif @@ -2484,9 +2492,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, rmd.mfn = mfn; rmd.prot = prot; + /* We use the err_ptr to indicate if there we are doing a contigious + * mapping or a discontigious mapping. */ + rmd.contiguous = !err_ptr; while (nr) { - batch = min(REMAP_BATCH_SIZE, nr); + int index = 0; + int done = 0; + int batch = min(REMAP_BATCH_SIZE, nr); + int batch_left = batch; range = (unsigned long)batch << PAGE_SHIFT; rmd.mmu_update = mmu_update; @@ -2495,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, if (err) goto out; - err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid); - if (err < 0) - goto out; + /* We record the error for each page that gives an error, but + * continue mapping until the whole set is done */ + do { + int i; + + err = HYPERVISOR_mmu_update(&mmu_update[index], + batch_left, &done, domid); + + /* + * @err_ptr may be the same buffer as @mfn, so + * only clear it after each chunk of @mfn is + * used. + */ + if (err_ptr) { + for (i = index; i < index + done; i++) + err_ptr[i] = 0; + } + if (err < 0) { + if (!err_ptr) + goto out; + err_ptr[i] = err; + done++; /* Skip failed frame. */ + } else + mapped += done; + batch_left -= done; + index += done; + } while (batch_left); nr -= batch; addr += range; + if (err_ptr) + err_ptr += batch; } - - err = 0; out: xen_flush_tlb_all(); - return err; + return err < 0 ? err : mapped; +} + +int xen_remap_domain_mfn_range(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t mfn, int nr, + pgprot_t prot, unsigned domid, + struct page **pages) +{ + return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages); } EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); +int xen_remap_domain_mfn_array(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned domid, struct page **pages) +{ + /* We BUG_ON because it's a programmer error to pass a NULL err_ptr, + * and the consequences later is quite hard to detect what the actual + * cause of "wrong memory was mapped in". + */ + BUG_ON(err_ptr == NULL); + return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages); +} +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); + + /* Returns: 0 success */ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, int numpgs, struct page **pages) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 59ac71c4a043..5a296161d843 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -159,6 +159,40 @@ static int traverse_pages(unsigned nelem, size_t size, return ret; } +/* + * Similar to traverse_pages, but use each page as a "block" of + * data to be processed as one unit. + */ +static int traverse_pages_block(unsigned nelem, size_t size, + struct list_head *pos, + int (*fn)(void *data, int nr, void *state), + void *state) +{ + void *pagedata; + unsigned pageidx; + int ret = 0; + + BUG_ON(size > PAGE_SIZE); + + pageidx = PAGE_SIZE; + + while (nelem) { + int nr = (PAGE_SIZE/size); + struct page *page; + if (nr > nelem) + nr = nelem; + pos = pos->next; + page = list_entry(pos, struct page, lru); + pagedata = page_address(page); + ret = (*fn)(pagedata, nr, state); + if (ret) + break; + nelem -= nr; + } + + return ret; +} + struct mmap_mfn_state { unsigned long va; struct vm_area_struct *vma; @@ -274,39 +308,25 @@ struct mmap_batch_state { /* auto translated dom0 note: if domU being created is PV, then mfn is * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP). */ -static int mmap_batch_fn(void *data, void *state) +static int mmap_batch_fn(void *data, int nr, void *state) { xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; struct vm_area_struct *vma = st->vma; struct page **pages = vma->vm_private_data; - struct page *cur_page = NULL; + struct page **cur_pages = NULL; int ret; if (xen_feature(XENFEAT_auto_translated_physmap)) - cur_page = pages[st->index++]; + cur_pages = &pages[st->index]; - ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, - st->vma->vm_page_prot, st->domain, - &cur_page); + BUG_ON(nr < 0); + ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr, + (int *)mfnp, st->vma->vm_page_prot, + st->domain, cur_pages); - /* Store error code for second pass. */ - if (st->version == 1) { - if (ret < 0) { - /* - * V1 encodes the error codes in the 32bit top nibble of the - * mfn (with its known limitations vis-a-vis 64 bit callers). - */ - *mfnp |= (ret == -ENOENT) ? - PRIVCMD_MMAPBATCH_PAGED_ERROR : - PRIVCMD_MMAPBATCH_MFN_ERROR; - } - } else { /* st->version == 2 */ - *((int *) mfnp) = ret; - } - - /* And see if it affects the global_error. */ - if (ret < 0) { + /* Adjust the global_error? */ + if (ret != nr) { if (ret == -ENOENT) st->global_error = -ENOENT; else { @@ -315,23 +335,35 @@ static int mmap_batch_fn(void *data, void *state) st->global_error = 1; } } - st->va += PAGE_SIZE; + st->va += PAGE_SIZE * nr; + st->index += nr; return 0; } -static int mmap_return_errors(void *data, void *state) +static int mmap_return_error(int err, struct mmap_batch_state *st) { - struct mmap_batch_state *st = state; + int ret; if (st->version == 1) { - xen_pfn_t mfnp = *((xen_pfn_t *) data); - if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR) - return __put_user(mfnp, st->user_mfn++); - else + if (err) { + xen_pfn_t mfn; + + ret = get_user(mfn, st->user_mfn); + if (ret < 0) + return ret; + /* + * V1 encodes the error codes in the 32bit top + * nibble of the mfn (with its known + * limitations vis-a-vis 64 bit callers). + */ + mfn |= (err == -ENOENT) ? + PRIVCMD_MMAPBATCH_PAGED_ERROR : + PRIVCMD_MMAPBATCH_MFN_ERROR; + return __put_user(mfn, st->user_mfn++); + } else st->user_mfn++; } else { /* st->version == 2 */ - int err = *((int *) data); if (err) return __put_user(err, st->user_err++); else @@ -341,6 +373,21 @@ static int mmap_return_errors(void *data, void *state) return 0; } +static int mmap_return_errors(void *data, int nr, void *state) +{ + struct mmap_batch_state *st = state; + int *errs = data; + int i; + int ret; + + for (i = 0; i < nr; i++) { + ret = mmap_return_error(errs[i], st); + if (ret < 0) + return ret; + } + return 0; +} + /* Allocate pfns that are then mapped with gmfns from foreign domid. Update * the vma with the page info to use later. * Returns: 0 if success, otherwise -errno @@ -472,8 +519,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) state.version = version; /* mmap_batch_fn guarantees ret == 0 */ - BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, mmap_batch_fn, &state)); + BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_batch_fn, &state)); up_write(&mm->mmap_sem); @@ -481,8 +528,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) /* Write back errors in second pass. */ state.user_mfn = (xen_pfn_t *)m.arr; state.user_err = m.err; - ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, mmap_return_errors, &state); + ret = traverse_pages_block(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_return_errors, &state); } else ret = 0; diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index 7724d90fc697..58a5389aec89 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -62,13 +62,15 @@ static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn, } struct remap_data { - xen_pfn_t fgmfn; /* foreign domain's gmfn */ + xen_pfn_t *fgmfn; /* foreign domain's gmfn */ pgprot_t prot; domid_t domid; struct vm_area_struct *vma; int index; struct page **pages; struct xen_remap_mfn_info *info; + int *err_ptr; + int mapped; }; static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, @@ -80,38 +82,46 @@ static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot)); int rc; - rc = map_foreign_page(pfn, info->fgmfn, info->domid); - if (rc < 0) - return rc; - set_pte_at(info->vma->vm_mm, addr, ptep, pte); + rc = map_foreign_page(pfn, *info->fgmfn, info->domid); + *info->err_ptr++ = rc; + if (!rc) { + set_pte_at(info->vma->vm_mm, addr, ptep, pte); + info->mapped++; + } + info->fgmfn++; return 0; } -int xen_xlate_remap_gfn_range(struct vm_area_struct *vma, +int xen_xlate_remap_gfn_array(struct vm_area_struct *vma, unsigned long addr, - xen_pfn_t gfn, int nr, - pgprot_t prot, unsigned domid, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned domid, struct page **pages) { int err; struct remap_data data; + unsigned long range = nr << PAGE_SHIFT; - /* TBD: Batching, current sole caller only does page at a time */ - if (nr > 1) - return -EINVAL; + /* Kept here for the purpose of making sure code doesn't break + x86 PVOPS */ + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); - data.fgmfn = gfn; - data.prot = prot; + data.fgmfn = mfn; + data.prot = prot; data.domid = domid; - data.vma = vma; - data.index = 0; + data.vma = vma; data.pages = pages; - err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, + data.index = 0; + data.err_ptr = err_ptr; + data.mapped = 0; + + err = apply_to_page_range(vma->vm_mm, addr, range, remap_pte_fn, &data); - return err; + return err < 0 ? err : data.mapped; } -EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_range); +EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_array); int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, int nr, struct page **pages) diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 9eb88a4512bd..c643e6a94c9a 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -27,17 +27,54 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); struct vm_area_struct; + +/* + * xen_remap_domain_mfn_array() - map an array of foreign frames + * @vma: VMA to map the pages into + * @addr: Address at which to map the pages + * @gfn: Array of GFNs to map + * @nr: Number entries in the GFN array + * @err_ptr: Returns per-GFN error status. + * @prot: page protection mask + * @domid: Domain owning the pages + * @pages: Array of pages if this domain has an auto-translated physmap + * + * @gfn and @err_ptr may point to the same buffer, the GFNs will be + * overwritten by the error codes after they are mapped. + * + * Returns the number of successfully mapped frames, or a -ve error + * code. + */ +int xen_remap_domain_mfn_array(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *gfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned domid, + struct page **pages); + +/* xen_remap_domain_mfn_range() - map a range of foreign frames + * @vma: VMA to map the pages into + * @addr: Address at which to map the pages + * @gfn: First GFN to map. + * @nr: Number frames to map + * @prot: page protection mask + * @domid: Domain owning the pages + * @pages: Array of pages if this domain has an auto-translated physmap + * + * Returns the number of successfully mapped frames, or a -ve error + * code. + */ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, - xen_pfn_t mfn, int nr, + xen_pfn_t gfn, int nr, pgprot_t prot, unsigned domid, struct page **pages); int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, int numpgs, struct page **pages); -int xen_xlate_remap_gfn_range(struct vm_area_struct *vma, +int xen_xlate_remap_gfn_array(struct vm_area_struct *vma, unsigned long addr, - xen_pfn_t gfn, int nr, - pgprot_t prot, + xen_pfn_t *gfn, int nr, + int *err_ptr, pgprot_t prot, unsigned domid, struct page **pages); int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, -- cgit v1.2.3