summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Vrabel <david.vrabel@citrix.com>2015-03-11 14:49:57 +0000
committerDavid Vrabel <david.vrabel@citrix.com>2015-03-16 14:49:15 +0000
commit4e8c0c8c4bf3a5b5c98046e146ab3884bf7a7d0e (patch)
treee2da5980fd405c7109a342b13f0a2a1214f94b61
parent628c28eefd6f2cef03b212081b466ae43fd093a3 (diff)
downloadlinux-4e8c0c8c4bf3a5b5c98046e146ab3884bf7a7d0e.tar.bz2
xen/privcmd: improve performance of MMAPBATCH_V2
Make the IOCTL_PRIVCMD_MMAPBATCH_V2 (and older V1 version) map multiple frames at a time rather than one at a time, despite the pages being non-consecutive GFNs. xen_remap_foreign_mfn_array() is added which maps an array of GFNs (instead of a consecutive range of GFNs). Since per-frame errors are returned in an array, privcmd must set the MMAPBATCH_V1 error bits as part of the "report errors" phase, after all the frames are mapped. Migrate times are significantly improved (when using a PV toolstack domain). For example, for an idle 12 GiB PV guest: Before After real 0m38.179s 0m26.868s user 0m15.096s 0m13.652s sys 0m28.988s 0m18.732s Signed-off-by: David Vrabel <david.vrabel@citrix.com> Reviewed-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
-rw-r--r--arch/arm/xen/enlighten.c20
-rw-r--r--arch/x86/xen/mmu.c101
-rw-r--r--drivers/xen/privcmd.c117
-rw-r--r--drivers/xen/xlate_mmu.c46
-rw-r--r--include/xen/xen-ops.h45
5 files changed, 249 insertions, 80 deletions
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 5c04389fc9ef..224081ccc92f 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -53,15 +53,27 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
static __read_mostly int xen_events_irq = -1;
-int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr,
- xen_pfn_t mfn, int nr,
- pgprot_t prot, unsigned domid,
+ xen_pfn_t *mfn, int nr,
+ int *err_ptr, pgprot_t prot,
+ unsigned domid,
struct page **pages)
{
- return xen_xlate_remap_gfn_range(vma, addr, mfn, nr,
+ return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
prot, domid, pages);
}
+EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
+
+/* Not used by XENFEAT_auto_translated guests. */
+int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+ unsigned long addr,
+ xen_pfn_t mfn, int nr,
+ pgprot_t prot, unsigned domid,
+ struct page **pages)
+{
+ return -ENOSYS;
+}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3d536a56ddf1..29b3be230ede 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2439,7 +2439,8 @@ void __init xen_hvm_init_mmu_ops(void)
#define REMAP_BATCH_SIZE 16
struct remap_data {
- unsigned long mfn;
+ xen_pfn_t *mfn;
+ bool contiguous;
pgprot_t prot;
struct mmu_update *mmu_update;
};
@@ -2448,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
unsigned long addr, void *data)
{
struct remap_data *rmd = data;
- pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot));
+ pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot));
+
+ /* If we have a contigious range, just update the mfn itself,
+ else update pointer to be "next mfn". */
+ if (rmd->contiguous)
+ (*rmd->mfn)++;
+ else
+ rmd->mfn++;
rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
rmd->mmu_update->val = pte_val_ma(pte);
@@ -2457,26 +2465,26 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
return 0;
}
-int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
- unsigned long addr,
- xen_pfn_t mfn, int nr,
- pgprot_t prot, unsigned domid,
- struct page **pages)
-
+static int do_remap_mfn(struct vm_area_struct *vma,
+ unsigned long addr,
+ xen_pfn_t *mfn, int nr,
+ int *err_ptr, pgprot_t prot,
+ unsigned domid,
+ struct page **pages)
{
+ int err = 0;
struct remap_data rmd;
struct mmu_update mmu_update[REMAP_BATCH_SIZE];
- int batch;
unsigned long range;
- int err = 0;
+ int mapped = 0;
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
if (xen_feature(XENFEAT_auto_translated_physmap)) {
#ifdef CONFIG_XEN_PVH
/* We need to update the local page tables and the xen HAP */
- return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, prot,
- domid, pages);
+ return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
+ prot, domid, pages);
#else
return -EINVAL;
#endif
@@ -2484,9 +2492,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
rmd.mfn = mfn;
rmd.prot = prot;
+ /* We use the err_ptr to indicate if there we are doing a contigious
+ * mapping or a discontigious mapping. */
+ rmd.contiguous = !err_ptr;
while (nr) {
- batch = min(REMAP_BATCH_SIZE, nr);
+ int index = 0;
+ int done = 0;
+ int batch = min(REMAP_BATCH_SIZE, nr);
+ int batch_left = batch;
range = (unsigned long)batch << PAGE_SHIFT;
rmd.mmu_update = mmu_update;
@@ -2495,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
if (err)
goto out;
- err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
- if (err < 0)
- goto out;
+ /* We record the error for each page that gives an error, but
+ * continue mapping until the whole set is done */
+ do {
+ int i;
+
+ err = HYPERVISOR_mmu_update(&mmu_update[index],
+ batch_left, &done, domid);
+
+ /*
+ * @err_ptr may be the same buffer as @mfn, so
+ * only clear it after each chunk of @mfn is
+ * used.
+ */
+ if (err_ptr) {
+ for (i = index; i < index + done; i++)
+ err_ptr[i] = 0;
+ }
+ if (err < 0) {
+ if (!err_ptr)
+ goto out;
+ err_ptr[i] = err;
+ done++; /* Skip failed frame. */
+ } else
+ mapped += done;
+ batch_left -= done;
+ index += done;
+ } while (batch_left);
nr -= batch;
addr += range;
+ if (err_ptr)
+ err_ptr += batch;
}
-
- err = 0;
out:
xen_flush_tlb_all();
- return err;
+ return err < 0 ? err : mapped;
+}
+
+int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+ unsigned long addr,
+ xen_pfn_t mfn, int nr,
+ pgprot_t prot, unsigned domid,
+ struct page **pages)
+{
+ return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages);
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
+int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+ unsigned long addr,
+ xen_pfn_t *mfn, int nr,
+ int *err_ptr, pgprot_t prot,
+ unsigned domid, struct page **pages)
+{
+ /* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
+ * and the consequences later is quite hard to detect what the actual
+ * cause of "wrong memory was mapped in".
+ */
+ BUG_ON(err_ptr == NULL);
+ return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages);
+}
+EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
+
+
/* Returns: 0 success */
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
int numpgs, struct page **pages)
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 59ac71c4a043..5a296161d843 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -159,6 +159,40 @@ static int traverse_pages(unsigned nelem, size_t size,
return ret;
}
+/*
+ * Similar to traverse_pages, but use each page as a "block" of
+ * data to be processed as one unit.
+ */
+static int traverse_pages_block(unsigned nelem, size_t size,
+ struct list_head *pos,
+ int (*fn)(void *data, int nr, void *state),
+ void *state)
+{
+ void *pagedata;
+ unsigned pageidx;
+ int ret = 0;
+
+ BUG_ON(size > PAGE_SIZE);
+
+ pageidx = PAGE_SIZE;
+
+ while (nelem) {
+ int nr = (PAGE_SIZE/size);
+ struct page *page;
+ if (nr > nelem)
+ nr = nelem;
+ pos = pos->next;
+ page = list_entry(pos, struct page, lru);
+ pagedata = page_address(page);
+ ret = (*fn)(pagedata, nr, state);
+ if (ret)
+ break;
+ nelem -= nr;
+ }
+
+ return ret;
+}
+
struct mmap_mfn_state {
unsigned long va;
struct vm_area_struct *vma;
@@ -274,39 +308,25 @@ struct mmap_batch_state {
/* auto translated dom0 note: if domU being created is PV, then mfn is
* mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
*/
-static int mmap_batch_fn(void *data, void *state)
+static int mmap_batch_fn(void *data, int nr, void *state)
{
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;
struct vm_area_struct *vma = st->vma;
struct page **pages = vma->vm_private_data;
- struct page *cur_page = NULL;
+ struct page **cur_pages = NULL;
int ret;
if (xen_feature(XENFEAT_auto_translated_physmap))
- cur_page = pages[st->index++];
+ cur_pages = &pages[st->index];
- ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
- st->vma->vm_page_prot, st->domain,
- &cur_page);
+ BUG_ON(nr < 0);
+ ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr,
+ (int *)mfnp, st->vma->vm_page_prot,
+ st->domain, cur_pages);
- /* Store error code for second pass. */
- if (st->version == 1) {
- if (ret < 0) {
- /*
- * V1 encodes the error codes in the 32bit top nibble of the
- * mfn (with its known limitations vis-a-vis 64 bit callers).
- */
- *mfnp |= (ret == -ENOENT) ?
- PRIVCMD_MMAPBATCH_PAGED_ERROR :
- PRIVCMD_MMAPBATCH_MFN_ERROR;
- }
- } else { /* st->version == 2 */
- *((int *) mfnp) = ret;
- }
-
- /* And see if it affects the global_error. */
- if (ret < 0) {
+ /* Adjust the global_error? */
+ if (ret != nr) {
if (ret == -ENOENT)
st->global_error = -ENOENT;
else {
@@ -315,23 +335,35 @@ static int mmap_batch_fn(void *data, void *state)
st->global_error = 1;
}
}
- st->va += PAGE_SIZE;
+ st->va += PAGE_SIZE * nr;
+ st->index += nr;
return 0;
}
-static int mmap_return_errors(void *data, void *state)
+static int mmap_return_error(int err, struct mmap_batch_state *st)
{
- struct mmap_batch_state *st = state;
+ int ret;
if (st->version == 1) {
- xen_pfn_t mfnp = *((xen_pfn_t *) data);
- if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR)
- return __put_user(mfnp, st->user_mfn++);
- else
+ if (err) {
+ xen_pfn_t mfn;
+
+ ret = get_user(mfn, st->user_mfn);
+ if (ret < 0)
+ return ret;
+ /*
+ * V1 encodes the error codes in the 32bit top
+ * nibble of the mfn (with its known
+ * limitations vis-a-vis 64 bit callers).
+ */
+ mfn |= (err == -ENOENT) ?
+ PRIVCMD_MMAPBATCH_PAGED_ERROR :
+ PRIVCMD_MMAPBATCH_MFN_ERROR;
+ return __put_user(mfn, st->user_mfn++);
+ } else
st->user_mfn++;
} else { /* st->version == 2 */
- int err = *((int *) data);
if (err)
return __put_user(err, st->user_err++);
else
@@ -341,6 +373,21 @@ static int mmap_return_errors(void *data, void *state)
return 0;
}
+static int mmap_return_errors(void *data, int nr, void *state)
+{
+ struct mmap_batch_state *st = state;
+ int *errs = data;
+ int i;
+ int ret;
+
+ for (i = 0; i < nr; i++) {
+ ret = mmap_return_error(errs[i], st);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
* the vma with the page info to use later.
* Returns: 0 if success, otherwise -errno
@@ -472,8 +519,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
state.version = version;
/* mmap_batch_fn guarantees ret == 0 */
- BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
- &pagelist, mmap_batch_fn, &state));
+ BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
+ &pagelist, mmap_batch_fn, &state));
up_write(&mm->mmap_sem);
@@ -481,8 +528,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
/* Write back errors in second pass. */
state.user_mfn = (xen_pfn_t *)m.arr;
state.user_err = m.err;
- ret = traverse_pages(m.num, sizeof(xen_pfn_t),
- &pagelist, mmap_return_errors, &state);
+ ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
+ &pagelist, mmap_return_errors, &state);
} else
ret = 0;
diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c
index 7724d90fc697..58a5389aec89 100644
--- a/drivers/xen/xlate_mmu.c
+++ b/drivers/xen/xlate_mmu.c
@@ -62,13 +62,15 @@ static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn,
}
struct remap_data {
- xen_pfn_t fgmfn; /* foreign domain's gmfn */
+ xen_pfn_t *fgmfn; /* foreign domain's gmfn */
pgprot_t prot;
domid_t domid;
struct vm_area_struct *vma;
int index;
struct page **pages;
struct xen_remap_mfn_info *info;
+ int *err_ptr;
+ int mapped;
};
static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
@@ -80,38 +82,46 @@ static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot));
int rc;
- rc = map_foreign_page(pfn, info->fgmfn, info->domid);
- if (rc < 0)
- return rc;
- set_pte_at(info->vma->vm_mm, addr, ptep, pte);
+ rc = map_foreign_page(pfn, *info->fgmfn, info->domid);
+ *info->err_ptr++ = rc;
+ if (!rc) {
+ set_pte_at(info->vma->vm_mm, addr, ptep, pte);
+ info->mapped++;
+ }
+ info->fgmfn++;
return 0;
}
-int xen_xlate_remap_gfn_range(struct vm_area_struct *vma,
+int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
unsigned long addr,
- xen_pfn_t gfn, int nr,
- pgprot_t prot, unsigned domid,
+ xen_pfn_t *mfn, int nr,
+ int *err_ptr, pgprot_t prot,
+ unsigned domid,
struct page **pages)
{
int err;
struct remap_data data;
+ unsigned long range = nr << PAGE_SHIFT;
- /* TBD: Batching, current sole caller only does page at a time */
- if (nr > 1)
- return -EINVAL;
+ /* Kept here for the purpose of making sure code doesn't break
+ x86 PVOPS */
+ BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
- data.fgmfn = gfn;
- data.prot = prot;
+ data.fgmfn = mfn;
+ data.prot = prot;
data.domid = domid;
- data.vma = vma;
- data.index = 0;
+ data.vma = vma;
data.pages = pages;
- err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
+ data.index = 0;
+ data.err_ptr = err_ptr;
+ data.mapped = 0;
+
+ err = apply_to_page_range(vma->vm_mm, addr, range,
remap_pte_fn, &data);
- return err;
+ return err < 0 ? err : data.mapped;
}
-EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_range);
+EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_array);
int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
int nr, struct page **pages)
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 9eb88a4512bd..c643e6a94c9a 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -27,17 +27,54 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
struct vm_area_struct;
+
+/*
+ * xen_remap_domain_mfn_array() - map an array of foreign frames
+ * @vma: VMA to map the pages into
+ * @addr: Address at which to map the pages
+ * @gfn: Array of GFNs to map
+ * @nr: Number entries in the GFN array
+ * @err_ptr: Returns per-GFN error status.
+ * @prot: page protection mask
+ * @domid: Domain owning the pages
+ * @pages: Array of pages if this domain has an auto-translated physmap
+ *
+ * @gfn and @err_ptr may point to the same buffer, the GFNs will be
+ * overwritten by the error codes after they are mapped.
+ *
+ * Returns the number of successfully mapped frames, or a -ve error
+ * code.
+ */
+int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
+ unsigned long addr,
+ xen_pfn_t *gfn, int nr,
+ int *err_ptr, pgprot_t prot,
+ unsigned domid,
+ struct page **pages);
+
+/* xen_remap_domain_mfn_range() - map a range of foreign frames
+ * @vma: VMA to map the pages into
+ * @addr: Address at which to map the pages
+ * @gfn: First GFN to map.
+ * @nr: Number frames to map
+ * @prot: page protection mask
+ * @domid: Domain owning the pages
+ * @pages: Array of pages if this domain has an auto-translated physmap
+ *
+ * Returns the number of successfully mapped frames, or a -ve error
+ * code.
+ */
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
- xen_pfn_t mfn, int nr,
+ xen_pfn_t gfn, int nr,
pgprot_t prot, unsigned domid,
struct page **pages);
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
int numpgs, struct page **pages);
-int xen_xlate_remap_gfn_range(struct vm_area_struct *vma,
+int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
unsigned long addr,
- xen_pfn_t gfn, int nr,
- pgprot_t prot,
+ xen_pfn_t *gfn, int nr,
+ int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages);
int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,