From e36acfe6c86d13eec62321e1e86a1ce287e52e7d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 23 May 2019 09:41:19 -0300 Subject: mm/hmm: Use hmm_mirror not mm as an argument for hmm_range_register Ralph observes that hmm_range_register() can only be called by a driver while a mirror is registered. Make this clear in the API by passing in the mirror structure as a parameter. This also simplifies understanding the lifetime model for struct hmm, as the hmm pointer must be valid as part of a registered mirror so all we need in hmm_register_range() is a simple kref_get. Suggested-by: Ralph Campbell Signed-off-by: Jason Gunthorpe Reviewed-by: John Hubbard Reviewed-by: Ralph Campbell Reviewed-by: Ira Weiny Reviewed-by: Christoph Hellwig Tested-by: Philip Yang --- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 93ed43c413f0..8c92374afcf2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -649,7 +649,7 @@ nouveau_svm_fault(struct nvif_notify *notify) range.values = nouveau_svm_pfn_values; range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT; again: - ret = hmm_vma_fault(&range, true); + ret = hmm_vma_fault(&svmm->mirror, &range, true); if (ret == 0) { mutex_lock(&svmm->mutex); if (!hmm_vma_range_done(&range)) { -- cgit v1.2.3 From 3ed2dcdf54d5bf1f9823b5faf1a702e7cee53982 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:07 +0200 Subject: memremap: validate the pagemap type passed to devm_memremap_pages Most pgmap types are only supported when certain config options are enabled. Check for a type that is valid for the current configuration before setting up the pagemap. For this the usage of the 0 type for device dax gets replaced with an explicit MEMORY_DEVICE_DEVDAX type. Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Reviewed-by: Dan Williams Tested-by: Dan Williams Signed-off-by: Jason Gunthorpe --- drivers/dax/device.c | 1 + include/linux/memremap.h | 8 ++++++++ kernel/memremap.c | 22 ++++++++++++++++++++++ 3 files changed, 31 insertions(+) (limited to 'drivers') diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 8465d12fecba..79014baa782d 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -468,6 +468,7 @@ int dev_dax_probe(struct device *dev) dev_dax->pgmap.ref = &dev_dax->ref; dev_dax->pgmap.kill = dev_dax_percpu_kill; dev_dax->pgmap.cleanup = dev_dax_percpu_exit; + dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX; addr = devm_memremap_pages(dev, &dev_dax->pgmap); if (IS_ERR(addr)) return PTR_ERR(addr); diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 995c62c5a48b..0c86f2c5ac9c 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -45,13 +45,21 @@ struct vmem_altmap { * wakeup is used to coordinate physical address space management (ex: * fs truncate/hole punch) vs pinned pages (ex: device dma). * + * MEMORY_DEVICE_DEVDAX: + * Host memory that has similar access semantics as System RAM i.e. DMA + * coherent and supports page pinning. In contrast to + * MEMORY_DEVICE_FS_DAX, this memory is access via a device-dax + * character device. + * * MEMORY_DEVICE_PCI_P2PDMA: * Device memory residing in a PCI BAR intended for use with Peer-to-Peer * transactions. */ enum memory_type { + /* 0 is reserved to catch uninitialized type fields */ MEMORY_DEVICE_PRIVATE = 1, MEMORY_DEVICE_FS_DAX, + MEMORY_DEVICE_DEVDAX, MEMORY_DEVICE_PCI_P2PDMA, }; diff --git a/kernel/memremap.c b/kernel/memremap.c index 6e1970719dc2..abda62d1e5a3 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -157,6 +157,28 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) pgprot_t pgprot = PAGE_KERNEL; int error, nid, is_ram; + switch (pgmap->type) { + case MEMORY_DEVICE_PRIVATE: + if (!IS_ENABLED(CONFIG_DEVICE_PRIVATE)) { + WARN(1, "Device private memory not supported\n"); + return ERR_PTR(-EINVAL); + } + break; + case MEMORY_DEVICE_FS_DAX: + if (!IS_ENABLED(CONFIG_ZONE_DEVICE) || + IS_ENABLED(CONFIG_FS_DAX_LIMITED)) { + WARN(1, "File system DAX not supported\n"); + return ERR_PTR(-EINVAL); + } + break; + case MEMORY_DEVICE_DEVDAX: + case MEMORY_DEVICE_PCI_P2PDMA: + break; + default: + WARN(1, "Invalid pgmap type %d\n", pgmap->type); + break; + } + if (!pgmap->ref || !pgmap->kill || !pgmap->cleanup) { WARN(1, "Missing reference count teardown definition\n"); return ERR_PTR(-EINVAL); -- cgit v1.2.3 From 1e240e8d4a7d92232b6214e02a0a4197a53afd6c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:08 +0200 Subject: memremap: move dev_pagemap callbacks into a separate structure The dev_pagemap is a growing too many callbacks. Move them into a separate ops structure so that they are not duplicated for multiple instances, and an attacker can't easily overwrite them. Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Tested-by: Dan Williams Signed-off-by: Jason Gunthorpe --- drivers/dax/device.c | 11 +++++++---- drivers/dax/pmem/core.c | 2 +- drivers/nvdimm/pmem.c | 19 +++++++++++-------- drivers/pci/p2pdma.c | 8 ++++++-- include/linux/memremap.h | 36 ++++++++++++++++++++---------------- kernel/memremap.c | 18 +++++++++--------- mm/hmm.c | 10 +++++++--- tools/testing/nvdimm/test/iomap.c | 7 ++++--- 8 files changed, 65 insertions(+), 46 deletions(-) (limited to 'drivers') diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 79014baa782d..f390083a64d7 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -36,9 +36,8 @@ static void dev_dax_percpu_exit(struct percpu_ref *ref) percpu_ref_exit(ref); } -static void dev_dax_percpu_kill(struct percpu_ref *data) +static void dev_dax_percpu_kill(struct percpu_ref *ref) { - struct percpu_ref *ref = data; struct dev_dax *dev_dax = ref_to_dev_dax(ref); dev_dbg(&dev_dax->dev, "%s\n", __func__); @@ -442,6 +441,11 @@ static void dev_dax_kill(void *dev_dax) kill_dev_dax(dev_dax); } +static const struct dev_pagemap_ops dev_dax_pagemap_ops = { + .kill = dev_dax_percpu_kill, + .cleanup = dev_dax_percpu_exit, +}; + int dev_dax_probe(struct device *dev) { struct dev_dax *dev_dax = to_dev_dax(dev); @@ -466,9 +470,8 @@ int dev_dax_probe(struct device *dev) return rc; dev_dax->pgmap.ref = &dev_dax->ref; - dev_dax->pgmap.kill = dev_dax_percpu_kill; - dev_dax->pgmap.cleanup = dev_dax_percpu_exit; dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX; + dev_dax->pgmap.ops = &dev_dax_pagemap_ops; addr = devm_memremap_pages(dev, &dev_dax->pgmap); if (IS_ERR(addr)) return PTR_ERR(addr); diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c index f9f51786d556..6eb6dfdf19bf 100644 --- a/drivers/dax/pmem/core.c +++ b/drivers/dax/pmem/core.c @@ -16,7 +16,7 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) struct dev_dax *dev_dax; struct nd_namespace_io *nsio; struct dax_region *dax_region; - struct dev_pagemap pgmap = { 0 }; + struct dev_pagemap pgmap = { }; struct nd_namespace_common *ndns; struct nd_dax *nd_dax = to_nd_dax(dev); struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 24d7fe7c74ed..c2449af2b388 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -303,7 +303,7 @@ static const struct attribute_group *pmem_attribute_groups[] = { NULL, }; -static void __pmem_release_queue(struct percpu_ref *ref) +static void pmem_pagemap_cleanup(struct percpu_ref *ref) { struct request_queue *q; @@ -313,10 +313,10 @@ static void __pmem_release_queue(struct percpu_ref *ref) static void pmem_release_queue(void *ref) { - __pmem_release_queue(ref); + pmem_pagemap_cleanup(ref); } -static void pmem_freeze_queue(struct percpu_ref *ref) +static void pmem_pagemap_kill(struct percpu_ref *ref) { struct request_queue *q; @@ -339,19 +339,24 @@ static void pmem_release_pgmap_ops(void *__pgmap) dev_pagemap_put_ops(); } -static void fsdax_pagefree(struct page *page, void *data) +static void pmem_pagemap_page_free(struct page *page, void *data) { wake_up_var(&page->_refcount); } +static const struct dev_pagemap_ops fsdax_pagemap_ops = { + .page_free = pmem_pagemap_page_free, + .kill = pmem_pagemap_kill, + .cleanup = pmem_pagemap_cleanup, +}; + static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap) { dev_pagemap_get_ops(); if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap)) return -ENOMEM; pgmap->type = MEMORY_DEVICE_FS_DAX; - pgmap->page_free = fsdax_pagefree; - + pgmap->ops = &fsdax_pagemap_ops; return 0; } @@ -409,8 +414,6 @@ static int pmem_attach_disk(struct device *dev, pmem->pfn_flags = PFN_DEV; pmem->pgmap.ref = &q->q_usage_counter; - pmem->pgmap.kill = pmem_freeze_queue; - pmem->pgmap.cleanup = __pmem_release_queue; if (is_nd_pfn(dev)) { if (setup_pagemap_fsdax(dev, &pmem->pgmap)) return -ENOMEM; diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index a4994aa3acc0..fb039259d463 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -153,6 +153,11 @@ out: return error; } +static const struct dev_pagemap_ops pci_p2pdma_pagemap_ops = { + .kill = pci_p2pdma_percpu_kill, + .cleanup = pci_p2pdma_percpu_cleanup, +}; + /** * pci_p2pdma_add_resource - add memory for use as p2p memory * @pdev: the device to add the memory to @@ -208,8 +213,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) - pci_resource_start(pdev, bar); - pgmap->kill = pci_p2pdma_percpu_kill; - pgmap->cleanup = pci_p2pdma_percpu_cleanup; + pgmap->ops = &pci_p2pdma_pagemap_ops; addr = devm_memremap_pages(&pdev->dev, pgmap); if (IS_ERR(addr)) { diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 0c86f2c5ac9c..919755f48c7e 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -63,41 +63,45 @@ enum memory_type { MEMORY_DEVICE_PCI_P2PDMA, }; -/* - * Additional notes about MEMORY_DEVICE_PRIVATE may be found in - * include/linux/hmm.h and Documentation/vm/hmm.rst. There is also a brief - * explanation in include/linux/memory_hotplug.h. - * - * The page_free() callback is called once the page refcount reaches 1 - * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug. - * This allows the device driver to implement its own memory management.) - */ -typedef void (*dev_page_free_t)(struct page *page, void *data); +struct dev_pagemap_ops { + /* + * Called once the page refcount reaches 1. (ZONE_DEVICE pages never + * reach 0 refcount unless there is a refcount bug. This allows the + * device driver to implement its own memory management.) + */ + void (*page_free)(struct page *page, void *data); + + /* + * Transition the refcount in struct dev_pagemap to the dead state. + */ + void (*kill)(struct percpu_ref *ref); + + /* + * Wait for refcount in struct dev_pagemap to be idle and reap it. + */ + void (*cleanup)(struct percpu_ref *ref); +}; /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings - * @page_free: free page callback when page refcount reaches 1 * @altmap: pre-allocated/reserved memory for vmemmap allocations * @res: physical address range covered by @ref * @ref: reference count that pins the devm_memremap_pages() mapping - * @kill: callback to transition @ref to the dead state - * @cleanup: callback to wait for @ref to be idle and reap it * @dev: host device of the mapping for debug * @data: private data pointer for page_free() * @type: memory type: see MEMORY_* in memory_hotplug.h + * @ops: method table */ struct dev_pagemap { - dev_page_free_t page_free; struct vmem_altmap altmap; bool altmap_valid; struct resource res; struct percpu_ref *ref; - void (*kill)(struct percpu_ref *ref); - void (*cleanup)(struct percpu_ref *ref); struct device *dev; void *data; enum memory_type type; u64 pci_p2pdma_bus_offset; + const struct dev_pagemap_ops *ops; }; #ifdef CONFIG_ZONE_DEVICE diff --git a/kernel/memremap.c b/kernel/memremap.c index abda62d1e5a3..0824237ef979 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -92,10 +92,10 @@ static void devm_memremap_pages_release(void *data) unsigned long pfn; int nid; - pgmap->kill(pgmap->ref); + pgmap->ops->kill(pgmap->ref); for_each_device_pfn(pfn, pgmap) put_page(pfn_to_page(pfn)); - pgmap->cleanup(pgmap->ref); + pgmap->ops->cleanup(pgmap->ref); /* pages are dead and unused, undo the arch mapping */ align_start = res->start & ~(SECTION_SIZE - 1); @@ -128,8 +128,8 @@ static void devm_memremap_pages_release(void *data) * @pgmap: pointer to a struct dev_pagemap * * Notes: - * 1/ At a minimum the res, ref and type members of @pgmap must be initialized - * by the caller before passing it to this function + * 1/ At a minimum the res, ref and type and ops members of @pgmap must be + * initialized by the caller before passing it to this function * * 2/ The altmap field may optionally be initialized, in which case altmap_valid * must be set to true @@ -179,7 +179,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) break; } - if (!pgmap->ref || !pgmap->kill || !pgmap->cleanup) { + if (!pgmap->ref || !pgmap->ops || !pgmap->ops->kill || + !pgmap->ops->cleanup) { WARN(1, "Missing reference count teardown definition\n"); return ERR_PTR(-EINVAL); } @@ -293,9 +294,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) err_pfn_remap: pgmap_array_delete(res); err_array: - pgmap->kill(pgmap->ref); - pgmap->cleanup(pgmap->ref); - + pgmap->ops->kill(pgmap->ref); + pgmap->ops->cleanup(pgmap->ref); return ERR_PTR(error); } EXPORT_SYMBOL_GPL(devm_memremap_pages); @@ -388,7 +388,7 @@ void __put_devmap_managed_page(struct page *page) mem_cgroup_uncharge(page); - page->pgmap->page_free(page, page->pgmap->data); + page->pgmap->ops->page_free(page, page->pgmap->data); } else if (!count) __put_page(page); } diff --git a/mm/hmm.c b/mm/hmm.c index 48574f8485bb..583a02a16872 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -1384,6 +1384,12 @@ static void hmm_devmem_free(struct page *page, void *data) devmem->ops->free(devmem, page); } +static const struct dev_pagemap_ops hmm_pagemap_ops = { + .page_free = hmm_devmem_free, + .kill = hmm_devmem_ref_kill, + .cleanup = hmm_devmem_ref_exit, +}; + /* * hmm_devmem_add() - hotplug ZONE_DEVICE memory for device memory * @@ -1438,12 +1444,10 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; devmem->pagemap.res = *devmem->resource; - devmem->pagemap.page_free = hmm_devmem_free; + devmem->pagemap.ops = &hmm_pagemap_ops; devmem->pagemap.altmap_valid = false; devmem->pagemap.ref = &devmem->ref; devmem->pagemap.data = devmem; - devmem->pagemap.kill = hmm_devmem_ref_kill; - devmem->pagemap.cleanup = hmm_devmem_ref_exit; result = devm_memremap_pages(devmem->device, &devmem->pagemap); if (IS_ERR(result)) diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 076df22e4bda..cf3f064a697d 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -100,9 +100,10 @@ static void nfit_test_kill(void *_pgmap) { struct dev_pagemap *pgmap = _pgmap; - WARN_ON(!pgmap || !pgmap->ref || !pgmap->kill || !pgmap->cleanup); - pgmap->kill(pgmap->ref); - pgmap->cleanup(pgmap->ref); + WARN_ON(!pgmap || !pgmap->ref || !pgmap->ops || !pgmap->ops->kill || + !pgmap->ops->cleanup); + pgmap->ops->kill(pgmap->ref); + pgmap->ops->cleanup(pgmap->ref); } void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) -- cgit v1.2.3 From d8668bb0451c3c45b59dbcde2654e0539aad1d2a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:09 +0200 Subject: memremap: pass a struct dev_pagemap to ->kill and ->cleanup Passing the actual typed structure leads to more understandable code vs just passing the ref member. Reported-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Tested-by: Dan Williams Signed-off-by: Jason Gunthorpe --- drivers/dax/device.c | 12 ++++++------ drivers/nvdimm/pmem.c | 18 +++++++++--------- drivers/pci/p2pdma.c | 9 +++++---- include/linux/memremap.h | 4 ++-- kernel/memremap.c | 8 ++++---- mm/hmm.c | 10 +++++----- tools/testing/nvdimm/test/iomap.c | 4 ++-- 7 files changed, 33 insertions(+), 32 deletions(-) (limited to 'drivers') diff --git a/drivers/dax/device.c b/drivers/dax/device.c index f390083a64d7..b5257038c188 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -27,21 +27,21 @@ static void dev_dax_percpu_release(struct percpu_ref *ref) complete(&dev_dax->cmp); } -static void dev_dax_percpu_exit(struct percpu_ref *ref) +static void dev_dax_percpu_exit(struct dev_pagemap *pgmap) { - struct dev_dax *dev_dax = ref_to_dev_dax(ref); + struct dev_dax *dev_dax = container_of(pgmap, struct dev_dax, pgmap); dev_dbg(&dev_dax->dev, "%s\n", __func__); wait_for_completion(&dev_dax->cmp); - percpu_ref_exit(ref); + percpu_ref_exit(pgmap->ref); } -static void dev_dax_percpu_kill(struct percpu_ref *ref) +static void dev_dax_percpu_kill(struct dev_pagemap *pgmap) { - struct dev_dax *dev_dax = ref_to_dev_dax(ref); + struct dev_dax *dev_dax = container_of(pgmap, struct dev_dax, pgmap); dev_dbg(&dev_dax->dev, "%s\n", __func__); - percpu_ref_kill(ref); + percpu_ref_kill(pgmap->ref); } static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index c2449af2b388..9dac48359353 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -303,24 +303,24 @@ static const struct attribute_group *pmem_attribute_groups[] = { NULL, }; -static void pmem_pagemap_cleanup(struct percpu_ref *ref) +static void pmem_pagemap_cleanup(struct dev_pagemap *pgmap) { - struct request_queue *q; + struct request_queue *q = + container_of(pgmap->ref, struct request_queue, q_usage_counter); - q = container_of(ref, typeof(*q), q_usage_counter); blk_cleanup_queue(q); } -static void pmem_release_queue(void *ref) +static void pmem_release_queue(void *pgmap) { - pmem_pagemap_cleanup(ref); + pmem_pagemap_cleanup(pgmap); } -static void pmem_pagemap_kill(struct percpu_ref *ref) +static void pmem_pagemap_kill(struct dev_pagemap *pgmap) { - struct request_queue *q; + struct request_queue *q = + container_of(pgmap->ref, struct request_queue, q_usage_counter); - q = container_of(ref, typeof(*q), q_usage_counter); blk_freeze_queue_start(q); } @@ -435,7 +435,7 @@ static int pmem_attach_disk(struct device *dev, memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); } else { if (devm_add_action_or_reset(dev, pmem_release_queue, - &q->q_usage_counter)) + &pmem->pgmap)) return -ENOMEM; addr = devm_memremap(dev, pmem->phys_addr, pmem->size, ARCH_MEMREMAP_PMEM); diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index fb039259d463..fa6249e4ed5f 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -91,14 +91,15 @@ static void pci_p2pdma_percpu_release(struct percpu_ref *ref) complete(&p2p_pgmap->ref_done); } -static void pci_p2pdma_percpu_kill(struct percpu_ref *ref) +static void pci_p2pdma_percpu_kill(struct dev_pagemap *pgmap) { - percpu_ref_kill(ref); + percpu_ref_kill(pgmap->ref); } -static void pci_p2pdma_percpu_cleanup(struct percpu_ref *ref) +static void pci_p2pdma_percpu_cleanup(struct dev_pagemap *pgmap) { - struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref); + struct p2pdma_pagemap *p2p_pgmap = + container_of(pgmap, struct p2pdma_pagemap, pgmap); wait_for_completion(&p2p_pgmap->ref_done); percpu_ref_exit(&p2p_pgmap->ref); diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 919755f48c7e..b8666a0d8665 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -74,12 +74,12 @@ struct dev_pagemap_ops { /* * Transition the refcount in struct dev_pagemap to the dead state. */ - void (*kill)(struct percpu_ref *ref); + void (*kill)(struct dev_pagemap *pgmap); /* * Wait for refcount in struct dev_pagemap to be idle and reap it. */ - void (*cleanup)(struct percpu_ref *ref); + void (*cleanup)(struct dev_pagemap *pgmap); }; /** diff --git a/kernel/memremap.c b/kernel/memremap.c index 0824237ef979..00c1ceb60c19 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -92,10 +92,10 @@ static void devm_memremap_pages_release(void *data) unsigned long pfn; int nid; - pgmap->ops->kill(pgmap->ref); + pgmap->ops->kill(pgmap); for_each_device_pfn(pfn, pgmap) put_page(pfn_to_page(pfn)); - pgmap->ops->cleanup(pgmap->ref); + pgmap->ops->cleanup(pgmap); /* pages are dead and unused, undo the arch mapping */ align_start = res->start & ~(SECTION_SIZE - 1); @@ -294,8 +294,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) err_pfn_remap: pgmap_array_delete(res); err_array: - pgmap->ops->kill(pgmap->ref); - pgmap->ops->cleanup(pgmap->ref); + pgmap->ops->kill(pgmap); + pgmap->ops->cleanup(pgmap); return ERR_PTR(error); } EXPORT_SYMBOL_GPL(devm_memremap_pages); diff --git a/mm/hmm.c b/mm/hmm.c index 583a02a16872..987793fba923 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -1352,18 +1352,18 @@ static void hmm_devmem_ref_release(struct percpu_ref *ref) complete(&devmem->completion); } -static void hmm_devmem_ref_exit(struct percpu_ref *ref) +static void hmm_devmem_ref_exit(struct dev_pagemap *pgmap) { struct hmm_devmem *devmem; - devmem = container_of(ref, struct hmm_devmem, ref); + devmem = container_of(pgmap, struct hmm_devmem, pagemap); wait_for_completion(&devmem->completion); - percpu_ref_exit(ref); + percpu_ref_exit(pgmap->ref); } -static void hmm_devmem_ref_kill(struct percpu_ref *ref) +static void hmm_devmem_ref_kill(struct dev_pagemap *pgmap) { - percpu_ref_kill(ref); + percpu_ref_kill(pgmap->ref); } static vm_fault_t hmm_devmem_fault(struct vm_area_struct *vma, diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index cf3f064a697d..82f901569e06 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -102,8 +102,8 @@ static void nfit_test_kill(void *_pgmap) WARN_ON(!pgmap || !pgmap->ref || !pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup); - pgmap->ops->kill(pgmap->ref); - pgmap->ops->cleanup(pgmap->ref); + pgmap->ops->kill(pgmap); + pgmap->ops->cleanup(pgmap); } void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) -- cgit v1.2.3 From f6a55e1a3fe6b3bb294a80a05437fcf86488d819 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:10 +0200 Subject: memremap: lift the devmap_enable manipulation into devm_memremap_pages Just check if there is a ->page_free operation set and take care of the static key enable, as well as the put using device managed resources. Also check that a ->page_free is provided for the pgmaps types that require it, and check for a valid type as well while we are at it. Note that this also fixes the fact that hmm never called dev_pagemap_put_ops and thus would leave the slow path enabled forever, even after a device driver unload or disable. Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Reviewed-by: Dan Williams Tested-by: Dan Williams Signed-off-by: Jason Gunthorpe --- drivers/nvdimm/pmem.c | 23 ++++---------------- include/linux/mm.h | 10 --------- kernel/memremap.c | 59 ++++++++++++++++++++++++++++++++------------------- mm/hmm.c | 2 -- 4 files changed, 41 insertions(+), 53 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 9dac48359353..48767171a4df 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -334,11 +334,6 @@ static void pmem_release_disk(void *__pmem) put_disk(pmem->disk); } -static void pmem_release_pgmap_ops(void *__pgmap) -{ - dev_pagemap_put_ops(); -} - static void pmem_pagemap_page_free(struct page *page, void *data) { wake_up_var(&page->_refcount); @@ -350,16 +345,6 @@ static const struct dev_pagemap_ops fsdax_pagemap_ops = { .cleanup = pmem_pagemap_cleanup, }; -static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap) -{ - dev_pagemap_get_ops(); - if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap)) - return -ENOMEM; - pgmap->type = MEMORY_DEVICE_FS_DAX; - pgmap->ops = &fsdax_pagemap_ops; - return 0; -} - static int pmem_attach_disk(struct device *dev, struct nd_namespace_common *ndns) { @@ -415,8 +400,8 @@ static int pmem_attach_disk(struct device *dev, pmem->pfn_flags = PFN_DEV; pmem->pgmap.ref = &q->q_usage_counter; if (is_nd_pfn(dev)) { - if (setup_pagemap_fsdax(dev, &pmem->pgmap)) - return -ENOMEM; + pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; + pmem->pgmap.ops = &fsdax_pagemap_ops; addr = devm_memremap_pages(dev, &pmem->pgmap); pfn_sb = nd_pfn->pfn_sb; pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); @@ -428,8 +413,8 @@ static int pmem_attach_disk(struct device *dev, } else if (pmem_should_map_pages(dev)) { memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); pmem->pgmap.altmap_valid = false; - if (setup_pagemap_fsdax(dev, &pmem->pgmap)) - return -ENOMEM; + pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; + pmem->pgmap.ops = &fsdax_pagemap_ops; addr = devm_memremap_pages(dev, &pmem->pgmap); pmem->pfn_flags |= PFN_MAP; memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); diff --git a/include/linux/mm.h b/include/linux/mm.h index 7399f9f08de6..2425f4167ec2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -932,8 +932,6 @@ static inline bool is_zone_device_page(const struct page *page) #endif #ifdef CONFIG_DEV_PAGEMAP_OPS -void dev_pagemap_get_ops(void); -void dev_pagemap_put_ops(void); void __put_devmap_managed_page(struct page *page); DECLARE_STATIC_KEY_FALSE(devmap_managed_key); static inline bool put_devmap_managed_page(struct page *page) @@ -973,14 +971,6 @@ static inline bool is_pci_p2pdma_page(const struct page *page) #endif /* CONFIG_PCI_P2PDMA */ #else /* CONFIG_DEV_PAGEMAP_OPS */ -static inline void dev_pagemap_get_ops(void) -{ -} - -static inline void dev_pagemap_put_ops(void) -{ -} - static inline bool put_devmap_managed_page(struct page *page) { return false; diff --git a/kernel/memremap.c b/kernel/memremap.c index 00c1ceb60c19..3219a4c91d07 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -17,6 +17,35 @@ static DEFINE_XARRAY(pgmap_array); #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) #define SECTION_SIZE (1UL << PA_SECTION_SHIFT) +#ifdef CONFIG_DEV_PAGEMAP_OPS +DEFINE_STATIC_KEY_FALSE(devmap_managed_key); +EXPORT_SYMBOL(devmap_managed_key); +static atomic_t devmap_managed_enable; + +static void devmap_managed_enable_put(void *data) +{ + if (atomic_dec_and_test(&devmap_managed_enable)) + static_branch_disable(&devmap_managed_key); +} + +static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap) +{ + if (!pgmap->ops->page_free) { + WARN(1, "Missing page_free method\n"); + return -EINVAL; + } + + if (atomic_inc_return(&devmap_managed_enable) == 1) + static_branch_enable(&devmap_managed_key); + return devm_add_action_or_reset(dev, devmap_managed_enable_put, NULL); +} +#else +static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap) +{ + return -EINVAL; +} +#endif /* CONFIG_DEV_PAGEMAP_OPS */ + #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) vm_fault_t device_private_entry_fault(struct vm_area_struct *vma, unsigned long addr, @@ -156,6 +185,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) }; pgprot_t pgprot = PAGE_KERNEL; int error, nid, is_ram; + bool need_devmap_managed = true; switch (pgmap->type) { case MEMORY_DEVICE_PRIVATE: @@ -173,6 +203,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) break; case MEMORY_DEVICE_DEVDAX: case MEMORY_DEVICE_PCI_P2PDMA: + need_devmap_managed = false; break; default: WARN(1, "Invalid pgmap type %d\n", pgmap->type); @@ -185,6 +216,12 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) return ERR_PTR(-EINVAL); } + if (need_devmap_managed) { + error = devmap_managed_enable_get(dev, pgmap); + if (error) + return ERR_PTR(error); + } + align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) - align_start; @@ -351,28 +388,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, EXPORT_SYMBOL_GPL(get_dev_pagemap); #ifdef CONFIG_DEV_PAGEMAP_OPS -DEFINE_STATIC_KEY_FALSE(devmap_managed_key); -EXPORT_SYMBOL(devmap_managed_key); -static atomic_t devmap_enable; - -/* - * Toggle the static key for ->page_free() callbacks when dev_pagemap - * pages go idle. - */ -void dev_pagemap_get_ops(void) -{ - if (atomic_inc_return(&devmap_enable) == 1) - static_branch_enable(&devmap_managed_key); -} -EXPORT_SYMBOL_GPL(dev_pagemap_get_ops); - -void dev_pagemap_put_ops(void) -{ - if (atomic_dec_and_test(&devmap_enable)) - static_branch_disable(&devmap_managed_key); -} -EXPORT_SYMBOL_GPL(dev_pagemap_put_ops); - void __put_devmap_managed_page(struct page *page) { int count = page_ref_dec_return(page); diff --git a/mm/hmm.c b/mm/hmm.c index 987793fba923..5b0bd5f6a74f 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -1415,8 +1415,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, void *result; int ret; - dev_pagemap_get_ops(); - devmem = devm_kzalloc(device, sizeof(*devmem), GFP_KERNEL); if (!devmem) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 80a72d0af05ae97a8b106c172e431072ba587492 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:12 +0200 Subject: memremap: remove the data field in struct dev_pagemap struct dev_pagemap is always embedded into a containing structure, so there is no need to an additional private data field. Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Tested-by: Dan Williams Signed-off-by: Jason Gunthorpe --- drivers/nvdimm/pmem.c | 2 +- include/linux/memremap.h | 3 +-- kernel/memremap.c | 2 +- mm/hmm.c | 9 +++++---- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 48767171a4df..093408ce40ad 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -334,7 +334,7 @@ static void pmem_release_disk(void *__pmem) put_disk(pmem->disk); } -static void pmem_pagemap_page_free(struct page *page, void *data) +static void pmem_pagemap_page_free(struct page *page) { wake_up_var(&page->_refcount); } diff --git a/include/linux/memremap.h b/include/linux/memremap.h index ac985bd03a7f..336eca601dad 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -69,7 +69,7 @@ struct dev_pagemap_ops { * reach 0 refcount unless there is a refcount bug. This allows the * device driver to implement its own memory management.) */ - void (*page_free)(struct page *page, void *data); + void (*page_free)(struct page *page); /* * Transition the refcount in struct dev_pagemap to the dead state. @@ -104,7 +104,6 @@ struct dev_pagemap { struct resource res; struct percpu_ref *ref; struct device *dev; - void *data; enum memory_type type; u64 pci_p2pdma_bus_offset; const struct dev_pagemap_ops *ops; diff --git a/kernel/memremap.c b/kernel/memremap.c index c06a5487dda7..6c3dbb692037 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -376,7 +376,7 @@ void __put_devmap_managed_page(struct page *page) mem_cgroup_uncharge(page); - page->pgmap->ops->page_free(page, page->pgmap->data); + page->pgmap->ops->page_free(page); } else if (!count) __put_page(page); } diff --git a/mm/hmm.c b/mm/hmm.c index 96633ee066d8..36e25cdbdac1 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -1368,15 +1368,17 @@ static void hmm_devmem_ref_kill(struct dev_pagemap *pgmap) static vm_fault_t hmm_devmem_migrate_to_ram(struct vm_fault *vmf) { - struct hmm_devmem *devmem = vmf->page->pgmap->data; + struct hmm_devmem *devmem = + container_of(vmf->page->pgmap, struct hmm_devmem, pagemap); return devmem->ops->fault(devmem, vmf->vma, vmf->address, vmf->page, vmf->flags, vmf->pmd); } -static void hmm_devmem_free(struct page *page, void *data) +static void hmm_devmem_free(struct page *page) { - struct hmm_devmem *devmem = data; + struct hmm_devmem *devmem = + container_of(page->pgmap, struct hmm_devmem, pagemap); devmem->ops->free(devmem, page); } @@ -1442,7 +1444,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, devmem->pagemap.ops = &hmm_pagemap_ops; devmem->pagemap.altmap_valid = false; devmem->pagemap.ref = &devmem->ref; - devmem->pagemap.data = devmem; result = devm_memremap_pages(devmem->device, &devmem->pagemap); if (IS_ERR(result)) -- cgit v1.2.3 From 514caf23a70fd697fa2ece238b2cd8dcc73fb16f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:13 +0200 Subject: memremap: replace the altmap_valid field with a PGMAP_ALTMAP_VALID flag Add a flags field to struct dev_pagemap to replace the altmap_valid boolean to be a little more extensible. Also add a pgmap_altmap() helper to find the optional altmap and clean up the code using the altmap using it. Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Reviewed-by: Dan Williams Tested-by: Dan Williams Signed-off-by: Jason Gunthorpe --- arch/powerpc/mm/mem.c | 10 +--------- arch/x86/mm/init_64.c | 8 ++------ drivers/nvdimm/pfn_devs.c | 3 +-- drivers/nvdimm/pmem.c | 1 - include/linux/memremap.h | 12 +++++++++++- kernel/memremap.c | 26 ++++++++++---------------- mm/hmm.c | 1 - mm/memory_hotplug.c | 6 ++---- mm/page_alloc.c | 5 ++--- 9 files changed, 29 insertions(+), 43 deletions(-) (limited to 'drivers') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2540d3b2588c..a2923c5c1982 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -131,17 +131,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct page *page; + struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap); int ret; - /* - * If we have an altmap then we need to skip over any reserved PFNs - * when querying the zone. - */ - page = pfn_to_page(start_pfn); - if (altmap) - page += vmem_altmap_offset(altmap); - __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); /* Remove htab bolted mappings for this section of memory */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0f01c7b1d217..08bbf648827b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1213,13 +1213,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct page *page = pfn_to_page(start_pfn); - struct zone *zone; + struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap); + struct zone *zone = page_zone(page); - /* With altmap the first mapped page is offset from @start */ - if (altmap) - page += vmem_altmap_offset(altmap); - zone = page_zone(page); __remove_pages(zone, start_pfn, nr_pages, altmap); kernel_physical_mapping_remove(start, start + size); } diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 0f81fc56bbfd..55fb6b7433ed 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -622,7 +622,6 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) if (offset < reserve) return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); - pgmap->altmap_valid = false; } else if (nd_pfn->mode == PFN_MODE_PMEM) { nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res) - offset) / PAGE_SIZE); @@ -634,7 +633,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) memcpy(altmap, &__altmap, sizeof(*altmap)); altmap->free = PHYS_PFN(offset - reserve); altmap->alloc = 0; - pgmap->altmap_valid = true; + pgmap->flags |= PGMAP_ALTMAP_VALID; } else return -ENXIO; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 093408ce40ad..e7d8cc9f41e8 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -412,7 +412,6 @@ static int pmem_attach_disk(struct device *dev, bb_res.start += pmem->data_offset; } else if (pmem_should_map_pages(dev)) { memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); - pmem->pgmap.altmap_valid = false; pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; pmem->pgmap.ops = &fsdax_pagemap_ops; addr = devm_memremap_pages(dev, &pmem->pgmap); diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 336eca601dad..e25685b878e9 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -88,6 +88,8 @@ struct dev_pagemap_ops { vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf); }; +#define PGMAP_ALTMAP_VALID (1 << 0) + /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings * @altmap: pre-allocated/reserved memory for vmemmap allocations @@ -96,19 +98,27 @@ struct dev_pagemap_ops { * @dev: host device of the mapping for debug * @data: private data pointer for page_free() * @type: memory type: see MEMORY_* in memory_hotplug.h + * @flags: PGMAP_* flags to specify defailed behavior * @ops: method table */ struct dev_pagemap { struct vmem_altmap altmap; - bool altmap_valid; struct resource res; struct percpu_ref *ref; struct device *dev; enum memory_type type; + unsigned int flags; u64 pci_p2pdma_bus_offset; const struct dev_pagemap_ops *ops; }; +static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) +{ + if (pgmap->flags & PGMAP_ALTMAP_VALID) + return &pgmap->altmap; + return NULL; +} + #ifdef CONFIG_ZONE_DEVICE void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap); diff --git a/kernel/memremap.c b/kernel/memremap.c index 6c3dbb692037..eee490e7d7e1 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -54,14 +54,8 @@ static void pgmap_array_delete(struct resource *res) static unsigned long pfn_first(struct dev_pagemap *pgmap) { - const struct resource *res = &pgmap->res; - struct vmem_altmap *altmap = &pgmap->altmap; - unsigned long pfn; - - pfn = res->start >> PAGE_SHIFT; - if (pgmap->altmap_valid) - pfn += vmem_altmap_offset(altmap); - return pfn; + return (pgmap->res.start >> PAGE_SHIFT) + + vmem_altmap_offset(pgmap_altmap(pgmap)); } static unsigned long pfn_end(struct dev_pagemap *pgmap) @@ -109,7 +103,7 @@ static void devm_memremap_pages_release(void *data) align_size >> PAGE_SHIFT, NULL); } else { arch_remove_memory(nid, align_start, align_size, - pgmap->altmap_valid ? &pgmap->altmap : NULL); + pgmap_altmap(pgmap)); kasan_remove_zero_shadow(__va(align_start), align_size); } mem_hotplug_done(); @@ -129,8 +123,8 @@ static void devm_memremap_pages_release(void *data) * 1/ At a minimum the res, ref and type and ops members of @pgmap must be * initialized by the caller before passing it to this function * - * 2/ The altmap field may optionally be initialized, in which case altmap_valid - * must be set to true + * 2/ The altmap field may optionally be initialized, in which case + * PGMAP_ALTMAP_VALID must be set in pgmap->flags. * * 3/ pgmap->ref must be 'live' on entry and will be killed and reaped * at devm_memremap_pages_release() time, or if this routine fails. @@ -142,15 +136,13 @@ static void devm_memremap_pages_release(void *data) void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { resource_size_t align_start, align_size, align_end; - struct vmem_altmap *altmap = pgmap->altmap_valid ? - &pgmap->altmap : NULL; struct resource *res = &pgmap->res; struct dev_pagemap *conflict_pgmap; struct mhp_restrictions restrictions = { /* * We do not want any optional features only our own memmap */ - .altmap = altmap, + .altmap = pgmap_altmap(pgmap), }; pgprot_t pgprot = PAGE_KERNEL; int error, nid, is_ram; @@ -274,7 +266,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE]; move_pfn_range_to_zone(zone, align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT, altmap); + align_size >> PAGE_SHIFT, pgmap_altmap(pgmap)); } mem_hotplug_done(); @@ -319,7 +311,9 @@ EXPORT_SYMBOL_GPL(devm_memunmap_pages); unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) { /* number of pfns from base where pfn_to_page() is valid */ - return altmap->reserve + altmap->free; + if (altmap) + return altmap->reserve + altmap->free; + return 0; } void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns) diff --git a/mm/hmm.c b/mm/hmm.c index 36e25cdbdac1..e4470462298f 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -1442,7 +1442,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; devmem->pagemap.res = *devmem->resource; devmem->pagemap.ops = &hmm_pagemap_ops; - devmem->pagemap.altmap_valid = false; devmem->pagemap.ref = &devmem->ref; result = devm_memremap_pages(devmem->device, &devmem->pagemap); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e096c987d261..6166ba5a15f3 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -557,10 +557,8 @@ void __remove_pages(struct zone *zone, unsigned long phys_start_pfn, int sections_to_remove; /* In the ZONE_DEVICE case device driver owns the memory region */ - if (is_dev_zone(zone)) { - if (altmap) - map_offset = vmem_altmap_offset(altmap); - } + if (is_dev_zone(zone)) + map_offset = vmem_altmap_offset(altmap); clear_zone_contiguous(zone); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d66bc8abe0af..17a39d40a556 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5853,6 +5853,7 @@ void __ref memmap_init_zone_device(struct zone *zone, { unsigned long pfn, end_pfn = start_pfn + size; struct pglist_data *pgdat = zone->zone_pgdat; + struct vmem_altmap *altmap = pgmap_altmap(pgmap); unsigned long zone_idx = zone_idx(zone); unsigned long start = jiffies; int nid = pgdat->node_id; @@ -5865,9 +5866,7 @@ void __ref memmap_init_zone_device(struct zone *zone, * of the pages reserved for the memmap, so we can just jump to * the end of that region and start processing the device pages. */ - if (pgmap->altmap_valid) { - struct vmem_altmap *altmap = &pgmap->altmap; - + if (altmap) { start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap); size = end_pfn - start_pfn; } -- cgit v1.2.3 From ea31d5859f5862fc915ccf3fa34f6c6c5f63f336 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:15 +0200 Subject: device-dax: use the dev_pagemap internal refcount The functionality is identical to the one currently open coded in device-dax. Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Reviewed-by: Dan Williams Tested-by: Dan Williams Signed-off-by: Jason Gunthorpe --- drivers/dax/dax-private.h | 4 ---- drivers/dax/device.c | 43 ------------------------------------------- 2 files changed, 47 deletions(-) (limited to 'drivers') diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index b4177aafbbd1..c915889d1769 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -43,8 +43,6 @@ struct dax_region { * @target_node: effective numa node if dev_dax memory range is onlined * @dev - device core * @pgmap - pgmap for memmap setup / lifetime (driver owned) - * @ref: pgmap reference count (driver owned) - * @cmp: @ref final put completion (driver owned) */ struct dev_dax { struct dax_region *region; @@ -52,8 +50,6 @@ struct dev_dax { int target_node; struct device dev; struct dev_pagemap pgmap; - struct percpu_ref ref; - struct completion cmp; }; static inline struct dev_dax *to_dev_dax(struct device *dev) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index b5257038c188..1af823b2fe6b 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -14,36 +14,6 @@ #include "dax-private.h" #include "bus.h" -static struct dev_dax *ref_to_dev_dax(struct percpu_ref *ref) -{ - return container_of(ref, struct dev_dax, ref); -} - -static void dev_dax_percpu_release(struct percpu_ref *ref) -{ - struct dev_dax *dev_dax = ref_to_dev_dax(ref); - - dev_dbg(&dev_dax->dev, "%s\n", __func__); - complete(&dev_dax->cmp); -} - -static void dev_dax_percpu_exit(struct dev_pagemap *pgmap) -{ - struct dev_dax *dev_dax = container_of(pgmap, struct dev_dax, pgmap); - - dev_dbg(&dev_dax->dev, "%s\n", __func__); - wait_for_completion(&dev_dax->cmp); - percpu_ref_exit(pgmap->ref); -} - -static void dev_dax_percpu_kill(struct dev_pagemap *pgmap) -{ - struct dev_dax *dev_dax = container_of(pgmap, struct dev_dax, pgmap); - - dev_dbg(&dev_dax->dev, "%s\n", __func__); - percpu_ref_kill(pgmap->ref); -} - static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, const char *func) { @@ -441,11 +411,6 @@ static void dev_dax_kill(void *dev_dax) kill_dev_dax(dev_dax); } -static const struct dev_pagemap_ops dev_dax_pagemap_ops = { - .kill = dev_dax_percpu_kill, - .cleanup = dev_dax_percpu_exit, -}; - int dev_dax_probe(struct device *dev) { struct dev_dax *dev_dax = to_dev_dax(dev); @@ -463,15 +428,7 @@ int dev_dax_probe(struct device *dev) return -EBUSY; } - init_completion(&dev_dax->cmp); - rc = percpu_ref_init(&dev_dax->ref, dev_dax_percpu_release, 0, - GFP_KERNEL); - if (rc) - return rc; - - dev_dax->pgmap.ref = &dev_dax->ref; dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX; - dev_dax->pgmap.ops = &dev_dax_pagemap_ops; addr = devm_memremap_pages(dev, &dev_dax->pgmap); if (IS_ERR(addr)) return PTR_ERR(addr); -- cgit v1.2.3 From d0b3517dbcf3f632f7554d878f85943439aade64 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:16 +0200 Subject: PCI/P2PDMA: use the dev_pagemap internal refcount The functionality is identical to the one currently open coded in p2pdma.c. Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Reviewed-by: Logan Gunthorpe Tested-by: Logan Gunthorpe Reviewed-by: Dan Williams Signed-off-by: Jason Gunthorpe --- drivers/pci/p2pdma.c | 57 ++++------------------------------------------------ 1 file changed, 4 insertions(+), 53 deletions(-) (limited to 'drivers') diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index fa6249e4ed5f..a3073ce16520 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -25,12 +25,6 @@ struct pci_p2pdma { bool p2pmem_published; }; -struct p2pdma_pagemap { - struct dev_pagemap pgmap; - struct percpu_ref ref; - struct completion ref_done; -}; - static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -79,32 +73,6 @@ static const struct attribute_group p2pmem_group = { .name = "p2pmem", }; -static struct p2pdma_pagemap *to_p2p_pgmap(struct percpu_ref *ref) -{ - return container_of(ref, struct p2pdma_pagemap, ref); -} - -static void pci_p2pdma_percpu_release(struct percpu_ref *ref) -{ - struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref); - - complete(&p2p_pgmap->ref_done); -} - -static void pci_p2pdma_percpu_kill(struct dev_pagemap *pgmap) -{ - percpu_ref_kill(pgmap->ref); -} - -static void pci_p2pdma_percpu_cleanup(struct dev_pagemap *pgmap) -{ - struct p2pdma_pagemap *p2p_pgmap = - container_of(pgmap, struct p2pdma_pagemap, pgmap); - - wait_for_completion(&p2p_pgmap->ref_done); - percpu_ref_exit(&p2p_pgmap->ref); -} - static void pci_p2pdma_release(void *data) { struct pci_dev *pdev = data; @@ -154,11 +122,6 @@ out: return error; } -static const struct dev_pagemap_ops pci_p2pdma_pagemap_ops = { - .kill = pci_p2pdma_percpu_kill, - .cleanup = pci_p2pdma_percpu_cleanup, -}; - /** * pci_p2pdma_add_resource - add memory for use as p2p memory * @pdev: the device to add the memory to @@ -172,7 +135,6 @@ static const struct dev_pagemap_ops pci_p2pdma_pagemap_ops = { int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, u64 offset) { - struct p2pdma_pagemap *p2p_pgmap; struct dev_pagemap *pgmap; void *addr; int error; @@ -195,26 +157,15 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, return error; } - p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL); - if (!p2p_pgmap) + pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL); + if (!pgmap) return -ENOMEM; - - init_completion(&p2p_pgmap->ref_done); - error = percpu_ref_init(&p2p_pgmap->ref, - pci_p2pdma_percpu_release, 0, GFP_KERNEL); - if (error) - goto pgmap_free; - - pgmap = &p2p_pgmap->pgmap; - pgmap->res.start = pci_resource_start(pdev, bar) + offset; pgmap->res.end = pgmap->res.start + size - 1; pgmap->res.flags = pci_resource_flags(pdev, bar); - pgmap->ref = &p2p_pgmap->ref; pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) - pci_resource_start(pdev, bar); - pgmap->ops = &pci_p2pdma_pagemap_ops; addr = devm_memremap_pages(&pdev->dev, pgmap); if (IS_ERR(addr)) { @@ -225,7 +176,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr, pci_bus_address(pdev, bar) + offset, resource_size(&pgmap->res), dev_to_node(&pdev->dev), - &p2p_pgmap->ref); + pgmap->ref); if (error) goto pages_free; @@ -237,7 +188,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, pages_free: devm_memunmap_pages(&pdev->dev, pgmap); pgmap_free: - devm_kfree(&pdev->dev, p2p_pgmap); + devm_kfree(&pdev->dev, pgmap); return error; } EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource); -- cgit v1.2.3 From 721be868142cb95888847dfaaf3d1c5b8c65b943 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:17 +0200 Subject: nouveau: use alloc_page_vma directly hmm_vma_alloc_locked_page is scheduled to go away, use the proper mm function directly. Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 40c47d6a7d78..a50f6fd2fe24 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -148,11 +148,12 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) continue; - dpage = hmm_vma_alloc_locked_page(vma, addr); + dpage = alloc_page_vma(GFP_HIGHUSER, vma, addr); if (!dpage) { dst_pfns[i] = MIGRATE_PFN_ERROR; continue; } + lock_page(dpage); dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; -- cgit v1.2.3 From 4239f267e3cd31e6e592d26a9fa6834b5a11560b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:18 +0200 Subject: nouveau: use devm_memremap_pages directly Just use devm_memremap_pages instead of hmm_devmem_add pages to allow killing that wrapper which doesn't provide a whole lot of benefits. Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 82 ++++++++++++++++------------------ 1 file changed, 38 insertions(+), 44 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index a50f6fd2fe24..0fb7a44b8bc4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -72,7 +72,8 @@ struct nouveau_dmem_migrate { }; struct nouveau_dmem { - struct hmm_devmem *devmem; + struct nouveau_drm *drm; + struct dev_pagemap pagemap; struct nouveau_dmem_migrate migrate; struct list_head chunk_free; struct list_head chunk_full; @@ -80,6 +81,11 @@ struct nouveau_dmem { struct mutex mutex; }; +static inline struct nouveau_dmem *page_to_dmem(struct page *page) +{ + return container_of(page->pgmap, struct nouveau_dmem, pagemap); +} + struct nouveau_dmem_fault { struct nouveau_drm *drm; struct nouveau_fence *fence; @@ -96,8 +102,7 @@ struct nouveau_migrate { unsigned long dma_nr; }; -static void -nouveau_dmem_free(struct hmm_devmem *devmem, struct page *page) +static void nouveau_dmem_page_free(struct page *page) { struct nouveau_dmem_chunk *chunk; unsigned long idx; @@ -260,29 +265,21 @@ static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = { .finalize_and_map = nouveau_dmem_fault_finalize_and_map, }; -static vm_fault_t -nouveau_dmem_fault(struct hmm_devmem *devmem, - struct vm_area_struct *vma, - unsigned long addr, - const struct page *page, - unsigned int flags, - pmd_t *pmdp) +static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) { - struct drm_device *drm_dev = dev_get_drvdata(devmem->device); + struct nouveau_dmem *dmem = page_to_dmem(vmf->page); unsigned long src[1] = {0}, dst[1] = {0}; - struct nouveau_dmem_fault fault = {0}; + struct nouveau_dmem_fault fault = { .drm = dmem->drm }; int ret; - - /* * FIXME what we really want is to find some heuristic to migrate more * than just one page on CPU fault. When such fault happens it is very * likely that more surrounding page will CPU fault too. */ - fault.drm = nouveau_drm(drm_dev); - ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vma, addr, - addr + PAGE_SIZE, src, dst, &fault); + ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma, + vmf->address, vmf->address + PAGE_SIZE, + src, dst, &fault); if (ret) return VM_FAULT_SIGBUS; @@ -292,10 +289,9 @@ nouveau_dmem_fault(struct hmm_devmem *devmem, return 0; } -static const struct hmm_devmem_ops -nouveau_dmem_devmem_ops = { - .free = nouveau_dmem_free, - .fault = nouveau_dmem_fault, +static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = { + .page_free = nouveau_dmem_page_free, + .migrate_to_ram = nouveau_dmem_migrate_to_ram, }; static int @@ -581,7 +577,8 @@ void nouveau_dmem_init(struct nouveau_drm *drm) { struct device *device = drm->dev->dev; - unsigned long i, size; + struct resource *res; + unsigned long i, size, pfn_first; int ret; /* This only make sense on PASCAL or newer */ @@ -591,6 +588,7 @@ nouveau_dmem_init(struct nouveau_drm *drm) if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL))) return; + drm->dmem->drm = drm; mutex_init(&drm->dmem->mutex); INIT_LIST_HEAD(&drm->dmem->chunk_free); INIT_LIST_HEAD(&drm->dmem->chunk_full); @@ -600,11 +598,8 @@ nouveau_dmem_init(struct nouveau_drm *drm) /* Initialize migration dma helpers before registering memory */ ret = nouveau_dmem_migrate_init(drm); - if (ret) { - kfree(drm->dmem); - drm->dmem = NULL; - return; - } + if (ret) + goto out_free; /* * FIXME we need some kind of policy to decide how much VRAM we @@ -612,14 +607,16 @@ nouveau_dmem_init(struct nouveau_drm *drm) * and latter if we want to do thing like over commit then we * could revisit this. */ - drm->dmem->devmem = hmm_devmem_add(&nouveau_dmem_devmem_ops, - device, size); - if (IS_ERR(drm->dmem->devmem)) { - kfree(drm->dmem); - drm->dmem = NULL; - return; - } - + res = devm_request_free_mem_region(device, &iomem_resource, size); + if (IS_ERR(res)) + goto out_free; + drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE; + drm->dmem->pagemap.res = *res; + drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops; + if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap))) + goto out_free; + + pfn_first = res->start >> PAGE_SHIFT; for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) { struct nouveau_dmem_chunk *chunk; struct page *page; @@ -632,8 +629,7 @@ nouveau_dmem_init(struct nouveau_drm *drm) } chunk->drm = drm; - chunk->pfn_first = drm->dmem->devmem->pfn_first; - chunk->pfn_first += (i * DMEM_CHUNK_NPAGES); + chunk->pfn_first = pfn_first + (i * DMEM_CHUNK_NPAGES); list_add_tail(&chunk->list, &drm->dmem->chunk_empty); page = pfn_to_page(chunk->pfn_first); @@ -643,6 +639,10 @@ nouveau_dmem_init(struct nouveau_drm *drm) } NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20); + return; +out_free: + kfree(drm->dmem); + drm->dmem = NULL; } static void @@ -833,13 +833,7 @@ out: static inline bool nouveau_dmem_page(struct nouveau_drm *drm, struct page *page) { - if (!is_device_private_page(page)) - return false; - - if (drm->dmem->devmem != page->pgmap->data) - return false; - - return true; + return is_device_private_page(page) && drm->dmem == page_to_dmem(page); } void -- cgit v1.2.3 From 8a164fef9c4ccf6ff7757170397222860e40d192 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:21 +0200 Subject: mm: simplify ZONE_DEVICE page private data Remove the clumsy hmm_devmem_page_{get,set}_drvdata helpers, and instead just access the page directly. Also make the page data a void pointer, and thus much easier to use. Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 18 +++++++----------- include/linux/hmm.h | 32 -------------------------------- include/linux/mm_types.h | 2 +- mm/page_alloc.c | 8 ++++---- 4 files changed, 12 insertions(+), 48 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 0fb7a44b8bc4..42c026010938 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -104,11 +104,8 @@ struct nouveau_migrate { static void nouveau_dmem_page_free(struct page *page) { - struct nouveau_dmem_chunk *chunk; - unsigned long idx; - - chunk = (void *)hmm_devmem_page_get_drvdata(page); - idx = page_to_pfn(page) - chunk->pfn_first; + struct nouveau_dmem_chunk *chunk = page->zone_device_data; + unsigned long idx = page_to_pfn(page) - chunk->pfn_first; /* * FIXME: @@ -200,7 +197,7 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, dst_addr = fault->dma[fault->npages++]; - chunk = (void *)hmm_devmem_page_get_drvdata(spage); + chunk = spage->zone_device_data; src_addr = page_to_pfn(spage) - chunk->pfn_first; src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset; @@ -633,9 +630,8 @@ nouveau_dmem_init(struct nouveau_drm *drm) list_add_tail(&chunk->list, &drm->dmem->chunk_empty); page = pfn_to_page(chunk->pfn_first); - for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page) { - hmm_devmem_page_set_drvdata(page, (long)chunk); - } + for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page) + page->zone_device_data = chunk; } NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20); @@ -698,7 +694,7 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) continue; - chunk = (void *)hmm_devmem_page_get_drvdata(dpage); + chunk = dpage->zone_device_data; dst_addr = page_to_pfn(dpage) - chunk->pfn_first; dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset; @@ -862,7 +858,7 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm, continue; } - chunk = (void *)hmm_devmem_page_get_drvdata(page); + chunk = page->zone_device_data; addr = page_to_pfn(page) - chunk->pfn_first; addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT; diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 86aa4ec3404c..3d00e9550e77 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -584,36 +584,4 @@ static inline void hmm_mm_destroy(struct mm_struct *mm) {} static inline void hmm_mm_init(struct mm_struct *mm) {} #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) -/* - * hmm_devmem_page_set_drvdata - set per-page driver data field - * - * @page: pointer to struct page - * @data: driver data value to set - * - * Because page can not be on lru we have an unsigned long that driver can use - * to store a per page field. This just a simple helper to do that. - */ -static inline void hmm_devmem_page_set_drvdata(struct page *page, - unsigned long data) -{ - page->hmm_data = data; -} - -/* - * hmm_devmem_page_get_drvdata - get per page driver data field - * - * @page: pointer to struct page - * Return: driver data value - */ -static inline unsigned long hmm_devmem_page_get_drvdata(const struct page *page) -{ - return page->hmm_data; -} -#endif /* CONFIG_DEVICE_PRIVATE */ -#else /* IS_ENABLED(CONFIG_HMM) */ -static inline void hmm_mm_destroy(struct mm_struct *mm) {} -static inline void hmm_mm_init(struct mm_struct *mm) {} -#endif /* IS_ENABLED(CONFIG_HMM) */ - #endif /* LINUX_HMM_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8ec38b11b361..f33a1289c101 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -158,7 +158,7 @@ struct page { struct { /* ZONE_DEVICE pages */ /** @pgmap: Points to the hosting device page map. */ struct dev_pagemap *pgmap; - unsigned long hmm_data; + void *zone_device_data; unsigned long _zd_pad_1; /* uses mapping */ }; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 17a39d40a556..c0e031c52db5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5886,12 +5886,12 @@ void __ref memmap_init_zone_device(struct zone *zone, __SetPageReserved(page); /* - * ZONE_DEVICE pages union ->lru with a ->pgmap back - * pointer and hmm_data. It is a bug if a ZONE_DEVICE - * page is ever freed or placed on a driver-private list. + * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer + * and zone_device_data. It is a bug if a ZONE_DEVICE page is + * ever freed or placed on a driver-private list. */ page->pgmap = pgmap; - page->hmm_data = 0; + page->zone_device_data = NULL; /* * Mark the block movable so that blocks are reserved for -- cgit v1.2.3 From 7328d9cc1b9fdc52105901072e9a4a1234e29b11 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:22 +0200 Subject: mm: sort out the DEVICE_PRIVATE Kconfig mess The ZONE_DEVICE support doesn't depend on anything HMM related, just on various bits of arch support as indicated by the architecture. Also don't select the option from nouveau as it isn't present in many setups, and depend on it instead. Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Reviewed-by: Dan Williams Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/Kconfig | 2 +- mm/Kconfig | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index dba2613f7180..6303d203ab1d 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -85,10 +85,10 @@ config DRM_NOUVEAU_BACKLIGHT config DRM_NOUVEAU_SVM bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support" depends on ARCH_HAS_HMM + depends on DEVICE_PRIVATE depends on DRM_NOUVEAU depends on STAGING select HMM_MIRROR - select DEVICE_PRIVATE default n help Say Y here if you want to enable experimental support for diff --git a/mm/Kconfig b/mm/Kconfig index 6f35b85b3052..eecf037a54b3 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -677,13 +677,13 @@ config ARCH_HAS_HMM_MIRROR config ARCH_HAS_HMM bool - default y depends on (X86_64 || PPC64) depends on ZONE_DEVICE depends on MMU && 64BIT depends on MEMORY_HOTPLUG depends on MEMORY_HOTREMOVE depends on SPARSEMEM_VMEMMAP + default y config MIGRATE_VMA_HELPER bool @@ -709,8 +709,7 @@ config HMM_MIRROR config DEVICE_PRIVATE bool "Unaddressable device memory (GPU memory, ...)" - depends on ARCH_HAS_HMM - select HMM + depends on ZONE_DEVICE select DEV_PAGEMAP_OPS help -- cgit v1.2.3 From 43535b0aefab29ea6564e608de4c783ed2ab6c49 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:23 +0200 Subject: mm: remove the HMM config option All the mm/hmm.c code is better keyed off HMM_MIRROR. Also let nouveau depend on it instead of the mix of a dummy dependency symbol plus the actually selected one. Drop various odd dependencies, as the code is pretty portable. Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/Kconfig | 3 +-- include/linux/hmm.h | 5 +---- include/linux/mm_types.h | 2 +- mm/Kconfig | 27 ++++----------------------- mm/Makefile | 2 +- mm/hmm.c | 2 -- 6 files changed, 8 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 6303d203ab1d..66c839d8e9d1 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -84,11 +84,10 @@ config DRM_NOUVEAU_BACKLIGHT config DRM_NOUVEAU_SVM bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support" - depends on ARCH_HAS_HMM depends on DEVICE_PRIVATE depends on DRM_NOUVEAU + depends on HMM_MIRROR depends on STAGING - select HMM_MIRROR default n help Say Y here if you want to enable experimental support for diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 3d00e9550e77..b697496e85ba 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -62,7 +62,7 @@ #include #include -#if IS_ENABLED(CONFIG_HMM) +#ifdef CONFIG_HMM_MIRROR #include #include @@ -332,9 +332,6 @@ static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range, return hmm_device_entry_from_pfn(range, pfn); } - - -#if IS_ENABLED(CONFIG_HMM_MIRROR) /* * Mirroring: how to synchronize device page table with CPU page table. * diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f33a1289c101..8d37182f8dbe 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -501,7 +501,7 @@ struct mm_struct { #endif struct work_struct async_put_work; -#if IS_ENABLED(CONFIG_HMM) +#ifdef CONFIG_HMM_MIRROR /* HMM needs to track a few things per mm */ struct hmm *hmm; #endif diff --git a/mm/Kconfig b/mm/Kconfig index eecf037a54b3..1e426c26b1d6 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -669,37 +669,18 @@ config ZONE_DEVICE If FS_DAX is enabled, then say Y. -config ARCH_HAS_HMM_MIRROR - bool - default y - depends on (X86_64 || PPC64) - depends on MMU && 64BIT - -config ARCH_HAS_HMM - bool - depends on (X86_64 || PPC64) - depends on ZONE_DEVICE - depends on MMU && 64BIT - depends on MEMORY_HOTPLUG - depends on MEMORY_HOTREMOVE - depends on SPARSEMEM_VMEMMAP - default y - config MIGRATE_VMA_HELPER bool config DEV_PAGEMAP_OPS bool -config HMM - bool - select MMU_NOTIFIER - select MIGRATE_VMA_HELPER - config HMM_MIRROR bool "HMM mirror CPU page table into a device page table" - depends on ARCH_HAS_HMM - select HMM + depends on (X86_64 || PPC64) + depends on MMU && 64BIT + select MMU_NOTIFIER + select MIGRATE_VMA_HELPER help Select HMM_MIRROR if you want to mirror range of the CPU page table of a process into a device page table. Here, mirror means "keep synchronized". diff --git a/mm/Makefile b/mm/Makefile index ac5e5ba78874..91c99040065c 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -102,5 +102,5 @@ obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o -obj-$(CONFIG_HMM) += hmm.o +obj-$(CONFIG_HMM_MIRROR) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o diff --git a/mm/hmm.c b/mm/hmm.c index 90ca0cdab9db..d62ce64d6bca 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -25,7 +25,6 @@ #include #include -#if IS_ENABLED(CONFIG_HMM_MIRROR) static const struct mmu_notifier_ops hmm_mmu_notifier_ops; static inline struct hmm *mm_get_hmm(struct mm_struct *mm) @@ -1326,4 +1325,3 @@ long hmm_range_dma_unmap(struct hmm_range *range, return cpages; } EXPORT_SYMBOL(hmm_range_dma_unmap); -#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -- cgit v1.2.3 From b6b346a0665a8bf8b28fd851217c435a3eec4af9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:24 +0200 Subject: mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR The migrate_vma helper is only used by noveau to migrate device private pages around. Other HMM_MIRROR users like amdgpu or infiniband don't need it. Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/Kconfig | 1 + mm/Kconfig | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 66c839d8e9d1..96b9814e6d06 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -88,6 +88,7 @@ config DRM_NOUVEAU_SVM depends on DRM_NOUVEAU depends on HMM_MIRROR depends on STAGING + select MIGRATE_VMA_HELPER default n help Say Y here if you want to enable experimental support for diff --git a/mm/Kconfig b/mm/Kconfig index 1e426c26b1d6..40cf0562412d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -680,7 +680,6 @@ config HMM_MIRROR depends on (X86_64 || PPC64) depends on MMU && 64BIT select MMU_NOTIFIER - select MIGRATE_VMA_HELPER help Select HMM_MIRROR if you want to mirror range of the CPU page table of a process into a device page table. Here, mirror means "keep synchronized". -- cgit v1.2.3