summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlistair Popple <apopple@nvidia.com>2022-09-28 22:01:19 +1000
committerAndrew Morton <akpm@linux-foundation.org>2022-10-12 18:51:49 -0700
commite778406b40dbb1342a1888cd751ca9d2982a12e2 (patch)
tree0d5d3f806002f99dae2dcf67ea2c98fffa1c15a1
parent241f68859656836ae3e85179cc224cc4c5e4e6a7 (diff)
downloadlinux-e778406b40dbb1342a1888cd751ca9d2982a12e2.tar.bz2
mm/migrate_device.c: add migrate_device_range()
Device drivers can use the migrate_vma family of functions to migrate existing private anonymous mappings to device private pages. These pages are backed by memory on the device with drivers being responsible for copying data to and from device memory. Device private pages are freed via the pgmap->page_free() callback when they are unmapped and their refcount drops to zero. Alternatively they may be freed indirectly via migration back to CPU memory in response to a pgmap->migrate_to_ram() callback called whenever the CPU accesses an address mapped to a device private page. In other words drivers cannot control the lifetime of data allocated on the devices and must wait until these pages are freed from userspace. This causes issues when memory needs to reclaimed on the device, either because the device is going away due to a ->release() callback or because another user needs to use the memory. Drivers could use the existing migrate_vma functions to migrate data off the device. However this would require them to track the mappings of each page which is both complicated and not always possible. Instead drivers need to be able to migrate device pages directly so they can free up device memory. To allow that this patch introduces the migrate_device family of functions which are functionally similar to migrate_vma but which skips the initial lookup based on mapping. Link: https://lkml.kernel.org/r/868116aab70b0c8ee467d62498bb2cf0ef907295.1664366292.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple <apopple@nvidia.com> Cc: "Huang, Ying" <ying.huang@intel.com> Cc: Zi Yan <ziy@nvidia.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Yang Shi <shy828301@gmail.com> Cc: David Hildenbrand <david@redhat.com> Cc: Ralph Campbell <rcampbell@nvidia.com> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Alex Deucher <alexander.deucher@amd.com> Cc: Alex Sierra <alex.sierra@amd.com> Cc: Ben Skeggs <bskeggs@redhat.com> Cc: Christian König <christian.koenig@amd.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Felix Kuehling <Felix.Kuehling@amd.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Lyude Paul <lyude@redhat.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r--include/linux/migrate.h7
-rw-r--r--mm/migrate_device.c89
2 files changed, 89 insertions, 7 deletions
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 52090d1f9230..3ef77f52a4f0 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -210,6 +210,13 @@ struct migrate_vma {
int migrate_vma_setup(struct migrate_vma *args);
void migrate_vma_pages(struct migrate_vma *migrate);
void migrate_vma_finalize(struct migrate_vma *migrate);
+int migrate_device_range(unsigned long *src_pfns, unsigned long start,
+ unsigned long npages);
+void migrate_device_pages(unsigned long *src_pfns, unsigned long *dst_pfns,
+ unsigned long npages);
+void migrate_device_finalize(unsigned long *src_pfns,
+ unsigned long *dst_pfns, unsigned long npages);
+
#endif /* CONFIG_MIGRATION */
#endif /* _LINUX_MIGRATE_H */
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 7707c1d898f5..6fa682eef7a0 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -693,7 +693,7 @@ abort:
*src &= ~MIGRATE_PFN_MIGRATE;
}
-static void migrate_device_pages(unsigned long *src_pfns,
+static void __migrate_device_pages(unsigned long *src_pfns,
unsigned long *dst_pfns, unsigned long npages,
struct migrate_vma *migrate)
{
@@ -715,6 +715,9 @@ static void migrate_device_pages(unsigned long *src_pfns,
if (!page) {
unsigned long addr;
+ if (!(src_pfns[i] & MIGRATE_PFN_MIGRATE))
+ continue;
+
/*
* The only time there is no vma is when called from
* migrate_device_coherent_page(). However this isn't
@@ -722,8 +725,6 @@ static void migrate_device_pages(unsigned long *src_pfns,
*/
VM_BUG_ON(!migrate);
addr = migrate->start + i*PAGE_SIZE;
- if (!(src_pfns[i] & MIGRATE_PFN_MIGRATE))
- continue;
if (!notified) {
notified = true;
@@ -779,6 +780,22 @@ static void migrate_device_pages(unsigned long *src_pfns,
}
/**
+ * migrate_device_pages() - migrate meta-data from src page to dst page
+ * @src_pfns: src_pfns returned from migrate_device_range()
+ * @dst_pfns: array of pfns allocated by the driver to migrate memory to
+ * @npages: number of pages in the range
+ *
+ * Equivalent to migrate_vma_pages(). This is called to migrate struct page
+ * meta-data from source struct page to destination.
+ */
+void migrate_device_pages(unsigned long *src_pfns, unsigned long *dst_pfns,
+ unsigned long npages)
+{
+ __migrate_device_pages(src_pfns, dst_pfns, npages, NULL);
+}
+EXPORT_SYMBOL(migrate_device_pages);
+
+/**
* migrate_vma_pages() - migrate meta-data from src page to dst page
* @migrate: migrate struct containing all migration information
*
@@ -788,12 +805,22 @@ static void migrate_device_pages(unsigned long *src_pfns,
*/
void migrate_vma_pages(struct migrate_vma *migrate)
{
- migrate_device_pages(migrate->src, migrate->dst, migrate->npages, migrate);
+ __migrate_device_pages(migrate->src, migrate->dst, migrate->npages, migrate);
}
EXPORT_SYMBOL(migrate_vma_pages);
-static void migrate_device_finalize(unsigned long *src_pfns,
- unsigned long *dst_pfns, unsigned long npages)
+/*
+ * migrate_device_finalize() - complete page migration
+ * @src_pfns: src_pfns returned from migrate_device_range()
+ * @dst_pfns: array of pfns allocated by the driver to migrate memory to
+ * @npages: number of pages in the range
+ *
+ * Completes migration of the page by removing special migration entries.
+ * Drivers must ensure copying of page data is complete and visible to the CPU
+ * before calling this.
+ */
+void migrate_device_finalize(unsigned long *src_pfns,
+ unsigned long *dst_pfns, unsigned long npages)
{
unsigned long i;
@@ -837,6 +864,7 @@ static void migrate_device_finalize(unsigned long *src_pfns,
}
}
}
+EXPORT_SYMBOL(migrate_device_finalize);
/**
* migrate_vma_finalize() - restore CPU page table entry
@@ -855,6 +883,53 @@ void migrate_vma_finalize(struct migrate_vma *migrate)
}
EXPORT_SYMBOL(migrate_vma_finalize);
+/**
+ * migrate_device_range() - migrate device private pfns to normal memory.
+ * @src_pfns: array large enough to hold migrating source device private pfns.
+ * @start: starting pfn in the range to migrate.
+ * @npages: number of pages to migrate.
+ *
+ * migrate_vma_setup() is similar in concept to migrate_vma_setup() except that
+ * instead of looking up pages based on virtual address mappings a range of
+ * device pfns that should be migrated to system memory is used instead.
+ *
+ * This is useful when a driver needs to free device memory but doesn't know the
+ * virtual mappings of every page that may be in device memory. For example this
+ * is often the case when a driver is being unloaded or unbound from a device.
+ *
+ * Like migrate_vma_setup() this function will take a reference and lock any
+ * migrating pages that aren't free before unmapping them. Drivers may then
+ * allocate destination pages and start copying data from the device to CPU
+ * memory before calling migrate_device_pages().
+ */
+int migrate_device_range(unsigned long *src_pfns, unsigned long start,
+ unsigned long npages)
+{
+ unsigned long i, pfn;
+
+ for (pfn = start, i = 0; i < npages; pfn++, i++) {
+ struct page *page = pfn_to_page(pfn);
+
+ if (!get_page_unless_zero(page)) {
+ src_pfns[i] = 0;
+ continue;
+ }
+
+ if (!trylock_page(page)) {
+ src_pfns[i] = 0;
+ put_page(page);
+ continue;
+ }
+
+ src_pfns[i] = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
+ }
+
+ migrate_device_unmap(src_pfns, npages, NULL);
+
+ return 0;
+}
+EXPORT_SYMBOL(migrate_device_range);
+
/*
* Migrate a device coherent page back to normal memory. The caller should have
* a reference on page which will be copied to the new page if migration is
@@ -885,7 +960,7 @@ int migrate_device_coherent_page(struct page *page)
dst_pfn = migrate_pfn(page_to_pfn(dpage));
}
- migrate_device_pages(&src_pfn, &dst_pfn, 1, NULL);
+ migrate_device_pages(&src_pfn, &dst_pfn, 1);
if (src_pfn & MIGRATE_PFN_MIGRATE)
copy_highpage(dpage, page);
migrate_device_finalize(&src_pfn, &dst_pfn, 1);