From 1b88b99bdff85e75df1cfaa418ed2ef8e1bffeb2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 Jul 2019 08:52:56 +0200 Subject: nouveau: return -EBUSY when hmm_range_wait_until_valid fails -EAGAIN has a magic meaning for non-blocking faults, so don't overload it. Given that the caller doesn't check for specific error codes this change is purely cosmetic. Link: https://lore.kernel.org/r/20190724065258.16603-6-hch@lst.de Tested-by: Ralph Campbell Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index a835cebb6d90..545100f7c594 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -502,7 +502,7 @@ nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range) if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) { up_read(&range->vma->vm_mm->mmap_sem); - return -EAGAIN; + return -EBUSY; } ret = hmm_range_fault(range, true); -- cgit v1.2.3 From f32471e2cf87112b8f5dc10469b27c39c1a41722 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 Jul 2019 08:52:57 +0200 Subject: mm/hmm: remove the legacy hmm_pfn_* APIs Switch the one remaining user in nouveau over to its replacement, and remove all the wrappers. Link: https://lore.kernel.org/r/20190724065258.16603-7-hch@lst.de Tested-by: Ralph Campbell Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 2 +- include/linux/hmm.h | 34 ---------------------------------- 2 files changed, 1 insertion(+), 35 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 1333220787a1..345c63cb752a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -845,7 +845,7 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm, struct page *page; uint64_t addr; - page = hmm_pfn_to_page(range, range->pfns[i]); + page = hmm_device_entry_to_page(range, range->pfns[i]); if (page == NULL) continue; diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 7ef56dc18050..9f32586684c9 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -290,40 +290,6 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, range->flags[HMM_PFN_VALID]; } -/* - * Old API: - * hmm_pfn_to_page() - * hmm_pfn_to_pfn() - * hmm_pfn_from_page() - * hmm_pfn_from_pfn() - * - * This are the OLD API please use new API, it is here to avoid cross-tree - * merge painfullness ie we convert things to new API in stages. - */ -static inline struct page *hmm_pfn_to_page(const struct hmm_range *range, - uint64_t pfn) -{ - return hmm_device_entry_to_page(range, pfn); -} - -static inline unsigned long hmm_pfn_to_pfn(const struct hmm_range *range, - uint64_t pfn) -{ - return hmm_device_entry_to_pfn(range, pfn); -} - -static inline uint64_t hmm_pfn_from_page(const struct hmm_range *range, - struct page *page) -{ - return hmm_device_entry_from_page(range, page); -} - -static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range, - unsigned long pfn) -{ - return hmm_device_entry_from_pfn(range, pfn); -} - /* * Mirroring: how to synchronize device page table with CPU page table. * -- cgit v1.2.3 From 1f961807925032daa90267d8a23ff730e7ede07a Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Thu, 25 Jul 2019 17:56:44 -0700 Subject: mm/hmm: replace hmm_update with mmu_notifier_range The hmm_mirror_ops callback function sync_cpu_device_pagetables() passes a struct hmm_update which is a simplified version of struct mmu_notifier_range. This is unnecessary so replace hmm_update with mmu_notifier_range directly. Link: https://lore.kernel.org/r/20190726005650.2566-2-rcampbell@nvidia.com Signed-off-by: Ralph Campbell Reviewed: Christoph Hellwig Reviewed-by: Jason Gunthorpe [jgg: white space tuning] Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 14 ++++++++------ drivers/gpu/drm/nouveau/nouveau_svm.c | 4 ++-- include/linux/hmm.h | 34 ++++++---------------------------- mm/hmm.c | 13 ++++--------- 4 files changed, 20 insertions(+), 45 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 3971c201f320..b698b423b25d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -195,13 +195,14 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, - const struct hmm_update *update) +static int +amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, + const struct mmu_notifier_range *update) { struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); unsigned long start = update->start; unsigned long end = update->end; - bool blockable = update->blockable; + bool blockable = mmu_notifier_range_blockable(update); struct interval_tree_node *it; /* notification is exclusive, but interval is inclusive */ @@ -243,13 +244,14 @@ static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, * necessitates evicting all user-mode queues of the process. The BOs * are restorted in amdgpu_mn_invalidate_range_end_hsa. */ -static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, - const struct hmm_update *update) +static int +amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, + const struct mmu_notifier_range *update) { struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); unsigned long start = update->start; unsigned long end = update->end; - bool blockable = update->blockable; + bool blockable = mmu_notifier_range_blockable(update); struct interval_tree_node *it; /* notification is exclusive, but interval is inclusive */ diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 545100f7c594..79b29c918717 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -252,13 +252,13 @@ nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit) static int nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror, - const struct hmm_update *update) + const struct mmu_notifier_range *update) { struct nouveau_svmm *svmm = container_of(mirror, typeof(*svmm), mirror); unsigned long start = update->start; unsigned long limit = update->end; - if (!update->blockable) + if (!mmu_notifier_range_blockable(update)) return -EAGAIN; SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit); diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 9f32586684c9..4f870f7017cb 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -340,29 +340,6 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, struct hmm_mirror; -/* - * enum hmm_update_event - type of update - * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why) - */ -enum hmm_update_event { - HMM_UPDATE_INVALIDATE, -}; - -/* - * struct hmm_update - HMM update information for callback - * - * @start: virtual start address of the range to update - * @end: virtual end address of the range to update - * @event: event triggering the update (what is happening) - * @blockable: can the callback block/sleep ? - */ -struct hmm_update { - unsigned long start; - unsigned long end; - enum hmm_update_event event; - bool blockable; -}; - /* * struct hmm_mirror_ops - HMM mirror device operations callback * @@ -383,9 +360,9 @@ struct hmm_mirror_ops { /* sync_cpu_device_pagetables() - synchronize page tables * * @mirror: pointer to struct hmm_mirror - * @update: update information (see struct hmm_update) - * Return: -EAGAIN if update.blockable false and callback need to - * block, 0 otherwise. + * @update: update information (see struct mmu_notifier_range) + * Return: -EAGAIN if mmu_notifier_range_blockable(update) is false + * and callback needs to block, 0 otherwise. * * This callback ultimately originates from mmu_notifiers when the CPU * page table is updated. The device driver must update its page table @@ -396,8 +373,9 @@ struct hmm_mirror_ops { * page tables are completely updated (TLBs flushed, etc); this is a * synchronous call. */ - int (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror, - const struct hmm_update *update); + int (*sync_cpu_device_pagetables)( + struct hmm_mirror *mirror, + const struct mmu_notifier_range *update); }; /* diff --git a/mm/hmm.c b/mm/hmm.c index 54b3a4162ae9..4040b4427635 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -165,7 +165,6 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn, { struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier); struct hmm_mirror *mirror; - struct hmm_update update; struct hmm_range *range; unsigned long flags; int ret = 0; @@ -173,15 +172,10 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn, if (!kref_get_unless_zero(&hmm->kref)) return 0; - update.start = nrange->start; - update.end = nrange->end; - update.event = HMM_UPDATE_INVALIDATE; - update.blockable = mmu_notifier_range_blockable(nrange); - spin_lock_irqsave(&hmm->ranges_lock, flags); hmm->notifiers++; list_for_each_entry(range, &hmm->ranges, list) { - if (update.end < range->start || update.start >= range->end) + if (nrange->end < range->start || nrange->start >= range->end) continue; range->valid = false; @@ -198,9 +192,10 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn, list_for_each_entry(mirror, &hmm->mirrors, list) { int rc; - rc = mirror->ops->sync_cpu_device_pagetables(mirror, &update); + rc = mirror->ops->sync_cpu_device_pagetables(mirror, nrange); if (rc) { - if (WARN_ON(update.blockable || rc != -EAGAIN)) + if (WARN_ON(mmu_notifier_range_blockable(nrange) || + rc != -EAGAIN)) continue; ret = -EAGAIN; break; -- cgit v1.2.3 From 9a4903e49e495bfd2650862dfae4178bebe4db9c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 25 Jul 2019 17:56:46 -0700 Subject: mm/hmm: replace the block argument to hmm_range_fault with a flags value This allows easier expansion to other flags, and also makes the callers a little easier to read. Link: https://lore.kernel.org/r/20190726005650.2566-4-rcampbell@nvidia.com Signed-off-by: Christoph Hellwig Signed-off-by: Ralph Campbell Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- include/linux/hmm.h | 11 ++++- mm/hmm.c | 74 ++++++++++++++++----------------- 4 files changed, 48 insertions(+), 41 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e51b48ac48eb..12a59ac83f72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -832,7 +832,7 @@ retry: down_read(&mm->mmap_sem); - r = hmm_range_fault(range, true); + r = hmm_range_fault(range, 0); if (unlikely(r < 0)) { if (likely(r == -EAGAIN)) { /* diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 79b29c918717..49b520c60fc5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -505,7 +505,7 @@ nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range) return -EBUSY; } - ret = hmm_range_fault(range, true); + ret = hmm_range_fault(range, 0); if (ret <= 0) { if (ret == 0) ret = -EBUSY; diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 4f870f7017cb..89a141060e5b 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -407,12 +407,19 @@ int hmm_range_register(struct hmm_range *range, unsigned long end, unsigned page_shift); void hmm_range_unregister(struct hmm_range *range); + +/* + * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case. + */ +#define HMM_FAULT_ALLOW_RETRY (1 << 0) + long hmm_range_snapshot(struct hmm_range *range); -long hmm_range_fault(struct hmm_range *range, bool block); +long hmm_range_fault(struct hmm_range *range, unsigned int flags); + long hmm_range_dma_map(struct hmm_range *range, struct device *device, dma_addr_t *daddrs, - bool block); + unsigned int flags); long hmm_range_dma_unmap(struct hmm_range *range, struct vm_area_struct *vma, struct device *device, diff --git a/mm/hmm.c b/mm/hmm.c index 362944b0fbca..84f2791d3510 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -281,7 +281,7 @@ struct hmm_vma_walk { struct dev_pagemap *pgmap; unsigned long last; bool fault; - bool block; + unsigned int flags; }; static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr, @@ -293,8 +293,11 @@ static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr, struct vm_area_struct *vma = walk->vma; vm_fault_t ret; - flags |= hmm_vma_walk->block ? 0 : FAULT_FLAG_ALLOW_RETRY; - flags |= write_fault ? FAULT_FLAG_WRITE : 0; + if (hmm_vma_walk->flags & HMM_FAULT_ALLOW_RETRY) + flags |= FAULT_FLAG_ALLOW_RETRY; + if (write_fault) + flags |= FAULT_FLAG_WRITE; + ret = handle_mm_fault(vma, addr, flags); if (ret & VM_FAULT_RETRY) { /* Note, handle_mm_fault did up_read(&mm->mmap_sem)) */ @@ -1012,26 +1015,26 @@ long hmm_range_snapshot(struct hmm_range *range) } EXPORT_SYMBOL(hmm_range_snapshot); -/* - * hmm_range_fault() - try to fault some address in a virtual address range - * @range: range being faulted - * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem) - * Return: number of valid pages in range->pfns[] (from range start - * address). This may be zero. If the return value is negative, - * then one of the following values may be returned: +/** + * hmm_range_fault - try to fault some address in a virtual address range + * @range: range being faulted + * @flags: HMM_FAULT_* flags * - * -EINVAL invalid arguments or mm or virtual address are in an - * invalid vma (for instance device file vma). - * -ENOMEM: Out of memory. - * -EPERM: Invalid permission (for instance asking for write and - * range is read only). - * -EAGAIN: If you need to retry and mmap_sem was drop. This can only - * happens if block argument is false. - * -EBUSY: If the the range is being invalidated and you should wait - * for invalidation to finish. - * -EFAULT: Invalid (ie either no valid vma or it is illegal to access - * that range), number of valid pages in range->pfns[] (from - * range start address). + * Return: the number of valid pages in range->pfns[] (from range start + * address), which may be zero. On error one of the following status codes + * can be returned: + * + * -EINVAL: Invalid arguments or mm or virtual address is in an invalid vma + * (e.g., device file vma). + * -ENOMEM: Out of memory. + * -EPERM: Invalid permission (e.g., asking for write and range is read + * only). + * -EAGAIN: A page fault needs to be retried and mmap_sem was dropped. + * -EBUSY: The range has been invalidated and the caller needs to wait for + * the invalidation to finish. + * -EFAULT: Invalid (i.e., either no valid vma or it is illegal to access + * that range) number of valid pages in range->pfns[] (from + * range start address). * * This is similar to a regular CPU page fault except that it will not trigger * any memory migration if the memory being faulted is not accessible by CPUs @@ -1040,7 +1043,7 @@ EXPORT_SYMBOL(hmm_range_snapshot); * On error, for one virtual address in the range, the function will mark the * corresponding HMM pfn entry with an error flag. */ -long hmm_range_fault(struct hmm_range *range, bool block) +long hmm_range_fault(struct hmm_range *range, unsigned int flags) { const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP; unsigned long start = range->start, end; @@ -1086,7 +1089,7 @@ long hmm_range_fault(struct hmm_range *range, bool block) hmm_vma_walk.pgmap = NULL; hmm_vma_walk.last = start; hmm_vma_walk.fault = true; - hmm_vma_walk.block = block; + hmm_vma_walk.flags = flags; hmm_vma_walk.range = range; mm_walk.private = &hmm_vma_walk; end = min(range->end, vma->vm_end); @@ -1125,25 +1128,22 @@ long hmm_range_fault(struct hmm_range *range, bool block) EXPORT_SYMBOL(hmm_range_fault); /** - * hmm_range_dma_map() - hmm_range_fault() and dma map page all in one. - * @range: range being faulted - * @device: device against to dma map page to - * @daddrs: dma address of mapped pages - * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem) - * Return: number of pages mapped on success, -EAGAIN if mmap_sem have been - * drop and you need to try again, some other error value otherwise + * hmm_range_dma_map - hmm_range_fault() and dma map page all in one. + * @range: range being faulted + * @device: device to map page to + * @daddrs: array of dma addresses for the mapped pages + * @flags: HMM_FAULT_* * - * Note same usage pattern as hmm_range_fault(). + * Return: the number of pages mapped on success (including zero), or any + * status return from hmm_range_fault() otherwise. */ -long hmm_range_dma_map(struct hmm_range *range, - struct device *device, - dma_addr_t *daddrs, - bool block) +long hmm_range_dma_map(struct hmm_range *range, struct device *device, + dma_addr_t *daddrs, unsigned int flags) { unsigned long i, npages, mapped; long ret; - ret = hmm_range_fault(range, block); + ret = hmm_range_fault(range, flags); if (ret <= 0) return ret ? ret : -EBUSY; -- cgit v1.2.3 From cc374377a19d2a49d693997b62dc3a6f5fac6d61 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Thu, 25 Jul 2019 17:56:50 -0700 Subject: mm/hmm: remove hmm_range vma Since hmm_range_fault() doesn't use the struct hmm_range vma field, remove it. Link: https://lore.kernel.org/r/20190726005650.2566-8-rcampbell@nvidia.com Suggested-by: Jason Gunthorpe Signed-off-by: Ralph Campbell Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_svm.c | 7 +++---- include/linux/hmm.h | 1 - mm/hmm.c | 1 - 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 49b520c60fc5..a74530b5a523 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -496,12 +496,12 @@ nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range) range->start, range->end, PAGE_SHIFT); if (ret) { - up_read(&range->vma->vm_mm->mmap_sem); + up_read(&range->hmm->mm->mmap_sem); return (int)ret; } if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) { - up_read(&range->vma->vm_mm->mmap_sem); + up_read(&range->hmm->mm->mmap_sem); return -EBUSY; } @@ -509,7 +509,7 @@ nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range) if (ret <= 0) { if (ret == 0) ret = -EBUSY; - up_read(&range->vma->vm_mm->mmap_sem); + up_read(&range->hmm->mm->mmap_sem); hmm_range_unregister(range); return ret; } @@ -682,7 +682,6 @@ nouveau_svm_fault(struct nvif_notify *notify) args.i.p.addr + args.i.p.size, fn - fi); /* Have HMM fault pages within the fault window to the GPU. */ - range.vma = vma; range.start = args.i.p.addr; range.end = args.i.p.addr + args.i.p.size; range.pfns = args.phys; diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 90dc5944b1bc..82265118d94a 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -164,7 +164,6 @@ enum hmm_pfn_value_e { */ struct hmm_range { struct hmm *hmm; - struct vm_area_struct *vma; struct list_head list; unsigned long start; unsigned long end; diff --git a/mm/hmm.c b/mm/hmm.c index 6111c0a3c12d..9a908902e4cc 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -1002,7 +1002,6 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) return -EPERM; } - range->vma = vma; hmm_vma_walk.pgmap = NULL; hmm_vma_walk.last = start; hmm_vma_walk.flags = flags; -- cgit v1.2.3 From 5aa0acb374edd3c5001328276d34ac86f14a3867 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Aug 2019 19:05:41 +0300 Subject: nouveau: pass struct nouveau_svmm to nouveau_range_fault We'll need the nouveau_svmm structure to improve the function soon. For now this allows using the svmm->mm reference to unlock the mmap_sem, and thus the same dereference chain that the caller uses to lock and unlock it. Link: https://lore.kernel.org/r/20190806160554.14046-4-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_svm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index a74530b5a523..98072fd48cf7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -485,23 +485,23 @@ nouveau_range_done(struct hmm_range *range) } static int -nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range) +nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range) { long ret; range->default_flags = 0; range->pfn_flags_mask = -1UL; - ret = hmm_range_register(range, mirror, + ret = hmm_range_register(range, &svmm->mirror, range->start, range->end, PAGE_SHIFT); if (ret) { - up_read(&range->hmm->mm->mmap_sem); + up_read(&svmm->mm->mmap_sem); return (int)ret; } if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) { - up_read(&range->hmm->mm->mmap_sem); + up_read(&svmm->mm->mmap_sem); return -EBUSY; } @@ -509,7 +509,7 @@ nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range) if (ret <= 0) { if (ret == 0) ret = -EBUSY; - up_read(&range->hmm->mm->mmap_sem); + up_read(&svmm->mm->mmap_sem); hmm_range_unregister(range); return ret; } @@ -689,7 +689,7 @@ nouveau_svm_fault(struct nvif_notify *notify) range.values = nouveau_svm_pfn_values; range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT; again: - ret = nouveau_range_fault(&svmm->mirror, &range); + ret = nouveau_range_fault(svmm, &range); if (ret == 0) { mutex_lock(&svmm->mutex); if (!nouveau_range_done(&range)) { -- cgit v1.2.3 From fac555ac93d453a0d2265eef88bf4c249dd63e07 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Aug 2019 19:05:44 +0300 Subject: mm/hmm: remove superfluous arguments from hmm_range_register The start, end and page_shift values are all saved in the range structure, so we might as well use that for argument passing. Link: https://lore.kernel.org/r/20190806160554.14046-7-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Felix Kuehling Signed-off-by: Jason Gunthorpe --- Documentation/vm/hmm.rst | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 +++++-- drivers/gpu/drm/nouveau/nouveau_svm.c | 5 ++--- include/linux/hmm.h | 6 +----- mm/hmm.c | 20 +++++--------------- 5 files changed, 14 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst index ddcb5ca8b296..e63c11f7e0e0 100644 --- a/Documentation/vm/hmm.rst +++ b/Documentation/vm/hmm.rst @@ -222,7 +222,7 @@ The usage pattern is:: range.flags = ...; range.values = ...; range.pfn_shift = ...; - hmm_range_register(&range); + hmm_range_register(&range, mirror); /* * Just wait for range to be valid, safe to ignore return value as we diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index f0821638bbc6..71d6e7087b0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -818,8 +818,11 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) 0 : range->flags[HMM_PFN_WRITE]; range->pfn_flags_mask = 0; range->pfns = pfns; - hmm_range_register(range, mirror, start, - start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); + range->page_shift = PAGE_SHIFT; + range->start = start; + range->end = start + ttm->num_pages * PAGE_SIZE; + + hmm_range_register(range, mirror); /* * Just wait for range to be valid, safe to ignore return value as we diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 98072fd48cf7..41fad4719ac6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -492,9 +492,7 @@ nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range) range->default_flags = 0; range->pfn_flags_mask = -1UL; - ret = hmm_range_register(range, &svmm->mirror, - range->start, range->end, - PAGE_SHIFT); + ret = hmm_range_register(range, &svmm->mirror); if (ret) { up_read(&svmm->mm->mmap_sem); return (int)ret; @@ -682,6 +680,7 @@ nouveau_svm_fault(struct nvif_notify *notify) args.i.p.addr + args.i.p.size, fn - fi); /* Have HMM fault pages within the fault window to the GPU. */ + range.page_shift = PAGE_SHIFT; range.start = args.i.p.addr; range.end = args.i.p.addr + args.i.p.size; range.pfns = args.phys; diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 59be0aa2476d..c5b51376b453 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -400,11 +400,7 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror); /* * Please see Documentation/vm/hmm.rst for how to use the range API. */ -int hmm_range_register(struct hmm_range *range, - struct hmm_mirror *mirror, - unsigned long start, - unsigned long end, - unsigned page_shift); +int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror); void hmm_range_unregister(struct hmm_range *range); /* diff --git a/mm/hmm.c b/mm/hmm.c index 160ecb47d04b..ab22fdb655c8 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -850,35 +850,25 @@ static void hmm_pfns_clear(struct hmm_range *range, * hmm_range_register() - start tracking change to CPU page table over a range * @range: range * @mm: the mm struct for the range of virtual address - * @start: start virtual address (inclusive) - * @end: end virtual address (exclusive) - * @page_shift: expect page shift for the range + * * Return: 0 on success, -EFAULT if the address space is no longer valid * * Track updates to the CPU page table see include/linux/hmm.h */ -int hmm_range_register(struct hmm_range *range, - struct hmm_mirror *mirror, - unsigned long start, - unsigned long end, - unsigned page_shift) +int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror) { - unsigned long mask = ((1UL << page_shift) - 1UL); + unsigned long mask = ((1UL << range->page_shift) - 1UL); struct hmm *hmm = mirror->hmm; unsigned long flags; range->valid = false; range->hmm = NULL; - if ((start & mask) || (end & mask)) + if ((range->start & mask) || (range->end & mask)) return -EINVAL; - if (start >= end) + if (range->start >= range->end) return -EINVAL; - range->page_shift = page_shift; - range->start = start; - range->end = end; - /* Prevent hmm_release() from running while the range is valid */ if (!mmget_not_zero(hmm->mm)) return -EFAULT; -- cgit v1.2.3 From 7f08263d9bc6627382da14f9e81d643d0329d5d1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Aug 2019 19:05:45 +0300 Subject: mm/hmm: remove the page_shift member from struct hmm_range All users pass PAGE_SIZE here, and if we wanted to support single entries for huge pages we should really just add a HMM_FAULT_HUGEPAGE flag instead that uses the huge page size instead of having the caller calculate that size once, just for the hmm code to verify it. Link: https://lore.kernel.org/r/20190806160554.14046-8-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Felix Kuehling Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 - drivers/gpu/drm/nouveau/nouveau_svm.c | 1 - include/linux/hmm.h | 22 ----------------- mm/hmm.c | 42 +++++++-------------------------- 4 files changed, 9 insertions(+), 57 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 71d6e7087b0b..8bf79288c4e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -818,7 +818,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) 0 : range->flags[HMM_PFN_WRITE]; range->pfn_flags_mask = 0; range->pfns = pfns; - range->page_shift = PAGE_SHIFT; range->start = start; range->end = start + ttm->num_pages * PAGE_SIZE; diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 41fad4719ac6..668d4bd0c118 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -680,7 +680,6 @@ nouveau_svm_fault(struct nvif_notify *notify) args.i.p.addr + args.i.p.size, fn - fi); /* Have HMM fault pages within the fault window to the GPU. */ - range.page_shift = PAGE_SHIFT; range.start = args.i.p.addr; range.end = args.i.p.addr + args.i.p.size; range.pfns = args.phys; diff --git a/include/linux/hmm.h b/include/linux/hmm.h index c5b51376b453..51e18fbb8953 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -158,7 +158,6 @@ enum hmm_pfn_value_e { * @values: pfn value for some special case (none, special, error, ...) * @default_flags: default flags for the range (write, read, ... see hmm doc) * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter - * @page_shift: device virtual address shift value (should be >= PAGE_SHIFT) * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT) * @valid: pfns array did not change since it has been fill by an HMM function */ @@ -172,31 +171,10 @@ struct hmm_range { const uint64_t *values; uint64_t default_flags; uint64_t pfn_flags_mask; - uint8_t page_shift; uint8_t pfn_shift; bool valid; }; -/* - * hmm_range_page_shift() - return the page shift for the range - * @range: range being queried - * Return: page shift (page size = 1 << page shift) for the range - */ -static inline unsigned hmm_range_page_shift(const struct hmm_range *range) -{ - return range->page_shift; -} - -/* - * hmm_range_page_size() - return the page size for the range - * @range: range being queried - * Return: page size for the range in bytes - */ -static inline unsigned long hmm_range_page_size(const struct hmm_range *range) -{ - return 1UL << hmm_range_page_shift(range); -} - /* * hmm_range_wait_until_valid() - wait for range to be valid * @range: range affected by invalidation to wait on diff --git a/mm/hmm.c b/mm/hmm.c index ab22fdb655c8..9e0052e037fe 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -345,13 +345,12 @@ static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end, struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; uint64_t *pfns = range->pfns; - unsigned long i, page_size; + unsigned long i; hmm_vma_walk->last = addr; - page_size = hmm_range_page_size(range); - i = (addr - range->start) >> range->page_shift; + i = (addr - range->start) >> PAGE_SHIFT; - for (; addr < end; addr += page_size, i++) { + for (; addr < end; addr += PAGE_SIZE, i++) { pfns[i] = range->values[HMM_PFN_NONE]; if (fault || write_fault) { int ret; @@ -779,7 +778,7 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, struct mm_walk *walk) { #ifdef CONFIG_HUGETLB_PAGE - unsigned long addr = start, i, pfn, mask, size, pfn_inc; + unsigned long addr = start, i, pfn, mask; struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; struct vm_area_struct *vma = walk->vma; @@ -790,24 +789,12 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, pte_t entry; int ret = 0; - size = huge_page_size(h); - mask = size - 1; - if (range->page_shift != PAGE_SHIFT) { - /* Make sure we are looking at a full page. */ - if (start & mask) - return -EINVAL; - if (end < (start + size)) - return -EINVAL; - pfn_inc = size >> PAGE_SHIFT; - } else { - pfn_inc = 1; - size = PAGE_SIZE; - } + mask = huge_page_size(h) - 1; ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); entry = huge_ptep_get(pte); - i = (start - range->start) >> range->page_shift; + i = (start - range->start) >> PAGE_SHIFT; orig_pfn = range->pfns[i]; range->pfns[i] = range->values[HMM_PFN_NONE]; cpu_flags = pte_to_hmm_pfn_flags(range, entry); @@ -819,8 +806,8 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, goto unlock; } - pfn = pte_pfn(entry) + ((start & mask) >> range->page_shift); - for (; addr < end; addr += size, i++, pfn += pfn_inc) + pfn = pte_pfn(entry) + ((start & mask) >> PAGE_SHIFT); + for (; addr < end; addr += PAGE_SIZE, i++, pfn++) range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; hmm_vma_walk->last = end; @@ -857,14 +844,13 @@ static void hmm_pfns_clear(struct hmm_range *range, */ int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror) { - unsigned long mask = ((1UL << range->page_shift) - 1UL); struct hmm *hmm = mirror->hmm; unsigned long flags; range->valid = false; range->hmm = NULL; - if ((range->start & mask) || (range->end & mask)) + if ((range->start & (PAGE_SIZE - 1)) || (range->end & (PAGE_SIZE - 1))) return -EINVAL; if (range->start >= range->end) return -EINVAL; @@ -971,16 +957,6 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) if (vma == NULL || (vma->vm_flags & device_vma)) return -EFAULT; - if (is_vm_hugetlb_page(vma)) { - if (huge_page_shift(hstate_vma(vma)) != - range->page_shift && - range->page_shift != PAGE_SHIFT) - return -EINVAL; - } else { - if (range->page_shift != PAGE_SHIFT) - return -EINVAL; - } - if (!(vma->vm_flags & VM_READ)) { /* * If vma do not allow read access, then assume that it -- cgit v1.2.3 From 9c240a7bb33787d4fed56508425ae0b2936b1674 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Aug 2019 19:05:52 +0300 Subject: mm/hmm: make HMM_MIRROR an implicit option Make HMM_MIRROR an option that is selected by drivers wanting to use it instead of a user visible option as it is just a low-level implementation detail. Link: https://lore.kernel.org/r/20190806160554.14046-15-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/amd/amdgpu/Kconfig | 4 +++- drivers/gpu/drm/nouveau/Kconfig | 4 +++- mm/Kconfig | 14 ++++++-------- 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index f6e5c0282fc1..2e98c016cb47 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -27,7 +27,9 @@ config DRM_AMDGPU_CIK config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" depends on DRM_AMDGPU - depends on HMM_MIRROR + depends on MMU + select HMM_MIRROR + select MMU_NOTIFIER help This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it isn't already selected to enabled full userptr support. diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 96b9814e6d06..df4352c279ba 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -86,9 +86,11 @@ config DRM_NOUVEAU_SVM bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support" depends on DEVICE_PRIVATE depends on DRM_NOUVEAU - depends on HMM_MIRROR + depends on MMU depends on STAGING + select HMM_MIRROR select MIGRATE_VMA_HELPER + select MMU_NOTIFIER default n help Say Y here if you want to enable experimental support for diff --git a/mm/Kconfig b/mm/Kconfig index b18782be969c..563436dc1f24 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -675,16 +675,14 @@ config MIGRATE_VMA_HELPER config DEV_PAGEMAP_OPS bool +# +# Helpers to mirror range of the CPU page tables of a process into device page +# tables. +# config HMM_MIRROR - bool "HMM mirror CPU page table into a device page table" + bool depends on MMU - select MMU_NOTIFIER - help - Select HMM_MIRROR if you want to mirror range of the CPU page table of a - process into a device page table. Here, mirror means "keep synchronized". - Prerequisites: the device must provide the ability to write-protect its - page tables (at PAGE_SIZE granularity), and must be able to recover from - the resulting potential page faults. + depends on MMU_NOTIFIER config DEVICE_PRIVATE bool "Unaddressable device memory (GPU memory, ...)" -- cgit v1.2.3 From c7d8b7824ff9de866a356e1892dbe9f191aa5d06 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 6 Aug 2019 20:15:42 -0300 Subject: hmm: use mmu_notifier_get/put for 'struct hmm' This is a significant simplification, it eliminates all the remaining 'hmm' stuff in mm_struct, eliminates krefing along the critical notifier paths, and takes away all the ugly locking and abuse of page_table_lock. mmu_notifier_get() provides the single struct hmm per struct mm which eliminates mm->hmm. It also directly guarantees that no mmu_notifier op callback is callable while concurrent free is possible, this eliminates all the krefs inside the mmu_notifier callbacks. The remaining krefs in the range code were overly cautious, drivers are already not permitted to free the mirror while a range exists. Link: https://lore.kernel.org/r/20190806231548.25242-6-jgg@ziepe.ca Reviewed-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Ralph Campbell Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 + drivers/gpu/drm/nouveau/nouveau_drm.c | 3 + include/linux/hmm.h | 12 +--- include/linux/mm_types.h | 6 -- kernel/fork.c | 1 - mm/hmm.c | 121 ++++++++------------------------ 6 files changed, 34 insertions(+), 111 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f2e8b4238efd..abc233088201 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_irq.h" @@ -1464,6 +1465,7 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); amdgpu_fence_slab_fini(); + mmu_notifier_synchronize(); } module_init(amdgpu_init); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 7c2fcaba42d6..a0e48a482452 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -1292,6 +1293,8 @@ nouveau_drm_exit(void) #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER platform_driver_unregister(&nouveau_platform_driver); #endif + if (IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM)) + mmu_notifier_synchronize(); } module_init(nouveau_drm_init); diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 51e18fbb8953..3fec513b9c00 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -84,15 +84,12 @@ * @notifiers: count of active mmu notifiers */ struct hmm { - struct mm_struct *mm; - struct kref kref; + struct mmu_notifier mmu_notifier; spinlock_t ranges_lock; struct list_head ranges; struct list_head mirrors; - struct mmu_notifier mmu_notifier; struct rw_semaphore mirrors_sem; wait_queue_head_t wq; - struct rcu_head rcu; long notifiers; }; @@ -409,13 +406,6 @@ long hmm_range_dma_unmap(struct hmm_range *range, */ #define HMM_RANGE_DEFAULT_TIMEOUT 1000 -/* Below are for HMM internal use only! Not to be used by device driver! */ -static inline void hmm_mm_init(struct mm_struct *mm) -{ - mm->hmm = NULL; -} -#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -static inline void hmm_mm_init(struct mm_struct *mm) {} #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ #endif /* LINUX_HMM_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3a37a89eb7a7..525d25d93330 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -25,7 +25,6 @@ struct address_space; struct mem_cgroup; -struct hmm; /* * Each physical page in the system has a struct page associated with @@ -502,11 +501,6 @@ struct mm_struct { atomic_long_t hugetlb_usage; #endif struct work_struct async_put_work; - -#ifdef CONFIG_HMM_MIRROR - /* HMM needs to track a few things per mm */ - struct hmm *hmm; -#endif } __randomize_layout; /* diff --git a/kernel/fork.c b/kernel/fork.c index d8ae0f1b4148..bd4a0762f12f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1007,7 +1007,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_owner(mm, p); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_mm_init(mm); - hmm_mm_init(mm); init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; diff --git a/mm/hmm.c b/mm/hmm.c index ef553e664061..49eace16f9f8 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -26,101 +26,37 @@ #include #include -static const struct mmu_notifier_ops hmm_mmu_notifier_ops; - -/** - * hmm_get_or_create - register HMM against an mm (HMM internal) - * - * @mm: mm struct to attach to - * Return: an HMM object, either by referencing the existing - * (per-process) object, or by creating a new one. - * - * This is not intended to be used directly by device drivers. If mm already - * has an HMM struct then it get a reference on it and returns it. Otherwise - * it allocates an HMM struct, initializes it, associate it with the mm and - * returns it. - */ -static struct hmm *hmm_get_or_create(struct mm_struct *mm) +static struct mmu_notifier *hmm_alloc_notifier(struct mm_struct *mm) { struct hmm *hmm; - lockdep_assert_held_write(&mm->mmap_sem); - - /* Abuse the page_table_lock to also protect mm->hmm. */ - spin_lock(&mm->page_table_lock); - hmm = mm->hmm; - if (mm->hmm && kref_get_unless_zero(&mm->hmm->kref)) - goto out_unlock; - spin_unlock(&mm->page_table_lock); - - hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); + hmm = kzalloc(sizeof(*hmm), GFP_KERNEL); if (!hmm) - return NULL; + return ERR_PTR(-ENOMEM); + init_waitqueue_head(&hmm->wq); INIT_LIST_HEAD(&hmm->mirrors); init_rwsem(&hmm->mirrors_sem); - hmm->mmu_notifier.ops = NULL; INIT_LIST_HEAD(&hmm->ranges); spin_lock_init(&hmm->ranges_lock); - kref_init(&hmm->kref); hmm->notifiers = 0; - hmm->mm = mm; - - hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; - if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) { - kfree(hmm); - return NULL; - } - - mmgrab(hmm->mm); - - /* - * We hold the exclusive mmap_sem here so we know that mm->hmm is - * still NULL or 0 kref, and is safe to update. - */ - spin_lock(&mm->page_table_lock); - mm->hmm = hmm; - -out_unlock: - spin_unlock(&mm->page_table_lock); - return hmm; + return &hmm->mmu_notifier; } -static void hmm_free_rcu(struct rcu_head *rcu) +static void hmm_free_notifier(struct mmu_notifier *mn) { - struct hmm *hmm = container_of(rcu, struct hmm, rcu); + struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier); - mmdrop(hmm->mm); + WARN_ON(!list_empty(&hmm->ranges)); + WARN_ON(!list_empty(&hmm->mirrors)); kfree(hmm); } -static void hmm_free(struct kref *kref) -{ - struct hmm *hmm = container_of(kref, struct hmm, kref); - - spin_lock(&hmm->mm->page_table_lock); - if (hmm->mm->hmm == hmm) - hmm->mm->hmm = NULL; - spin_unlock(&hmm->mm->page_table_lock); - - mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm); - mmu_notifier_call_srcu(&hmm->rcu, hmm_free_rcu); -} - -static inline void hmm_put(struct hmm *hmm) -{ - kref_put(&hmm->kref, hmm_free); -} - static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier); struct hmm_mirror *mirror; - /* Bail out if hmm is in the process of being freed */ - if (!kref_get_unless_zero(&hmm->kref)) - return; - /* * Since hmm_range_register() holds the mmget() lock hmm_release() is * prevented as long as a range exists. @@ -137,8 +73,6 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) mirror->ops->release(mirror); } up_read(&hmm->mirrors_sem); - - hmm_put(hmm); } static void notifiers_decrement(struct hmm *hmm) @@ -169,9 +103,6 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn, unsigned long flags; int ret = 0; - if (!kref_get_unless_zero(&hmm->kref)) - return 0; - spin_lock_irqsave(&hmm->ranges_lock, flags); hmm->notifiers++; list_for_each_entry(range, &hmm->ranges, list) { @@ -206,7 +137,6 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn, out: if (ret) notifiers_decrement(hmm); - hmm_put(hmm); return ret; } @@ -215,17 +145,15 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn, { struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier); - if (!kref_get_unless_zero(&hmm->kref)) - return; - notifiers_decrement(hmm); - hmm_put(hmm); } static const struct mmu_notifier_ops hmm_mmu_notifier_ops = { .release = hmm_release, .invalidate_range_start = hmm_invalidate_range_start, .invalidate_range_end = hmm_invalidate_range_end, + .alloc_notifier = hmm_alloc_notifier, + .free_notifier = hmm_free_notifier, }; /* @@ -237,18 +165,27 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops = { * * To start mirroring a process address space, the device driver must register * an HMM mirror struct. + * + * The caller cannot unregister the hmm_mirror while any ranges are + * registered. + * + * Callers using this function must put a call to mmu_notifier_synchronize() + * in their module exit functions. */ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm) { + struct mmu_notifier *mn; + lockdep_assert_held_write(&mm->mmap_sem); /* Sanity check */ if (!mm || !mirror || !mirror->ops) return -EINVAL; - mirror->hmm = hmm_get_or_create(mm); - if (!mirror->hmm) - return -ENOMEM; + mn = mmu_notifier_get_locked(&hmm_mmu_notifier_ops, mm); + if (IS_ERR(mn)) + return PTR_ERR(mn); + mirror->hmm = container_of(mn, struct hmm, mmu_notifier); down_write(&mirror->hmm->mirrors_sem); list_add(&mirror->list, &mirror->hmm->mirrors); @@ -272,7 +209,7 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror) down_write(&hmm->mirrors_sem); list_del(&mirror->list); up_write(&hmm->mirrors_sem); - hmm_put(hmm); + mmu_notifier_put(&hmm->mmu_notifier); } EXPORT_SYMBOL(hmm_mirror_unregister); @@ -854,14 +791,13 @@ int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror) return -EINVAL; /* Prevent hmm_release() from running while the range is valid */ - if (!mmget_not_zero(hmm->mm)) + if (!mmget_not_zero(hmm->mmu_notifier.mm)) return -EFAULT; /* Initialize range to track CPU page table updates. */ spin_lock_irqsave(&hmm->ranges_lock, flags); range->hmm = hmm; - kref_get(&hmm->kref); list_add(&range->list, &hmm->ranges); /* @@ -893,8 +829,7 @@ void hmm_range_unregister(struct hmm_range *range) spin_unlock_irqrestore(&hmm->ranges_lock, flags); /* Drop reference taken by hmm_range_register() */ - mmput(hmm->mm); - hmm_put(hmm); + mmput(hmm->mmu_notifier.mm); /* * The range is now invalid and the ref on the hmm is dropped, so @@ -944,14 +879,14 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) struct mm_walk mm_walk; int ret; - lockdep_assert_held(&hmm->mm->mmap_sem); + lockdep_assert_held(&hmm->mmu_notifier.mm->mmap_sem); do { /* If range is no longer valid force retry. */ if (!range->valid) return -EBUSY; - vma = find_vma(hmm->mm, start); + vma = find_vma(hmm->mmu_notifier.mm, start); if (vma == NULL || (vma->vm_flags & device_vma)) return -EFAULT; -- cgit v1.2.3 From a7d1f22bb74f32cf3cd93f52776007e161f1a738 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Aug 2019 09:59:19 +0200 Subject: mm: turn migrate_vma upside down There isn't any good reason to pass callbacks to migrate_vma. Instead we can just export the three steps done by this function to drivers and let them sequence the operation without callbacks. This removes a lot of boilerplate code as-is, and will allow the drivers to drastically improve code flow and error handling further on. Link: https://lore.kernel.org/r/20190814075928.23766-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Ralph Campbell Signed-off-by: Jason Gunthorpe --- Documentation/vm/hmm.rst | 54 +------- drivers/gpu/drm/nouveau/nouveau_dmem.c | 122 +++++++++-------- include/linux/migrate.h | 118 +++------------- mm/migrate.c | 244 +++++++++++++++------------------ 4 files changed, 194 insertions(+), 344 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst index e63c11f7e0e0..0a5960beccf7 100644 --- a/Documentation/vm/hmm.rst +++ b/Documentation/vm/hmm.rst @@ -339,58 +339,8 @@ Migration to and from device memory =================================== Because the CPU cannot access device memory, migration must use the device DMA -engine to perform copy from and to device memory. For this we need a new -migration helper:: - - int migrate_vma(const struct migrate_vma_ops *ops, - struct vm_area_struct *vma, - unsigned long mentries, - unsigned long start, - unsigned long end, - unsigned long *src, - unsigned long *dst, - void *private); - -Unlike other migration functions it works on a range of virtual address, there -are two reasons for that. First, device DMA copy has a high setup overhead cost -and thus batching multiple pages is needed as otherwise the migration overhead -makes the whole exercise pointless. The second reason is because the -migration might be for a range of addresses the device is actively accessing. - -The migrate_vma_ops struct defines two callbacks. First one (alloc_and_copy()) -controls destination memory allocation and copy operation. Second one is there -to allow the device driver to perform cleanup operations after migration:: - - struct migrate_vma_ops { - void (*alloc_and_copy)(struct vm_area_struct *vma, - const unsigned long *src, - unsigned long *dst, - unsigned long start, - unsigned long end, - void *private); - void (*finalize_and_map)(struct vm_area_struct *vma, - const unsigned long *src, - const unsigned long *dst, - unsigned long start, - unsigned long end, - void *private); - }; - -It is important to stress that these migration helpers allow for holes in the -virtual address range. Some pages in the range might not be migrated for all -the usual reasons (page is pinned, page is locked, ...). This helper does not -fail but just skips over those pages. - -The alloc_and_copy() might decide to not migrate all pages in the -range (for reasons under the callback control). For those, the callback just -has to leave the corresponding dst entry empty. - -Finally, the migration of the struct page might fail (for file backed page) for -various reasons (failure to freeze reference, or update page cache, ...). If -that happens, then the finalize_and_map() can catch any pages that were not -migrated. Note those pages were still copied to a new page and thus we wasted -bandwidth but this is considered as a rare event and a price that we are -willing to pay to keep all the code simpler. +engine to perform copy from and to device memory. For this we need to use +migrate_vma_setup(), migrate_vma_pages(), and migrate_vma_finalize() helpers. Memory cgroup (memcg) and rss accounting diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 345c63cb752a..38416798abd4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -131,9 +131,8 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, unsigned long *dst_pfns, unsigned long start, unsigned long end, - void *private) + struct nouveau_dmem_fault *fault) { - struct nouveau_dmem_fault *fault = private; struct nouveau_drm *drm = fault->drm; struct device *dev = drm->dev->dev; unsigned long addr, i, npages = 0; @@ -230,14 +229,9 @@ error: } } -void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma, - const unsigned long *src_pfns, - const unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) +static void +nouveau_dmem_fault_finalize_and_map(struct nouveau_dmem_fault *fault) { - struct nouveau_dmem_fault *fault = private; struct nouveau_drm *drm = fault->drm; if (fault->fence) { @@ -257,29 +251,35 @@ void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma, kfree(fault->dma); } -static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = { - .alloc_and_copy = nouveau_dmem_fault_alloc_and_copy, - .finalize_and_map = nouveau_dmem_fault_finalize_and_map, -}; - static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) { struct nouveau_dmem *dmem = page_to_dmem(vmf->page); unsigned long src[1] = {0}, dst[1] = {0}; + struct migrate_vma args = { + .vma = vmf->vma, + .start = vmf->address, + .end = vmf->address + PAGE_SIZE, + .src = src, + .dst = dst, + }; struct nouveau_dmem_fault fault = { .drm = dmem->drm }; - int ret; /* * FIXME what we really want is to find some heuristic to migrate more * than just one page on CPU fault. When such fault happens it is very * likely that more surrounding page will CPU fault too. */ - ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma, - vmf->address, vmf->address + PAGE_SIZE, - src, dst, &fault); - if (ret) + if (migrate_vma_setup(&args) < 0) return VM_FAULT_SIGBUS; + if (!args.cpages) + return 0; + + nouveau_dmem_fault_alloc_and_copy(args.vma, src, dst, args.start, + args.end, &fault); + migrate_vma_pages(&args); + nouveau_dmem_fault_finalize_and_map(&fault); + migrate_vma_finalize(&args); if (dst[0] == MIGRATE_PFN_ERROR) return VM_FAULT_SIGBUS; @@ -648,9 +648,8 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, unsigned long *dst_pfns, unsigned long start, unsigned long end, - void *private) + struct nouveau_migrate *migrate) { - struct nouveau_migrate *migrate = private; struct nouveau_drm *drm = migrate->drm; struct device *dev = drm->dev->dev; unsigned long addr, i, npages = 0; @@ -747,14 +746,9 @@ error: } } -void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma, - const unsigned long *src_pfns, - const unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) +static void +nouveau_dmem_migrate_finalize_and_map(struct nouveau_migrate *migrate) { - struct nouveau_migrate *migrate = private; struct nouveau_drm *drm = migrate->drm; if (migrate->fence) { @@ -779,10 +773,15 @@ void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma, */ } -static const struct migrate_vma_ops nouveau_dmem_migrate_ops = { - .alloc_and_copy = nouveau_dmem_migrate_alloc_and_copy, - .finalize_and_map = nouveau_dmem_migrate_finalize_and_map, -}; +static void nouveau_dmem_migrate_chunk(struct migrate_vma *args, + struct nouveau_migrate *migrate) +{ + nouveau_dmem_migrate_alloc_and_copy(args->vma, args->src, args->dst, + args->start, args->end, migrate); + migrate_vma_pages(args); + nouveau_dmem_migrate_finalize_and_map(migrate); + migrate_vma_finalize(args); +} int nouveau_dmem_migrate_vma(struct nouveau_drm *drm, @@ -790,40 +789,45 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, unsigned long start, unsigned long end) { - unsigned long *src_pfns, *dst_pfns, npages; - struct nouveau_migrate migrate = {0}; - unsigned long i, c, max; - int ret = 0; - - npages = (end - start) >> PAGE_SHIFT; - max = min(SG_MAX_SINGLE_ALLOC, npages); - src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); - if (src_pfns == NULL) - return -ENOMEM; - dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); - if (dst_pfns == NULL) { - kfree(src_pfns); - return -ENOMEM; - } + unsigned long npages = (end - start) >> PAGE_SHIFT; + unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages); + struct migrate_vma args = { + .vma = vma, + .start = start, + }; + struct nouveau_migrate migrate = { + .drm = drm, + .vma = vma, + .npages = npages, + }; + unsigned long c, i; + int ret = -ENOMEM; + + args.src = kzalloc(sizeof(long) * max, GFP_KERNEL); + if (!args.src) + goto out; + args.dst = kzalloc(sizeof(long) * max, GFP_KERNEL); + if (!args.dst) + goto out_free_src; - migrate.drm = drm; - migrate.vma = vma; - migrate.npages = npages; for (i = 0; i < npages; i += c) { - unsigned long next; - c = min(SG_MAX_SINGLE_ALLOC, npages); - next = start + (c << PAGE_SHIFT); - ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start, - next, src_pfns, dst_pfns, &migrate); + args.end = start + (c << PAGE_SHIFT); + ret = migrate_vma_setup(&args); if (ret) - goto out; - start = next; + goto out_free_dst; + + if (args.cpages) + nouveau_dmem_migrate_chunk(&args, &migrate); + args.start = args.end; } + ret = 0; +out_free_dst: + kfree(args.dst); +out_free_src: + kfree(args.src); out: - kfree(dst_pfns); - kfree(src_pfns); return ret; } diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 7f04754c7f2b..18156d379ebf 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -182,107 +182,27 @@ static inline unsigned long migrate_pfn(unsigned long pfn) return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID; } -/* - * struct migrate_vma_ops - migrate operation callback - * - * @alloc_and_copy: alloc destination memory and copy source memory to it - * @finalize_and_map: allow caller to map the successfully migrated pages - * - * - * The alloc_and_copy() callback happens once all source pages have been locked, - * unmapped and checked (checked whether pinned or not). All pages that can be - * migrated will have an entry in the src array set with the pfn value of the - * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set (other - * flags might be set but should be ignored by the callback). - * - * The alloc_and_copy() callback can then allocate destination memory and copy - * source memory to it for all those entries (ie with MIGRATE_PFN_VALID and - * MIGRATE_PFN_MIGRATE flag set). Once these are allocated and copied, the - * callback must update each corresponding entry in the dst array with the pfn - * value of the destination page and with the MIGRATE_PFN_VALID and - * MIGRATE_PFN_LOCKED flags set (destination pages must have their struct pages - * locked, via lock_page()). - * - * At this point the alloc_and_copy() callback is done and returns. - * - * Note that the callback does not have to migrate all the pages that are - * marked with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration - * from device memory to system memory (ie the MIGRATE_PFN_DEVICE flag is also - * set in the src array entry). If the device driver cannot migrate a device - * page back to system memory, then it must set the corresponding dst array - * entry to MIGRATE_PFN_ERROR. This will trigger a SIGBUS if CPU tries to - * access any of the virtual addresses originally backed by this page. Because - * a SIGBUS is such a severe result for the userspace process, the device - * driver should avoid setting MIGRATE_PFN_ERROR unless it is really in an - * unrecoverable state. - * - * For empty entry inside CPU page table (pte_none() or pmd_none() is true) we - * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus - * allowing device driver to allocate device memory for those unback virtual - * address. For this the device driver simply have to allocate device memory - * and properly set the destination entry like for regular migration. Note that - * this can still fails and thus inside the device driver must check if the - * migration was successful for those entry inside the finalize_and_map() - * callback just like for regular migration. - * - * THE alloc_and_copy() CALLBACK MUST NOT CHANGE ANY OF THE SRC ARRAY ENTRIES - * OR BAD THINGS WILL HAPPEN ! - * - * - * The finalize_and_map() callback happens after struct page migration from - * source to destination (destination struct pages are the struct pages for the - * memory allocated by the alloc_and_copy() callback). Migration can fail, and - * thus the finalize_and_map() allows the driver to inspect which pages were - * successfully migrated, and which were not. Successfully migrated pages will - * have the MIGRATE_PFN_MIGRATE flag set for their src array entry. - * - * It is safe to update device page table from within the finalize_and_map() - * callback because both destination and source page are still locked, and the - * mmap_sem is held in read mode (hence no one can unmap the range being - * migrated). - * - * Once callback is done cleaning up things and updating its page table (if it - * chose to do so, this is not an obligation) then it returns. At this point, - * the HMM core will finish up the final steps, and the migration is complete. - * - * THE finalize_and_map() CALLBACK MUST NOT CHANGE ANY OF THE SRC OR DST ARRAY - * ENTRIES OR BAD THINGS WILL HAPPEN ! - */ -struct migrate_vma_ops { - void (*alloc_and_copy)(struct vm_area_struct *vma, - const unsigned long *src, - unsigned long *dst, - unsigned long start, - unsigned long end, - void *private); - void (*finalize_and_map)(struct vm_area_struct *vma, - const unsigned long *src, - const unsigned long *dst, - unsigned long start, - unsigned long end, - void *private); +struct migrate_vma { + struct vm_area_struct *vma; + /* + * Both src and dst array must be big enough for + * (end - start) >> PAGE_SHIFT entries. + * + * The src array must not be modified by the caller after + * migrate_vma_setup(), and must not change the dst array after + * migrate_vma_pages() returns. + */ + unsigned long *dst; + unsigned long *src; + unsigned long cpages; + unsigned long npages; + unsigned long start; + unsigned long end; }; -#if defined(CONFIG_MIGRATE_VMA_HELPER) -int migrate_vma(const struct migrate_vma_ops *ops, - struct vm_area_struct *vma, - unsigned long start, - unsigned long end, - unsigned long *src, - unsigned long *dst, - void *private); -#else -static inline int migrate_vma(const struct migrate_vma_ops *ops, - struct vm_area_struct *vma, - unsigned long start, - unsigned long end, - unsigned long *src, - unsigned long *dst, - void *private) -{ - return -EINVAL; -} -#endif /* IS_ENABLED(CONFIG_MIGRATE_VMA_HELPER) */ +int migrate_vma_setup(struct migrate_vma *args); +void migrate_vma_pages(struct migrate_vma *migrate); +void migrate_vma_finalize(struct migrate_vma *migrate); #endif /* CONFIG_MIGRATION */ diff --git a/mm/migrate.c b/mm/migrate.c index 8992741f10aa..8111e031fa2b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2118,16 +2118,6 @@ out_unlock: #endif /* CONFIG_NUMA */ #if defined(CONFIG_MIGRATE_VMA_HELPER) -struct migrate_vma { - struct vm_area_struct *vma; - unsigned long *dst; - unsigned long *src; - unsigned long cpages; - unsigned long npages; - unsigned long start; - unsigned long end; -}; - static int migrate_vma_collect_hole(unsigned long start, unsigned long end, struct mm_walk *walk) @@ -2578,6 +2568,110 @@ restore: } } +/** + * migrate_vma_setup() - prepare to migrate a range of memory + * @args: contains the vma, start, and and pfns arrays for the migration + * + * Returns: negative errno on failures, 0 when 0 or more pages were migrated + * without an error. + * + * Prepare to migrate a range of memory virtual address range by collecting all + * the pages backing each virtual address in the range, saving them inside the + * src array. Then lock those pages and unmap them. Once the pages are locked + * and unmapped, check whether each page is pinned or not. Pages that aren't + * pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) in the + * corresponding src array entry. Then restores any pages that are pinned, by + * remapping and unlocking those pages. + * + * The caller should then allocate destination memory and copy source memory to + * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE + * flag set). Once these are allocated and copied, the caller must update each + * corresponding entry in the dst array with the pfn value of the destination + * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_LOCKED flags set + * (destination pages must have their struct pages locked, via lock_page()). + * + * Note that the caller does not have to migrate all the pages that are marked + * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from + * device memory to system memory. If the caller cannot migrate a device page + * back to system memory, then it must return VM_FAULT_SIGBUS, which has severe + * consequences for the userspace process, so it must be avoided if at all + * possible. + * + * For empty entries inside CPU page table (pte_none() or pmd_none() is true) we + * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus + * allowing the caller to allocate device memory for those unback virtual + * address. For this the caller simply has to allocate device memory and + * properly set the destination entry like for regular migration. Note that + * this can still fails and thus inside the device driver must check if the + * migration was successful for those entries after calling migrate_vma_pages() + * just like for regular migration. + * + * After that, the callers must call migrate_vma_pages() to go over each entry + * in the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag + * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set, + * then migrate_vma_pages() to migrate struct page information from the source + * struct page to the destination struct page. If it fails to migrate the + * struct page information, then it clears the MIGRATE_PFN_MIGRATE flag in the + * src array. + * + * At this point all successfully migrated pages have an entry in the src + * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst + * array entry with MIGRATE_PFN_VALID flag set. + * + * Once migrate_vma_pages() returns the caller may inspect which pages were + * successfully migrated, and which were not. Successfully migrated pages will + * have the MIGRATE_PFN_MIGRATE flag set for their src array entry. + * + * It is safe to update device page table after migrate_vma_pages() because + * both destination and source page are still locked, and the mmap_sem is held + * in read mode (hence no one can unmap the range being migrated). + * + * Once the caller is done cleaning up things and updating its page table (if it + * chose to do so, this is not an obligation) it finally calls + * migrate_vma_finalize() to update the CPU page table to point to new pages + * for successfully migrated pages or otherwise restore the CPU page table to + * point to the original source pages. + */ +int migrate_vma_setup(struct migrate_vma *args) +{ + long nr_pages = (args->end - args->start) >> PAGE_SHIFT; + + args->start &= PAGE_MASK; + args->end &= PAGE_MASK; + if (!args->vma || is_vm_hugetlb_page(args->vma) || + (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma)) + return -EINVAL; + if (nr_pages <= 0) + return -EINVAL; + if (args->start < args->vma->vm_start || + args->start >= args->vma->vm_end) + return -EINVAL; + if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end) + return -EINVAL; + if (!args->src || !args->dst) + return -EINVAL; + + memset(args->src, 0, sizeof(*args->src) * nr_pages); + args->cpages = 0; + args->npages = 0; + + migrate_vma_collect(args); + + if (args->cpages) + migrate_vma_prepare(args); + if (args->cpages) + migrate_vma_unmap(args); + + /* + * At this point pages are locked and unmapped, and thus they have + * stable content and can safely be copied to destination memory that + * is allocated by the drivers. + */ + return 0; + +} +EXPORT_SYMBOL(migrate_vma_setup); + static void migrate_vma_insert_page(struct migrate_vma *migrate, unsigned long addr, struct page *page, @@ -2709,7 +2803,7 @@ abort: *src &= ~MIGRATE_PFN_MIGRATE; } -/* +/** * migrate_vma_pages() - migrate meta-data from src page to dst page * @migrate: migrate struct containing all migration information * @@ -2717,7 +2811,7 @@ abort: * struct page. This effectively finishes the migration from source page to the * destination page. */ -static void migrate_vma_pages(struct migrate_vma *migrate) +void migrate_vma_pages(struct migrate_vma *migrate) { const unsigned long npages = migrate->npages; const unsigned long start = migrate->start; @@ -2791,8 +2885,9 @@ static void migrate_vma_pages(struct migrate_vma *migrate) if (notified) mmu_notifier_invalidate_range_only_end(&range); } +EXPORT_SYMBOL(migrate_vma_pages); -/* +/** * migrate_vma_finalize() - restore CPU page table entry * @migrate: migrate struct containing all migration information * @@ -2803,7 +2898,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate) * This also unlocks the pages and puts them back on the lru, or drops the extra * refcount, for device pages. */ -static void migrate_vma_finalize(struct migrate_vma *migrate) +void migrate_vma_finalize(struct migrate_vma *migrate) { const unsigned long npages = migrate->npages; unsigned long i; @@ -2846,124 +2941,5 @@ static void migrate_vma_finalize(struct migrate_vma *migrate) } } } - -/* - * migrate_vma() - migrate a range of memory inside vma - * - * @ops: migration callback for allocating destination memory and copying - * @vma: virtual memory area containing the range to be migrated - * @start: start address of the range to migrate (inclusive) - * @end: end address of the range to migrate (exclusive) - * @src: array of hmm_pfn_t containing source pfns - * @dst: array of hmm_pfn_t containing destination pfns - * @private: pointer passed back to each of the callback - * Returns: 0 on success, error code otherwise - * - * This function tries to migrate a range of memory virtual address range, using - * callbacks to allocate and copy memory from source to destination. First it - * collects all the pages backing each virtual address in the range, saving this - * inside the src array. Then it locks those pages and unmaps them. Once the pages - * are locked and unmapped, it checks whether each page is pinned or not. Pages - * that aren't pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) - * in the corresponding src array entry. It then restores any pages that are - * pinned, by remapping and unlocking those pages. - * - * At this point it calls the alloc_and_copy() callback. For documentation on - * what is expected from that callback, see struct migrate_vma_ops comments in - * include/linux/migrate.h - * - * After the alloc_and_copy() callback, this function goes over each entry in - * the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag - * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set, - * then the function tries to migrate struct page information from the source - * struct page to the destination struct page. If it fails to migrate the struct - * page information, then it clears the MIGRATE_PFN_MIGRATE flag in the src - * array. - * - * At this point all successfully migrated pages have an entry in the src - * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst - * array entry with MIGRATE_PFN_VALID flag set. - * - * It then calls the finalize_and_map() callback. See comments for "struct - * migrate_vma_ops", in include/linux/migrate.h for details about - * finalize_and_map() behavior. - * - * After the finalize_and_map() callback, for successfully migrated pages, this - * function updates the CPU page table to point to new pages, otherwise it - * restores the CPU page table to point to the original source pages. - * - * Function returns 0 after the above steps, even if no pages were migrated - * (The function only returns an error if any of the arguments are invalid.) - * - * Both src and dst array must be big enough for (end - start) >> PAGE_SHIFT - * unsigned long entries. - */ -int migrate_vma(const struct migrate_vma_ops *ops, - struct vm_area_struct *vma, - unsigned long start, - unsigned long end, - unsigned long *src, - unsigned long *dst, - void *private) -{ - struct migrate_vma migrate; - - /* Sanity check the arguments */ - start &= PAGE_MASK; - end &= PAGE_MASK; - if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) || - vma_is_dax(vma)) - return -EINVAL; - if (start < vma->vm_start || start >= vma->vm_end) - return -EINVAL; - if (end <= vma->vm_start || end > vma->vm_end) - return -EINVAL; - if (!ops || !src || !dst || start >= end) - return -EINVAL; - - memset(src, 0, sizeof(*src) * ((end - start) >> PAGE_SHIFT)); - migrate.src = src; - migrate.dst = dst; - migrate.start = start; - migrate.npages = 0; - migrate.cpages = 0; - migrate.end = end; - migrate.vma = vma; - - /* Collect, and try to unmap source pages */ - migrate_vma_collect(&migrate); - if (!migrate.cpages) - return 0; - - /* Lock and isolate page */ - migrate_vma_prepare(&migrate); - if (!migrate.cpages) - return 0; - - /* Unmap pages */ - migrate_vma_unmap(&migrate); - if (!migrate.cpages) - return 0; - - /* - * At this point pages are locked and unmapped, and thus they have - * stable content and can safely be copied to destination memory that - * is allocated by the callback. - * - * Note that migration can fail in migrate_vma_struct_page() for each - * individual page. - */ - ops->alloc_and_copy(vma, src, dst, start, end, private); - - /* This does the real migration of struct page */ - migrate_vma_pages(&migrate); - - ops->finalize_and_map(vma, src, dst, start, end, private); - - /* Unlock and remap pages */ - migrate_vma_finalize(&migrate); - - return 0; -} -EXPORT_SYMBOL(migrate_vma); +EXPORT_SYMBOL(migrate_vma_finalize); #endif /* defined(MIGRATE_VMA_HELPER) */ -- cgit v1.2.3 From dea027f28209aa7a23a4e56b2ae270f16d48055d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Aug 2019 09:59:20 +0200 Subject: nouveau: reset dma_nr in nouveau_dmem_migrate_alloc_and_copy When we start a new batch of dma_map operations we need to reset dma_nr, as we start filling a newly allocated array. Link: https://lore.kernel.org/r/20190814075928.23766-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Ralph Campbell Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 38416798abd4..e696157f771e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -682,6 +682,7 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, migrate->dma = kmalloc(sizeof(*migrate->dma) * npages, GFP_KERNEL); if (!migrate->dma) goto error; + migrate->dma_nr = 0; /* Copy things over */ copy = drm->dmem->migrate.copy_func; -- cgit v1.2.3 From 64de8b8d65f9bfd0fced9029bcec37257614ccaf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Aug 2019 09:59:21 +0200 Subject: nouveau: factor out device memory address calculation Factor out the repeated device memory address calculation into a helper. Link: https://lore.kernel.org/r/20190814075928.23766-4-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Ralph Campbell Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 42 ++++++++++++++-------------------- 1 file changed, 17 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index e696157f771e..d469bc334438 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -102,6 +102,14 @@ struct nouveau_migrate { unsigned long dma_nr; }; +static unsigned long nouveau_dmem_page_addr(struct page *page) +{ + struct nouveau_dmem_chunk *chunk = page->zone_device_data; + unsigned long idx = page_to_pfn(page) - chunk->pfn_first; + + return (idx << PAGE_SHIFT) + chunk->bo->bo.offset; +} + static void nouveau_dmem_page_free(struct page *page) { struct nouveau_dmem_chunk *chunk = page->zone_device_data; @@ -169,9 +177,7 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, /* Copy things over */ copy = drm->dmem->migrate.copy_func; for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct nouveau_dmem_chunk *chunk; struct page *spage, *dpage; - u64 src_addr, dst_addr; dpage = migrate_pfn_to_page(dst_pfns[i]); if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) @@ -194,14 +200,10 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, continue; } - dst_addr = fault->dma[fault->npages++]; - - chunk = spage->zone_device_data; - src_addr = page_to_pfn(spage) - chunk->pfn_first; - src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset; - - ret = copy(drm, 1, NOUVEAU_APER_HOST, dst_addr, - NOUVEAU_APER_VRAM, src_addr); + ret = copy(drm, 1, NOUVEAU_APER_HOST, + fault->dma[fault->npages++], + NOUVEAU_APER_VRAM, + nouveau_dmem_page_addr(spage)); if (ret) { dst_pfns[i] = MIGRATE_PFN_ERROR; __free_page(dpage); @@ -687,18 +689,12 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, /* Copy things over */ copy = drm->dmem->migrate.copy_func; for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct nouveau_dmem_chunk *chunk; struct page *spage, *dpage; - u64 src_addr, dst_addr; dpage = migrate_pfn_to_page(dst_pfns[i]); if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) continue; - chunk = dpage->zone_device_data; - dst_addr = page_to_pfn(dpage) - chunk->pfn_first; - dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset; - spage = migrate_pfn_to_page(src_pfns[i]); if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { nouveau_dmem_page_free_locked(drm, dpage); @@ -716,10 +712,10 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, continue; } - src_addr = migrate->dma[migrate->dma_nr++]; - - ret = copy(drm, 1, NOUVEAU_APER_VRAM, dst_addr, - NOUVEAU_APER_HOST, src_addr); + ret = copy(drm, 1, NOUVEAU_APER_VRAM, + nouveau_dmem_page_addr(dpage), + NOUVEAU_APER_HOST, + migrate->dma[migrate->dma_nr++]); if (ret) { nouveau_dmem_page_free_locked(drm, dpage); dst_pfns[i] = 0; @@ -846,7 +842,6 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm, npages = (range->end - range->start) >> PAGE_SHIFT; for (i = 0; i < npages; ++i) { - struct nouveau_dmem_chunk *chunk; struct page *page; uint64_t addr; @@ -864,10 +859,7 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm, continue; } - chunk = page->zone_device_data; - addr = page_to_pfn(page) - chunk->pfn_first; - addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT; - + addr = nouveau_dmem_page_addr(page); range->pfns[i] &= ((1UL << range->pfn_shift) - 1); range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; } -- cgit v1.2.3 From 2ab2bda53c986a93e46228bff4362b6267202f81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Aug 2019 09:59:22 +0200 Subject: nouveau: factor out dmem fence completion Factor out the end of fencing logic from the two migration routines. Link: https://lore.kernel.org/r/20190814075928.23766-5-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Ralph Campbell Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index d469bc334438..21052a4aaf69 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -133,6 +133,19 @@ static void nouveau_dmem_page_free(struct page *page) spin_unlock(&chunk->lock); } +static void nouveau_dmem_fence_done(struct nouveau_fence **fence) +{ + if (fence) { + nouveau_fence_wait(*fence, true, false); + nouveau_fence_unref(fence); + } else { + /* + * FIXME wait for channel to be IDLE before calling finalizing + * the hmem object. + */ + } +} + static void nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, const unsigned long *src_pfns, @@ -236,15 +249,7 @@ nouveau_dmem_fault_finalize_and_map(struct nouveau_dmem_fault *fault) { struct nouveau_drm *drm = fault->drm; - if (fault->fence) { - nouveau_fence_wait(fault->fence, true, false); - nouveau_fence_unref(&fault->fence); - } else { - /* - * FIXME wait for channel to be IDLE before calling finalizing - * the hmem object below (nouveau_migrate_hmem_fini()). - */ - } + nouveau_dmem_fence_done(&fault->fence); while (fault->npages--) { dma_unmap_page(drm->dev->dev, fault->dma[fault->npages], @@ -748,15 +753,7 @@ nouveau_dmem_migrate_finalize_and_map(struct nouveau_migrate *migrate) { struct nouveau_drm *drm = migrate->drm; - if (migrate->fence) { - nouveau_fence_wait(migrate->fence, true, false); - nouveau_fence_unref(&migrate->fence); - } else { - /* - * FIXME wait for channel to be IDLE before finalizing - * the hmem object below (nouveau_migrate_hmem_fini()) ? - */ - } + nouveau_dmem_fence_done(&migrate->fence); while (migrate->dma_nr--) { dma_unmap_page(drm->dev->dev, migrate->dma[migrate->dma_nr], -- cgit v1.2.3 From 107ba59fc6d761ca996243dd0a1ee89addc295af Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Aug 2019 09:59:23 +0200 Subject: nouveau: remove a few function stubs nouveau_dmem_migrate_vma and nouveau_dmem_convert_pfn are only called when CONFIG_DRM_NOUVEAU_SVM is enabled, so there is no need to provide !CONFIG_DRM_NOUVEAU_SVM stubs for them. Link: https://lore.kernel.org/r/20190814075928.23766-6-hch@lst.de Signed-off-by: Christoph Hellwig Tested-by: Ralph Campbell Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_dmem.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.h b/drivers/gpu/drm/nouveau/nouveau_dmem.h index 9d97d756fb7d..92394be5d649 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.h +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.h @@ -45,16 +45,5 @@ static inline void nouveau_dmem_init(struct nouveau_drm *drm) {} static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {} static inline void nouveau_dmem_suspend(struct nouveau_drm *drm) {} static inline void nouveau_dmem_resume(struct nouveau_drm *drm) {} - -static inline int nouveau_dmem_migrate_vma(struct nouveau_drm *drm, - struct vm_area_struct *vma, - unsigned long start, - unsigned long end) -{ - return 0; -} - -static inline void nouveau_dmem_convert_pfn(struct nouveau_drm *drm, - struct hmm_range *range) {} #endif /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ #endif -- cgit v1.2.3 From bfe69ef94aeeae76531b8f37799b8ac6fca56957 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Aug 2019 09:59:24 +0200 Subject: nouveau: simplify nouveau_dmem_migrate_to_ram Factor the main copy page to ram routine out into a helper that acts on a single page and which doesn't require the nouveau_dmem_fault structure for argument passing. Also remove the loop over multiple pages as we only handle one at the moment, although the structure of the main worker function makes it relatively easy to add multi page support back if needed in the future. But at least for now this avoid the needed to dynamically allocate memory for the dma addresses in what is essentially the page fault path. Link: https://lore.kernel.org/r/20190814075928.23766-7-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Ralph Campbell Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 161 ++++++++------------------------- 1 file changed, 40 insertions(+), 121 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 21052a4aaf69..7dded864022c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -86,13 +86,6 @@ static inline struct nouveau_dmem *page_to_dmem(struct page *page) return container_of(page->pgmap, struct nouveau_dmem, pagemap); } -struct nouveau_dmem_fault { - struct nouveau_drm *drm; - struct nouveau_fence *fence; - dma_addr_t *dma; - unsigned long npages; -}; - struct nouveau_migrate { struct vm_area_struct *vma; struct nouveau_drm *drm; @@ -146,130 +139,55 @@ static void nouveau_dmem_fence_done(struct nouveau_fence **fence) } } -static void -nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, - const unsigned long *src_pfns, - unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - struct nouveau_dmem_fault *fault) +static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm, + struct vm_fault *vmf, struct migrate_vma *args, + dma_addr_t *dma_addr) { - struct nouveau_drm *drm = fault->drm; struct device *dev = drm->dev->dev; - unsigned long addr, i, npages = 0; - nouveau_migrate_copy_t copy; - int ret; - - - /* First allocate new memory */ - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct page *dpage, *spage; - - dst_pfns[i] = 0; - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) - continue; - - dpage = alloc_page_vma(GFP_HIGHUSER, vma, addr); - if (!dpage) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - continue; - } - lock_page(dpage); - - dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | - MIGRATE_PFN_LOCKED; - npages++; - } - - /* Allocate storage for DMA addresses, so we can unmap later. */ - fault->dma = kmalloc(sizeof(*fault->dma) * npages, GFP_KERNEL); - if (!fault->dma) - goto error; - - /* Copy things over */ - copy = drm->dmem->migrate.copy_func; - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct page *spage, *dpage; - - dpage = migrate_pfn_to_page(dst_pfns[i]); - if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; - - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - __free_page(dpage); - continue; - } + struct page *dpage, *spage; - fault->dma[fault->npages] = - dma_map_page_attrs(dev, dpage, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); - if (dma_mapping_error(dev, fault->dma[fault->npages])) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - __free_page(dpage); - continue; - } - - ret = copy(drm, 1, NOUVEAU_APER_HOST, - fault->dma[fault->npages++], - NOUVEAU_APER_VRAM, - nouveau_dmem_page_addr(spage)); - if (ret) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - __free_page(dpage); - continue; - } - } - - nouveau_fence_new(drm->dmem->migrate.chan, false, &fault->fence); - - return; - -error: - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { - struct page *page; - - if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; + spage = migrate_pfn_to_page(args->src[0]); + if (!spage || !(args->src[0] & MIGRATE_PFN_MIGRATE)) + return 0; - page = migrate_pfn_to_page(dst_pfns[i]); - dst_pfns[i] = MIGRATE_PFN_ERROR; - if (page == NULL) - continue; + dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address); + if (!dpage) + return VM_FAULT_SIGBUS; + lock_page(dpage); - __free_page(page); - } -} + *dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, *dma_addr)) + goto error_free_page; -static void -nouveau_dmem_fault_finalize_and_map(struct nouveau_dmem_fault *fault) -{ - struct nouveau_drm *drm = fault->drm; + if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr, + NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage))) + goto error_dma_unmap; - nouveau_dmem_fence_done(&fault->fence); + args->dst[0] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; + return 0; - while (fault->npages--) { - dma_unmap_page(drm->dev->dev, fault->dma[fault->npages], - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - } - kfree(fault->dma); +error_dma_unmap: + dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +error_free_page: + __free_page(dpage); + return VM_FAULT_SIGBUS; } static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) { struct nouveau_dmem *dmem = page_to_dmem(vmf->page); - unsigned long src[1] = {0}, dst[1] = {0}; + struct nouveau_drm *drm = dmem->drm; + struct nouveau_fence *fence; + unsigned long src = 0, dst = 0; + dma_addr_t dma_addr = 0; + vm_fault_t ret; struct migrate_vma args = { .vma = vmf->vma, .start = vmf->address, .end = vmf->address + PAGE_SIZE, - .src = src, - .dst = dst, + .src = &src, + .dst = &dst, }; - struct nouveau_dmem_fault fault = { .drm = dmem->drm }; /* * FIXME what we really want is to find some heuristic to migrate more @@ -281,16 +199,17 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) if (!args.cpages) return 0; - nouveau_dmem_fault_alloc_and_copy(args.vma, src, dst, args.start, - args.end, &fault); - migrate_vma_pages(&args); - nouveau_dmem_fault_finalize_and_map(&fault); + ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr); + if (ret || dst == 0) + goto done; + nouveau_fence_new(dmem->migrate.chan, false, &fence); + migrate_vma_pages(&args); + nouveau_dmem_fence_done(&fence); + dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +done: migrate_vma_finalize(&args); - if (dst[0] == MIGRATE_PFN_ERROR) - return VM_FAULT_SIGBUS; - - return 0; + return ret; } static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = { -- cgit v1.2.3 From f268307ec7c728fbc042608c7505926460117816 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Aug 2019 09:59:25 +0200 Subject: nouveau: simplify nouveau_dmem_migrate_vma Factor the main copy page to vram routine out into a helper that acts on a single page and which doesn't require the nouveau_dmem_migrate structure for argument passing. As an added benefit the new version only allocates the dma address array once and reuses it for each subsequent chunk of work. Link: https://lore.kernel.org/r/20190814075928.23766-8-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Ralph Campbell Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 184 ++++++++++----------------------- 1 file changed, 55 insertions(+), 129 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 7dded864022c..d96b987b9982 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -44,8 +44,6 @@ #define DMEM_CHUNK_SIZE (2UL << 20) #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT) -struct nouveau_migrate; - enum nouveau_aper { NOUVEAU_APER_VIRT, NOUVEAU_APER_VRAM, @@ -86,15 +84,6 @@ static inline struct nouveau_dmem *page_to_dmem(struct page *page) return container_of(page->pgmap, struct nouveau_dmem, pagemap); } -struct nouveau_migrate { - struct vm_area_struct *vma; - struct nouveau_drm *drm; - struct nouveau_fence *fence; - unsigned long npages; - dma_addr_t *dma; - unsigned long dma_nr; -}; - static unsigned long nouveau_dmem_page_addr(struct page *page) { struct nouveau_dmem_chunk *chunk = page->zone_device_data; @@ -568,131 +557,66 @@ out_free: drm->dmem = NULL; } -static void -nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, - const unsigned long *src_pfns, - unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - struct nouveau_migrate *migrate) +static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, + unsigned long src, dma_addr_t *dma_addr) { - struct nouveau_drm *drm = migrate->drm; struct device *dev = drm->dev->dev; - unsigned long addr, i, npages = 0; - nouveau_migrate_copy_t copy; - int ret; - - /* First allocate new memory */ - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct page *dpage, *spage; - - dst_pfns[i] = 0; - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) - continue; - - dpage = nouveau_dmem_page_alloc_locked(drm); - if (!dpage) - continue; - - dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | - MIGRATE_PFN_LOCKED | - MIGRATE_PFN_DEVICE; - npages++; - } - - if (!npages) - return; - - /* Allocate storage for DMA addresses, so we can unmap later. */ - migrate->dma = kmalloc(sizeof(*migrate->dma) * npages, GFP_KERNEL); - if (!migrate->dma) - goto error; - migrate->dma_nr = 0; - - /* Copy things over */ - copy = drm->dmem->migrate.copy_func; - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct page *spage, *dpage; - - dpage = migrate_pfn_to_page(dst_pfns[i]); - if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; - - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { - nouveau_dmem_page_free_locked(drm, dpage); - dst_pfns[i] = 0; - continue; - } - - migrate->dma[migrate->dma_nr] = - dma_map_page_attrs(dev, spage, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); - if (dma_mapping_error(dev, migrate->dma[migrate->dma_nr])) { - nouveau_dmem_page_free_locked(drm, dpage); - dst_pfns[i] = 0; - continue; - } - - ret = copy(drm, 1, NOUVEAU_APER_VRAM, - nouveau_dmem_page_addr(dpage), - NOUVEAU_APER_HOST, - migrate->dma[migrate->dma_nr++]); - if (ret) { - nouveau_dmem_page_free_locked(drm, dpage); - dst_pfns[i] = 0; - continue; - } - } + struct page *dpage, *spage; - nouveau_fence_new(drm->dmem->migrate.chan, false, &migrate->fence); + spage = migrate_pfn_to_page(src); + if (!spage || !(src & MIGRATE_PFN_MIGRATE)) + goto out; - return; + dpage = nouveau_dmem_page_alloc_locked(drm); + if (!dpage) + return 0; -error: - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { - struct page *page; + *dma_addr = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, *dma_addr)) + goto out_free_page; - if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; + if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_VRAM, + nouveau_dmem_page_addr(dpage), NOUVEAU_APER_HOST, + *dma_addr)) + goto out_dma_unmap; - page = migrate_pfn_to_page(dst_pfns[i]); - dst_pfns[i] = MIGRATE_PFN_ERROR; - if (page == NULL) - continue; + return migrate_pfn(page_to_pfn(dpage)) | + MIGRATE_PFN_LOCKED | MIGRATE_PFN_DEVICE; - __free_page(page); - } +out_dma_unmap: + dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +out_free_page: + nouveau_dmem_page_free_locked(drm, dpage); +out: + return 0; } -static void -nouveau_dmem_migrate_finalize_and_map(struct nouveau_migrate *migrate) +static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm, + struct migrate_vma *args, dma_addr_t *dma_addrs) { - struct nouveau_drm *drm = migrate->drm; + struct nouveau_fence *fence; + unsigned long addr = args->start, nr_dma = 0, i; + + for (i = 0; addr < args->end; i++) { + args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->src[i], + dma_addrs + nr_dma); + if (args->dst[i]) + nr_dma++; + addr += PAGE_SIZE; + } - nouveau_dmem_fence_done(&migrate->fence); + nouveau_fence_new(drm->dmem->migrate.chan, false, &fence); + migrate_vma_pages(args); + nouveau_dmem_fence_done(&fence); - while (migrate->dma_nr--) { - dma_unmap_page(drm->dev->dev, migrate->dma[migrate->dma_nr], - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + while (nr_dma--) { + dma_unmap_page(drm->dev->dev, dma_addrs[nr_dma], PAGE_SIZE, + DMA_BIDIRECTIONAL); } - kfree(migrate->dma); - /* - * FIXME optimization: update GPU page table to point to newly - * migrated memory. + * FIXME optimization: update GPU page table to point to newly migrated + * memory. */ -} - -static void nouveau_dmem_migrate_chunk(struct migrate_vma *args, - struct nouveau_migrate *migrate) -{ - nouveau_dmem_migrate_alloc_and_copy(args->vma, args->src, args->dst, - args->start, args->end, migrate); - migrate_vma_pages(args); - nouveau_dmem_migrate_finalize_and_map(migrate); migrate_vma_finalize(args); } @@ -704,38 +628,40 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, { unsigned long npages = (end - start) >> PAGE_SHIFT; unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages); + dma_addr_t *dma_addrs; struct migrate_vma args = { .vma = vma, .start = start, }; - struct nouveau_migrate migrate = { - .drm = drm, - .vma = vma, - .npages = npages, - }; unsigned long c, i; int ret = -ENOMEM; - args.src = kzalloc(sizeof(long) * max, GFP_KERNEL); + args.src = kcalloc(max, sizeof(args.src), GFP_KERNEL); if (!args.src) goto out; - args.dst = kzalloc(sizeof(long) * max, GFP_KERNEL); + args.dst = kcalloc(max, sizeof(args.dst), GFP_KERNEL); if (!args.dst) goto out_free_src; + dma_addrs = kmalloc_array(max, sizeof(*dma_addrs), GFP_KERNEL); + if (!dma_addrs) + goto out_free_dst; + for (i = 0; i < npages; i += c) { c = min(SG_MAX_SINGLE_ALLOC, npages); args.end = start + (c << PAGE_SHIFT); ret = migrate_vma_setup(&args); if (ret) - goto out_free_dst; + goto out_free_dma; if (args.cpages) - nouveau_dmem_migrate_chunk(&args, &migrate); + nouveau_dmem_migrate_chunk(drm, &args, dma_addrs); args.start = args.end; } ret = 0; +out_free_dma: + kfree(dma_addrs); out_free_dst: kfree(args.dst); out_free_src: -- cgit v1.2.3 From 06d462beb470d361ffa8bd7b3d865509a8606987 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Aug 2019 09:59:27 +0200 Subject: mm: remove the unused MIGRATE_PFN_DEVICE flag No one ever checks this flag, and we could easily get that information from the page if needed. Link: https://lore.kernel.org/r/20190814075928.23766-10-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Reviewed-by: Jason Gunthorpe Tested-by: Ralph Campbell Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 3 +-- include/linux/migrate.h | 1 - mm/migrate.c | 4 ++-- 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index d96b987b9982..fa1439941596 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -580,8 +580,7 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, *dma_addr)) goto out_dma_unmap; - return migrate_pfn(page_to_pfn(dpage)) | - MIGRATE_PFN_LOCKED | MIGRATE_PFN_DEVICE; + return migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; out_dma_unmap: dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 1e67dcfd318f..72120061b7d4 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -166,7 +166,6 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm, #define MIGRATE_PFN_MIGRATE (1UL << 1) #define MIGRATE_PFN_LOCKED (1UL << 2) #define MIGRATE_PFN_WRITE (1UL << 3) -#define MIGRATE_PFN_DEVICE (1UL << 4) #define MIGRATE_PFN_SHIFT 6 static inline struct page *migrate_pfn_to_page(unsigned long mpfn) diff --git a/mm/migrate.c b/mm/migrate.c index 8111e031fa2b..f4f5ae5ae44f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2237,8 +2237,8 @@ again: goto next; page = device_private_entry_to_page(entry); - mpfn = migrate_pfn(page_to_pfn(page))| - MIGRATE_PFN_DEVICE | MIGRATE_PFN_MIGRATE; + mpfn = migrate_pfn(page_to_pfn(page)) | + MIGRATE_PFN_MIGRATE; if (is_write_device_private_entry(entry)) mpfn |= MIGRATE_PFN_WRITE; } else { -- cgit v1.2.3 From 9b2ed9cb975c63f5534daaebeb225ab52b589372 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Aug 2019 09:59:28 +0200 Subject: mm: remove CONFIG_MIGRATE_VMA_HELPER CONFIG_MIGRATE_VMA_HELPER guards helpers that are required for proper devic private memory support. Remove the option and just check for CONFIG_DEVICE_PRIVATE instead. Link: https://lore.kernel.org/r/20190814075928.23766-11-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Tested-by: Ralph Campbell Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/Kconfig | 1 - mm/Kconfig | 3 --- mm/migrate.c | 4 ++-- 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/nouveau') diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index df4352c279ba..3558df043592 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -89,7 +89,6 @@ config DRM_NOUVEAU_SVM depends on MMU depends on STAGING select HMM_MIRROR - select MIGRATE_VMA_HELPER select MMU_NOTIFIER default n help diff --git a/mm/Kconfig b/mm/Kconfig index 563436dc1f24..2fe4902ad755 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -669,9 +669,6 @@ config ZONE_DEVICE If FS_DAX is enabled, then say Y. -config MIGRATE_VMA_HELPER - bool - config DEV_PAGEMAP_OPS bool diff --git a/mm/migrate.c b/mm/migrate.c index f4f5ae5ae44f..2cff57e7116e 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2117,7 +2117,7 @@ out_unlock: #endif /* CONFIG_NUMA */ -#if defined(CONFIG_MIGRATE_VMA_HELPER) +#ifdef CONFIG_DEVICE_PRIVATE static int migrate_vma_collect_hole(unsigned long start, unsigned long end, struct mm_walk *walk) @@ -2942,4 +2942,4 @@ void migrate_vma_finalize(struct migrate_vma *migrate) } } EXPORT_SYMBOL(migrate_vma_finalize); -#endif /* defined(MIGRATE_VMA_HELPER) */ +#endif /* CONFIG_DEVICE_PRIVATE */ -- cgit v1.2.3