diff options
author | Maxime Ripard <maxime.ripard@bootlin.com> | 2019-07-22 21:24:10 +0200 |
---|---|---|
committer | Maxime Ripard <maxime.ripard@bootlin.com> | 2019-07-22 21:24:10 +0200 |
commit | 03b0f2ce735e97e9f49790d4563c82515b8fa702 (patch) | |
tree | da561805bffd06bfba81c867f83cacb28f3a64e5 /drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |
parent | e4f86e43716443e934d705952902d40de0fa9a05 (diff) | |
parent | 5f9e832c137075045d15cd6899ab0505cfb2ca4b (diff) | |
download | linux-03b0f2ce735e97e9f49790d4563c82515b8fa702.tar.bz2 |
Merge v5.3-rc1 into drm-misc-next
Noralf needs some SPI patches in 5.3 to merge some work on tinydrm.
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 215 |
1 files changed, 122 insertions, 93 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d81bebf76310..e51b48ac48eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -391,6 +391,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, src->mem); src_node_size = (src_mm->size << PAGE_SHIFT); + src_page_offset = 0; } else { src_node_start += cur_size; src_page_offset = src_node_start & (PAGE_SIZE - 1); @@ -400,6 +401,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, dst->mem); dst_node_size = (dst_mm->size << PAGE_SHIFT); + dst_page_offset = 0; } else { dst_node_start += cur_size; dst_page_offset = dst_node_start & (PAGE_SIZE - 1); @@ -487,6 +489,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); if (unlikely(r)) { + pr_err("Failed to find GTT space for blit from VRAM\n"); return r; } @@ -545,6 +548,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); if (unlikely(r)) { + pr_err("Failed to find GTT space for blit to VRAM\n"); return r; } @@ -565,6 +569,30 @@ out_cleanup: } /** + * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy + * + * Called by amdgpu_bo_move() + */ +static bool amdgpu_mem_visible(struct amdgpu_device *adev, + struct ttm_mem_reg *mem) +{ + struct drm_mm_node *nodes = mem->mm_node; + + if (mem->mem_type == TTM_PL_SYSTEM || + mem->mem_type == TTM_PL_TT) + return true; + if (mem->mem_type != TTM_PL_VRAM) + return false; + + /* ttm_mem_reg_ioremap only supports contiguous memory */ + if (nodes->size != mem->num_pages) + return false; + + return ((nodes->start + nodes->size) << PAGE_SHIFT) + <= adev->gmc.visible_vram_size; +} + +/** * amdgpu_bo_move - Move a buffer object to a new memory location * * Called by ttm_bo_handle_move_mem() @@ -608,8 +636,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return 0; } - if (!adev->mman.buffer_funcs_enabled) + if (!adev->mman.buffer_funcs_enabled) { + r = -ENODEV; goto memcpy; + } if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type == TTM_PL_SYSTEM) { @@ -624,10 +654,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, ctx, new_mem); - if (r) { + /* Check that all memory is CPU accessible */ + if (!amdgpu_mem_visible(adev, old_mem) || + !amdgpu_mem_visible(adev, new_mem)) { + pr_err("Move buffer fallback to memcpy unavailable\n"); return r; } + + r = ttm_bo_move_memcpy(bo, ctx, new_mem); + if (r) + return r; } if (bo->type == ttm_bo_type_device && @@ -716,8 +752,7 @@ struct amdgpu_ttm_tt { struct task_struct *usertask; uint32_t userflags; #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - struct hmm_range *ranges; - int nr_ranges; + struct hmm_range *range; #endif }; @@ -730,57 +765,44 @@ struct amdgpu_ttm_tt { */ #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) -/* Support Userptr pages cross max 16 vmas */ -#define MAX_NR_VMAS (16) +#define MAX_RETRY_HMM_RANGE_FAULT 16 -int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { + struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL; + struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm; struct mm_struct *mm = gtt->usertask->mm; unsigned long start = gtt->userptr; - unsigned long end = start + ttm->num_pages * PAGE_SIZE; - struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS]; - struct hmm_range *ranges; - unsigned long nr_pages, i; - uint64_t *pfns, f; + struct vm_area_struct *vma; + struct hmm_range *range; + unsigned long i; + uint64_t *pfns; + int retry = 0; int r = 0; if (!mm) /* Happens during process shutdown */ return -ESRCH; - down_read(&mm->mmap_sem); - - /* user pages may cross multiple VMAs */ - gtt->nr_ranges = 0; - do { - unsigned long vm_start; - - if (gtt->nr_ranges >= MAX_NR_VMAS) { - DRM_ERROR("Too many VMAs in userptr range\n"); - r = -EFAULT; - goto out; - } - - vm_start = vma ? vma->vm_end : start; - vma = find_vma(mm, vm_start); - if (unlikely(!vma || vm_start < vma->vm_start)) { - r = -EFAULT; - goto out; - } - vmas[gtt->nr_ranges++] = vma; - } while (end > vma->vm_end); - - DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n", - start, gtt->nr_ranges, ttm->num_pages); + if (unlikely(!mirror)) { + DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n"); + r = -EFAULT; + goto out; + } + vma = find_vma(mm, start); + if (unlikely(!vma || start < vma->vm_start)) { + r = -EFAULT; + goto out; + } if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && - vmas[0]->vm_file)) { + vma->vm_file)) { r = -EPERM; goto out; } - ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL); - if (unlikely(!ranges)) { + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (unlikely(!range)) { r = -ENOMEM; goto out; } @@ -791,61 +813,67 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) goto out_free_ranges; } - for (i = 0; i < gtt->nr_ranges; i++) - amdgpu_hmm_init_range(&ranges[i]); + amdgpu_hmm_init_range(range); + range->default_flags = range->flags[HMM_PFN_VALID]; + range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ? + 0 : range->flags[HMM_PFN_WRITE]; + range->pfn_flags_mask = 0; + range->pfns = pfns; + hmm_range_register(range, mirror, start, + start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); - f = ranges[0].flags[HMM_PFN_VALID]; - f |= amdgpu_ttm_tt_is_readonly(ttm) ? - 0 : ranges[0].flags[HMM_PFN_WRITE]; - memset64(pfns, f, ttm->num_pages); +retry: + /* + * Just wait for range to be valid, safe to ignore return value as we + * will use the return value of hmm_range_fault() below under the + * mmap_sem to ascertain the validity of the range. + */ + hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); - for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) { - ranges[i].vma = vmas[i]; - ranges[i].start = max(start, vmas[i]->vm_start); - ranges[i].end = min(end, vmas[i]->vm_end); - ranges[i].pfns = pfns + nr_pages; - nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE; + down_read(&mm->mmap_sem); - r = hmm_vma_fault(&ranges[i], true); - if (unlikely(r)) - break; - } - if (unlikely(r)) { - while (i--) - hmm_vma_range_done(&ranges[i]); + r = hmm_range_fault(range, true); + if (unlikely(r < 0)) { + if (likely(r == -EAGAIN)) { + /* + * return -EAGAIN, mmap_sem is dropped + */ + if (retry++ < MAX_RETRY_HMM_RANGE_FAULT) + goto retry; + else + pr_err("Retry hmm fault too many times\n"); + } - goto out_free_pfns; + goto out_up_read; } up_read(&mm->mmap_sem); for (i = 0; i < ttm->num_pages; i++) { - pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]); - if (!pages[i]) { + pages[i] = hmm_device_entry_to_page(range, pfns[i]); + if (unlikely(!pages[i])) { pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", i, pfns[i]); - goto out_invalid_pfn; + r = -ENOMEM; + + goto out_free_pfns; } } - gtt->ranges = ranges; + + gtt->range = range; return 0; +out_up_read: + if (likely(r != -EAGAIN)) + up_read(&mm->mmap_sem); out_free_pfns: + hmm_range_unregister(range); kvfree(pfns); out_free_ranges: - kvfree(ranges); + kfree(range); out: - up_read(&mm->mmap_sem); - return r; - -out_invalid_pfn: - for (i = 0; i < gtt->nr_ranges; i++) - hmm_vma_range_done(&ranges[i]); - kvfree(pfns); - kvfree(ranges); - return -ENOMEM; } /** @@ -858,23 +886,23 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; bool r = false; - int i; if (!gtt || !gtt->userptr) return false; - DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n", - gtt->userptr, gtt->nr_ranges, ttm->num_pages); + DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n", + gtt->userptr, ttm->num_pages); - WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns, + WARN_ONCE(!gtt->range || !gtt->range->pfns, "No user pages to check\n"); - if (gtt->ranges) { - for (i = 0; i < gtt->nr_ranges; i++) - r |= hmm_vma_range_done(>t->ranges[i]); - kvfree(gtt->ranges[0].pfns); - kvfree(gtt->ranges); - gtt->ranges = NULL; + if (gtt->range) { + r = hmm_range_valid(gtt->range); + hmm_range_unregister(gtt->range); + + kvfree(gtt->range->pfns); + kfree(gtt->range); + gtt->range = NULL; } return r; @@ -958,9 +986,9 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) sg_free_table(ttm->sg); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - if (gtt->ranges && - ttm->pages[0] == hmm_pfn_to_page(>t->ranges[0], - gtt->ranges[0].pfns[0])) + if (gtt->range && + ttm->pages[0] == hmm_device_entry_to_page(gtt->range, + gtt->range->pfns[0])) WARN_ONCE(1, "Missing get_user_page_done\n"); #endif } @@ -983,8 +1011,8 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, goto gart_bind_fail; /* Patch mtype of the second part BO */ - flags &= ~AMDGPU_PTE_MTYPE_MASK; - flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC); + flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; + flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); r = amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT), @@ -2067,9 +2095,9 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, mm_node = bo->tbo.mem.mm_node; num_loops = 0; while (num_pages) { - uint32_t byte_count = mm_node->size << PAGE_SHIFT; + uint64_t byte_count = mm_node->size << PAGE_SHIFT; - num_loops += DIV_ROUND_UP(byte_count, max_bytes); + num_loops += DIV_ROUND_UP_ULL(byte_count, max_bytes); num_pages -= mm_node->size; ++mm_node; } @@ -2095,12 +2123,13 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, mm_node = bo->tbo.mem.mm_node; while (num_pages) { - uint32_t byte_count = mm_node->size << PAGE_SHIFT; + uint64_t byte_count = mm_node->size << PAGE_SHIFT; uint64_t dst_addr; dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem); while (byte_count) { - uint32_t cur_size_in_bytes = min(byte_count, max_bytes); + uint32_t cur_size_in_bytes = min_t(uint64_t, byte_count, + max_bytes); amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr, cur_size_in_bytes); |