summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorPhilip Yang <Philip.Yang@amd.com>2019-06-14 14:03:36 -0400
committerAlex Deucher <alexander.deucher@amd.com>2019-06-20 11:33:41 -0500
commite82fdb16a0650250e27f7241133682342d476ad3 (patch)
tree1ef548a587852f7b25c3d5b9ec62364227827649 /drivers
parentc1d827d62f71eb6eaa0b2ffad4d893faa99e4c87 (diff)
downloadlinux-e82fdb16a0650250e27f7241133682342d476ad3.tar.bz2
drm/amdgpu: improve HMM error -ENOMEM and -EBUSY handling
Under memory pressure, hmm_range_fault may return error code -ENOMEM or -EBUSY, change pr_info to pr_debug to remove unnecessary kernel log message because we will retry restore again. Call get_user_pages_done if TTM get user pages failed will have WARN_ONCE kernel calling stack dump log. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c38
1 files changed, 6 insertions, 32 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 74e86952553f..10abae398e51 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1731,35 +1731,17 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
bo->tbo.ttm->pages);
if (ret) {
- bo->tbo.ttm->pages[0] = NULL;
- pr_info("%s: Failed to get user pages: %d\n",
+ pr_debug("%s: Failed to get user pages: %d\n",
__func__, ret);
- /* Pretend it succeeded. It will fail later
- * with a VM fault if the GPU tries to access
- * it. Better than hanging indefinitely with
- * stalled user mode queues.
- */
- }
- }
-
- return 0;
-}
-/* Remove invalid userptr BOs from hmm track list
- *
- * Stop HMM track the userptr update
- */
-static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info)
-{
- struct kgd_mem *mem, *tmp_mem;
- struct amdgpu_bo *bo;
+ /* Return error -EBUSY or -ENOMEM, retry restore */
+ return ret;
+ }
- list_for_each_entry_safe(mem, tmp_mem,
- &process_info->userptr_inval_list,
- validate_list.head) {
- bo = mem->bo;
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
}
+
+ return 0;
}
/* Validate invalid userptr BOs
@@ -1841,13 +1823,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
list_move_tail(&mem->validate_list.head,
&process_info->userptr_valid_list);
- /* Stop HMM track the userptr update. We dont check the return
- * value for concurrent CPU page table update because we will
- * reschedule the restore worker if process_info->evicted_bos
- * is updated.
- */
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
-
/* Update mapping. If the BO was not validated
* (because we couldn't get user pages), this will
* clear the page table entries, which will result in
@@ -1946,7 +1921,6 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
}
unlock_out:
- untrack_invalid_user_pages(process_info);
mutex_unlock(&process_info->lock);
mmput(mm);
put_task_struct(usertask);