diff options
author | Philip Yang <Philip.Yang@amd.com> | 2019-06-14 14:03:36 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2019-06-20 11:33:41 -0500 |
commit | e82fdb16a0650250e27f7241133682342d476ad3 (patch) | |
tree | 1ef548a587852f7b25c3d5b9ec62364227827649 /drivers | |
parent | c1d827d62f71eb6eaa0b2ffad4d893faa99e4c87 (diff) | |
download | linux-e82fdb16a0650250e27f7241133682342d476ad3.tar.bz2 |
drm/amdgpu: improve HMM error -ENOMEM and -EBUSY handling
Under memory pressure, hmm_range_fault may return error code -ENOMEM
or -EBUSY, change pr_info to pr_debug to remove unnecessary kernel log
message because we will retry restore again.
Call get_user_pages_done if TTM get user pages failed will have
WARN_ONCE kernel calling stack dump log.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 38 |
1 files changed, 6 insertions, 32 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 74e86952553f..10abae398e51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1731,35 +1731,17 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); if (ret) { - bo->tbo.ttm->pages[0] = NULL; - pr_info("%s: Failed to get user pages: %d\n", + pr_debug("%s: Failed to get user pages: %d\n", __func__, ret); - /* Pretend it succeeded. It will fail later - * with a VM fault if the GPU tries to access - * it. Better than hanging indefinitely with - * stalled user mode queues. - */ - } - } - - return 0; -} -/* Remove invalid userptr BOs from hmm track list - * - * Stop HMM track the userptr update - */ -static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info) -{ - struct kgd_mem *mem, *tmp_mem; - struct amdgpu_bo *bo; + /* Return error -EBUSY or -ENOMEM, retry restore */ + return ret; + } - list_for_each_entry_safe(mem, tmp_mem, - &process_info->userptr_inval_list, - validate_list.head) { - bo = mem->bo; amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); } + + return 0; } /* Validate invalid userptr BOs @@ -1841,13 +1823,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) list_move_tail(&mem->validate_list.head, &process_info->userptr_valid_list); - /* Stop HMM track the userptr update. We dont check the return - * value for concurrent CPU page table update because we will - * reschedule the restore worker if process_info->evicted_bos - * is updated. - */ - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); - /* Update mapping. If the BO was not validated * (because we couldn't get user pages), this will * clear the page table entries, which will result in @@ -1946,7 +1921,6 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) } unlock_out: - untrack_invalid_user_pages(process_info); mutex_unlock(&process_info->lock); mmput(mm); put_task_struct(usertask); |