summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2019-04-18 11:00:21 -0400
committerAlex Deucher <alexander.deucher@amd.com>2019-05-02 15:45:48 -0500
commit5918045c4ed492fb5813f980dcf89a90fefd0a4e (patch)
treea59c6f9bb006645b8fc46164d7fb53cb38b24294 /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
parentb3198c38f02d54a5e964258a2180d502abe6eaf0 (diff)
downloadlinux-5918045c4ed492fb5813f980dcf89a90fefd0a4e.tar.bz2
drm/scheduler: rework job destruction
We now destroy finished jobs from the worker thread to make sure that we never destroy a job currently in timeout processing. By this we avoid holding lock around ring mirror list in drm_sched_stop which should solve a deadlock reported by a user. v2: Remove unused variable. v4: Move guilty job free into sched code. v5: Move sched->hw_rq_count to drm_sched_start to account for counter decrement in drm_sched_stop even when we don't call resubmit jobs if guily job did signal. v6: remove unused variable Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109692 Acked-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/1555599624-12285-3-git-send-email-andrey.grodzovsky@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c9
1 files changed, 3 insertions, 6 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7cee269ec3e3..a0e165c91a78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3334,7 +3334,7 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
if (!ring || !ring->sched.thread)
continue;
- drm_sched_stop(&ring->sched);
+ drm_sched_stop(&ring->sched, &job->base);
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion(ring);
@@ -3343,8 +3343,6 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
if(job)
drm_sched_increase_karma(&job->base);
-
-
if (!amdgpu_sriov_vf(adev)) {
if (!need_full_reset)
@@ -3482,8 +3480,7 @@ end:
return r;
}
-static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
- struct amdgpu_job *job)
+static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
{
int i;
@@ -3623,7 +3620,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
/* Post ASIC reset for all devs .*/
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
- amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
+ amdgpu_device_post_asic_reset(tmp_adev);
if (r) {
/* bad news, how to tell it to userspace ? */