summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Zhao <Victor.Zhao@amd.com>2022-06-24 12:00:06 +0800
committerAlex Deucher <alexander.deucher@amd.com>2022-08-16 18:14:31 -0400
commit194eb174cbe4fe2b3376ac30acca2dc8c8beca00 (patch)
tree7b47276a92eb81bb4e62e8fdc54c2fc470c46633
parent72fadb13674f807f10a168fb7d020dde58ce6b0b (diff)
downloadlinux-194eb174cbe4fe2b3376ac30acca2dc8c8beca00.tar.bz2
drm/amdgpu: reduce reset time
In multi container use case, reset time is important, so skip ring tests and cp halt wait during ip suspending for reset as they are going to fail and cost more time on reset v2: add a hang flag to indicate the reset comes from a job timeout, skip ring test and cp halt wait in this case v3: move hang flag to adev Signed-off-by: Victor Zhao <Victor.Zhao@amd.com> Acked-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c9
4 files changed, 11 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 66a26f2c51c1..79bb6fd83094 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1065,6 +1065,7 @@ struct amdgpu_device {
struct work_struct reset_work;
uint32_t amdgpu_reset_level_mask;
+ bool job_hang;
};
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 222d3d7ea076..454a78ba60d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -477,7 +477,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
RESET_QUEUES, 0, 0);
- if (adev->gfx.kiq.ring.sched.ready)
+ if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang)
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 36f49da15800..8f51adf3b329 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -49,6 +49,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
}
memset(&ti, 0, sizeof(struct amdgpu_task_info));
+ adev->job_hang = true;
if (amdgpu_gpu_recovery &&
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
@@ -83,6 +84,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
}
exit:
+ adev->job_hang = false;
drm_dev_exit(idx);
return DRM_GPU_SCHED_STAT_NOMINAL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index a2a4dc1844c0..1a915edccb92 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -5971,6 +5971,9 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
}
+ if (adev->job_hang && !enable)
+ return 0;
+
for (i = 0; i < adev->usec_timeout; i++) {
if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
break;
@@ -7569,8 +7572,10 @@ static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
PREEMPT_QUEUES, 0, 0);
-
- return amdgpu_ring_test_helper(kiq_ring);
+ if (!adev->job_hang)
+ return amdgpu_ring_test_helper(kiq_ring);
+ else
+ return 0;
}
#endif