summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd
diff options
context:
space:
mode:
authorAndrey Grodzovsky <andrey.grodzovsky@amd.com>2021-12-20 17:27:37 -0500
committerAndrey Grodzovsky <andrey.grodzovsky@amd.com>2022-02-09 12:16:06 -0500
commit02599bc7f7047f2b316ab499f41d72ca14e3b3d3 (patch)
tree0c507db460b008654a35c454d145184d23e5414b /drivers/gpu/drm/amd
parent54f329cc7a7a7ea265c45b206d45e3d09192aba7 (diff)
downloadlinux-02599bc7f7047f2b316ab499f41d72ca14e3b3d3.tar.bz2
drm/amd/virt: For SRIOV send GPU reset directly to TDR queue.
No need to to trigger another work queue inside the work queue. v3: Problem: Extra reset caused by host side FLR notification following guest side triggered reset. Fix: Preven qeuing flr_work from mailbox irq if guest already executing a reset. Suggested-by: Liu Shaoyun <Shaoyun.Liu@amd.com> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Reviewed-by: Liu Shaoyun <Shaoyun.Liu@amd.com> Link: https://www.spinics.net/lists/amd-gfx/msg74114.html
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c9
3 files changed, 18 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 56da5ab82987..5869d51d8bee 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -282,7 +282,7 @@ flr_done:
if (amdgpu_device_should_recover_gpu(adev)
&& (!amdgpu_device_has_job_running(adev) ||
adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT))
- amdgpu_device_gpu_recover(adev, NULL);
+ amdgpu_device_gpu_recover_imp(adev, NULL);
}
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -307,8 +307,11 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
switch (event) {
case IDH_FLR_NOTIFICATION:
- if (amdgpu_sriov_runtime(adev))
- schedule_work(&adev->virt.flr_work);
+ if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev))
+ WARN_ONCE(!queue_work(adev->reset_domain.wq,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
break;
case IDH_QUERY_ALIVE:
xgpu_ai_mailbox_send_ack(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 477d0dde19c5..5728a6401d73 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -309,7 +309,7 @@ flr_done:
adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
- amdgpu_device_gpu_recover(adev, NULL);
+ amdgpu_device_gpu_recover_imp(adev, NULL);
}
static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -337,8 +337,11 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,
switch (event) {
case IDH_FLR_NOTIFICATION:
- if (amdgpu_sriov_runtime(adev))
- schedule_work(&adev->virt.flr_work);
+ if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev))
+ WARN_ONCE(!queue_work(adev->reset_domain.wq,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
break;
/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
* it byfar since that polling thread will handle it,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index aef9d059ae52..02290febfcf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -521,7 +521,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
/* Trigger recovery due to world switch failure */
if (amdgpu_device_should_recover_gpu(adev))
- amdgpu_device_gpu_recover(adev, NULL);
+ amdgpu_device_gpu_recover_imp(adev, NULL);
}
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -550,8 +550,11 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev,
r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
/* only handle FLR_NOTIFY now */
- if (!r)
- schedule_work(&adev->virt.flr_work);
+ if (!r && !amdgpu_in_reset(adev))
+ WARN_ONCE(!queue_work(adev->reset_domain.wq,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
}
return 0;