diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 37 |
1 files changed, 19 insertions, 18 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6573e1112462..78fd2c9a7b7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1940,7 +1940,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) if (adev->ip_blocks[i].status.hw == true) break; - if (adev->in_gpu_reset || adev->in_suspend) { + if (amdgpu_in_reset(adev) || adev->in_suspend) { r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { DRM_ERROR("resume of IP block <%s> failed %d\n", @@ -2117,7 +2117,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) AMDGPU_RESET_MAGIC_NUM)) return true; - if (!adev->in_gpu_reset) + if (!amdgpu_in_reset(adev)) return false; /* @@ -3053,6 +3053,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); hash_init(adev->mn_hash); + atomic_set(&adev->in_gpu_reset, 0); mutex_init(&adev->lock_reset); mutex_init(&adev->psp.mutex); mutex_init(&adev->notifier_lock); @@ -4200,16 +4201,14 @@ end: return r; } -static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev) { - if (trylock) { - if (!mutex_trylock(&adev->lock_reset)) - return false; - } else - mutex_lock(&adev->lock_reset); + if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) + return false; + + mutex_lock(&adev->lock_reset); atomic_inc(&adev->gpu_reset_counter); - adev->in_gpu_reset = true; switch (amdgpu_asic_reset_method(adev)) { case AMD_RESET_METHOD_MODE1: adev->mp1_state = PP_MP1_STATE_SHUTDOWN; @@ -4229,7 +4228,7 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) { amdgpu_vf_error_trans_all(adev); adev->mp1_state = PP_MP1_STATE_NONE; - adev->in_gpu_reset = false; + atomic_set(&adev->in_gpu_reset, 0); mutex_unlock(&adev->lock_reset); } @@ -4340,12 +4339,14 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * We always reset all schedulers for device and all devices for XGMI * hive so that should take care of them too. */ - hive = amdgpu_get_xgmi_hive(adev, true); - if (hive && !mutex_trylock(&hive->reset_lock)) { - DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", - job ? job->base.id : -1, hive->hive_id); - mutex_unlock(&hive->hive_lock); - return 0; + hive = amdgpu_get_xgmi_hive(adev, false); + if (hive) { + if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) { + DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", + job ? job->base.id : -1, hive->hive_id); + return 0; + } + mutex_lock(&hive->hive_lock); } /* @@ -4367,7 +4368,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* block all schedulers and reset given job's ring */ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - if (!amdgpu_device_lock_adev(tmp_adev, !hive)) { + if (!amdgpu_device_lock_adev(tmp_adev)) { DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", job ? job->base.id : -1); mutex_unlock(&hive->hive_lock); @@ -4505,7 +4506,7 @@ skip_sched_resume: } if (hive) { - mutex_unlock(&hive->reset_lock); + atomic_set(&hive->in_reset, 0); mutex_unlock(&hive->hive_lock); } |