diff options
author | Tao Zhou <tao.zhou1@amd.com> | 2019-08-30 19:50:39 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2019-09-13 17:50:47 -0500 |
commit | 1a6fc071e1991321d3b6a00e0e7c733a462a4418 (patch) | |
tree | 341868da914a979cf38fd5bca6a2870e66ebb88c /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
parent | 87d2b92f1e9df64a74f7fda0691d4041ba2727f9 (diff) | |
download | linux-1a6fc071e1991321d3b6a00e0e7c733a462a4418.tar.bz2 |
drm/amdgpu: move the call of ras recovery_init and bad page reserve to proper place
ras recovery_init should be called after ttm init,
bad page reserve should be put in front of gpu reset since i2c
may be unstable during gpu reset.
add cleanup for recovery_init and recovery_fini
v2: add more comment and print.
remove cancel_work_sync in recovery_init.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 39 |
1 files changed, 26 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 53540e067d15..e9bd40ea7ce0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1493,16 +1493,17 @@ out: return 0; } -static int amdgpu_ras_recovery_init(struct amdgpu_device *adev) +int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data **data = &con->eh_data; int ret; - *data = kmalloc(sizeof(**data), - GFP_KERNEL|__GFP_ZERO); - if (!*data) - return -ENOMEM; + *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO); + if (!*data) { + ret = -ENOMEM; + goto out; + } mutex_init(&con->recovery_lock); INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery); @@ -1511,18 +1512,30 @@ static int amdgpu_ras_recovery_init(struct amdgpu_device *adev) ret = amdgpu_ras_eeprom_init(&adev->psp.ras.ras->eeprom_control); if (ret) - return ret; + goto free; if (adev->psp.ras.ras->eeprom_control.num_recs) { ret = amdgpu_ras_load_bad_pages(adev); if (ret) - return ret; + goto free; ret = amdgpu_ras_reserve_bad_pages(adev); if (ret) - return ret; + goto release; } return 0; + +release: + amdgpu_ras_release_bad_pages(adev); +free: + con->eh_data = NULL; + kfree((*data)->bps); + kfree((*data)->bps_bo); + kfree(*data); +out: + DRM_WARN("Failed to initialize ras recovery!\n"); + + return ret; } static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) @@ -1530,12 +1543,17 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data = con->eh_data; + /* recovery_init failed to init it, fini is useless */ + if (!data) + return 0; + cancel_work_sync(&con->recovery_work); amdgpu_ras_release_bad_pages(adev); mutex_lock(&con->recovery_lock); con->eh_data = NULL; kfree(data->bps); + kfree(data->bps_bo); kfree(data); mutex_unlock(&con->recovery_lock); @@ -1627,9 +1645,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev) return r; } - if (amdgpu_ras_recovery_init(adev)) - goto recovery_out; - amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK; if (amdgpu_ras_fs_init(adev)) @@ -1644,8 +1659,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev) con->hw_supported, con->supported); return 0; fs_out: - amdgpu_ras_recovery_fini(adev); -recovery_out: amdgpu_ras_set_context(adev, NULL); kfree(con); |