diff options
author | Guchun Chen <guchun.chen@amd.com> | 2020-07-23 15:42:19 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2020-08-04 17:26:31 -0400 |
commit | b82e65a93510465cb4c203c938245f137a4e95dc (patch) | |
tree | 82187ca76db2ab385bf2369fe162b4c3f020be59 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
parent | 1d6a9d122d2c7817ad1bae0e59c8a29450c2b14d (diff) | |
download | linux-b82e65a93510465cb4c203c938245f137a4e95dc.tar.bz2 |
drm/amdgpu: break driver init process when it's bad GPU(v5)
When retrieving bad gpu tag from eeprom, GPU init should
fail as the GPU needs to be retired for further check.
v2: Fix spelling typo, correct the condition to detect
bad gpu tag and refine error message.
v3: Refine function argument name.
v4: Fix missing check of returning value of i2c
initialization error case.
v5: Use dev_err to print PCI information in dmesg instead
of DRM_ERROR.
Signed-off-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 6660094a1063..1a1652ea76b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1821,6 +1821,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data **data; uint32_t max_eeprom_records_len = 0; + bool exc_err_limit = false; int ret; if (con) @@ -1842,8 +1843,12 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) max_eeprom_records_len = amdgpu_ras_eeprom_get_record_max_length(); amdgpu_ras_validate_threshold(adev, max_eeprom_records_len); - ret = amdgpu_ras_eeprom_init(&con->eeprom_control); - if (ret) + ret = amdgpu_ras_eeprom_init(&con->eeprom_control, &exc_err_limit); + /* + * This calling fails when exc_err_limit is true or + * ret != 0. + */ + if (exc_err_limit || ret) goto free; if (con->eeprom_control.num_recs) { @@ -1867,6 +1872,15 @@ free: out: dev_warn(adev->dev, "Failed to initialize ras recovery!\n"); + /* + * Except error threshold exceeding case, other failure cases in this + * function would not fail amdgpu driver init. + */ + if (!exc_err_limit) + ret = 0; + else + ret = -EINVAL; + return ret; } |