diff options
author | Tao Zhou <tao.zhou1@amd.com> | 2019-08-15 16:15:08 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2019-09-13 17:50:40 -0500 |
commit | 87d2b92f1e9df64a74f7fda0691d4041ba2727f9 (patch) | |
tree | 76897717624eede2a434ee2ec376326211b2e816 /drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | |
parent | 78ad00c9030cf872621ef4c14c20fbe7d34d2c06 (diff) | |
download | linux-87d2b92f1e9df64a74f7fda0691d4041ba2727f9.tar.bz2 |
drm/amdgpu: save umc error records
save umc error records to ras bad page array
v2: add bad pages before gpu reset
v3: add NULL check for adev->umc.funcs
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 40 |
1 files changed, 31 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 2b4eb0b9d9ce..9b06d775d137 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -247,21 +247,43 @@ static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev, struct ras_err_data *err_data, struct amdgpu_iv_entry *entry) { - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - if (adev->umc.funcs->query_ras_error_count) - adev->umc.funcs->query_ras_error_count(adev, err_data); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return AMDGPU_RAS_SUCCESS; + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + if (adev->umc.funcs && + adev->umc.funcs->query_ras_error_count) + adev->umc.funcs->query_ras_error_count(adev, err_data); + + if (adev->umc.funcs && + adev->umc.funcs->query_ras_error_address && + adev->umc.max_ras_err_cnt_per_query) { + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if(!err_data->err_addr) + DRM_WARN("Failed to alloc memory for umc error address record!\n"); + /* umc query_ras_error_address is also responsible for clearing * error status */ - if (adev->umc.funcs->query_ras_error_address) - adev->umc.funcs->query_ras_error_address(adev, err_data); + adev->umc.funcs->query_ras_error_address(adev, err_data); + } + + /* only uncorrectable error needs gpu reset */ + if (err_data->ue_count) { + if (err_data->err_addr_cnt && + amdgpu_ras_add_bad_pages(adev, err_data->err_addr, + err_data->err_addr_cnt)) + DRM_WARN("Failed to add ras bad page!\n"); - /* only uncorrectable error needs gpu reset */ - if (err_data->ue_count) - amdgpu_ras_reset_gpu(adev, 0); + amdgpu_ras_reset_gpu(adev, 0); } + kfree(err_data->err_addr); return AMDGPU_RAS_SUCCESS; } |