diff options
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c | 16 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h | 16 |
3 files changed, 34 insertions, 6 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c index 830080ff90d8..b4789dfc2bb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -994,7 +994,7 @@ static int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, return ret; } -static const struct soc15_reg_entry gfx_v9_4_rdrsp_status_regs = +static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs = { SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 }; static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev) @@ -1007,15 +1007,21 @@ static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < gfx_v9_4_rdrsp_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_rdrsp_status_regs.instance; + for (i = 0; i < gfx_v9_4_ea_err_status_regs.se_num; i++) { + for (j = 0; j < gfx_v9_4_ea_err_status_regs.instance; j++) { gfx_v9_4_select_se_sh(adev, i, 0, j); reg_value = RREG32(SOC15_REG_ENTRY_OFFSET( - gfx_v9_4_rdrsp_status_regs)); - if (reg_value) + gfx_v9_4_ea_err_status_regs)); + if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) || + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) || + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { + /* SDP read/write error/parity error in FUE_IS_FATAL mode + * can cause system fatal error in arcturas. Harvest the error + * status before GPU reset */ dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n", j, reg_value); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 1a92177c522f..47c8dd9d1c78 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -1645,9 +1645,15 @@ static void mmhub_v9_4_query_ras_error_status(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_err_status_regs); i++) { reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_err_status_regs[i])); - if (reg_value) + if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) || + REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) || + REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { + /* SDP read/write error/parity error in FUE_IS_FATAL mode + * can cause system fatal error in arcturas. Harvest the error + * status before GPU reset */ dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n", i, reg_value); + } } } diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h index 4089cfa081f5..849450caca15 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_sh_mask.h @@ -617,6 +617,22 @@ #define GCEA_EDC_CNT3__MAM_A3MEM_SEC_COUNT_MASK 0x30000000L #define GCEA_EDC_CNT3__MAM_A3MEM_DED_COUNT_MASK 0xC0000000L +//GCEA_ERR_STATUS +#define GCEA_ERR_STATUS__SDP_RDRSP_STATUS__SHIFT 0x0 +#define GCEA_ERR_STATUS__SDP_WRRSP_STATUS__SHIFT 0x4 +#define GCEA_ERR_STATUS__SDP_RDRSP_DATASTATUS__SHIFT 0x8 +#define GCEA_ERR_STATUS__SDP_RDRSP_DATAPARITY_ERROR__SHIFT 0xa +#define GCEA_ERR_STATUS__CLEAR_ERROR_STATUS__SHIFT 0xb +#define GCEA_ERR_STATUS__BUSY_ON_ERROR__SHIFT 0xc +#define GCEA_ERR_STATUS__FUE_FLAG__SHIFT 0xd +#define GCEA_ERR_STATUS__SDP_RDRSP_STATUS_MASK 0x0000000FL +#define GCEA_ERR_STATUS__SDP_WRRSP_STATUS_MASK 0x000000F0L +#define GCEA_ERR_STATUS__SDP_RDRSP_DATASTATUS_MASK 0x00000300L +#define GCEA_ERR_STATUS__SDP_RDRSP_DATAPARITY_ERROR_MASK 0x00000400L +#define GCEA_ERR_STATUS__CLEAR_ERROR_STATUS_MASK 0x00000800L +#define GCEA_ERR_STATUS__BUSY_ON_ERROR_MASK 0x00001000L +#define GCEA_ERR_STATUS__FUE_FLAG_MASK 0x00002000L + // addressBlock: gc_gfxudec //GRBM_GFX_INDEX #define GRBM_GFX_INDEX__INSTANCE_INDEX__SHIFT 0x0 |