summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHawking Zhang <Hawking.Zhang@amd.com>2022-10-14 15:17:43 +0800
committerAlex Deucher <alexander.deucher@amd.com>2022-10-17 17:41:21 -0400
commit6c0ca748205dc815505c6de79ecf565953390b66 (patch)
treecf1f34297412197fa00576e14ded7e8ca7c50f51
parent027bf0cee89a27325a9a4f2240c21dd5fb81e4fa (diff)
downloadlinux-6c0ca748205dc815505c6de79ecf565953390b66.tar.bz2
drm/amdgpu: move convert_error_address out of umc_ras
RAS error address translation algorithm is common across dGPU and A + A platform as along as the SOC integrates the same generation of UMC IP. UMC RAS is managed by x86 MCA on A + A platform, umc_ras in GPU driver is not initialized at all on A + A platform. In such case, any umc_ras callback implemented for dGPU config shouldn't be invoked from A + A specific callback. The change moves convert_error_address out of dGPU umc_ras structure and makes it share between A + A and dGPU config. Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Stanley Yang <Stanley.Yang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.h4
4 files changed, 18 insertions, 12 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a4b47e1bd111..21a47f2bb87b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -36,6 +36,7 @@
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include "atom.h"
#include "amdgpu_reset.h"
+#include "umc_v6_7.h"
#ifdef CONFIG_X86_MCE_AMD
#include <asm/mce.h>
@@ -2899,10 +2900,17 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
/*
* Translate UMC channel address to Physical address
*/
- if (adev->umc.ras &&
- adev->umc.ras->convert_ras_error_address)
- adev->umc.ras->convert_ras_error_address(adev,
- &err_data, m->addr, ch_inst, umc_inst);
+ switch (adev->ip_versions[UMC_HWIP][0]) {
+ case IP_VERSION(6, 7, 0):
+ umc_v6_7_convert_error_address(adev,
+ &err_data, m->addr, ch_inst, umc_inst);
+ break;
+ default:
+ dev_warn(adev->dev,
+ "UMC address to Physical address translation is not supported\n");
+ kfree(err_data.err_addr);
+ return NOTIFY_DONE;
+ }
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index e46439274f3a..3629d8f292ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -51,9 +51,6 @@ struct amdgpu_umc_ras {
struct amdgpu_ras_block_object ras_block;
void (*err_cnt_init)(struct amdgpu_device *adev);
bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
- void (*convert_ras_error_address)(struct amdgpu_device *adev,
- struct ras_err_data *err_data, uint64_t err_addr,
- uint32_t ch_inst, uint32_t umc_inst);
void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index 5d5d031c9e7d..72fd963f178b 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -187,9 +187,9 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
}
}
-static void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
- struct ras_err_data *err_data, uint64_t err_addr,
- uint32_t ch_inst, uint32_t umc_inst)
+void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst)
{
uint32_t channel_index;
uint64_t soc_pa, retired_page, column;
@@ -553,5 +553,4 @@ struct amdgpu_umc_ras umc_v6_7_ras = {
.query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
.ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,
.ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
- .convert_ras_error_address = umc_v6_7_convert_error_address,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
index fe41ed2f5945..105245d5b6e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
@@ -71,5 +71,7 @@ extern const uint32_t
umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
extern const uint32_t
umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
-
+void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t err_addr,
+ uint32_t ch_inst, uint32_t umc_inst);
#endif