summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2019-10-26 05:56:57 +1000
committerDave Airlie <airlied@redhat.com>2019-10-26 05:56:57 +1000
commit3275a71e76fac5bc276f0d60e027b18c2e8d7a5b (patch)
treef275ab1c98be91f5e0fda869819e09c05d0918ab /drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
parent2e79e22e092acd55da0b2db066e4826d7d152c41 (diff)
parent1cd4d9eead73c004d08a58536dc726bd172eaaec (diff)
downloadlinux-3275a71e76fac5bc276f0d60e027b18c2e8d7a5b.tar.bz2
Merge tag 'drm-next-5.5-2019-10-09' of git://people.freedesktop.org/~agd5f/linux into drm-next
drm-next-5.5-2019-10-09: amdgpu: - Additional RAS enablement for vega20 - RAS page retirement and bad page storage in EEPROM - No GPU reset with unrecoverable RAS errors - Reserve vram for page tables rather than trying to evict - Fix issues with GPU reset and xgmi hives - DC i2c over aux fixes - Direct submission for clears, PTE/PDE updates - Improvements to help support recoverable GPU page faults - Silence harmless SAD block messages - Clean up code for creating a bo at a fixed location - Initial DC HDCP support - Lots of documentation fixes - GPU reset for renoir - Add IH clockgating support for soc15 asics - Powerplay improvements - DC MST cleanups - Add support for MSI-X - Misc cleanups and bug fixes amdkfd: - Query KFD device info by asic type rather than pci ids - Add navi14 support - Add renoir support - Add navi12 support - gfx10 trap handler improvements - pasid cleanups - Check against device cgroup ttm: - Return -EBUSY with pipelining with no_gpu_wait radeon: - Silence harmless SAD block messages device_cgroup: - Export devcgroup_check_permission Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191010041713.3412-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c50
1 files changed, 50 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 65aae75f80fd..00371713c671 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_xgmi.h"
#include "amdgpu_smu.h"
+#include "amdgpu_ras.h"
#include "df/df_3_6_offset.h"
static DEFINE_MUTEX(xgmi_mutex);
@@ -437,3 +438,52 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
mutex_unlock(&hive->hive_lock);
}
}
+
+int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "xgmi_wafl_err_count",
+ .debugfs_name = "xgmi_wafl_err_inject",
+ };
+
+ if (!adev->gmc.xgmi.supported ||
+ adev->gmc.xgmi.num_physical_nodes == 0)
+ return 0;
+
+ if (!adev->gmc.xgmi.ras_if) {
+ adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->gmc.xgmi.ras_if)
+ return -ENOMEM;
+ adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
+ adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->gmc.xgmi.ras_if->sub_block_index = 0;
+ strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl");
+ }
+ ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if;
+ r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if,
+ &fs_info, &ih_info);
+ if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) {
+ kfree(adev->gmc.xgmi.ras_if);
+ adev->gmc.xgmi.ras_if = NULL;
+ }
+
+ return r;
+}
+
+void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) &&
+ adev->gmc.xgmi.ras_if) {
+ struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}