diff options
author | Dave Airlie <airlied@redhat.com> | 2020-09-08 16:40:13 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2020-09-08 16:40:13 +1000 |
commit | 0c8d22fcae2f9590a07b000e1724f665820b77f7 (patch) | |
tree | 5a2405fe298358d861a58dc933184ee6d3415eb4 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | |
parent | ce5c207c6b8dd9cdeaeeb2345b8a69335c0d98bf (diff) | |
parent | 11bc98bd71fe2e0cb572988519e51bca9d58a18a (diff) | |
download | linux-0c8d22fcae2f9590a07b000e1724f665820b77f7.tar.bz2 |
Merge tag 'amd-drm-next-5.10-2020-09-03' of git://people.freedesktop.org/~agd5f/linux into drm-next
amd-drm-next-5.10-2020-09-03:
amdgpu:
- RAS fixes
- Sienna Cichlid updates
- Navy Flounder updates
- DCE6 (SI) support in DC
- Enable plane rotation
- Rework pre-OS vram reservation handling during driver init
- Add standard interface to dump GPU metrics table from SMU
- Rework tiling and tmz state handling in atomic commits
- Pstate fixes
- Add voltage and power hwmon interfaces for renoir
- SW CTF fixes
- S/G display fix for Raven
- Print client strings for vmfaults for vega and newer
- Manual fan control fixes
- Display updates
- Reorg power management directory structure
- Misc bug fixes
- Misc code cleanups
amdkfd:
- Topology fixes
- Add SMI events for thermal throttling and GPU resets
radeon:
- switch from pci_* to dma_* for dma allocations
- PLL fix
Scheduler:
- Clean up priority levels
UAPI:
- amdgpu INFO IOCTL query update for TMZ state
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6049
- amdkfd SMI event interface updates
https://github.com/RadeonOpenCompute/rocm_smi_lib/tree/therm_thrott
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200903222921.4152-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 22 |
1 files changed, 19 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index b2667342cf67..6b8d7bb83bb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -31,6 +31,10 @@ #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" +#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) +#define AMDGPU_RAS_FLAG_INIT_NEED_RESET (0x1 << 1) +#define AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV (0x1 << 2) + enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, AMDGPU_RAS_BLOCK__SDMA, @@ -336,6 +340,12 @@ struct amdgpu_ras { struct amdgpu_ras_eeprom_control eeprom_control; bool error_query_ready; + + /* bad page count threshold */ + uint32_t bad_page_cnt_threshold; + + /* disable ras error count harvest in recovery */ + bool disable_ras_err_cnt_harvest; }; struct ras_fs_data { @@ -490,6 +500,8 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev); unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce); +bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev); + /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, struct eeprom_table_record *bps, int pages); @@ -500,10 +512,14 @@ static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - /* save bad page to eeprom before gpu reset, - * i2c may be unstable in gpu reset + /* + * Save bad page to eeprom before gpu reset, i2c may be unstable + * in gpu reset. + * + * Also, exclude the case when ras recovery issuer is + * eeprom page write itself. */ - if (in_task()) + if (!(ras->flags & AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV) && in_task()) amdgpu_ras_reserve_bad_pages(adev); if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) |