summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2019-10-26 05:56:57 +1000
committerDave Airlie <airlied@redhat.com>2019-10-26 05:56:57 +1000
commit3275a71e76fac5bc276f0d60e027b18c2e8d7a5b (patch)
treef275ab1c98be91f5e0fda869819e09c05d0918ab /drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
parent2e79e22e092acd55da0b2db066e4826d7d152c41 (diff)
parent1cd4d9eead73c004d08a58536dc726bd172eaaec (diff)
downloadlinux-3275a71e76fac5bc276f0d60e027b18c2e8d7a5b.tar.bz2
Merge tag 'drm-next-5.5-2019-10-09' of git://people.freedesktop.org/~agd5f/linux into drm-next
drm-next-5.5-2019-10-09: amdgpu: - Additional RAS enablement for vega20 - RAS page retirement and bad page storage in EEPROM - No GPU reset with unrecoverable RAS errors - Reserve vram for page tables rather than trying to evict - Fix issues with GPU reset and xgmi hives - DC i2c over aux fixes - Direct submission for clears, PTE/PDE updates - Improvements to help support recoverable GPU page faults - Silence harmless SAD block messages - Clean up code for creating a bo at a fixed location - Initial DC HDCP support - Lots of documentation fixes - GPU reset for renoir - Add IH clockgating support for soc15 asics - Powerplay improvements - DC MST cleanups - Add support for MSI-X - Misc cleanups and bug fixes amdkfd: - Query KFD device info by asic type rather than pci ids - Add navi14 support - Add renoir support - Add navi12 support - gfx10 trap handler improvements - pasid cleanups - Check against device cgroup ttm: - Return -EBUSY with pipelining with no_gpu_wait radeon: - Silence harmless SAD block messages device_cgroup: - Export devcgroup_check_permission Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191010041713.3412-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c200
1 files changed, 109 insertions, 91 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 8a32b5c93778..20af0a17d00b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -100,7 +100,101 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
return ret;
}
-static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control);
+
+
+static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control)
+{
+ int i;
+ uint32_t tbl_sum = 0;
+
+ /* Header checksum, skip checksum field in the calculation */
+ for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++)
+ tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i);
+
+ return tbl_sum;
+}
+
+static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records,
+ int num)
+{
+ int i, j;
+ uint32_t tbl_sum = 0;
+
+ /* Records checksum */
+ for (i = 0; i < num; i++) {
+ struct eeprom_table_record *record = &records[i];
+
+ for (j = 0; j < sizeof(*record); j++) {
+ tbl_sum += *(((unsigned char *)record) + j);
+ }
+ }
+
+ return tbl_sum;
+}
+
+static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records, int num)
+{
+ return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num);
+}
+
+/* Checksum = 256 -((sum of all table entries) mod 256) */
+static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records, int num,
+ uint32_t old_hdr_byte_sum)
+{
+ /*
+ * This will update the table sum with new records.
+ *
+ * TODO: What happens when the EEPROM table is to be wrapped around
+ * and old records from start will get overridden.
+ */
+
+ /* need to recalculate updated header byte sum */
+ control->tbl_byte_sum -= old_hdr_byte_sum;
+ control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num);
+
+ control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256);
+}
+
+/* table sum mod 256 + checksum must equals 256 */
+static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records, int num)
+{
+ control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num);
+
+ if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) {
+ DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum);
+ return false;
+ }
+
+ return true;
+}
+
+int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
+{
+ unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ int ret = 0;
+
+ mutex_lock(&control->tbl_mutex);
+
+ hdr->header = EEPROM_TABLE_HDR_VAL;
+ hdr->version = EEPROM_TABLE_VER;
+ hdr->first_rec_offset = EEPROM_RECORD_START;
+ hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE;
+
+ control->tbl_byte_sum = 0;
+ __update_tbl_checksum(control, NULL, 0, 0);
+ control->next_addr = EEPROM_RECORD_START;
+
+ ret = __update_table_header(control, buff);
+
+ mutex_unlock(&control->tbl_mutex);
+
+ return ret;
+
+}
int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
{
@@ -143,25 +237,18 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
if (hdr->header == EEPROM_TABLE_HDR_VAL) {
control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) /
EEPROM_TABLE_RECORD_SIZE;
+ control->tbl_byte_sum = __calc_hdr_byte_sum(control);
+ control->next_addr = EEPROM_RECORD_START;
+
DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
control->num_recs);
} else {
DRM_INFO("Creating new EEPROM table");
- hdr->header = EEPROM_TABLE_HDR_VAL;
- hdr->version = EEPROM_TABLE_VER;
- hdr->first_rec_offset = EEPROM_RECORD_START;
- hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE;
-
- adev->psp.ras.ras->eeprom_control.tbl_byte_sum =
- __calc_hdr_byte_sum(&adev->psp.ras.ras->eeprom_control);
- ret = __update_table_header(control, buff);
+ ret = amdgpu_ras_eeprom_reset_table(control);
}
- /* Start inserting records from here */
- adev->psp.ras.ras->eeprom_control.next_addr = EEPROM_RECORD_START;
-
return ret == 1 ? 0 : -EIO;
}
@@ -226,8 +313,8 @@ static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *co
record->offset = (le64_to_cpu(tmp) & 0xffffffffffff);
i += 6;
- buff[i++] = record->mem_channel;
- buff[i++] = record->mcumc_id;
+ record->mem_channel = buff[i++];
+ record->mcumc_id = buff[i++];
memcpy(&tmp, buff + i, 6);
record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff);
@@ -266,84 +353,15 @@ static uint32_t __correct_eeprom_dest_address(uint32_t curr_address)
return curr_address;
}
-
-static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control)
-{
- int i;
- uint32_t tbl_sum = 0;
-
- /* Header checksum, skip checksum field in the calculation */
- for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++)
- tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i);
-
- return tbl_sum;
-}
-
-static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records,
- int num)
-{
- int i, j;
- uint32_t tbl_sum = 0;
-
- /* Records checksum */
- for (i = 0; i < num; i++) {
- struct eeprom_table_record *record = &records[i];
-
- for (j = 0; j < sizeof(*record); j++) {
- tbl_sum += *(((unsigned char *)record) + j);
- }
- }
-
- return tbl_sum;
-}
-
-static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control,
- struct eeprom_table_record *records, int num)
-{
- return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num);
-}
-
-/* Checksum = 256 -((sum of all table entries) mod 256) */
-static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
- struct eeprom_table_record *records, int num,
- uint32_t old_hdr_byte_sum)
-{
- /*
- * This will update the table sum with new records.
- *
- * TODO: What happens when the EEPROM table is to be wrapped around
- * and old records from start will get overridden.
- */
-
- /* need to recalculate updated header byte sum */
- control->tbl_byte_sum -= old_hdr_byte_sum;
- control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num);
-
- control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256);
-}
-
-/* table sum mod 256 + checksum must equals 256 */
-static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
- struct eeprom_table_record *records, int num)
-{
- control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num);
-
- if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) {
- DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum);
- return false;
- }
-
- return true;
-}
-
int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
struct eeprom_table_record *records,
bool write,
int num)
{
int i, ret = 0;
- struct i2c_msg *msgs;
- unsigned char *buffs;
+ struct i2c_msg *msgs, *msg;
+ unsigned char *buffs, *buff;
+ struct eeprom_table_record *record;
struct amdgpu_device *adev = to_amdgpu_device(control);
if (adev->asic_type != CHIP_VEGA20)
@@ -373,9 +391,9 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
* 256b
*/
for (i = 0; i < num; i++) {
- unsigned char *buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
- struct eeprom_table_record *record = &records[i];
- struct i2c_msg *msg = &msgs[i];
+ buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
+ record = &records[i];
+ msg = &msgs[i];
control->next_addr = __correct_eeprom_dest_address(control->next_addr);
@@ -415,8 +433,8 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
if (!write) {
for (i = 0; i < num; i++) {
- unsigned char *buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
- struct eeprom_table_record *record = &records[i];
+ buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
+ record = &records[i];
__decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE);
}