diff options
author | Dave Airlie <airlied@redhat.com> | 2022-08-26 10:01:44 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-08-26 10:03:43 +1000 |
commit | 2c2d7a67defa198a8b8148dbaddc9e5554efebc8 (patch) | |
tree | 2906871b6c5426ea9e4b845f779cc17d3be30491 /drivers/gpu/drm/i915/i915_gpu_error.c | |
parent | 1c23f9e627a7b412978b4e852793c5e3c3efc555 (diff) | |
parent | 5ece208ab05e4042c80ed1e6fe6d7ce236eee89b (diff) | |
download | linux-2c2d7a67defa198a8b8148dbaddc9e5554efebc8.tar.bz2 |
Merge tag 'drm-intel-gt-next-2022-08-24' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes:
- Create gt/gtN/.defaults/ for per gt sysfs defaults
Create a gt/gtN/.defaults/ directory (similar to
engine/<engine-name>/.defaults/) to expose default parameter values for
each gt in sysfs. This allows userspace to restore default parameter values
after they have changed.
Driver Changes:
- Support GuC v69 in parallel to v70 (Daniele)
- Improve TLB invalidation to limit performance regression (Chris, Mauro)
- Expose per-gt RPS defaults in sysfs (Ashutosh)
- Suppress OOM warning for shmemfs object allocation failure (Chris, Nirmoy)
- Disable PCI resize on 32-bit machines (Nirmoy)
- Update DG2 to GuC v70.4.1 (John)
- Fix CCS data copying on DG2 during swapping (Matt A)
- Add DG2 performance tuning setting recommended by spec (Matt R)
- Add GuC <-> kernel time stamp translation information to error logs (John)
- Record GuC CTB info in error logs (John)
- Route semaphores to GuC for Gen12+ when enabled (Michal Wi, John)
- Improve resilency to bug #3575: Handle reset timeouts under unrelated kernel hangs (Chris, Ashutosh)
- Avoid system freeze by removing shared locking on freeing objects (Chris, Nirmoy)
- Demote GuC error "No response for request" into debug when expected (Zhanjun)
- Fix GuC capture size warning and bump the size (John)
- Use streaming loads to speed up dumping the GuC log (Chris, John)
- Don't abort on CTB_UNUSED status from GuC (John)
- Don't send spurious policy update for GuC child contexts (Daniele)
- Don't leak the CCS state (Matt A)
- Prefer drm_err over pr_err (John)
- Eliminate unused calc_ctrl_surf_instr_size (Matt A)
- Add dedicated function for non-ctx register tuning settings (Matt R)
- Style and typo fixes, documentation improvements (Jason Wang, Mauro)
- Selftest improvements (Matt B, Rahul, John)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/YwYTCjA/Rhpd1n4A@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gpu_error.c | 67 |
1 files changed, 60 insertions, 7 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 32e92651ef7c..543ba63f958e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -671,6 +671,18 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m, pdev->subsystem_device); } +static void err_print_guc_ctb(struct drm_i915_error_state_buf *m, + const char *name, + const struct intel_ctb_coredump *ctb) +{ + if (!ctb->size) + return; + + err_printf(m, "GuC %s CTB: raw: 0x%08X, 0x%08X/%08X, cached: 0x%08X/%08X, desc = 0x%08X, buf = 0x%08X x 0x%08X\n", + name, ctb->raw_status, ctb->raw_head, ctb->raw_tail, + ctb->head, ctb->tail, ctb->desc_offset, ctb->cmds_offset, ctb->size); +} + static void err_print_uc(struct drm_i915_error_state_buf *m, const struct intel_uc_coredump *error_uc) { @@ -678,7 +690,12 @@ static void err_print_uc(struct drm_i915_error_state_buf *m, intel_uc_fw_dump(&error_uc->guc_fw, &p); intel_uc_fw_dump(&error_uc->huc_fw, &p); - intel_gpu_error_print_vma(m, NULL, error_uc->guc_log); + err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->guc.timestamp); + intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_log); + err_printf(m, "GuC CTB fence: %d\n", error_uc->guc.last_fence); + err_print_guc_ctb(m, "Send", error_uc->guc.ctb + 0); + err_print_guc_ctb(m, "Recv", error_uc->guc.ctb + 1); + intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_ctb); } static void err_free_sgl(struct scatterlist *sgl) @@ -720,6 +737,8 @@ static void err_print_gt_global_nonguc(struct drm_i915_error_state_buf *m, int i; err_printf(m, "GT awake: %s\n", str_yes_no(gt->awake)); + err_printf(m, "CS timestamp frequency: %u Hz, %d ns\n", + gt->clock_frequency, gt->clock_period_ns); err_printf(m, "EIR: 0x%08x\n", gt->eir); err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er); @@ -851,7 +870,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, if (error->gt) { bool print_guc_capture = false; - if (error->gt->uc && error->gt->uc->is_guc_capture) + if (error->gt->uc && error->gt->uc->guc.is_guc_capture) print_guc_capture = true; err_print_gt_display(m, error->gt); @@ -1006,7 +1025,8 @@ static void cleanup_uc(struct intel_uc_coredump *uc) { kfree(uc->guc_fw.path); kfree(uc->huc_fw.path); - i915_vma_coredump_free(uc->guc_log); + i915_vma_coredump_free(uc->guc.vma_log); + i915_vma_coredump_free(uc->guc.vma_ctb); kfree(uc); } @@ -1655,6 +1675,23 @@ gt_record_engines(struct intel_gt_coredump *gt, } } +static void gt_record_guc_ctb(struct intel_ctb_coredump *saved, + const struct intel_guc_ct_buffer *ctb, + const void *blob_ptr, struct intel_guc *guc) +{ + if (!ctb || !ctb->desc) + return; + + saved->raw_status = ctb->desc->status; + saved->raw_head = ctb->desc->head; + saved->raw_tail = ctb->desc->tail; + saved->head = ctb->head; + saved->tail = ctb->tail; + saved->size = ctb->size; + saved->desc_offset = ((void *)ctb->desc) - blob_ptr; + saved->cmds_offset = ((void *)ctb->cmds) - blob_ptr; +} + static struct intel_uc_coredump * gt_record_uc(struct intel_gt_coredump *gt, struct i915_vma_compress *compress) @@ -1675,8 +1712,22 @@ gt_record_uc(struct intel_gt_coredump *gt, */ error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL); error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL); - error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma, - "GuC log buffer", compress); + + /* + * Save the GuC log and include a timestamp reference for converting the + * log times to system times (in conjunction with the error->boottime and + * gt->clock_frequency fields saved elsewhere). + */ + error_uc->guc.timestamp = intel_uncore_read(gt->_gt->uncore, GUCPMTIMESTAMP); + error_uc->guc.vma_log = create_vma_coredump(gt->_gt, uc->guc.log.vma, + "GuC log buffer", compress); + error_uc->guc.vma_ctb = create_vma_coredump(gt->_gt, uc->guc.ct.vma, + "GuC CT buffer", compress); + error_uc->guc.last_fence = uc->guc.ct.requests.last_fence; + gt_record_guc_ctb(error_uc->guc.ctb + 0, &uc->guc.ct.ctbs.send, + uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc); + gt_record_guc_ctb(error_uc->guc.ctb + 1, &uc->guc.ct.ctbs.recv, + uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc); return error_uc; } @@ -1833,6 +1884,8 @@ static void gt_record_global_regs(struct intel_gt_coredump *gt) static void gt_record_info(struct intel_gt_coredump *gt) { memcpy(>->info, >->_gt->info, sizeof(struct intel_gt_info)); + gt->clock_frequency = gt->_gt->clock_frequency; + gt->clock_period_ns = gt->_gt->clock_period_ns; } /* @@ -2027,9 +2080,9 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du error->gt->uc = gt_record_uc(error->gt, compress); if (error->gt->uc) { if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE) - error->gt->uc->is_guc_capture = true; + error->gt->uc->guc.is_guc_capture = true; else - GEM_BUG_ON(error->gt->uc->is_guc_capture); + GEM_BUG_ON(error->gt->uc->guc.is_guc_capture); } } |