diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gpu_error.c | 195 |
1 files changed, 115 insertions, 80 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 653fb69e7ecb..944059322daa 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -30,6 +30,8 @@ #include <generated/utsrelease.h> #include <linux/stop_machine.h> #include <linux/zlib.h> +#include <drm/drm_print.h> + #include "i915_drv.h" static const char *engine_str(int engine) @@ -175,6 +177,21 @@ static void i915_error_puts(struct drm_i915_error_state_buf *e, #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__) #define err_puts(e, s) i915_error_puts(e, s) +static void __i915_printfn_error(struct drm_printer *p, struct va_format *vaf) +{ + i915_error_vprintf(p->arg, vaf->fmt, *vaf->va); +} + +static inline struct drm_printer +i915_error_printer(struct drm_i915_error_state_buf *e) +{ + struct drm_printer p = { + .printfn = __i915_printfn_error, + .arg = e, + }; + return p; +} + #ifdef CONFIG_DRM_I915_COMPRESS_ERROR struct compress { @@ -399,6 +416,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, int n; err_printf(m, "%s command stream:\n", engine_str(ee->engine_id)); + err_printf(m, " IDLE?: %s\n", yesno(ee->idle)); err_printf(m, " START: 0x%08x\n", ee->start); err_printf(m, " HEAD: 0x%08x [0x%08x]\n", ee->head, ee->rq_head); err_printf(m, " TAIL: 0x%08x [0x%08x, 0x%08x]\n", @@ -547,34 +565,17 @@ static void print_error_obj(struct drm_i915_error_state_buf *m, static void err_print_capabilities(struct drm_i915_error_state_buf *m, const struct intel_device_info *info) { -#define PRINT_FLAG(x) err_printf(m, #x ": %s\n", yesno(info->x)) - DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); -#undef PRINT_FLAG -} + struct drm_printer p = i915_error_printer(m); -static __always_inline void err_print_param(struct drm_i915_error_state_buf *m, - const char *name, - const char *type, - const void *x) -{ - if (!__builtin_strcmp(type, "bool")) - err_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); - else if (!__builtin_strcmp(type, "int")) - err_printf(m, "i915.%s=%d\n", name, *(const int *)x); - else if (!__builtin_strcmp(type, "unsigned int")) - err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); - else if (!__builtin_strcmp(type, "char *")) - err_printf(m, "i915.%s=%s\n", name, *(const char **)x); - else - BUILD_BUG(); + intel_device_info_dump_flags(info, &p); } static void err_print_params(struct drm_i915_error_state_buf *m, - const struct i915_params *p) + const struct i915_params *params) { -#define PRINT(T, x, ...) err_print_param(m, #x, #T, &p->x); - I915_PARAMS_FOR_EACH(PRINT); -#undef PRINT + struct drm_printer p = i915_error_printer(m); + + i915_params_dump(params, &p); } static void err_print_pciid(struct drm_i915_error_state_buf *m, @@ -589,6 +590,21 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m, pdev->subsystem_device); } +static void err_print_uc(struct drm_i915_error_state_buf *m, + const struct i915_error_uc *error_uc) +{ + struct drm_printer p = i915_error_printer(m); + const struct i915_gpu_state *error = + container_of(error_uc, typeof(*error), uc); + + if (!error->device_info.has_guc) + return; + + intel_uc_fw_dump(&error_uc->guc_fw, &p); + intel_uc_fw_dump(&error_uc->huc_fw, &p); + print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log); +} + int i915_error_state_to_str(struct drm_i915_error_state_buf *m, const struct i915_gpu_state *error) { @@ -759,11 +775,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, print_error_obj(m, dev_priv->engine[i], "WA batchbuffer", ee->wa_batchbuffer); - } - - print_error_obj(m, NULL, "Semaphores", error->semaphore); - print_error_obj(m, NULL, "GuC log buffer", error->guc_log); + print_error_obj(m, dev_priv->engine[i], + "NULL context", ee->default_state); + } if (error->overlay) intel_overlay_print_error_state(m, error->overlay); @@ -773,6 +788,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_print_capabilities(m, &error->device_info); err_print_params(m, &error->params); + err_print_uc(m, &error->uc); if (m->bytes == 0 && m->err) return m->err; @@ -831,6 +847,22 @@ static __always_inline void free_param(const char *type, void *x) kfree(*(void **)x); } +static void cleanup_params(struct i915_gpu_state *error) +{ +#define FREE(T, x, ...) free_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(FREE); +#undef FREE +} + +static void cleanup_uc_state(struct i915_gpu_state *error) +{ + struct i915_error_uc *error_uc = &error->uc; + + kfree(error_uc->guc_fw.path); + kfree(error_uc->huc_fw.path); + i915_error_object_free(error_uc->guc_log); +} + void __i915_gpu_state_free(struct kref *error_ref) { struct i915_gpu_state *error = @@ -856,9 +888,6 @@ void __i915_gpu_state_free(struct kref *error_ref) kfree(ee->waiters); } - i915_error_object_free(error->semaphore); - i915_error_object_free(error->guc_log); - for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) kfree(error->active_bo[i]); kfree(error->pinned_bo); @@ -866,9 +895,8 @@ void __i915_gpu_state_free(struct kref *error_ref) kfree(error->overlay); kfree(error->display); -#define FREE(T, x, ...) free_param(#T, &error->params.x); - I915_PARAMS_FOR_EACH(FREE); -#undef FREE + cleanup_params(error); + cleanup_uc_state(error); kfree(error); } @@ -912,7 +940,7 @@ i915_error_object_create(struct drm_i915_private *i915, ggtt->base.insert_page(&ggtt->base, dma, slot, I915_CACHE_NONE, 0); - s = io_mapping_map_atomic_wc(&ggtt->mappable, slot); + s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); ret = compress_page(&compress, (void __force *)s, dst); io_mapping_unmap_atomic(s); @@ -1071,34 +1099,6 @@ gen8_engine_sync_index(struct intel_engine_cs *engine, return idx; } -static void gen8_record_semaphore_state(struct i915_gpu_state *error, - struct intel_engine_cs *engine, - struct drm_i915_error_engine *ee) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct intel_engine_cs *to; - enum intel_engine_id id; - - if (!error->semaphore) - return; - - for_each_engine(to, dev_priv, id) { - int idx; - u16 signal_offset; - u32 *tmp; - - if (engine == to) - continue; - - signal_offset = - (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4; - tmp = error->semaphore->pages[0]; - idx = gen8_engine_sync_index(engine, to); - - ee->semaphore_mboxes[idx] = tmp[signal_offset]; - } -} - static void gen6_record_semaphore_state(struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1172,11 +1172,12 @@ static void error_record_engine_registers(struct i915_gpu_state *error, if (INTEL_GEN(dev_priv) >= 6) { ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base)); - ee->fault_reg = I915_READ(RING_FAULT_REG(engine)); - if (INTEL_GEN(dev_priv) >= 8) - gen8_record_semaphore_state(error, engine, ee); - else + if (INTEL_GEN(dev_priv) >= 8) { + ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG); + } else { gen6_record_semaphore_state(engine, ee); + ee->fault_reg = I915_READ(RING_FAULT_REG(engine)); + } } if (INTEL_GEN(dev_priv) >= 4) { @@ -1239,6 +1240,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error, ee->hws = I915_READ(mmio); } + ee->idle = intel_engine_is_idle(engine); ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; ee->hangcheck_action = engine->hangcheck.action; ee->hangcheck_stalled = engine->hangcheck.stalled; @@ -1400,15 +1402,30 @@ static void request_record_user_bo(struct drm_i915_gem_request *request, ee->user_bo_count = count; } +static struct drm_i915_error_object * +capture_object(struct drm_i915_private *dev_priv, + struct drm_i915_gem_object *obj) +{ + if (obj && i915_gem_object_has_pages(obj)) { + struct i915_vma fake = { + .node = { .start = U64_MAX, .size = obj->base.size }, + .size = obj->base.size, + .pages = obj->mm.pages, + .obj = obj, + }; + + return i915_error_object_create(dev_priv, &fake); + } else { + return NULL; + } +} + static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { struct i915_ggtt *ggtt = &dev_priv->ggtt; int i; - error->semaphore = - i915_error_object_create(dev_priv, dev_priv->semaphore); - for (i = 0; i < I915_NUM_ENGINES; i++) { struct intel_engine_cs *engine = dev_priv->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i]; @@ -1474,6 +1491,9 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, ee->wa_ctx = i915_error_object_create(dev_priv, engine->wa_ctx.vma); + + ee->default_state = + capture_object(dev_priv, engine->default_state); } } @@ -1559,15 +1579,25 @@ static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, error->pinned_bo = bo; } -static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_uc_state(struct i915_gpu_state *error) { - /* Capturing log buf contents won't be useful if logging was disabled */ - if (!dev_priv->guc.log.vma || (i915_modparams.guc_log_level < 0)) + struct drm_i915_private *i915 = error->i915; + struct i915_error_uc *error_uc = &error->uc; + + /* Capturing uC state won't be useful if there is no GuC */ + if (!error->device_info.has_guc) return; - error->guc_log = i915_error_object_create(dev_priv, - dev_priv->guc.log.vma); + error_uc->guc_fw = i915->guc.fw; + error_uc->huc_fw = i915->huc.fw; + + /* Non-default firmware paths will be specified by the modparam. + * As modparams are generally accesible from the userspace make + * explicit copies of the firmware paths. + */ + error_uc->guc_fw.path = kstrdup(i915->guc.fw.path, GFP_ATOMIC); + error_uc->huc_fw.path = kstrdup(i915->huc.fw.path, GFP_ATOMIC); + error_uc->guc_log = i915_error_object_create(i915, i915->guc.log.vma); } /* Capture all registers which don't fit into another category. */ @@ -1695,6 +1725,14 @@ static __always_inline void dup_param(const char *type, void *x) *(void **)x = kstrdup(*(void **)x, GFP_ATOMIC); } +static void capture_params(struct i915_gpu_state *error) +{ + error->params = i915_modparams; +#define DUP(T, x, ...) dup_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(DUP); +#undef DUP +} + static int capture(void *data) { struct i915_gpu_state *error = data; @@ -1705,10 +1743,8 @@ static int capture(void *data) ktime_to_timeval(ktime_sub(ktime_get(), error->i915->gt.last_init_time)); - error->params = i915_modparams; -#define DUP(T, x, ...) dup_param(#T, &error->params.x); - I915_PARAMS_FOR_EACH(DUP); -#undef DUP + capture_params(error); + capture_uc_state(error); i915_capture_gen_state(error->i915, error); i915_capture_reg_state(error->i915, error); @@ -1716,7 +1752,6 @@ static int capture(void *data) i915_gem_record_rings(error->i915, error); i915_capture_active_buffers(error->i915, error); i915_capture_pinned_buffers(error->i915, error); - i915_gem_capture_guc_log_buffer(error->i915, error); error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); |