From 4a15c75c42460252a63d30f03b4766a52945fb47 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 3 Dec 2018 13:33:41 +0000 Subject: drm/i915: Introduce per-engine workarounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We stopped re-applying the GT workarounds after engine reset since commit 59b449d5c82a ("drm/i915: Split out functions for different kinds of workarounds"). Issue with this is that some of the GT workarounds live in the MMIO space which gets lost during engine resets. So far the registers in 0x2xxx and 0xbxxx address range have been identified to be affected. This losing of applied workarounds has obvious negative effects and can even lead to hard system hangs (see the linked Bugzilla). Rather than just restoring this re-application, because we have also observed that it is not safe to just re-write all GT workarounds after engine resets (GPU might be live and weird hardware states can happen), we introduce a new class of per-engine workarounds and move only the affected GT workarounds over. Using the framework introduced in the previous patch, we therefore after engine reset, re-apply only the workarounds living in the affected MMIO address ranges. v2: * Move Wa_1406609255:icl to engine workarounds as well. * Rename API. (Chris Wilson) * Drop redundant IS_KABYLAKE. (Chris Wilson) * Re-order engine wa/ init so latest platforms are first. (Rodrigo Vivi) Signed-off-by: Tvrtko Ursulin Bugzilla: https://bugzilla.freedesktop.org/show_bug.cgi?id=107945 Fixes: 59b449d5c82a ("drm/i915: Split out functions for different kinds of workarounds") Cc: Mika Kuoppala Cc: Ville Syrjälä Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Acked-by: Rodrigo Vivi Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20181203133341.10258-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_engine_cs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 759c0fd58f8c..ef5d202e9d45 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -723,6 +723,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) __intel_context_unpin(i915->kernel_context, engine); i915_timeline_fini(&engine->timeline); + + intel_wa_list_free(&engine->wa_list); } u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) -- cgit v1.2.3 From 69bcdecf1af5600dabbab890e3d8d9714638f91d Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 3 Dec 2018 12:50:12 +0000 Subject: drm/i915: Move register white-listing to the common workaround framework Instead of having a separate list of white-listed registers we can trivially move this to the common workarounds framework. This brings us one step closer to the goal of driving all workaround classes using the same code. v2: * Use GEM_DEBUG_WARN_ON for the sanity check. (Chris Wilson) v3: * API rename. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20181203125014.3219-6-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/intel_lrc.c | 5 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + drivers/gpu/drm/i915/intel_workarounds.c | 83 +++++++++------------- drivers/gpu/drm/i915/intel_workarounds.h | 3 +- drivers/gpu/drm/i915/selftests/intel_workarounds.c | 40 +++++------ 6 files changed, 60 insertions(+), 73 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ef5d202e9d45..496462d77ebc 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -725,6 +725,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) i915_timeline_fini(&engine->timeline); intel_wa_list_free(&engine->wa_list); + intel_wa_list_free(&engine->whitelist); } u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a5d6663ceafd..92e1f08e1483 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1652,7 +1652,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - intel_whitelist_workarounds_apply(engine); + intel_engine_apply_whitelist(engine); /* We need to disable the AsyncFlip performance optimisations in order * to use MI_WAIT_FOR_EVENT within the CS. It should already be @@ -1675,7 +1675,7 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - intel_whitelist_workarounds_apply(engine); + intel_engine_apply_whitelist(engine); return 0; } @@ -2307,6 +2307,7 @@ int logical_render_ring_init(struct intel_engine_cs *engine) ret); } + intel_engine_init_whitelist(engine); intel_engine_init_workarounds(engine); return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 3abac391a739..7b110e221749 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -437,6 +437,7 @@ struct intel_engine_cs { struct intel_hw_status_page status_page; struct i915_ctx_workarounds wa_ctx; struct i915_wa_list wa_list; + struct i915_wa_list whitelist; struct i915_vma *scratch; u32 irq_keep_mask; /* always keep these interrupts */ diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index c53d4388930b..d920a6256c83 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -1011,29 +1011,20 @@ bool intel_gt_verify_workarounds(struct drm_i915_private *dev_priv, return wa_list_verify(dev_priv, &dev_priv->gt_wa_list, from); } -struct whitelist { - i915_reg_t reg[RING_MAX_NONPRIV_SLOTS]; - unsigned int count; - u32 nopid; -}; - -static void whitelist_reg(struct whitelist *w, i915_reg_t reg) +static void +whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) { - if (GEM_DEBUG_WARN_ON(w->count >= RING_MAX_NONPRIV_SLOTS)) - return; - - w->reg[w->count++] = reg; -} + struct i915_wa wa = { + .reg = reg + }; -static void bdw_whitelist_build(struct whitelist *w) -{ -} + if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) + return; -static void chv_whitelist_build(struct whitelist *w) -{ + wal_add(wal, &wa); } -static void gen9_whitelist_build(struct whitelist *w) +static void gen9_whitelist_build(struct i915_wa_list *w) { /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ whitelist_reg(w, GEN9_CTX_PREEMPT_REG); @@ -1045,7 +1036,7 @@ static void gen9_whitelist_build(struct whitelist *w) whitelist_reg(w, GEN8_HDC_CHICKEN1); } -static void skl_whitelist_build(struct whitelist *w) +static void skl_whitelist_build(struct i915_wa_list *w) { gen9_whitelist_build(w); @@ -1053,12 +1044,12 @@ static void skl_whitelist_build(struct whitelist *w) whitelist_reg(w, GEN8_L3SQCREG4); } -static void bxt_whitelist_build(struct whitelist *w) +static void bxt_whitelist_build(struct i915_wa_list *w) { gen9_whitelist_build(w); } -static void kbl_whitelist_build(struct whitelist *w) +static void kbl_whitelist_build(struct i915_wa_list *w) { gen9_whitelist_build(w); @@ -1066,7 +1057,7 @@ static void kbl_whitelist_build(struct whitelist *w) whitelist_reg(w, GEN8_L3SQCREG4); } -static void glk_whitelist_build(struct whitelist *w) +static void glk_whitelist_build(struct i915_wa_list *w) { gen9_whitelist_build(w); @@ -1074,18 +1065,18 @@ static void glk_whitelist_build(struct whitelist *w) whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); } -static void cfl_whitelist_build(struct whitelist *w) +static void cfl_whitelist_build(struct i915_wa_list *w) { gen9_whitelist_build(w); } -static void cnl_whitelist_build(struct whitelist *w) +static void cnl_whitelist_build(struct i915_wa_list *w) { /* WaEnablePreemptionGranularityControlByUMD:cnl */ whitelist_reg(w, GEN8_CS_CHICKEN1); } -static void icl_whitelist_build(struct whitelist *w) +static void icl_whitelist_build(struct i915_wa_list *w) { /* WaAllowUMDToModifyHalfSliceChicken7:icl */ whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); @@ -1094,22 +1085,21 @@ static void icl_whitelist_build(struct whitelist *w) whitelist_reg(w, GEN10_SAMPLER_MODE); } -static struct whitelist *whitelist_build(struct intel_engine_cs *engine, - struct whitelist *w) +void intel_engine_init_whitelist(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *w = &engine->whitelist; GEM_BUG_ON(engine->id != RCS); - w->count = 0; - w->nopid = i915_mmio_reg_offset(RING_NOPID(engine->mmio_base)); + wa_init_start(w, "whitelist"); if (INTEL_GEN(i915) < 8) - return NULL; + return; else if (IS_BROADWELL(i915)) - bdw_whitelist_build(w); + return; else if (IS_CHERRYVIEW(i915)) - chv_whitelist_build(w); + return; else if (IS_SKYLAKE(i915)) skl_whitelist_build(w); else if (IS_BROXTON(i915)) @@ -1127,37 +1117,30 @@ static struct whitelist *whitelist_build(struct intel_engine_cs *engine, else MISSING_CASE(INTEL_GEN(i915)); - return w; + wa_init_finish(w); } -static void whitelist_apply(struct intel_engine_cs *engine, - const struct whitelist *w) +void intel_engine_apply_whitelist(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + const struct i915_wa_list *wal = &engine->whitelist; const u32 base = engine->mmio_base; + struct i915_wa *wa; unsigned int i; - if (!w) + if (!wal->count) return; - intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); - - for (i = 0; i < w->count; i++) - I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), - i915_mmio_reg_offset(w->reg[i])); + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + I915_WRITE(RING_FORCE_TO_NONPRIV(base, i), + i915_mmio_reg_offset(wa->reg)); /* And clear the rest just in case of garbage */ for (; i < RING_MAX_NONPRIV_SLOTS; i++) - I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), w->nopid); - - intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); -} + I915_WRITE(RING_FORCE_TO_NONPRIV(base, i), + i915_mmio_reg_offset(RING_NOPID(base))); -void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine) -{ - struct whitelist w; - - whitelist_apply(engine, whitelist_build(engine, &w)); + DRM_DEBUG_DRIVER("Applied %u %s workarounds\n", wal->count, wal->name); } static void rcs_engine_wa_init(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h index 8822e6035f8d..3f99bfcb4a03 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.h +++ b/drivers/gpu/drm/i915/intel_workarounds.h @@ -35,7 +35,8 @@ void intel_gt_apply_workarounds(struct drm_i915_private *dev_priv); bool intel_gt_verify_workarounds(struct drm_i915_private *dev_priv, const char *from); -void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine); +void intel_engine_init_whitelist(struct intel_engine_cs *engine); +void intel_engine_apply_whitelist(struct intel_engine_cs *engine); void intel_engine_init_workarounds(struct intel_engine_cs *engine); void intel_engine_apply_workarounds(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index d76a048c3954..67017d5175b8 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -94,17 +94,23 @@ err_obj: return ERR_PTR(err); } -static u32 get_whitelist_reg(const struct whitelist *w, unsigned int i) +static u32 +get_whitelist_reg(const struct intel_engine_cs *engine, unsigned int i) { - return i < w->count ? i915_mmio_reg_offset(w->reg[i]) : w->nopid; + i915_reg_t reg = i < engine->whitelist.count ? + engine->whitelist.list[i].reg : + RING_NOPID(engine->mmio_base); + + return i915_mmio_reg_offset(reg); } -static void print_results(const struct whitelist *w, const u32 *results) +static void +print_results(const struct intel_engine_cs *engine, const u32 *results) { unsigned int i; for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { - u32 expected = get_whitelist_reg(w, i); + u32 expected = get_whitelist_reg(engine, i); u32 actual = results[i]; pr_info("RING_NONPRIV[%d]: expected 0x%08x, found 0x%08x\n", @@ -112,8 +118,7 @@ static void print_results(const struct whitelist *w, const u32 *results) } } -static int check_whitelist(const struct whitelist *w, - struct i915_gem_context *ctx, +static int check_whitelist(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { struct drm_i915_gem_object *results; @@ -141,11 +146,11 @@ static int check_whitelist(const struct whitelist *w, } for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { - u32 expected = get_whitelist_reg(w, i); + u32 expected = get_whitelist_reg(engine, i); u32 actual = vaddr[i]; if (expected != actual) { - print_results(w, vaddr); + print_results(engine, vaddr); pr_err("Invalid RING_NONPRIV[%d], expected 0x%08x, found 0x%08x\n", i, expected, actual); @@ -217,7 +222,6 @@ err: static int check_whitelist_across_reset(struct intel_engine_cs *engine, int (*reset)(struct intel_engine_cs *), - const struct whitelist *w, const char *name) { struct drm_i915_private *i915 = engine->i915; @@ -227,7 +231,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, int err; pr_info("Checking %d whitelisted registers (RING_NONPRIV) [%s]\n", - w->count, name); + engine->whitelist.count, name); if (want_spin) { err = igt_spinner_init(&spin, i915); @@ -239,7 +243,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, if (IS_ERR(ctx)) return PTR_ERR(ctx); - err = check_whitelist(w, ctx, engine); + err = check_whitelist(ctx, engine); if (err) { pr_err("Invalid whitelist *before* %s reset!\n", name); goto out; @@ -263,7 +267,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, goto out; } - err = check_whitelist(w, ctx, engine); + err = check_whitelist(ctx, engine); if (err) { pr_err("Whitelist not preserved in context across %s reset!\n", name); @@ -276,7 +280,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, if (IS_ERR(ctx)) return PTR_ERR(ctx); - err = check_whitelist(w, ctx, engine); + err = check_whitelist(ctx, engine); if (err) { pr_err("Invalid whitelist *after* %s reset in fresh context!\n", name); @@ -292,22 +296,18 @@ static int live_reset_whitelist(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine = i915->engine[RCS]; - struct whitelist w; int err = 0; /* If we reset the gpu, we should not lose the RING_NONPRIV */ - if (!engine) - return 0; - - if (!whitelist_build(engine, &w)) + if (!engine || engine->whitelist.count == 0) return 0; igt_global_reset_lock(i915); if (intel_has_reset_engine(i915)) { err = check_whitelist_across_reset(engine, - do_engine_reset, &w, + do_engine_reset, "engine"); if (err) goto out; @@ -315,7 +315,7 @@ static int live_reset_whitelist(void *arg) if (intel_has_gpu_reset(i915)) { err = check_whitelist_across_reset(engine, - do_device_reset, &w, + do_device_reset, "device"); if (err) goto out; -- cgit v1.2.3 From 452420d22d5b41256a0bb82402a797295e525da9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 3 Dec 2018 13:33:57 +0000 Subject: drm/i915: Fuse per-context workaround handling with the common framework Convert the per context workaround handling code to run against the newly introduced common workaround framework and fuse the two to use the existing smarter list add helper, the one which does the sorted insert and merges registers where possible. This completes migration of all four classes of workarounds onto the common framework. Existing macros are kept untouched for smaller code churn. v2: * Rename to list name ctx_wa_list and move from dev_priv to engine. v3: * API rename and parameters tweaking. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20181203133357.10341-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 12 +- drivers/gpu/drm/i915/i915_drv.h | 15 -- drivers/gpu/drm/i915/i915_gem_context.c | 6 +- drivers/gpu/drm/i915/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + drivers/gpu/drm/i915/intel_workarounds.c | 333 ++++++++++++++----------------- drivers/gpu/drm/i915/intel_workarounds.h | 5 +- 9 files changed, 168 insertions(+), 209 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 129b9a6f8309..38dcee1ca062 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3375,13 +3375,15 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) static int i915_wa_registers(struct seq_file *m, void *unused) { - struct i915_workarounds *wa = &node_to_i915(m->private)->workarounds; - int i; + struct drm_i915_private *i915 = node_to_i915(m->private); + const struct i915_wa_list *wal = &i915->engine[RCS]->ctx_wa_list; + struct i915_wa *wa; + unsigned int i; - seq_printf(m, "Workarounds applied: %d\n", wa->count); - for (i = 0; i < wa->count; ++i) + seq_printf(m, "Workarounds applied: %u\n", wal->count); + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n", - wa->reg[i].addr, wa->reg[i].value, wa->reg[i].mask); + i915_mmio_reg_offset(wa->reg), wa->val, wa->mask); return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d725390d5a48..23a3dc6f3907 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1190,20 +1190,6 @@ struct i915_frontbuffer_tracking { unsigned flip_bits; }; -struct i915_wa_reg { - u32 addr; - u32 value; - /* bitmask representing WA bits */ - u32 mask; -}; - -#define I915_MAX_WA_REGS 16 - -struct i915_workarounds { - struct i915_wa_reg reg[I915_MAX_WA_REGS]; - u32 count; -}; - struct i915_virtual_gpu { bool active; u32 caps; @@ -1653,7 +1639,6 @@ struct drm_i915_private { int dpio_phy_iosf_port[I915_NUM_PHYS_VLV]; - struct i915_workarounds workarounds; struct i915_wa_list gt_wa_list; struct i915_frontbuffer_tracking fb_tracking; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index b97963db0287..371c07087095 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -535,16 +535,12 @@ static bool needs_preempt_context(struct drm_i915_private *i915) int i915_gem_contexts_init(struct drm_i915_private *dev_priv) { struct i915_gem_context *ctx; - int ret; /* Reassure ourselves we are only called once */ GEM_BUG_ON(dev_priv->kernel_context); GEM_BUG_ON(dev_priv->preempt_context); - ret = intel_ctx_workarounds_init(dev_priv); - if (ret) - return ret; - + intel_engine_init_ctx_wa(dev_priv->engine[RCS]); init_contexts(dev_priv); /* lowest priority; idle task */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 496462d77ebc..6b427bc52f78 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -724,6 +724,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) i915_timeline_fini(&engine->timeline); + intel_wa_list_free(&engine->ctx_wa_list); intel_wa_list_free(&engine->wa_list); intel_wa_list_free(&engine->whitelist); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 92e1f08e1483..87227fd9ae5f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2087,7 +2087,7 @@ static int gen8_init_rcs_context(struct i915_request *rq) { int ret; - ret = intel_ctx_workarounds_emit(rq); + ret = intel_engine_emit_ctx_wa(rq); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 81b10d85b738..7f88df5bff09 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -656,7 +656,7 @@ static int intel_rcs_ctx_init(struct i915_request *rq) { int ret; - ret = intel_ctx_workarounds_emit(rq); + ret = intel_engine_emit_ctx_wa(rq); if (ret != 0) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 7b110e221749..927bb21a2b0b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -436,6 +436,7 @@ struct intel_engine_cs { struct intel_hw_status_page status_page; struct i915_ctx_workarounds wa_ctx; + struct i915_wa_list ctx_wa_list; struct i915_wa_list wa_list; struct i915_wa_list whitelist; struct i915_vma *scratch; diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index d920a6256c83..e52bd3f5d526 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -59,61 +59,87 @@ static void wa_init_finish(struct i915_wa_list *wal) return; DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n", - wal->count, wal->name); + wal->wa_count, wal->name); } -static void wa_add(struct drm_i915_private *i915, - i915_reg_t reg, const u32 mask, const u32 val) +static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) { - struct i915_workarounds *wa = &i915->workarounds; - unsigned int start = 0, end = wa->count; - unsigned int addr = i915_mmio_reg_offset(reg); - struct i915_wa_reg *r; + unsigned int addr = i915_mmio_reg_offset(wa->reg); + unsigned int start = 0, end = wal->count; + const unsigned int grow = 1 << 4; + struct i915_wa *wa_; + + GEM_BUG_ON(!is_power_of_2(grow)); + + if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ + struct i915_wa *list; + + list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), + GFP_KERNEL); + if (!list) { + DRM_ERROR("No space for workaround init!\n"); + return; + } + + if (wal->list) + memcpy(list, wal->list, sizeof(*wa) * wal->count); + + wal->list = list; + } while (start < end) { unsigned int mid = start + (end - start) / 2; - if (wa->reg[mid].addr < addr) { + if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) { start = mid + 1; - } else if (wa->reg[mid].addr > addr) { + } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) { end = mid; } else { - r = &wa->reg[mid]; + wa_ = &wal->list[mid]; - if ((mask & ~r->mask) == 0) { + if ((wa->mask & ~wa_->mask) == 0) { DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n", - addr, r->mask, r->value); + i915_mmio_reg_offset(wa_->reg), + wa_->mask, wa_->val); - r->value &= ~mask; + wa_->val &= ~wa->mask; } - r->value |= val; - r->mask |= mask; + wal->wa_count++; + wa_->val |= wa->val; + wa_->mask |= wa->mask; return; } } - if (WARN_ON_ONCE(wa->count >= I915_MAX_WA_REGS)) { - DRM_ERROR("Dropping w/a for reg %04x (mask: %08x, value: %08x)\n", - addr, mask, val); - return; - } + wal->wa_count++; + wa_ = &wal->list[wal->count++]; + *wa_ = *wa; - r = &wa->reg[wa->count++]; - r->addr = addr; - r->value = val; - r->mask = mask; - - while (r-- > wa->reg) { - GEM_BUG_ON(r[0].addr == r[1].addr); - if (r[1].addr > r[0].addr) + while (wa_-- > wal->list) { + GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) == + i915_mmio_reg_offset(wa_[1].reg)); + if (i915_mmio_reg_offset(wa_[1].reg) > + i915_mmio_reg_offset(wa_[0].reg)) break; - swap(r[1], r[0]); + swap(wa_[1], wa_[0]); } } -#define WA_REG(addr, mask, val) wa_add(dev_priv, (addr), (mask), (val)) +static void +__wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) +{ + struct i915_wa wa = { + .reg = reg, + .mask = mask, + .val = val + }; + + _wa_add(wal, &wa); +} + +#define WA_REG(addr, mask, val) __wa_add(wal, (addr), (mask), (val)) #define WA_SET_BIT_MASKED(addr, mask) \ WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) @@ -124,8 +150,10 @@ static void wa_add(struct drm_i915_private *i915, #define WA_SET_FIELD_MASKED(addr, mask, value) \ WA_REG(addr, (mask), _MASKED_FIELD(mask, value)) -static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv) +static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine) { + struct i915_wa_list *wal = &engine->ctx_wa_list; + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); /* WaDisableAsyncFlipPerfMode:bdw,chv */ @@ -169,17 +197,14 @@ static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv) WA_SET_FIELD_MASKED(GEN7_GT_MODE, GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4); - - return 0; } -static int bdw_ctx_workarounds_init(struct drm_i915_private *dev_priv) +static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine) { - int ret; + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; - ret = gen8_ctx_workarounds_init(dev_priv); - if (ret) - return ret; + gen8_ctx_workarounds_init(engine); /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); @@ -199,31 +224,28 @@ static int bdw_ctx_workarounds_init(struct drm_i915_private *dev_priv) /* WaForceContextSaveRestoreNonCoherent:bdw */ HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ - (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); - - return 0; + (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); } -static int chv_ctx_workarounds_init(struct drm_i915_private *dev_priv) +static void chv_ctx_workarounds_init(struct intel_engine_cs *engine) { - int ret; + struct i915_wa_list *wal = &engine->ctx_wa_list; - ret = gen8_ctx_workarounds_init(dev_priv); - if (ret) - return ret; + gen8_ctx_workarounds_init(engine); /* WaDisableThreadStallDopClockGating:chv */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); /* Improve HiZ throughput on CHV. */ WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); - - return 0; } -static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv) +static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine) { - if (HAS_LLC(dev_priv)) { + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + if (HAS_LLC(i915)) { /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl * * Must match Display Engine. See @@ -242,7 +264,7 @@ static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv) PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - if (!IS_COFFEELAKE(dev_priv)) + if (!IS_COFFEELAKE(i915)) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); @@ -285,9 +307,7 @@ static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv) HDC_FORCE_NON_COHERENT); /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ - if (IS_SKYLAKE(dev_priv) || - IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv)) + if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN8_SAMPLER_POWER_BYPASS_DIS); @@ -314,14 +334,14 @@ static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv) GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ - if (IS_GEN9_LP(dev_priv)) + if (IS_GEN9_LP(i915)) WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); - - return 0; } -static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv) +static void skl_tune_iz_hashing(struct intel_engine_cs *engine) { + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; u8 vals[3] = { 0, 0, 0 }; unsigned int i; @@ -332,7 +352,7 @@ static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv) * Only consider slices where one, and only one, subslice has 7 * EUs */ - if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) + if (!is_power_of_2(INTEL_INFO(i915)->sseu.subslice_7eu[i])) continue; /* @@ -341,12 +361,12 @@ static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv) * * -> 0 <= ss <= 3; */ - ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; + ss = ffs(INTEL_INFO(i915)->sseu.subslice_7eu[i]) - 1; vals[i] = 3 - ss; } if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) - return 0; + return; /* Tune IZ hashing. See intel_device_info_runtime_init() */ WA_SET_FIELD_MASKED(GEN7_GT_MODE, @@ -356,28 +376,19 @@ static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv) GEN9_IZ_HASHING(2, vals[2]) | GEN9_IZ_HASHING(1, vals[1]) | GEN9_IZ_HASHING(0, vals[0])); - - return 0; } -static int skl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +static void skl_ctx_workarounds_init(struct intel_engine_cs *engine) { - int ret; - - ret = gen9_ctx_workarounds_init(dev_priv); - if (ret) - return ret; - - return skl_tune_iz_hashing(dev_priv); + gen9_ctx_workarounds_init(engine); + skl_tune_iz_hashing(engine); } -static int bxt_ctx_workarounds_init(struct drm_i915_private *dev_priv) +static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine) { - int ret; + struct i915_wa_list *wal = &engine->ctx_wa_list; - ret = gen9_ctx_workarounds_init(dev_priv); - if (ret) - return ret; + gen9_ctx_workarounds_init(engine); /* WaDisableThreadStallDopClockGating:bxt */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, @@ -386,52 +397,41 @@ static int bxt_ctx_workarounds_init(struct drm_i915_private *dev_priv) /* WaToEnableHwFixForPushConstHWBug:bxt */ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - return 0; } -static int kbl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine) { - int ret; + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; - ret = gen9_ctx_workarounds_init(dev_priv); - if (ret) - return ret; + gen9_ctx_workarounds_init(engine); /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) + if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableSbeCacheDispatchPortSharing:kbl */ WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - - return 0; } -static int glk_ctx_workarounds_init(struct drm_i915_private *dev_priv) +static void glk_ctx_workarounds_init(struct intel_engine_cs *engine) { - int ret; + struct i915_wa_list *wal = &engine->ctx_wa_list; - ret = gen9_ctx_workarounds_init(dev_priv); - if (ret) - return ret; + gen9_ctx_workarounds_init(engine); /* WaToEnableHwFixForPushConstHWBug:glk */ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - return 0; } -static int cfl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine) { - int ret; + struct i915_wa_list *wal = &engine->ctx_wa_list; - ret = gen9_ctx_workarounds_init(dev_priv); - if (ret) - return ret; + gen9_ctx_workarounds_init(engine); /* WaToEnableHwFixForPushConstHWBug:cfl */ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, @@ -440,18 +440,19 @@ static int cfl_ctx_workarounds_init(struct drm_i915_private *dev_priv) /* WaDisableSbeCacheDispatchPortSharing:cfl */ WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - - return 0; } -static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine) { + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + /* WaForceContextSaveRestoreNonCoherent:cnl */ WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) + if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ @@ -459,7 +460,7 @@ static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv) GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0)) + if (IS_CNL_REVID(i915, 0, CNL_REVID_B0)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); @@ -479,16 +480,17 @@ static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv) /* WaDisableEarlyEOT:cnl */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); - - return 0; } -static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +static void icl_ctx_workarounds_init(struct intel_engine_cs *engine) { + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + /* Wa_1604370585:icl (pre-prod) * Formerly known as WaPushConstantDereferenceHoldDisable */ - if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0)) + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); @@ -504,7 +506,7 @@ static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv) /* Wa_2006611047:icl (pre-prod) * Formerly known as WaDisableImprovedTdlClkGating */ - if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0)) + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, GEN11_TDL_CLOCK_GATING_FIX_DISABLE); @@ -513,70 +515,67 @@ static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv) GEN11_STATE_CACHE_REDIRECT_TO_CS); /* Wa_2006665173:icl (pre-prod) */ - if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0)) + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); - - return 0; } -int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv) +void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) { - int err = 0; - - dev_priv->workarounds.count = 0; - - if (INTEL_GEN(dev_priv) < 8) - err = 0; - else if (IS_BROADWELL(dev_priv)) - err = bdw_ctx_workarounds_init(dev_priv); - else if (IS_CHERRYVIEW(dev_priv)) - err = chv_ctx_workarounds_init(dev_priv); - else if (IS_SKYLAKE(dev_priv)) - err = skl_ctx_workarounds_init(dev_priv); - else if (IS_BROXTON(dev_priv)) - err = bxt_ctx_workarounds_init(dev_priv); - else if (IS_KABYLAKE(dev_priv)) - err = kbl_ctx_workarounds_init(dev_priv); - else if (IS_GEMINILAKE(dev_priv)) - err = glk_ctx_workarounds_init(dev_priv); - else if (IS_COFFEELAKE(dev_priv)) - err = cfl_ctx_workarounds_init(dev_priv); - else if (IS_CANNONLAKE(dev_priv)) - err = cnl_ctx_workarounds_init(dev_priv); - else if (IS_ICELAKE(dev_priv)) - err = icl_ctx_workarounds_init(dev_priv); + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + wa_init_start(wal, "context"); + + if (INTEL_GEN(i915) < 8) + return; + else if (IS_BROADWELL(i915)) + bdw_ctx_workarounds_init(engine); + else if (IS_CHERRYVIEW(i915)) + chv_ctx_workarounds_init(engine); + else if (IS_SKYLAKE(i915)) + skl_ctx_workarounds_init(engine); + else if (IS_BROXTON(i915)) + bxt_ctx_workarounds_init(engine); + else if (IS_KABYLAKE(i915)) + kbl_ctx_workarounds_init(engine); + else if (IS_GEMINILAKE(i915)) + glk_ctx_workarounds_init(engine); + else if (IS_COFFEELAKE(i915)) + cfl_ctx_workarounds_init(engine); + else if (IS_CANNONLAKE(i915)) + cnl_ctx_workarounds_init(engine); + else if (IS_ICELAKE(i915)) + icl_ctx_workarounds_init(engine); else - MISSING_CASE(INTEL_GEN(dev_priv)); - if (err) - return err; + MISSING_CASE(INTEL_GEN(i915)); - DRM_DEBUG_DRIVER("Number of context specific w/a: %d\n", - dev_priv->workarounds.count); - return 0; + wa_init_finish(wal); } -int intel_ctx_workarounds_emit(struct i915_request *rq) +int intel_engine_emit_ctx_wa(struct i915_request *rq) { - struct i915_workarounds *w = &rq->i915->workarounds; + struct i915_wa_list *wal = &rq->engine->ctx_wa_list; + struct i915_wa *wa; + unsigned int i; u32 *cs; - int ret, i; + int ret; - if (w->count == 0) + if (wal->count == 0) return 0; ret = rq->engine->emit_flush(rq, EMIT_BARRIER); if (ret) return ret; - cs = intel_ring_begin(rq, (w->count * 2 + 2)); + cs = intel_ring_begin(rq, (wal->count * 2 + 2)); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(w->count); - for (i = 0; i < w->count; i++) { - *cs++ = w->reg[i].addr; - *cs++ = w->reg[i].value; + *cs++ = MI_LOAD_REGISTER_IMM(wal->count); + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + *cs++ = i915_mmio_reg_offset(wa->reg); + *cs++ = wa->val; } *cs++ = MI_NOOP; @@ -589,32 +588,6 @@ int intel_ctx_workarounds_emit(struct i915_request *rq) return 0; } -static void -wal_add(struct i915_wa_list *wal, const struct i915_wa *wa) -{ - const unsigned int grow = 1 << 4; - - GEM_BUG_ON(!is_power_of_2(grow)); - - if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ - struct i915_wa *list; - - list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), - GFP_KERNEL); - if (!list) { - DRM_ERROR("No space for workaround init!\n"); - return; - } - - if (wal->list) - memcpy(list, wal->list, sizeof(*wa) * wal->count); - - wal->list = list; - } - - wal->list[wal->count++] = *wa; -} - static void wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { @@ -624,7 +597,7 @@ wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) .val = _MASKED_BIT_ENABLE(val) }; - wal_add(wal, &wa); + _wa_add(wal, &wa); } static void @@ -637,7 +610,7 @@ wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, .val = val }; - wal_add(wal, &wa); + _wa_add(wal, &wa); } static void @@ -1021,7 +994,7 @@ whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) return; - wal_add(wal, &wa); + _wa_add(wal, &wa); } static void gen9_whitelist_build(struct i915_wa_list *w) diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h index 3f99bfcb4a03..7c734714b05e 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.h +++ b/drivers/gpu/drm/i915/intel_workarounds.h @@ -19,6 +19,7 @@ struct i915_wa_list { const char *name; struct i915_wa *list; unsigned int count; + unsigned int wa_count; }; static inline void intel_wa_list_free(struct i915_wa_list *wal) @@ -27,8 +28,8 @@ static inline void intel_wa_list_free(struct i915_wa_list *wal) memset(wal, 0, sizeof(*wal)); } -int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv); -int intel_ctx_workarounds_emit(struct i915_request *rq); +void intel_engine_init_ctx_wa(struct intel_engine_cs *engine); +int intel_engine_emit_ctx_wa(struct i915_request *rq); void intel_gt_init_workarounds(struct drm_i915_private *dev_priv); void intel_gt_apply_workarounds(struct drm_i915_private *dev_priv); -- cgit v1.2.3 From 5179749925933575a67f9d8f16d0cc204f98a29f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 4 Dec 2018 14:15:16 +0000 Subject: drm/i915: Allocate a common scratch page Currently we allocate a scratch page for each engine, but since we only ever write into it for post-sync operations, it is not exposed to userspace nor do we care for coherency. As we then do not care about its contents, we can use one page for all, reducing our allocations and avoid complications by not assuming per-engine isolation. For later use, it simplifies engine initialisation (by removing the allocation that required struct_mutex!) and means that we can always rely on there being a scratch page. v2: Check that we allocated a large enough scratch for I830 w/a Fixes: 06e562e7f515 ("drm/i915/ringbuffer: Delay after EMIT_INVALIDATE for gen4/gen5") # v4.18.20 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108850 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20181204141522.13640-1-chris@chris-wilson.co.uk Cc: Joonas Lahtinen Cc: # v4.18.20+ --- drivers/gpu/drm/i915/i915_drv.h | 7 +++++ drivers/gpu/drm/i915/i915_gem.c | 50 ++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 42 --------------------------- drivers/gpu/drm/i915/intel_lrc.c | 17 ++++------- drivers/gpu/drm/i915/intel_ringbuffer.c | 37 ++++++++---------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ---- 7 files changed, 74 insertions(+), 86 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 23a3dc6f3907..c5f01964f0fb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1983,6 +1983,8 @@ struct drm_i915_private { struct delayed_work idle_work; ktime_t last_init_time; + + struct i915_vma *scratch; } gt; /* perform PHY state sanity checks? */ @@ -3713,4 +3715,9 @@ static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) return I915_HWS_CSB_WRITE_INDEX; } +static inline u32 i915_scratch_offset(const struct drm_i915_private *i915) +{ + return i915_ggtt_offset(i915->gt.scratch); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 35ecfea4e903..d36a9755ad91 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5498,6 +5498,44 @@ err_active: goto out_ctx; } +static int +i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + obj = i915_gem_object_create_stolen(i915, size); + if (!obj) + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate scratch page\n"); + return PTR_ERR(obj); + } + + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unref; + } + + ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (ret) + goto err_unref; + + i915->gt.scratch = vma; + return 0; + +err_unref: + i915_gem_object_put(obj); + return ret; +} + +static void i915_gem_fini_scratch(struct drm_i915_private *i915) +{ + i915_vma_unpin_and_release(&i915->gt.scratch, 0); +} + int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; @@ -5544,12 +5582,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_unlock; } - ret = i915_gem_contexts_init(dev_priv); + ret = i915_gem_init_scratch(dev_priv, + IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_ggtt; } + ret = i915_gem_contexts_init(dev_priv); + if (ret) { + GEM_BUG_ON(ret == -EIO); + goto err_scratch; + } + ret = intel_engines_init(dev_priv); if (ret) { GEM_BUG_ON(ret == -EIO); @@ -5622,6 +5667,8 @@ err_pm: err_context: if (ret != -EIO) i915_gem_contexts_fini(dev_priv); +err_scratch: + i915_gem_fini_scratch(dev_priv); err_ggtt: err_unlock: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -5673,6 +5720,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv) intel_uc_fini(dev_priv); i915_gem_cleanup_engines(dev_priv); i915_gem_contexts_fini(dev_priv); + i915_gem_fini_scratch(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); intel_wa_list_free(&dev_priv->gt_wa_list); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index a6885a59568b..07465123c166 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1571,7 +1571,7 @@ static void gem_record_rings(struct i915_gpu_state *error) if (HAS_BROKEN_CS_TLB(i915)) ee->wa_batchbuffer = i915_error_object_create(i915, - engine->scratch); + i915->gt.scratch); request_record_user_bo(request, ee); ee->ctx = diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 6b427bc52f78..af2873403009 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -493,46 +493,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_cmd_parser(engine); } -int intel_engine_create_scratch(struct intel_engine_cs *engine, - unsigned int size) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int ret; - - WARN_ON(engine->scratch); - - obj = i915_gem_object_create_stolen(engine->i915, size); - if (!obj) - obj = i915_gem_object_create_internal(engine->i915, size); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate scratch page\n"); - return PTR_ERR(obj); - } - - vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_unref; - } - - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (ret) - goto err_unref; - - engine->scratch = vma; - return 0; - -err_unref: - i915_gem_object_put(obj); - return ret; -} - -void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) -{ - i915_vma_unpin_and_release(&engine->scratch, 0); -} - static void cleanup_status_page(struct intel_engine_cs *engine) { if (HWS_NEEDS_PHYSICAL(engine->i915)) { @@ -707,8 +667,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - intel_engine_cleanup_scratch(engine); - cleanup_status_page(engine); intel_engine_fini_breadcrumbs(engine); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 87227fd9ae5f..d7fa301b5ec7 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1288,9 +1288,10 @@ static int execlists_request_alloc(struct i915_request *request) static u32 * gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) { + /* NB no one else is allowed to scribble over scratch + 256! */ *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = i915_scratch_offset(engine->i915) + 256; *batch++ = 0; *batch++ = MI_LOAD_REGISTER_IMM(1); @@ -1304,7 +1305,7 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = i915_scratch_offset(engine->i915) + 256; *batch++ = 0; return batch; @@ -1341,7 +1342,7 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE, - i915_ggtt_offset(engine->scratch) + + i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES); *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -1973,7 +1974,7 @@ static int gen8_emit_flush_render(struct i915_request *request, { struct intel_engine_cs *engine = request->engine; u32 scratch_addr = - i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; + i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; u32 *cs, flags = 0; int len; @@ -2292,10 +2293,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine) if (ret) return ret; - ret = intel_engine_create_scratch(engine, PAGE_SIZE); - if (ret) - goto err_cleanup_common; - ret = intel_init_workaround_bb(engine); if (ret) { /* @@ -2311,10 +2308,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine) intel_engine_init_workarounds(engine); return 0; - -err_cleanup_common: - intel_engine_cleanup_common(engine); - return ret; } int logical_xcs_ring_init(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7f88df5bff09..c5eb26a7ee79 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -150,8 +150,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) */ if (mode & EMIT_INVALIDATE) { *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = i915_ggtt_offset(rq->engine->scratch) | - PIPE_CONTROL_GLOBAL_GTT; + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; *cs++ = 0; @@ -159,8 +158,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) *cs++ = MI_FLUSH; *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = i915_ggtt_offset(rq->engine->scratch) | - PIPE_CONTROL_GLOBAL_GTT; + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; *cs++ = 0; } @@ -212,8 +210,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) static int intel_emit_post_sync_nonzero_flush(struct i915_request *rq) { - u32 scratch_addr = - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; u32 *cs; cs = intel_ring_begin(rq, 6); @@ -246,8 +243,7 @@ intel_emit_post_sync_nonzero_flush(struct i915_request *rq) static int gen6_render_ring_flush(struct i915_request *rq, u32 mode) { - u32 scratch_addr = - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; u32 *cs, flags = 0; int ret; @@ -316,8 +312,7 @@ gen7_render_ring_cs_stall_wa(struct i915_request *rq) static int gen7_render_ring_flush(struct i915_request *rq, u32 mode) { - u32 scratch_addr = - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; u32 *cs, flags = 0; /* @@ -994,7 +989,7 @@ i965_emit_bb_start(struct i915_request *rq, } /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ -#define I830_BATCH_LIMIT (256*1024) +#define I830_BATCH_LIMIT SZ_256K #define I830_TLB_ENTRIES (2) #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) static int @@ -1002,7 +997,9 @@ i830_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { - u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch); + u32 *cs, cs_offset = i915_scratch_offset(rq->i915); + + GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE); cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -1459,7 +1456,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) { struct i915_timeline *timeline; struct intel_ring *ring; - unsigned int size; int err; intel_engine_setup_common(engine); @@ -1484,21 +1480,12 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) GEM_BUG_ON(engine->buffer); engine->buffer = ring; - size = PAGE_SIZE; - if (HAS_BROKEN_CS_TLB(engine->i915)) - size = I830_WA_SIZE; - err = intel_engine_create_scratch(engine, size); - if (err) - goto err_unpin; - err = intel_engine_init_common(engine); if (err) - goto err_scratch; + goto err_unpin; return 0; -err_scratch: - intel_engine_cleanup_scratch(engine); err_unpin: intel_ring_unpin(ring); err_ring: @@ -1572,7 +1559,7 @@ static int flush_pd_dir(struct i915_request *rq) /* Stall until the page table load is complete */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); - *cs++ = i915_ggtt_offset(engine->scratch); + *cs++ = i915_scratch_offset(rq->i915); *cs++ = MI_NOOP; intel_ring_advance(rq, cs); @@ -1681,7 +1668,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) /* Insert a delay before the next switch! */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = i915_ggtt_offset(engine->scratch); + *cs++ = i915_scratch_offset(rq->i915); *cs++ = MI_NOOP; } *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 927bb21a2b0b..72edaa7ff411 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -439,7 +439,6 @@ struct intel_engine_cs { struct i915_wa_list ctx_wa_list; struct i915_wa_list wa_list; struct i915_wa_list whitelist; - struct i915_vma *scratch; u32 irq_keep_mask; /* always keep these interrupts */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */ @@ -896,10 +895,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); -int intel_engine_create_scratch(struct intel_engine_cs *engine, - unsigned int size); -void intel_engine_cleanup_scratch(struct intel_engine_cs *engine); - int intel_init_render_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); -- cgit v1.2.3