From d1813ca2bbf1e68ad0280a6b76abeb2115c4740f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Jan 2020 13:17:04 +0000 Subject: drm/i915/gt: Clear LRC image inline When creating the initial LRC image, we also want to clear the MI_NOOPs and register values. Rather than use a blanket memset beforehand, apply the clears inline, close the context image and force inhibition of the uninitialised reminder. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200102131707.1463945-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 88 ++++++++++++++++++++-------------- drivers/gpu/drm/i915/gt/selftest_lrc.c | 13 +++-- 2 files changed, 58 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 029496d2dfb5..b36fd108f0c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -488,9 +488,15 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) return desc; } -static u32 *set_offsets(u32 *regs, +static inline unsigned int dword_in_page(void *addr) +{ + return offset_in_page(addr) / sizeof(u32); +} + +static void set_offsets(u32 *regs, const u8 *data, - const struct intel_engine_cs *engine) + const struct intel_engine_cs *engine, + bool clear) #define NOP(x) (BIT(7) | (x)) #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) #define POSTED BIT(0) @@ -498,7 +504,7 @@ static u32 *set_offsets(u32 *regs, #define REG16(x) \ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ (((x) >> 2) & 0x7f) -#define END() 0 +#define END(x) 0, (x) { const u32 base = engine->mmio_base; @@ -506,7 +512,10 @@ static u32 *set_offsets(u32 *regs, u8 count, flags; if (*data & BIT(7)) { /* skip */ - regs += *data++ & ~BIT(7); + count = *data++ & ~BIT(7); + if (clear) + memset32(regs, MI_NOOP, count); + regs += count; continue; } @@ -532,12 +541,25 @@ static u32 *set_offsets(u32 *regs, offset |= v & ~BIT(7); } while (v & BIT(7)); - *regs = base + (offset << 2); + regs[0] = base + (offset << 2); + if (clear) + regs[1] = 0; regs += 2; } while (--count); } - return regs; + if (clear) { + u8 count = *++data; + + /* Clear past the tail for HW access */ + GEM_BUG_ON(dword_in_page(regs) > count); + memset32(regs, MI_NOOP, count - dword_in_page(regs)); + + /* Close the batch; used mainly by live_lrc_layout() */ + *regs = MI_BATCH_BUFFER_END; + if (INTEL_GEN(engine->i915) >= 10) + *regs |= BIT(0); + } } static const u8 gen8_xcs_offsets[] = { @@ -572,7 +594,7 @@ static const u8 gen8_xcs_offsets[] = { REG16(0x200), REG(0x028), - END(), + END(80) }; static const u8 gen9_xcs_offsets[] = { @@ -656,7 +678,7 @@ static const u8 gen9_xcs_offsets[] = { REG16(0x67c), REG(0x068), - END(), + END(176) }; static const u8 gen12_xcs_offsets[] = { @@ -688,7 +710,7 @@ static const u8 gen12_xcs_offsets[] = { REG16(0x274), REG16(0x270), - END(), + END(80) }; static const u8 gen8_rcs_offsets[] = { @@ -725,7 +747,7 @@ static const u8 gen8_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END(), + END(80) }; static const u8 gen9_rcs_offsets[] = { @@ -809,7 +831,7 @@ static const u8 gen9_rcs_offsets[] = { REG16(0x67c), REG(0x68), - END() + END(176) }; static const u8 gen11_rcs_offsets[] = { @@ -850,7 +872,7 @@ static const u8 gen11_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END(), + END(80) }; static const u8 gen12_rcs_offsets[] = { @@ -891,7 +913,7 @@ static const u8 gen12_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END(), + END(80) }; #undef END @@ -1529,7 +1551,7 @@ static bool can_merge_rq(const struct i915_request *prev, static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { - set_offsets(regs, reg_offsets(engine), engine); + set_offsets(regs, reg_offsets(engine), engine, false); } static bool virtual_matches(const struct virtual_engine *ve, @@ -4043,15 +4065,19 @@ static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine) static void init_common_reg_state(u32 * const regs, const struct intel_engine_cs *engine, - const struct intel_ring *ring) + const struct intel_ring *ring, + bool inhibit) { - regs[CTX_CONTEXT_CONTROL] = - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + u32 ctl; + + ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + if (inhibit) + ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; if (INTEL_GEN(engine->i915) < 11) - regs[CTX_CONTEXT_CONTROL] |= - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | - CTX_CTRL_RS_CTX_ENABLE); + ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | + CTX_CTRL_RS_CTX_ENABLE); + regs[CTX_CONTEXT_CONTROL] = ctl; regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; } @@ -4109,7 +4135,7 @@ static void execlists_init_reg_state(u32 *regs, const struct intel_context *ce, const struct intel_engine_cs *engine, const struct intel_ring *ring, - bool close) + bool inhibit) { /* * A context is actually a big batch buffer with several @@ -4121,15 +4147,9 @@ static void execlists_init_reg_state(u32 *regs, * * Must keep consistent with virtual_update_register_offsets(). */ - u32 *bbe = set_offsets(regs, reg_offsets(engine), engine); + set_offsets(regs, reg_offsets(engine), engine, inhibit); - if (close) { /* Close the batch; used mainly by live_lrc_layout() */ - *bbe = MI_BATCH_BUFFER_END; - if (INTEL_GEN(engine->i915) >= 10) - *bbe |= BIT(0); - } - - init_common_reg_state(regs, engine, ring); + init_common_reg_state(regs, engine, ring, inhibit); init_ppgtt_reg_state(regs, vm_alias(ce->vm)); init_wa_bb_reg_state(regs, engine, @@ -4148,7 +4168,6 @@ populate_lr_context(struct intel_context *ce, { bool inhibit = true; void *vaddr; - u32 *regs; int ret; vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); @@ -4178,11 +4197,8 @@ populate_lr_context(struct intel_context *ce, /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ - regs = vaddr + LRC_STATE_PN * PAGE_SIZE; - execlists_init_reg_state(regs, ce, engine, ring, inhibit); - if (inhibit) - regs[CTX_CONTEXT_CONTROL] |= - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, + ce, engine, ring, inhibit); ret = 0; err_unpin_ctx: diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 943b623f00e9..627613d85db8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -3362,7 +3362,7 @@ static int live_lrc_layout(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; - u32 *mem; + u32 *lrc; int err; /* @@ -3370,13 +3370,13 @@ static int live_lrc_layout(void *arg) * match the layout saved by HW. */ - mem = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!mem) + lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!lrc) return -ENOMEM; err = 0; for_each_engine(engine, gt, id) { - u32 *hw, *lrc; + u32 *hw; int dw; if (!engine->default_state) @@ -3390,8 +3390,7 @@ static int live_lrc_layout(void *arg) } hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); - lrc = memset(mem, 0, PAGE_SIZE); - execlists_init_reg_state(lrc, + execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), engine->kernel_context, engine, engine->kernel_context->ring, @@ -3461,7 +3460,7 @@ static int live_lrc_layout(void *arg) break; } - kfree(mem); + kfree(lrc); return err; } -- cgit v1.2.3