diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_lrc.c | 320 |
1 files changed, 224 insertions, 96 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 4311b12542fb..87e6c5bdd2dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -217,7 +217,7 @@ struct virtual_engine { /* And finally, which physical engines this virtual engine maps onto. */ unsigned int num_siblings; - struct intel_engine_cs *siblings[0]; + struct intel_engine_cs *siblings[]; }; static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) @@ -429,18 +429,7 @@ static int effective_prio(const struct i915_request *rq) if (i915_request_has_nopreempt(rq)) prio = I915_PRIORITY_UNPREEMPTABLE; - /* - * On unwinding the active request, we give it a priority bump - * if it has completed waiting on any semaphore. If we know that - * the request has already started, we can prevent an unwanted - * preempt-to-idle cycle by taking that into account now. - */ - if (__i915_request_has_started(rq)) - prio |= I915_PRIORITY_NOSEMAPHORE; - - /* Restrict mere WAIT boosts from triggering preemption */ - BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */ - return prio | __NO_PREEMPTION; + return prio; } static int queue_prio(const struct intel_engine_execlists *execlists) @@ -1271,14 +1260,11 @@ execlists_check_context(const struct intel_context *ce, static void restore_default_state(struct intel_context *ce, struct intel_engine_cs *engine) { - u32 *regs = ce->lrc_reg_state; + u32 *regs; - if (engine->pinned_default_state) - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_OFFSET, - engine->context_size - PAGE_SIZE); + regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE); + execlists_init_reg_state(regs, ce, engine, ce->ring, true); - execlists_init_reg_state(regs, ce, engine, ce->ring, false); ce->runtime.last = intel_context_get_runtime(ce); } @@ -1372,7 +1358,7 @@ __execlists_schedule_in(struct i915_request *rq) ce->lrc.ccid = ce->tag; } else { /* We don't need a strict matching tag, just different values */ - unsigned int tag = ffs(engine->context_tag); + unsigned int tag = ffs(READ_ONCE(engine->context_tag)); GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); clear_bit(tag - 1, &engine->context_tag); @@ -1826,30 +1812,16 @@ static bool virtual_matches(const struct virtual_engine *ve, return true; } -static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, - struct i915_request *rq) +static void virtual_xfer_breadcrumbs(struct virtual_engine *ve) { - struct intel_engine_cs *old = ve->siblings[0]; - - /* All unattached (rq->engine == old) must already be completed */ - - spin_lock(&old->breadcrumbs.irq_lock); - if (!list_empty(&ve->context.signal_link)) { - list_del_init(&ve->context.signal_link); - - /* - * We cannot acquire the new engine->breadcrumbs.irq_lock - * (as we are holding a breadcrumbs.irq_lock already), - * so attach this request to the signaler on submission. - * The queued irq_work will occur when we finally drop - * the engine->active.lock after dequeue. - */ - set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags); - - /* Also transfer the pending irq_work for the old breadcrumb. */ - intel_engine_signal_breadcrumbs(rq->engine); - } - spin_unlock(&old->breadcrumbs.irq_lock); + /* + * All the outstanding signals on ve->siblings[0] must have + * been completed, just pending the interrupt handler. As those + * signals still refer to the old sibling (via rq->engine), we must + * transfer those to the old irq_worker to keep our locking + * consistent. + */ + intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context); } #define for_each_waiter(p__, rq__) \ @@ -1883,12 +1855,16 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + /* Leave semaphores spinning on the other engines */ if (w->engine != rq->engine) continue; /* No waiter should start before its signaler */ - GEM_BUG_ON(i915_request_started(w) && + GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) && + i915_request_started(w) && !i915_request_completed(rq)); GEM_BUG_ON(i915_request_is_active(w)); @@ -2280,7 +2256,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) engine); if (!list_empty(&ve->context.signals)) - virtual_xfer_breadcrumbs(ve, rq); + virtual_xfer_breadcrumbs(ve); /* * Move the bound engine to the top of the list @@ -3494,6 +3470,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) { u32 *cs; + GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq)); if (!i915_request_timeline(rq)->has_initial_breadcrumb) return 0; @@ -3520,6 +3497,56 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) /* Record the updated position of the request's payload */ rq->infix = intel_ring_offset(rq, cs); + __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); + + return 0; +} + +static int emit_pdps(struct i915_request *rq) +{ + const struct intel_engine_cs * const engine = rq->engine; + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm); + int err, i; + u32 *cs; + + GEM_BUG_ON(intel_vgpu_active(rq->i915)); + + /* + * Beware ye of the dragons, this sequence is magic! + * + * Small changes to this sequence can cause anything from + * GPU hangs to forcewake errors and machine lockups! + */ + + /* Flush any residual operations from the context load */ + err = engine->emit_flush(rq, EMIT_FLUSH); + if (err) + return err; + + /* Magic required to prevent forcewake errors! */ + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + return err; + + cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Ensure the LRI have landed before we invalidate & continue */ + *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; + for (i = GEN8_3LVL_PDPES; i--; ) { + const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); + u32 base = engine->mmio_base; + + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); + *cs++ = lower_32_bits(pd_daddr); + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + return 0; } @@ -3544,6 +3571,12 @@ static int execlists_request_alloc(struct i915_request *request) * to cancel/unwind this request now. */ + if (!i915_vm_is_4lvl(request->context->vm)) { + ret = emit_pdps(request); + if (ret) + return ret; + } + /* Unconditionally invalidate GPU caches and TLBs. */ ret = request->engine->emit_flush(request, EMIT_INVALIDATE); if (ret) @@ -3886,6 +3919,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) ring_set_paused(engine, 0); /* + * Sometimes Icelake forgets to reset its pointers on a GPU reset. + * Bludgeon them with a mmio update to be sure. + */ + ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, + 0xffff << 16 | reset_value << 8 | reset_value); + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + + /* * After a reset, the HW starts writing into CSB entry [0]. We * therefore have to set our HEAD pointer back one entry so that * the *first* entry we check is entry 0. To complicate this further, @@ -3898,16 +3939,15 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) WRITE_ONCE(*execlists->csb_write, reset_value); wmb(); /* Make sure this is visible to HW (paranoia?) */ - /* - * Sometimes Icelake forgets to reset its pointers on a GPU reset. - * Bludgeon them with a mmio update to be sure. - */ + invalidate_csb_entries(&execlists->csb_status[0], + &execlists->csb_status[reset_value]); + + /* Once more for luck and our trusty paranoia */ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, - reset_value << 8 | reset_value); + 0xffff << 16 | reset_value << 8 | reset_value); ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); - invalidate_csb_entries(&execlists->csb_status[0], - &execlists->csb_status[reset_value]); + GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value); } static void execlists_sanitize(struct intel_engine_cs *engine) @@ -4074,6 +4114,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) */ ring_set_paused(engine, 1); intel_engine_stop_cs(engine); + + engine->execlists.reset_ccid = active_ccid(engine); } static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) @@ -4116,7 +4158,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * its request, it was still running at the time of the * reset and will have been clobbered. */ - rq = execlists_active(execlists); + rq = active_context(engine, engine->execlists.reset_ccid); if (!rq) goto unwind; @@ -4166,8 +4208,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * image back to the expected values to skip over the guilty request. */ __i915_request_reset(rq, stalled); - if (!stalled) - goto out_replay; /* * We want a simple context + ring to execute the breadcrumb update. @@ -4177,9 +4217,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * future request will be after userspace has had the opportunity * to recreate its own state. */ - GEM_BUG_ON(!intel_context_is_pinned(ce)); - restore_default_state(ce, engine); - out_replay: ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", head, ce->ring->tail); @@ -4545,6 +4582,42 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | 1 << 8 | state; } +static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine) +{ + static const i915_reg_t vd[] = { + GEN12_VD0_AUX_NV, + GEN12_VD1_AUX_NV, + GEN12_VD2_AUX_NV, + GEN12_VD3_AUX_NV, + }; + + static const i915_reg_t ve[] = { + GEN12_VE0_AUX_NV, + GEN12_VE1_AUX_NV, + }; + + if (engine->class == VIDEO_DECODE_CLASS) + return vd[engine->instance]; + + if (engine->class == VIDEO_ENHANCEMENT_CLASS) + return ve[engine->instance]; + + GEM_BUG_ON("unknown aux_inv_reg\n"); + + return INVALID_MMIO_REG; +} + +static u32 * +gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(inv_reg); + *cs++ = AUX_INV; + *cs++ = MI_NOOP; + + return cs; +} + static int gen12_emit_flush_render(struct i915_request *request, u32 mode) { @@ -4553,13 +4626,13 @@ static int gen12_emit_flush_render(struct i915_request *request, u32 *cs; flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; + flags |= PIPE_CONTROL_FLUSH_L3; flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* Wa_1409600907:tgl */ flags |= PIPE_CONTROL_DEPTH_STALL; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; - flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; @@ -4570,7 +4643,9 @@ static int gen12_emit_flush_render(struct i915_request *request, if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + cs = gen12_emit_pipe_control(cs, + PIPE_CONTROL0_HDC_PIPELINE_FLUSH, + flags, LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(request, cs); } @@ -4585,14 +4660,13 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; flags |= PIPE_CONTROL_CS_STALL; - cs = intel_ring_begin(request, 8); + cs = intel_ring_begin(request, 8 + 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -4605,6 +4679,9 @@ static int gen12_emit_flush_render(struct i915_request *request, cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + /* hsdes: 1809175790 */ + cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs); + *cs++ = preparser_disable(false); intel_ring_advance(request, cs); } @@ -4612,6 +4689,56 @@ static int gen12_emit_flush_render(struct i915_request *request, return 0; } +static int gen12_emit_flush(struct i915_request *request, u32 mode) +{ + intel_engine_mask_t aux_inv = 0; + u32 cmd, *cs; + + if (mode & EMIT_INVALIDATE) + aux_inv = request->engine->mask & ~BIT(BCS0); + + cs = intel_ring_begin(request, + 4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cmd = MI_FLUSH_DW + 1; + + /* We always require a command barrier so that subsequent + * commands, such as breadcrumb interrupts, are strictly ordered + * wrt the contents of the write cache being flushed to memory + * (and thus being coherent from the CPU). + */ + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; + + if (mode & EMIT_INVALIDATE) { + cmd |= MI_INVALIDATE_TLB; + if (request->engine->class == VIDEO_DECODE_CLASS) + cmd |= MI_INVALIDATE_BSD; + } + + *cs++ = cmd; + *cs++ = LRC_PPHWSP_SCRATCH_ADDR; + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ + + if (aux_inv) { /* hsdes: 1809175790 */ + struct intel_engine_cs *engine; + unsigned int tmp; + + *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv)); + for_each_engine_masked(engine, request->engine->gt, + aux_inv, tmp) { + *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine)); + *cs++ = AUX_INV; + } + *cs++ = MI_NOOP; + } + intel_ring_advance(request, cs); + + return 0; +} + /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -4641,8 +4768,7 @@ static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs) } static __always_inline u32* -gen8_emit_fini_breadcrumb_footer(struct i915_request *request, - u32 *cs) +gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) { *cs++ = MI_USER_INTERRUPT; @@ -4656,14 +4782,16 @@ gen8_emit_fini_breadcrumb_footer(struct i915_request *request, return gen8_emit_wa_tail(request, cs); } -static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs) { - cs = gen8_emit_ggtt_write(cs, - request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, - 0); + u32 addr = i915_request_active_timeline(request)->hwsp_offset; + + return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0); +} - return gen8_emit_fini_breadcrumb_footer(request, cs); +static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) +{ + return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); } static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) @@ -4681,7 +4809,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL); - return gen8_emit_fini_breadcrumb_footer(request, cs); + return gen8_emit_fini_breadcrumb_tail(request, cs); } static u32 * @@ -4697,7 +4825,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); - return gen8_emit_fini_breadcrumb_footer(request, cs); + return gen8_emit_fini_breadcrumb_tail(request, cs); } /* @@ -4735,7 +4863,7 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs) } static __always_inline u32* -gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) +gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) { *cs++ = MI_USER_INTERRUPT; @@ -4749,33 +4877,29 @@ gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) return gen8_emit_wa_tail(request, cs); } -static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) { - cs = gen8_emit_ggtt_write(cs, - request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, - 0); - - return gen12_emit_fini_breadcrumb_footer(request, cs); + return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); } static u32 * gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { - cs = gen8_emit_ggtt_write_rcs(cs, - request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - /* Wa_1409600907:tgl */ - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_HDC_PIPELINE_FLUSH); + cs = gen12_emit_ggtt_write_rcs(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + PIPE_CONTROL0_HDC_PIPELINE_FLUSH, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + /* Wa_1409600907:tgl */ + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); - return gen12_emit_fini_breadcrumb_footer(request, cs); + return gen12_emit_fini_breadcrumb_tail(request, cs); } static void execlists_park(struct intel_engine_cs *engine) @@ -4801,8 +4925,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (!intel_vgpu_active(engine->i915)) { engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { engine->flags |= I915_ENGINE_HAS_PREEMPTION; + if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + engine->flags |= I915_ENGINE_HAS_TIMESLICES; + } } if (INTEL_GEN(engine->i915) >= 12) @@ -4845,9 +4972,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen8_emit_flush; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; - if (INTEL_GEN(engine->i915) >= 12) + if (INTEL_GEN(engine->i915) >= 12) { engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb; - + engine->emit_flush = gen12_emit_flush; + } engine->set_default_submission = intel_execlists_set_default_submission; if (INTEL_GEN(engine->i915) < 11) { |