summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c99
1 files changed, 82 insertions, 17 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a32aabce7901..724b2cb897d3 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -182,6 +182,7 @@
struct virtual_engine {
struct intel_engine_cs base;
struct intel_context context;
+ struct rcu_work rcu;
/*
* We allow only a single request through the virtual engine at a time
@@ -2787,6 +2788,9 @@ static void __execlists_hold(struct i915_request *rq)
static bool execlists_hold(struct intel_engine_cs *engine,
struct i915_request *rq)
{
+ if (i915_request_on_hold(rq))
+ return false;
+
spin_lock_irq(&engine->active.lock);
if (i915_request_completed(rq)) { /* too late! */
@@ -3168,8 +3172,10 @@ static void execlists_submission_tasklet(unsigned long data)
spin_unlock_irqrestore(&engine->active.lock, flags);
/* Recheck after serialising with direct-submission */
- if (unlikely(timeout && preempt_timeout(engine)))
+ if (unlikely(timeout && preempt_timeout(engine))) {
+ cancel_timer(&engine->execlists.preempt);
execlists_reset(engine, "preemption time out");
+ }
}
}
@@ -3547,6 +3553,19 @@ static const struct intel_context_ops execlists_context_ops = {
.destroy = execlists_context_destroy,
};
+static u32 hwsp_offset(const struct i915_request *rq)
+{
+ const struct intel_timeline_cacheline *cl;
+
+ /* Before the request is executed, the timeline/cachline is fixed */
+
+ cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
+ if (cl)
+ return cl->ggtt_offset;
+
+ return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
+}
+
static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
@@ -3569,7 +3588,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
*cs++ = MI_NOOP;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *cs++ = i915_request_timeline(rq)->hwsp_offset;
+ *cs++ = hwsp_offset(rq);
*cs++ = 0;
*cs++ = rq->fence.seqno - 1;
@@ -4886,11 +4905,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
return gen8_emit_wa_tail(request, cs);
}
-static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
{
- u32 addr = i915_request_active_timeline(request)->hwsp_offset;
-
- return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
+ return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
}
static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
@@ -4909,7 +4926,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
+ hwsp_offset(request),
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL);
@@ -4921,7 +4938,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
+ hwsp_offset(request),
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -4983,7 +5000,9 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
{
- return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
+ /* XXX Stalling flush before seqno write; post-sync not */
+ cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
+ return gen12_emit_fini_breadcrumb_tail(rq, cs);
}
static u32 *
@@ -4991,7 +5010,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
cs = gen12_emit_ggtt_write_rcs(cs,
request->fence.seqno,
- i915_request_active_timeline(request)->hwsp_offset,
+ hwsp_offset(request),
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
@@ -5412,44 +5431,90 @@ static struct list_head *virtual_queue(struct virtual_engine *ve)
return &ve->base.execlists.default_priolist.requests[0];
}
-static void virtual_context_destroy(struct kref *kref)
+static void rcu_virtual_context_destroy(struct work_struct *wrk)
{
struct virtual_engine *ve =
- container_of(kref, typeof(*ve), context.ref);
+ container_of(wrk, typeof(*ve), rcu.work);
unsigned int n;
- GEM_BUG_ON(!list_empty(virtual_queue(ve)));
- GEM_BUG_ON(ve->request);
GEM_BUG_ON(ve->context.inflight);
+ /* Preempt-to-busy may leave a stale request behind. */
+ if (unlikely(ve->request)) {
+ struct i915_request *old;
+
+ spin_lock_irq(&ve->base.active.lock);
+
+ old = fetch_and_zero(&ve->request);
+ if (old) {
+ GEM_BUG_ON(!i915_request_completed(old));
+ __i915_request_submit(old);
+ i915_request_put(old);
+ }
+
+ spin_unlock_irq(&ve->base.active.lock);
+ }
+
+ /*
+ * Flush the tasklet in case it is still running on another core.
+ *
+ * This needs to be done before we remove ourselves from the siblings'
+ * rbtrees as in the case it is running in parallel, it may reinsert
+ * the rb_node into a sibling.
+ */
+ tasklet_kill(&ve->base.execlists.tasklet);
+
+ /* Decouple ourselves from the siblings, no more access allowed. */
for (n = 0; n < ve->num_siblings; n++) {
struct intel_engine_cs *sibling = ve->siblings[n];
struct rb_node *node = &ve->nodes[sibling->id].rb;
- unsigned long flags;
if (RB_EMPTY_NODE(node))
continue;
- spin_lock_irqsave(&sibling->active.lock, flags);
+ spin_lock_irq(&sibling->active.lock);
/* Detachment is lazily performed in the execlists tasklet */
if (!RB_EMPTY_NODE(node))
rb_erase_cached(node, &sibling->execlists.virtual);
- spin_unlock_irqrestore(&sibling->active.lock, flags);
+ spin_unlock_irq(&sibling->active.lock);
}
GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+ GEM_BUG_ON(!list_empty(virtual_queue(ve)));
if (ve->context.state)
__execlists_context_fini(&ve->context);
intel_context_fini(&ve->context);
+ intel_breadcrumbs_free(ve->base.breadcrumbs);
intel_engine_free_request_pool(&ve->base);
kfree(ve->bonds);
kfree(ve);
}
+static void virtual_context_destroy(struct kref *kref)
+{
+ struct virtual_engine *ve =
+ container_of(kref, typeof(*ve), context.ref);
+
+ GEM_BUG_ON(!list_empty(&ve->context.signals));
+
+ /*
+ * When destroying the virtual engine, we have to be aware that
+ * it may still be in use from an hardirq/softirq context causing
+ * the resubmission of a completed request (background completion
+ * due to preempt-to-busy). Before we can free the engine, we need
+ * to flush the submission code and tasklets that are still potentially
+ * accessing the engine. Flushing the tasklets requires process context,
+ * and since we can guard the resubmit onto the engine with an RCU read
+ * lock, we can delegate the free of the engine to an RCU worker.
+ */
+ INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
+ queue_rcu_work(system_wq, &ve->rcu);
+}
+
static void virtual_engine_initial_hint(struct virtual_engine *ve)
{
int swp;