From bf201f5eda23eb57f7217d20ea3e18da8cbcf52b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Nov 2019 16:55:14 +0000 Subject: drm/i915/gt: Unlock engine-pm after queuing the kernel context switch In commit a79ca656b648 ("drm/i915: Push the wakeref->count deferral to the backend"), I erroneously concluded that we last modify the engine inside __i915_request_commit() meaning that we could enable concurrent submission for userspace as we enqueued this request. However, this falls into a trap with other users of the engine->kernel_context waking up and submitting their request before the idle-switch is queued, with the result that the kernel_context is executed out-of-sequence most likely upsetting the GPU and certainly ourselves when we try to retire the out-of-sequence requests. As such we need to hold onto the effective engine->kernel_context mutex lock (via the engine pm mutex proxy) until we have finish queuing the request to the engine. v2: Serialise against concurrent intel_gt_retire_requests() v3: Describe the hairy locking scheme with intel_gt_retire_requests() for future reference. v4: Combine timeline->lock and engine pm release; it's hairy. Fixes: a79ca656b648 ("drm/i915: Push the wakeref->count deferral to the backend") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191120165514.3955081-2-chris@chris-wilson.co.uk (cherry picked from commit 5cba288466e9b229feb68295675246e7522fb5eb) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 47 ++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 7269c87c1372..373a4b9f159c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -73,8 +73,25 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ +static void +__intel_timeline_enter_and_release_pm(struct intel_timeline *tl, + struct intel_engine_cs *engine) +{ + struct intel_gt_timelines *timelines = &engine->gt->timelines; + + spin_lock(&timelines->lock); + + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); + + __intel_wakeref_defer_park(&engine->wakeref); + + spin_unlock(&timelines->lock); +} + static bool switch_to_kernel_context(struct intel_engine_cs *engine) { + struct intel_context *ce = engine->kernel_context; struct i915_request *rq; unsigned long flags; bool result = true; @@ -98,16 +115,31 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * This should hold true as we can only park the engine after * retiring the last request, thus all rings should be empty and * all timelines idle. + * + * For unlocking, there are 2 other parties and the GPU who have a + * stake here. + * + * A new gpu user will be waiting on the engine-pm to start their + * engine_unpark. New waiters are predicated on engine->wakeref.count + * and so intel_wakeref_defer_park() acts like a mutex_unlock of the + * engine->wakeref. + * + * The other party is intel_gt_retire_requests(), which is walking the + * list of active timelines looking for completions. Meanwhile as soon + * as we call __i915_request_queue(), the GPU may complete our request. + * Ergo, if we put ourselves on the timelines.active_list + * (se intel_timeline_enter()) before we increment the + * engine->wakeref.count, we may see the request completion and retire + * it causing an undeflow of the engine->wakeref. */ - flags = __timeline_mark_lock(engine->kernel_context); + flags = __timeline_mark_lock(ce); + GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); - rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); + rq = __i915_request_create(ce, GFP_NOWAIT); if (IS_ERR(rq)) /* Context switch failed, hope for the best! Maybe reset? */ goto out_unlock; - intel_timeline_enter(i915_request_timeline(rq)); - /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; i915_request_add_active_barriers(rq); @@ -116,13 +148,14 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) rq->sched.attr.priority = I915_PRIORITY_BARRIER; __i915_request_commit(rq); - /* Release our exclusive hold on the engine */ - __intel_wakeref_defer_park(&engine->wakeref); __i915_request_queue(rq, NULL); + /* Expose ourselves to intel_gt_retire_requests() and new submission */ + __intel_timeline_enter_and_release_pm(ce->timeline, engine); + result = false; out_unlock: - __timeline_mark_unlock(engine->kernel_context, flags); + __timeline_mark_unlock(ce, flags); return result; } -- cgit v1.2.3