summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c42
1 files changed, 39 insertions, 3 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5e98fd79bd9d..1b567a3f006a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1387,6 +1387,10 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
*cs++ = rq->fence.seqno - 1;
intel_ring_advance(rq, cs);
+
+ /* Record the updated position of the request's payload */
+ rq->infix = intel_ring_offset(rq, cs);
+
return 0;
}
@@ -1878,6 +1882,23 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
spin_unlock_irqrestore(&engine->timeline.lock, flags);
}
+static bool lrc_regs_ok(const struct i915_request *rq)
+{
+ const struct intel_ring *ring = rq->ring;
+ const u32 *regs = rq->hw_context->lrc_reg_state;
+
+ /* Quick spot check for the common signs of context corruption */
+
+ if (regs[CTX_RING_BUFFER_CONTROL + 1] !=
+ (RING_CTL_SIZE(ring->size) | RING_VALID))
+ return false;
+
+ if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma))
+ return false;
+
+ return true;
+}
+
static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1913,6 +1934,21 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
goto out_unlock;
/*
+ * If this request hasn't started yet, e.g. it is waiting on a
+ * semaphore, we need to avoid skipping the request or else we
+ * break the signaling chain. However, if the context is corrupt
+ * the request will not restart and we will be stuck with a wedged
+ * device. It is quite often the case that if we issue a reset
+ * while the GPU is loading the context image, that the context
+ * image becomes corrupt.
+ *
+ * Otherwise, if we have not started yet, the request should replay
+ * perfectly and we do not need to flag the result as being erroneous.
+ */
+ if (!i915_request_started(rq) && lrc_regs_ok(rq))
+ goto out_unlock;
+
+ /*
* If the request was innocent, we leave the request in the ELSP
* and will try to replay it on restarting. The context image may
* have been corrupted by the reset, in which case we may have
@@ -1924,7 +1960,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
* image back to the expected values to skip over the guilty request.
*/
i915_reset_request(rq, stalled);
- if (!stalled)
+ if (!stalled && lrc_regs_ok(rq))
goto out_unlock;
/*
@@ -1942,8 +1978,8 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
engine->context_size - PAGE_SIZE);
}
- /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
- rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix);
+ /* Rerun the request; its payload has been neutered (if guilty). */
+ rq->ring->head = intel_ring_wrap(rq->ring, rq->head);
intel_ring_update_space(rq->ring);
execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring);