1 files changed, 77 insertions, 85 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index bc86585b9fbb..dde04b7643b1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -365,7 +365,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
 
-	reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
+	reg_state[CTX_RING_TAIL+1] = rq->tail;
 
 	/* True 32b PPGTT with dynamic page allocation: update PDP
 	 * registers and point the unallocated PDPs to scratch page.
@@ -440,7 +440,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	if (last)
 		/* WaIdleLiteRestore:bdw,skl
 		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
-		 * as we resubmit the request. See gen8_emit_request()
+		 * as we resubmit the request. See gen8_emit_breadcrumb()
 		 * for where we prepare the padding after the end of the
 		 * request.
 		 */
@@ -522,6 +522,28 @@ static bool execlists_elsp_idle(struct intel_engine_cs *engine)
 	return !engine->execlist_port[0].request;
 }
 
+/**
+ * intel_execlists_idle() - Determine if all engine submission ports are idle
+ * @dev_priv: i915 device private
+ *
+ * Return true if there are no requests pending on any of the submission ports
+ * of any engines.
+ */
+bool intel_execlists_idle(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	if (!i915.enable_execlists)
+		return true;
+
+	for_each_engine(engine, dev_priv, id)
+		if (!execlists_elsp_idle(engine))
+			return false;
+
+	return true;
+}
+
 static bool execlists_elsp_ready(struct intel_engine_cs *engine)
 {
 	int port;
@@ -599,6 +621,15 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 
 	spin_lock_irqsave(&engine->execlist_lock, flags);
 
+	/* We keep the previous context alive until we retire the following
+	 * request. This ensures that any the context object is still pinned
+	 * for any residual writes the HW makes into it on the context switch
+	 * into the next object following the breadcrumb. Otherwise, we may
+	 * retire the context too early.
+	 */
+	request->previous_context = engine->last_context;
+	engine->last_context = request->ctx;
+
 	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 	if (execlists_elsp_idle(engine))
 		tasklet_hi_schedule(&engine->irq_tasklet);
@@ -671,46 +702,6 @@ err_unpin:
 	return ret;
 }
 
-/*
- * intel_logical_ring_advance() - advance the tail and prepare for submission
- * @request: Request to advance the logical ringbuffer of.
- *
- * The tail is updated in our logical ringbuffer struct, not in the actual context. What
- * really happens during submission is that the context and current tail will be placed
- * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
- * point, the tail *inside* the context is updated and the ELSP written to.
- */
-static int
-intel_logical_ring_advance(struct drm_i915_gem_request *request)
-{
-	struct intel_ring *ring = request->ring;
-	struct intel_engine_cs *engine = request->engine;
-
-	intel_ring_advance(ring);
-	request->tail = ring->tail;
-
-	/*
-	 * Here we add two extra NOOPs as padding to avoid
-	 * lite restore of a context with HEAD==TAIL.
-	 *
-	 * Caller must reserve WA_TAIL_DWORDS for us!
-	 */
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
-	request->wa_tail = ring->tail;
-
-	/* We keep the previous context alive until we retire the following
-	 * request. This ensures that any the context object is still pinned
-	 * for any residual writes the HW makes into it on the context switch
-	 * into the next object following the breadcrumb. Otherwise, we may
-	 * retire the context too early.
-	 */
-	request->previous_context = engine->last_context;
-	engine->last_context = request->ctx;
-	return 0;
-}
-
 static int intel_lr_context_pin(struct i915_gem_context *ctx,
 				struct intel_engine_cs *engine)
 {
@@ -744,7 +735,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
 	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
 		i915_ggtt_offset(ce->ring->vma);
 
-	ce->state->obj->dirty = true;
+	ce->state->obj->mm.dirty = true;
 
 	/* Invalidate GuC TLB. */
 	if (i915.enable_guc_submission) {
@@ -1566,39 +1557,35 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine)
  * used as a workaround for not being allowed to do lite
  * restore with HEAD==TAIL (WaIdleLiteRestore).
  */
-
-static int gen8_emit_request(struct drm_i915_gem_request *request)
+static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *out)
 {
-	struct intel_ring *ring = request->ring;
-	int ret;
-
-	ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS);
-	if (ret)
-		return ret;
+	*out++ = MI_NOOP;
+	*out++ = MI_NOOP;
+	request->wa_tail = intel_ring_offset(request->ring, out);
+}
 
+static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request,
+				 u32 *out)
+{
 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
 
-	intel_ring_emit(ring, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
-	intel_ring_emit(ring,
-			intel_hws_seqno_address(request->engine) |
-			MI_FLUSH_DW_USE_GTT);
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, request->fence.seqno);
-	intel_ring_emit(ring, MI_USER_INTERRUPT);
-	intel_ring_emit(ring, MI_NOOP);
-	return intel_logical_ring_advance(request);
+	*out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+	*out++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT;
+	*out++ = 0;
+	*out++ = request->global_seqno;
+	*out++ = MI_USER_INTERRUPT;
+	*out++ = MI_NOOP;
+	request->tail = intel_ring_offset(request->ring, out);
+
+	gen8_emit_wa_tail(request, out);
 }
 
-static int gen8_emit_request_render(struct drm_i915_gem_request *request)
-{
-	struct intel_ring *ring = request->ring;
-	int ret;
-
-	ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS);
-	if (ret)
-		return ret;
+static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
 
+static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
+					u32 *out)
+{
 	/* We're using qword write, seqno should be aligned to 8 bytes. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
 
@@ -1606,21 +1593,24 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request)
 	 * need a prior CS_STALL, which is emitted by the flush
 	 * following the batch.
 	 */
-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
-	intel_ring_emit(ring,
-			(PIPE_CONTROL_GLOBAL_GTT_IVB |
-			 PIPE_CONTROL_CS_STALL |
-			 PIPE_CONTROL_QW_WRITE));
-	intel_ring_emit(ring, intel_hws_seqno_address(request->engine));
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, i915_gem_request_get_seqno(request));
+	*out++ = GFX_OP_PIPE_CONTROL(6);
+	*out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB |
+		  PIPE_CONTROL_CS_STALL |
+		  PIPE_CONTROL_QW_WRITE);
+	*out++ = intel_hws_seqno_address(request->engine);
+	*out++ = 0;
+	*out++ = request->global_seqno;
 	/* We're thrashing one dword of HWS. */
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, MI_USER_INTERRUPT);
-	intel_ring_emit(ring, MI_NOOP);
-	return intel_logical_ring_advance(request);
+	*out++ = 0;
+	*out++ = MI_USER_INTERRUPT;
+	*out++ = MI_NOOP;
+	request->tail = intel_ring_offset(request->ring, out);
+
+	gen8_emit_wa_tail(request, out);
 }
 
+static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
+
 static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
 {
 	int ret;
@@ -1637,7 +1627,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
 	if (ret)
 		DRM_ERROR("MOCS failed to program: expect performance issues.\n");
 
-	return i915_gem_render_state_init(req);
+	return i915_gem_render_state_emit(req);
 }
 
 /**
@@ -1694,7 +1684,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->init_hw = gen8_init_common_ring;
 	engine->reset_hw = reset_common_ring;
 	engine->emit_flush = gen8_emit_flush;
-	engine->emit_request = gen8_emit_request;
+	engine->emit_breadcrumb = gen8_emit_breadcrumb;
+	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
 	engine->submit_request = execlists_submit_request;
 
 	engine->irq_enable = gen8_logical_ring_enable_irq;
@@ -1816,7 +1807,8 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 		engine->init_hw = gen8_init_render_ring;
 	engine->init_context = gen8_init_rcs_context;
 	engine->emit_flush = gen8_emit_flush_render;
-	engine->emit_request = gen8_emit_request_render;
+	engine->emit_breadcrumb = gen8_emit_breadcrumb_render;
+	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_render_sz;
 
 	ret = intel_engine_create_scratch(engine, 4096);
 	if (ret)
@@ -2042,7 +2034,7 @@ populate_lr_context(struct i915_gem_context *ctx,
 		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
 		return ret;
 	}
-	ctx_obj->dirty = true;
+	ctx_obj->mm.dirty = true;
 
 	/* The second page of the context object contains some fields which must
 	 * be set up prior to the first execution. */
@@ -2180,7 +2172,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 			reg[CTX_RING_HEAD+1] = 0;
 			reg[CTX_RING_TAIL+1] = 0;
 
-			ce->state->obj->dirty = true;
+			ce->state->obj->mm.dirty = true;
 			i915_gem_object_unpin_map(ce->state->obj);
 
 			ce->ring->head = ce->ring->tail = 0;