diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 1596 |
1 files changed, 600 insertions, 996 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 414ddda43922..0adb879833ff 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -156,6 +156,11 @@ #define GEN8_CTX_STATUS_COMPLETE (1 << 4) #define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) +#define GEN8_CTX_STATUS_COMPLETED_MASK \ + (GEN8_CTX_STATUS_ACTIVE_IDLE | \ + GEN8_CTX_STATUS_PREEMPTED | \ + GEN8_CTX_STATUS_ELEMENT_SWITCH) + #define CTX_LRI_HEADER_0 0x01 #define CTX_CONTEXT_CONTROL 0x02 #define CTX_RING_HEAD 0x04 @@ -221,10 +226,16 @@ enum { /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ +#define WA_TAIL_DWORDS 2 + static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); static int intel_lr_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine); +static void execlists_init_reg_state(u32 *reg_state, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_ring *ring); /** * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists @@ -263,12 +274,10 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv)) - engine->idle_lite_restore_wa = ~0; - - engine->disable_lite_restore_wa = (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) || - IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) && - (engine->id == VCS || engine->id == VCS2); + engine->disable_lite_restore_wa = + (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) || + IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) && + (engine->id == VCS || engine->id == VCS2); engine->ctx_desc_template = GEN8_CTX_VALID; if (IS_GEN8(dev_priv)) @@ -288,7 +297,6 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) /** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context - * * @ctx: Context to work on * @engine: Engine the descriptor will be used with * @@ -297,12 +305,13 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) * expensive to calculate, we'll just do it once and cache the result, * which remains valid until the context is unpinned. * - * This is what a descriptor looks like, from LSB to MSB: - * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) - * bits 12-31: LRCA, GTT address of (the HWSP of) this context - * bits 32-52: ctx ID, a globally unique tag - * bits 53-54: mbz, reserved for use by hardware - * bits 55-63: group ID, currently unused and set to 0 + * This is what a descriptor looks like, from LSB to MSB:: + * + * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) + * bits 12-31: LRCA, GTT address of (the HWSP of) this context + * bits 32-52: ctx ID, a globally unique tag + * bits 53-54: mbz, reserved for use by hardware + * bits 55-63: group ID, currently unused and set to 0 */ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, @@ -315,7 +324,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, desc = ctx->desc_template; /* bits 3-4 */ desc |= engine->ctx_desc_template; /* bits 0-11 */ - desc |= ce->lrc_vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE; + desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE; /* bits 12-31 */ desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ @@ -328,34 +337,18 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, return ctx->engine[engine->id].lrc_desc; } -static void execlists_elsp_write(struct drm_i915_gem_request *rq0, - struct drm_i915_gem_request *rq1) +static inline void +execlists_context_status_change(struct drm_i915_gem_request *rq, + unsigned long status) { + /* + * Only used when GVT-g is enabled now. When GVT-g is disabled, + * The compiler should eliminate this function as dead-code. + */ + if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) + return; - struct intel_engine_cs *engine = rq0->engine; - struct drm_i915_private *dev_priv = rq0->i915; - uint64_t desc[2]; - - if (rq1) { - desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine); - rq1->elsp_submitted++; - } else { - desc[1] = 0; - } - - desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine); - rq0->elsp_submitted++; - - /* You must always write both descriptors in the order below. */ - I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[1])); - I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[1])); - - I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[0])); - /* The context is automatically loaded after the following */ - I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[0])); - - /* ELSP is a wo register, use another nearby reg for posting */ - POSTING_READ_FW(RING_EXECLIST_STATUS_LO(engine)); + atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq); } static void @@ -367,13 +360,13 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) ASSIGN_CTX_PDP(ppgtt, reg_state, 0); } -static void execlists_update_context(struct drm_i915_gem_request *rq) +static u64 execlists_update_context(struct drm_i915_gem_request *rq) { - struct intel_engine_cs *engine = rq->engine; + struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; - uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state; + u32 *reg_state = ce->lrc_reg_state; - reg_state[CTX_RING_TAIL+1] = rq->tail; + reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail); /* True 32b PPGTT with dynamic page allocation: update PDP * registers and point the unallocated PDPs to scratch page. @@ -382,321 +375,236 @@ static void execlists_update_context(struct drm_i915_gem_request *rq) */ if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) execlists_update_context_pdps(ppgtt, reg_state); + + return ce->lrc_desc; } -static void execlists_submit_requests(struct drm_i915_gem_request *rq0, - struct drm_i915_gem_request *rq1) +static void execlists_submit_ports(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = rq0->i915; - unsigned int fw_domains = rq0->engine->fw_domains; - - execlists_update_context(rq0); + struct drm_i915_private *dev_priv = engine->i915; + struct execlist_port *port = engine->execlist_port; + u32 __iomem *elsp = + dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + u64 desc[2]; - if (rq1) - execlists_update_context(rq1); + if (!port[0].count) + execlists_context_status_change(port[0].request, + INTEL_CONTEXT_SCHEDULE_IN); + desc[0] = execlists_update_context(port[0].request); + engine->preempt_wa = port[0].count++; /* bdw only? fixed on skl? */ - spin_lock_irq(&dev_priv->uncore.lock); - intel_uncore_forcewake_get__locked(dev_priv, fw_domains); + if (port[1].request) { + GEM_BUG_ON(port[1].count); + execlists_context_status_change(port[1].request, + INTEL_CONTEXT_SCHEDULE_IN); + desc[1] = execlists_update_context(port[1].request); + port[1].count = 1; + } else { + desc[1] = 0; + } + GEM_BUG_ON(desc[0] == desc[1]); - execlists_elsp_write(rq0, rq1); + /* You must always write both descriptors in the order below. */ + writel(upper_32_bits(desc[1]), elsp); + writel(lower_32_bits(desc[1]), elsp); - intel_uncore_forcewake_put__locked(dev_priv, fw_domains); - spin_unlock_irq(&dev_priv->uncore.lock); + writel(upper_32_bits(desc[0]), elsp); + /* The context is automatically loaded after the following */ + writel(lower_32_bits(desc[0]), elsp); } -static inline void execlists_context_status_change( - struct drm_i915_gem_request *rq, - unsigned long status) +static bool ctx_single_port_submission(const struct i915_gem_context *ctx) { - /* - * Only used when GVT-g is enabled now. When GVT-g is disabled, - * The compiler should eliminate this function as dead-code. - */ - if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) - return; - - atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq); + return (IS_ENABLED(CONFIG_DRM_I915_GVT) && + ctx->execlists_force_single_submission); } -static void execlists_context_unqueue(struct intel_engine_cs *engine) +static bool can_merge_ctx(const struct i915_gem_context *prev, + const struct i915_gem_context *next) { - struct drm_i915_gem_request *req0 = NULL, *req1 = NULL; - struct drm_i915_gem_request *cursor, *tmp; + if (prev != next) + return false; - assert_spin_locked(&engine->execlist_lock); + if (ctx_single_port_submission(prev)) + return false; - /* - * If irqs are not active generate a warning as batches that finish - * without the irqs may get lost and a GPU Hang may occur. - */ - WARN_ON(!intel_irqs_enabled(engine->i915)); - - /* Try to read in pairs */ - list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue, - execlist_link) { - if (!req0) { - req0 = cursor; - } else if (req0->ctx == cursor->ctx) { - /* Same ctx: ignore first request, as second request - * will update tail past first request's workload */ - cursor->elsp_submitted = req0->elsp_submitted; - list_del(&req0->execlist_link); - i915_gem_request_unreference(req0); - req0 = cursor; - } else { - if (IS_ENABLED(CONFIG_DRM_I915_GVT)) { - /* - * req0 (after merged) ctx requires single - * submission, stop picking - */ - if (req0->ctx->execlists_force_single_submission) - break; - /* - * req0 ctx doesn't require single submission, - * but next req ctx requires, stop picking - */ - if (cursor->ctx->execlists_force_single_submission) - break; - } - req1 = cursor; - WARN_ON(req1->elsp_submitted); - break; - } - } + return true; +} - if (unlikely(!req0)) - return; +static void execlists_dequeue(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_request *cursor, *last; + struct execlist_port *port = engine->execlist_port; + bool submit = false; + + last = port->request; + if (last) + /* WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL + * as we resubmit the request. See gen8_emit_request() + * for where we prepare the padding after the end of the + * request. + */ + last->tail = last->wa_tail; - execlists_context_status_change(req0, INTEL_CONTEXT_SCHEDULE_IN); + GEM_BUG_ON(port[1].request); - if (req1) - execlists_context_status_change(req1, - INTEL_CONTEXT_SCHEDULE_IN); + /* Hardware submission is through 2 ports. Conceptually each port + * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is + * static for a context, and unique to each, so we only execute + * requests belonging to a single context from each ring. RING_HEAD + * is maintained by the CS in the context image, it marks the place + * where it got up to last time, and through RING_TAIL we tell the CS + * where we want to execute up to this time. + * + * In this list the requests are in order of execution. Consecutive + * requests from the same context are adjacent in the ringbuffer. We + * can combine these requests into a single RING_TAIL update: + * + * RING_HEAD...req1...req2 + * ^- RING_TAIL + * since to execute req2 the CS must first execute req1. + * + * Our goal then is to point each port to the end of a consecutive + * sequence of requests as being the most optimal (fewest wake ups + * and context switches) submission. + */ - if (req0->elsp_submitted & engine->idle_lite_restore_wa) { - /* - * WaIdleLiteRestore: make sure we never cause a lite restore - * with HEAD==TAIL. + spin_lock(&engine->execlist_lock); + list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) { + /* Can we combine this request with the current port? It has to + * be the same context/ringbuffer and not have any exceptions + * (e.g. GVT saying never to combine contexts). * - * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL as we - * resubmit the request. See gen8_emit_request() for where we - * prepare the padding after the end of the request. + * If we can combine the requests, we can execute both by + * updating the RING_TAIL to point to the end of the second + * request, and so we never need to tell the hardware about + * the first. */ - struct intel_ringbuffer *ringbuf; + if (last && !can_merge_ctx(cursor->ctx, last->ctx)) { + /* If we are on the second port and cannot combine + * this request with the last, then we are done. + */ + if (port != engine->execlist_port) + break; + + /* If GVT overrides us we only ever submit port[0], + * leaving port[1] empty. Note that we also have + * to be careful that we don't queue the same + * context (even though a different request) to + * the second port. + */ + if (ctx_single_port_submission(cursor->ctx)) + break; + + GEM_BUG_ON(last->ctx == cursor->ctx); + + i915_gem_request_assign(&port->request, last); + port++; + } + last = cursor; + submit = true; + } + if (submit) { + /* Decouple all the requests submitted from the queue */ + engine->execlist_queue.next = &cursor->execlist_link; + cursor->execlist_link.prev = &engine->execlist_queue; - ringbuf = req0->ctx->engine[engine->id].ringbuf; - req0->tail += 8; - req0->tail &= ringbuf->size - 1; + i915_gem_request_assign(&port->request, last); } + spin_unlock(&engine->execlist_lock); - execlists_submit_requests(req0, req1); + if (submit) + execlists_submit_ports(engine); } -static unsigned int -execlists_check_remove_request(struct intel_engine_cs *engine, u32 ctx_id) +static bool execlists_elsp_idle(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *head_req; - - assert_spin_locked(&engine->execlist_lock); - - head_req = list_first_entry_or_null(&engine->execlist_queue, - struct drm_i915_gem_request, - execlist_link); - - if (WARN_ON(!head_req || (head_req->ctx_hw_id != ctx_id))) - return 0; - - WARN(head_req->elsp_submitted == 0, "Never submitted head request\n"); - - if (--head_req->elsp_submitted > 0) - return 0; - - execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT); - - list_del(&head_req->execlist_link); - i915_gem_request_unreference(head_req); - - return 1; + return !engine->execlist_port[0].request; } -static u32 -get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer, - u32 *context_id) +static bool execlists_elsp_ready(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - u32 status; + int port; - read_pointer %= GEN8_CSB_ENTRIES; + port = 1; /* wait for a free slot */ + if (engine->disable_lite_restore_wa || engine->preempt_wa) + port = 0; /* wait for GPU to be idle before continuing */ - status = I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(engine, read_pointer)); - - if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) - return 0; - - *context_id = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(engine, - read_pointer)); - - return status; + return !engine->execlist_port[port].request; } -/** - * intel_lrc_irq_handler() - handle Context Switch interrupts - * @data: tasklet handler passed in unsigned long - * +/* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. */ static void intel_lrc_irq_handler(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct execlist_port *port = engine->execlist_port; struct drm_i915_private *dev_priv = engine->i915; - u32 status_pointer; - unsigned int read_pointer, write_pointer; - u32 csb[GEN8_CSB_ENTRIES][2]; - unsigned int csb_read = 0, i; - unsigned int submit_contexts = 0; intel_uncore_forcewake_get(dev_priv, engine->fw_domains); - status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(engine)); - - read_pointer = engine->next_context_status_buffer; - write_pointer = GEN8_CSB_WRITE_PTR(status_pointer); - if (read_pointer > write_pointer) - write_pointer += GEN8_CSB_ENTRIES; - - while (read_pointer < write_pointer) { - if (WARN_ON_ONCE(csb_read == GEN8_CSB_ENTRIES)) - break; - csb[csb_read][0] = get_context_status(engine, ++read_pointer, - &csb[csb_read][1]); - csb_read++; - } - - engine->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES; - - /* Update the read pointer to the old write pointer. Manual ringbuffer - * management ftw </sarcasm> */ - I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(engine), - _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, - engine->next_context_status_buffer << 8)); - - intel_uncore_forcewake_put(dev_priv, engine->fw_domains); - - spin_lock(&engine->execlist_lock); + if (!execlists_elsp_idle(engine)) { + u32 __iomem *csb_mmio = + dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); + u32 __iomem *buf = + dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)); + unsigned int csb, head, tail; + + csb = readl(csb_mmio); + head = GEN8_CSB_READ_PTR(csb); + tail = GEN8_CSB_WRITE_PTR(csb); + if (tail < head) + tail += GEN8_CSB_ENTRIES; + while (head < tail) { + unsigned int idx = ++head % GEN8_CSB_ENTRIES; + unsigned int status = readl(buf + 2 * idx); + + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) + continue; + + GEM_BUG_ON(port[0].count == 0); + if (--port[0].count == 0) { + GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); + execlists_context_status_change(port[0].request, + INTEL_CONTEXT_SCHEDULE_OUT); + + i915_gem_request_put(port[0].request); + port[0] = port[1]; + memset(&port[1], 0, sizeof(port[1])); + + engine->preempt_wa = false; + } - for (i = 0; i < csb_read; i++) { - if (unlikely(csb[i][0] & GEN8_CTX_STATUS_PREEMPTED)) { - if (csb[i][0] & GEN8_CTX_STATUS_LITE_RESTORE) { - if (execlists_check_remove_request(engine, csb[i][1])) - WARN(1, "Lite Restored request removed from queue\n"); - } else - WARN(1, "Preemption without Lite Restore\n"); + GEM_BUG_ON(port[0].count == 0 && + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); } - if (csb[i][0] & (GEN8_CTX_STATUS_ACTIVE_IDLE | - GEN8_CTX_STATUS_ELEMENT_SWITCH)) - submit_contexts += - execlists_check_remove_request(engine, csb[i][1]); - } - - if (submit_contexts) { - if (!engine->disable_lite_restore_wa || - (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE)) - execlists_context_unqueue(engine); + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, + GEN8_CSB_WRITE_PTR(csb) << 8), + csb_mmio); } - spin_unlock(&engine->execlist_lock); + if (execlists_elsp_ready(engine)) + execlists_dequeue(engine); - if (unlikely(submit_contexts > 2)) - DRM_ERROR("More than two context complete events?\n"); + intel_uncore_forcewake_put(dev_priv, engine->fw_domains); } -static void execlists_context_queue(struct drm_i915_gem_request *request) +static void execlists_submit_request(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; - struct drm_i915_gem_request *cursor; - int num_elements = 0; - - spin_lock_bh(&engine->execlist_lock); + unsigned long flags; - list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) - if (++num_elements > 2) - break; + spin_lock_irqsave(&engine->execlist_lock, flags); - if (num_elements > 2) { - struct drm_i915_gem_request *tail_req; - - tail_req = list_last_entry(&engine->execlist_queue, - struct drm_i915_gem_request, - execlist_link); - - if (request->ctx == tail_req->ctx) { - WARN(tail_req->elsp_submitted != 0, - "More than 2 already-submitted reqs queued\n"); - list_del(&tail_req->execlist_link); - i915_gem_request_unreference(tail_req); - } - } - - i915_gem_request_reference(request); list_add_tail(&request->execlist_link, &engine->execlist_queue); - request->ctx_hw_id = request->ctx->hw_id; - if (num_elements == 0) - execlists_context_unqueue(engine); + if (execlists_elsp_idle(engine)) + tasklet_hi_schedule(&engine->irq_tasklet); - spin_unlock_bh(&engine->execlist_lock); -} - -static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - uint32_t flush_domains; - int ret; - - flush_domains = 0; - if (engine->gpu_caches_dirty) - flush_domains = I915_GEM_GPU_DOMAINS; - - ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); - if (ret) - return ret; - - engine->gpu_caches_dirty = false; - return 0; -} - -static int execlists_move_to_gpu(struct drm_i915_gem_request *req, - struct list_head *vmas) -{ - const unsigned other_rings = ~intel_engine_flag(req->engine); - struct i915_vma *vma; - uint32_t flush_domains = 0; - bool flush_chipset = false; - int ret; - - list_for_each_entry(vma, vmas, exec_list) { - struct drm_i915_gem_object *obj = vma->obj; - - if (obj->active & other_rings) { - ret = i915_gem_object_sync(obj, req->engine, &req); - if (ret) - return ret; - } - - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - flush_chipset |= i915_gem_clflush_object(obj, false); - - flush_domains |= obj->base.write_domain; - } - - if (flush_domains & I915_GEM_DOMAIN_GTT) - wmb(); - - /* Unconditionally invalidate gpu caches and ensure that we do flush - * any residual writes from the previous batch. - */ - return logical_ring_invalidate_all_caches(req); + spin_unlock_irqrestore(&engine->execlist_lock, flags); } int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) @@ -717,7 +625,11 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request return ret; } - request->ringbuf = ce->ringbuf; + request->ring = ce->ring; + + ret = intel_lr_context_pin(request->ctx, engine); + if (ret) + return ret; if (i915.enable_guc_submission) { /* @@ -725,23 +637,19 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request * going any further, as the i915_add_request() call * later on mustn't fail ... */ - ret = i915_guc_wq_check_space(request); + ret = i915_guc_wq_reserve(request); if (ret) - return ret; + goto err_unpin; } - ret = intel_lr_context_pin(request->ctx, engine); - if (ret) - return ret; - ret = intel_ring_begin(request, 0); if (ret) - goto err_unpin; + goto err_unreserve; if (!ce->initialised) { ret = engine->init_context(request); if (ret) - goto err_unpin; + goto err_unreserve; ce->initialised = true; } @@ -756,13 +664,16 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request request->reserved_space -= EXECLISTS_REQUEST_SIZE; return 0; +err_unreserve: + if (i915.enable_guc_submission) + i915_guc_wq_unreserve(request); err_unpin: intel_lr_context_unpin(request->ctx, engine); return ret; } /* - * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload + * intel_logical_ring_advance() - advance the tail and prepare for submission * @request: Request to advance the logical ringbuffer of. * * The tail is updated in our logical ringbuffer struct, not in the actual context. What @@ -771,13 +682,13 @@ err_unpin: * point, the tail *inside* the context is updated and the ELSP written to. */ static int -intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) +intel_logical_ring_advance(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ringbuf = request->ringbuf; + struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; - intel_logical_ring_advance(ringbuf); - request->tail = ringbuf->tail; + intel_ring_advance(ring); + request->tail = ring->tail; /* * Here we add two extra NOOPs as padding to avoid @@ -785,9 +696,10 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) * * Caller must reserve WA_TAIL_DWORDS for us! */ - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + request->wa_tail = ring->tail; /* We keep the previous context alive until we retire the following * request. This ensures that any the context object is still pinned @@ -797,168 +709,14 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) */ request->previous_context = engine->last_context; engine->last_context = request->ctx; - - if (i915.enable_guc_submission) - i915_guc_submit(request); - else - execlists_context_queue(request); - - return 0; -} - -/** - * execlists_submission() - submit a batchbuffer for execution, Execlists style - * @params: execbuffer call parameters. - * @args: execbuffer call arguments. - * @vmas: list of vmas. - * - * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts - * away the submission details of the execbuffer ioctl call. - * - * Return: non-zero if the submission fails. - */ -int intel_execlists_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas) -{ - struct drm_device *dev = params->dev; - struct intel_engine_cs *engine = params->engine; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ringbuffer *ringbuf = params->ctx->engine[engine->id].ringbuf; - u64 exec_start; - int instp_mode; - u32 instp_mask; - int ret; - - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; - instp_mask = I915_EXEC_CONSTANTS_MASK; - switch (instp_mode) { - case I915_EXEC_CONSTANTS_REL_GENERAL: - case I915_EXEC_CONSTANTS_ABSOLUTE: - case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && engine != &dev_priv->engine[RCS]) { - DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); - return -EINVAL; - } - - if (instp_mode != dev_priv->relative_constants_mode) { - if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { - DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); - return -EINVAL; - } - - /* The HW changed the meaning on this bit on gen6 */ - instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; - } - break; - default: - DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); - return -EINVAL; - } - - if (args->flags & I915_EXEC_GEN7_SOL_RESET) { - DRM_DEBUG("sol reset is gen7 only\n"); - return -EINVAL; - } - - ret = execlists_move_to_gpu(params->request, vmas); - if (ret) - return ret; - - if (engine == &dev_priv->engine[RCS] && - instp_mode != dev_priv->relative_constants_mode) { - ret = intel_ring_begin(params->request, 4); - if (ret) - return ret; - - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); - intel_logical_ring_emit_reg(ringbuf, INSTPM); - intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode); - intel_logical_ring_advance(ringbuf); - - dev_priv->relative_constants_mode = instp_mode; - } - - exec_start = params->batch_obj_vm_offset + - args->batch_start_offset; - - ret = engine->emit_bb_start(params->request, exec_start, params->dispatch_flags); - if (ret) - return ret; - - trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); - - i915_gem_execbuffer_move_to_active(vmas, params->request); - - return 0; -} - -void intel_execlists_cancel_requests(struct intel_engine_cs *engine) -{ - struct drm_i915_gem_request *req, *tmp; - LIST_HEAD(cancel_list); - - WARN_ON(!mutex_is_locked(&engine->i915->drm.struct_mutex)); - - spin_lock_bh(&engine->execlist_lock); - list_replace_init(&engine->execlist_queue, &cancel_list); - spin_unlock_bh(&engine->execlist_lock); - - list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) { - list_del(&req->execlist_link); - i915_gem_request_unreference(req); - } -} - -void intel_logical_ring_stop(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - if (!intel_engine_initialized(engine)) - return; - - ret = intel_engine_idle(engine); - if (ret) - DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", - engine->name, ret); - - /* TODO: Is this correct with Execlists enabled? */ - I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING)); - if (intel_wait_for_register(dev_priv, - RING_MI_MODE(engine->mmio_base), - MODE_IDLE, MODE_IDLE, - 1000)) { - DRM_ERROR("%s :timed out trying to stop ring\n", engine->name); - return; - } - I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); -} - -int logical_ring_flush_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - int ret; - - if (!engine->gpu_caches_dirty) - return 0; - - ret = engine->emit_flush(req, 0, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; - - engine->gpu_caches_dirty = false; return 0; } static int intel_lr_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = ctx->i915; struct intel_context *ce = &ctx->engine[engine->id]; void *vaddr; - u32 *lrc_reg_state; int ret; lockdep_assert_held(&ctx->i915->drm.struct_mutex); @@ -966,41 +724,42 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (ce->pin_count++) return 0; - ret = i915_gem_obj_ggtt_pin(ce->state, GEN8_LR_CONTEXT_ALIGN, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL); if (ret) goto err; - vaddr = i915_gem_object_pin_map(ce->state); + vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); - goto unpin_ctx_obj; + goto unpin_vma; } - lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - - ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ce->ringbuf); + ret = intel_ring_pin(ce->ring); if (ret) goto unpin_map; - i915_gem_context_reference(ctx); - ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state); intel_lr_context_descriptor_update(ctx, engine); - lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ringbuf->vma->node.start; - ce->lrc_reg_state = lrc_reg_state; - ce->state->dirty = true; + ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; + ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = + i915_ggtt_offset(ce->ring->vma); + + ce->state->obj->dirty = true; /* Invalidate GuC TLB. */ - if (i915.enable_guc_submission) + if (i915.enable_guc_submission) { + struct drm_i915_private *dev_priv = ctx->i915; I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); + } + i915_gem_context_get(ctx); return 0; unpin_map: - i915_gem_object_unpin_map(ce->state); -unpin_ctx_obj: - i915_gem_object_ggtt_unpin(ce->state); + i915_gem_object_unpin_map(ce->state->obj); +unpin_vma: + __i915_vma_unpin(ce->state); err: ce->pin_count = 0; return ret; @@ -1017,30 +776,24 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx, if (--ce->pin_count) return; - intel_unpin_ringbuffer_obj(ce->ringbuf); + intel_ring_unpin(ce->ring); - i915_gem_object_unpin_map(ce->state); - i915_gem_object_ggtt_unpin(ce->state); + i915_gem_object_unpin_map(ce->state->obj); + i915_vma_unpin(ce->state); - ce->lrc_vma = NULL; - ce->lrc_desc = 0; - ce->lrc_reg_state = NULL; - - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); } static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) { int ret, i; - struct intel_engine_cs *engine = req->engine; - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ring *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; if (w->count == 0) return 0; - engine->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1048,17 +801,16 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); for (i = 0; i < w->count; i++) { - intel_logical_ring_emit_reg(ringbuf, w->reg[i].addr); - intel_logical_ring_emit(ringbuf, w->reg[i].value); + intel_ring_emit_reg(ring, w->reg[i].addr); + intel_ring_emit(ring, w->reg[i].value); } - intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_advance(ring); - engine->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1094,7 +846,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) * code duplication. */ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, - uint32_t *const batch, + uint32_t *batch, uint32_t index) { struct drm_i915_private *dev_priv = engine->i915; @@ -1113,7 +865,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256); + wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); wa_ctx_emit(batch, index, 0); wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); @@ -1131,7 +883,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256); + wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); wa_ctx_emit(batch, index, 0); return index; @@ -1156,37 +908,24 @@ static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, return 0; } -/** - * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA - * - * @engine: only applicable for RCS - * @wa_ctx: structure representing wa_ctx - * offset: specifies start of the batch, should be cache-aligned. This is updated - * with the offset value received as input. - * size: size of the batch in DWORDS but HW expects in terms of cachelines - * @batch: page in which WA are loaded - * @offset: This field specifies the start of the batch, it should be - * cache-aligned otherwise it is adjusted accordingly. - * Typically we only have one indirect_ctx and per_ctx batch buffer which are - * initialized at the beginning and shared across all contexts but this field - * helps us to have multiple batches at different offsets and select them based - * on a criteria. At the moment this batch always start at the beginning of the page - * and at this point we don't have multiple wa_ctx batch buffers. - * - * The number of WA applied are not known at the beginning; we use this field - * to return the no of DWORDS written. +/* + * Typically we only have one indirect_ctx and per_ctx batch buffer which are + * initialized at the beginning and shared across all contexts but this field + * helps us to have multiple batches at different offsets and select them based + * on a criteria. At the moment this batch always start at the beginning of the page + * and at this point we don't have multiple wa_ctx batch buffers. * - * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END - * so it adds NOOPs as padding to make it cacheline aligned. - * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together - * makes a complete batch buffer. + * The number of WA applied are not known at the beginning; we use this field + * to return the no of DWORDS written. * - * Return: non-zero if we exceed the PAGE_SIZE limit. + * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END + * so it adds NOOPs as padding to make it cacheline aligned. + * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together + * makes a complete batch buffer. */ - static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { uint32_t scratch_addr; @@ -1205,7 +944,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ /* Actual scratch location is at 128 bytes offset */ - scratch_addr = engine->scratch.gtt_offset + 2*CACHELINE_BYTES; + scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | @@ -1230,26 +969,18 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); } -/** - * gen8_init_perctx_bb() - initialize per ctx batch with WA - * - * @engine: only applicable for RCS - * @wa_ctx: structure representing wa_ctx - * offset: specifies start of the batch, should be cache-aligned. - * size: size of the batch in DWORDS but HW expects in terms of cachelines - * @batch: page in which WA are loaded - * @offset: This field specifies the start of this batch. - * This batch is started immediately after indirect_ctx batch. Since we ensure - * that indirect_ctx ends on a cacheline this batch is aligned automatically. +/* + * This batch is started immediately after indirect_ctx batch. Since we ensure + * that indirect_ctx ends on a cacheline this batch is aligned automatically. * - * The number of DWORDS written are returned using this field. + * The number of DWORDS written are returned using this field. * * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. */ static int gen8_init_perctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); @@ -1264,7 +995,7 @@ static int gen8_init_perctx_bb(struct intel_engine_cs *engine, static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { int ret; @@ -1282,11 +1013,18 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, return ret; index = ret; + /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl */ + wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); + wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2); + wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE( + GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE)); + wa_ctx_emit(batch, index, MI_NOOP); + /* WaClearSlmSpaceAtContextSwitch:kbl */ /* Actual scratch location is at 128 bytes offset */ if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) { - uint32_t scratch_addr - = engine->scratch.gtt_offset + 2*CACHELINE_BYTES; + u32 scratch_addr = + i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | @@ -1332,7 +1070,7 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, static int gen9_init_perctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); @@ -1378,44 +1116,44 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine, static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) { - int ret; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; - engine->wa_ctx.obj = i915_gem_object_create(&engine->i915->drm, - PAGE_ALIGN(size)); - if (IS_ERR(engine->wa_ctx.obj)) { - DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n"); - ret = PTR_ERR(engine->wa_ctx.obj); - engine->wa_ctx.obj = NULL; - return ret; - } + obj = i915_gem_object_create(&engine->i915->drm, PAGE_ALIGN(size)); + if (IS_ERR(obj)) + return PTR_ERR(obj); - ret = i915_gem_obj_ggtt_pin(engine->wa_ctx.obj, PAGE_SIZE, 0); - if (ret) { - DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n", - ret); - drm_gem_object_unreference(&engine->wa_ctx.obj->base); - return ret; + vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; } + err = i915_vma_pin(vma, 0, PAGE_SIZE, PIN_GLOBAL | PIN_HIGH); + if (err) + goto err; + + engine->wa_ctx.vma = vma; return 0; + +err: + i915_gem_object_put(obj); + return err; } static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine) { - if (engine->wa_ctx.obj) { - i915_gem_object_ggtt_unpin(engine->wa_ctx.obj); - drm_gem_object_unreference(&engine->wa_ctx.obj->base); - engine->wa_ctx.obj = NULL; - } + i915_vma_unpin_and_release(&engine->wa_ctx.vma); } static int intel_init_workaround_bb(struct intel_engine_cs *engine) { - int ret; + struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; uint32_t *batch; uint32_t offset; struct page *page; - struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; + int ret; WARN_ON(engine->id != RCS); @@ -1427,7 +1165,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) } /* some WA perform writes to scratch page, ensure it is valid */ - if (engine->scratch.obj == NULL) { + if (!engine->scratch) { DRM_ERROR("scratch page not allocated for %s\n", engine->name); return -EINVAL; } @@ -1438,7 +1176,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } - page = i915_gem_object_get_dirty_page(wa_ctx->obj, 0); + page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0); batch = kmap_atomic(page); offset = 0; @@ -1485,55 +1223,37 @@ static void lrc_init_hws(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; I915_WRITE(RING_HWS_PGA(engine->mmio_base), - (u32)engine->status_page.gfx_addr); + engine->status_page.ggtt_offset); POSTING_READ(RING_HWS_PGA(engine->mmio_base)); } static int gen8_init_common_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - unsigned int next_context_status_buffer_hw; + int ret; + + ret = intel_mocs_init_engine(engine); + if (ret) + return ret; lrc_init_hws(engine); - I915_WRITE_IMR(engine, - ~(engine->irq_enable_mask | engine->irq_keep_mask)); + intel_engine_reset_breadcrumbs(engine); + I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); I915_WRITE(RING_MODE_GEN7(engine), _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); - POSTING_READ(RING_MODE_GEN7(engine)); - - /* - * Instead of resetting the Context Status Buffer (CSB) read pointer to - * zero, we need to read the write pointer from hardware and use its - * value because "this register is power context save restored". - * Effectively, these states have been observed: - * - * | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) | - * BDW | CSB regs not reset | CSB regs reset | - * CHT | CSB regs not reset | CSB regs not reset | - * SKL | ? | ? | - * BXT | ? | ? | - */ - next_context_status_buffer_hw = - GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(engine))); - - /* - * When the CSB registers are reset (also after power-up / gpu reset), - * CSB write pointer is set to all 1's, which is not valid, use '5' in - * this special case, so the first element read is CSB[0]. - */ - if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK) - next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1); - engine->next_context_status_buffer = next_context_status_buffer_hw; DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name); intel_engine_init_hangcheck(engine); - return intel_mocs_init_engine(engine); + if (!execlists_elsp_idle(engine)) + execlists_submit_ports(engine); + + return 0; } static int gen8_init_render_ring(struct intel_engine_cs *engine) @@ -1569,11 +1289,57 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) return init_workarounds_ring(engine); } +static void reset_common_ring(struct intel_engine_cs *engine, + struct drm_i915_gem_request *request) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct execlist_port *port = engine->execlist_port; + struct intel_context *ce = &request->ctx->engine[engine->id]; + + /* We want a simple context + ring to execute the breadcrumb update. + * We cannot rely on the context being intact across the GPU hang, + * so clear it and rebuild just what we need for the breadcrumb. + * All pending requests for this context will be zapped, and any + * future request will be after userspace has had the opportunity + * to recreate its own state. + */ + execlists_init_reg_state(ce->lrc_reg_state, + request->ctx, engine, ce->ring); + + /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ + ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = + i915_ggtt_offset(ce->ring->vma); + ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix; + + request->ring->head = request->postfix; + request->ring->last_retired_head = -1; + intel_ring_update_space(request->ring); + + if (i915.enable_guc_submission) + return; + + /* Catch up with any missed context-switch interrupts */ + I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0)); + if (request->ctx != port[0].request->ctx) { + i915_gem_request_put(port[0].request); + port[0] = port[1]; + memset(&port[1], 0, sizeof(port[1])); + } + + /* CS is stopped, and we will resubmit both ports on resume */ + GEM_BUG_ON(request->ctx != port[0].request->ctx); + port[0].count = 0; + port[1].count = 0; + + /* Reset WaIdleLiteRestore:bdw,skl as well */ + request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32); +} + static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - struct intel_ringbuffer *ringbuf = req->ringbuf; const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; int i, ret; @@ -1581,28 +1347,27 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) if (ret) return ret; - intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_lri_cmds)); for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - intel_logical_ring_emit_reg(ringbuf, - GEN8_RING_PDP_UDW(engine, i)); - intel_logical_ring_emit(ringbuf, upper_32_bits(pd_daddr)); - intel_logical_ring_emit_reg(ringbuf, - GEN8_RING_PDP_LDW(engine, i)); - intel_logical_ring_emit(ringbuf, lower_32_bits(pd_daddr)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, i)); + intel_ring_emit(ring, upper_32_bits(pd_daddr)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, i)); + intel_ring_emit(ring, lower_32_bits(pd_daddr)); } - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } static int gen8_emit_bb_start(struct drm_i915_gem_request *req, - u64 offset, unsigned dispatch_flags) + u64 offset, u32 len, + unsigned int dispatch_flags) { - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ring *ring = req->ring; bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); int ret; @@ -1629,14 +1394,14 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | - (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); - intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | + (ppgtt<<8) | + (dispatch_flags & I915_DISPATCH_RS ? + MI_BATCH_RESOURCE_STREAMER : 0)); + intel_ring_emit(ring, lower_32_bits(offset)); + intel_ring_emit(ring, upper_32_bits(offset)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -1655,14 +1420,10 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) I915_WRITE_IMR(engine, ~engine->irq_keep_mask); } -static int gen8_emit_flush(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 unused) +static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) { - struct intel_ringbuffer *ringbuf = request->ringbuf; - struct intel_engine_cs *engine = ringbuf->engine; - struct drm_i915_private *dev_priv = request->i915; - uint32_t cmd; + struct intel_ring *ring = request->ring; + u32 cmd; int ret; ret = intel_ring_begin(request, 4); @@ -1678,30 +1439,30 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, */ cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; - if (invalidate_domains & I915_GEM_GPU_DOMAINS) { + if (mode & EMIT_INVALIDATE) { cmd |= MI_INVALIDATE_TLB; - if (engine == &dev_priv->engine[VCS]) + if (request->engine->id == VCS) cmd |= MI_INVALIDATE_BSD; } - intel_logical_ring_emit(ringbuf, cmd); - intel_logical_ring_emit(ringbuf, - I915_GEM_HWS_SCRATCH_ADDR | - MI_FLUSH_DW_USE_GTT); - intel_logical_ring_emit(ringbuf, 0); /* upper addr */ - intel_logical_ring_emit(ringbuf, 0); /* value */ - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, + I915_GEM_HWS_SCRATCH_ADDR | + MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ + intel_ring_advance(ring); return 0; } static int gen8_emit_flush_render(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 flush_domains) + u32 mode) { - struct intel_ringbuffer *ringbuf = request->ringbuf; - struct intel_engine_cs *engine = ringbuf->engine; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + struct intel_ring *ring = request->ring; + struct intel_engine_cs *engine = request->engine; + u32 scratch_addr = + i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; u32 flags = 0; int ret; @@ -1709,14 +1470,14 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, flags |= PIPE_CONTROL_CS_STALL; - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -1751,40 +1512,40 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, return ret; if (vf_flush_wa) { - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); } if (dc_flush_wa) { - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, PIPE_CONTROL_DC_FLUSH_ENABLE); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, PIPE_CONTROL_DC_FLUSH_ENABLE); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); } - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, flags); - intel_logical_ring_emit(ringbuf, scratch_addr); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); if (dc_flush_wa) { - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, PIPE_CONTROL_CS_STALL); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, PIPE_CONTROL_CS_STALL); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); } - intel_logical_ring_advance(ringbuf); + intel_ring_advance(ring); return 0; } @@ -1809,11 +1570,10 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -#define WA_TAIL_DWORDS 2 static int gen8_emit_request(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ringbuf = request->ringbuf; + struct intel_ring *ring = request->ring; int ret; ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS); @@ -1823,21 +1583,20 @@ static int gen8_emit_request(struct drm_i915_gem_request *request) /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - intel_logical_ring_emit(ringbuf, - (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); - intel_logical_ring_emit(ringbuf, - intel_hws_seqno_address(request->engine) | - MI_FLUSH_DW_USE_GTT); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, request->seqno); - intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); - intel_logical_ring_emit(ringbuf, MI_NOOP); - return intel_logical_ring_advance_and_submit(request); + intel_ring_emit(ring, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); + intel_ring_emit(ring, + intel_hws_seqno_address(request->engine) | + MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, request->fence.seqno); + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_emit(ring, MI_NOOP); + return intel_logical_ring_advance(request); } static int gen8_emit_request_render(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ringbuf = request->ringbuf; + struct intel_ring *ring = request->ring; int ret; ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS); @@ -1851,50 +1610,19 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request) * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, - (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - intel_logical_ring_emit(ringbuf, - intel_hws_seqno_address(request->engine)); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request)); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, + (PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE)); + intel_ring_emit(ring, intel_hws_seqno_address(request->engine)); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, i915_gem_request_get_seqno(request)); /* We're thrashing one dword of HWS. */ - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); - intel_logical_ring_emit(ringbuf, MI_NOOP); - return intel_logical_ring_advance_and_submit(request); -} - -static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req) -{ - struct render_state so; - int ret; - - ret = i915_gem_render_state_prepare(req->engine, &so); - if (ret) - return ret; - - if (so.rodata == NULL) - return 0; - - ret = req->engine->emit_bb_start(req, so.ggtt_offset, - I915_DISPATCH_SECURE); - if (ret) - goto out; - - ret = req->engine->emit_bb_start(req, - (so.ggtt_offset + so.aux_batch_offset), - I915_DISPATCH_SECURE); - if (ret) - goto out; - - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); - -out: - i915_gem_render_state_fini(&so); - return ret; + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_emit(ring, MI_NOOP); + return intel_logical_ring_advance(request); } static int gen8_init_rcs_context(struct drm_i915_gem_request *req) @@ -1913,14 +1641,12 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) if (ret) DRM_ERROR("MOCS failed to program: expect performance issues.\n"); - return intel_lr_context_render_state_init(req); + return i915_gem_render_state_init(req); } /** * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer - * * @engine: Engine Command Streamer. - * */ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) { @@ -1939,39 +1665,42 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) dev_priv = engine->i915; if (engine->buffer) { - intel_logical_ring_stop(engine); WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0); } if (engine->cleanup) engine->cleanup(engine); - i915_cmd_parser_fini_ring(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); - - intel_engine_fini_breadcrumbs(engine); + intel_engine_cleanup_common(engine); - if (engine->status_page.obj) { - i915_gem_object_unpin_map(engine->status_page.obj); - engine->status_page.obj = NULL; + if (engine->status_page.vma) { + i915_gem_object_unpin_map(engine->status_page.vma->obj); + engine->status_page.vma = NULL; } intel_lr_context_unpin(dev_priv->kernel_context, engine); - engine->idle_lite_restore_wa = 0; - engine->disable_lite_restore_wa = false; - engine->ctx_desc_template = 0; - lrc_destroy_wa_ctx_obj(engine); engine->i915 = NULL; } +void intel_execlists_enable_submission(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + + for_each_engine(engine, dev_priv) + engine->submit_request = execlists_submit_request; +} + static void logical_ring_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overriden by each engine. */ engine->init_hw = gen8_init_common_ring; - engine->emit_request = gen8_emit_request; + engine->reset_hw = reset_common_ring; engine->emit_flush = gen8_emit_flush; + engine->emit_request = gen8_emit_request; + engine->submit_request = execlists_submit_request; + engine->irq_enable = gen8_logical_ring_enable_irq; engine->irq_disable = gen8_logical_ring_disable_irq; engine->emit_bb_start = gen8_emit_bb_start; @@ -1980,41 +1709,71 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) } static inline void -logical_ring_default_irqs(struct intel_engine_cs *engine, unsigned shift) +logical_ring_default_irqs(struct intel_engine_cs *engine) { + unsigned shift = engine->irq_shift; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } static int -lrc_setup_hws(struct intel_engine_cs *engine, - struct drm_i915_gem_object *dctx_obj) +lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma) { + const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE; void *hws; /* The HWSP is part of the default context object in LRC mode. */ - engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) + - LRC_PPHWSP_PN * PAGE_SIZE; - hws = i915_gem_object_pin_map(dctx_obj); + hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); if (IS_ERR(hws)) return PTR_ERR(hws); - engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE; - engine->status_page.obj = dctx_obj; + + engine->status_page.page_addr = hws + hws_offset; + engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset; + engine->status_page.vma = vma; return 0; } +static void +logical_ring_setup(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + enum forcewake_domains fw_domains; + + intel_engine_setup_common(engine); + + /* Intentionally left blank. */ + engine->buffer = NULL; + + fw_domains = intel_uncore_forcewake_for_reg(dev_priv, + RING_ELSP(engine), + FW_REG_WRITE); + + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, + RING_CONTEXT_STATUS_PTR(engine), + FW_REG_READ | FW_REG_WRITE); + + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, + RING_CONTEXT_STATUS_BUF_BASE(engine), + FW_REG_READ); + + engine->fw_domains = fw_domains; + + tasklet_init(&engine->irq_tasklet, + intel_lrc_irq_handler, (unsigned long)engine); + + logical_ring_init_platform_invariants(engine); + logical_ring_default_vfuncs(engine); + logical_ring_default_irqs(engine); +} + static int logical_ring_init(struct intel_engine_cs *engine) { struct i915_gem_context *dctx = engine->i915->kernel_context; int ret; - ret = intel_engine_init_breadcrumbs(engine); - if (ret) - goto error; - - ret = i915_cmd_parser_init_ring(engine); + ret = intel_engine_init_common(engine); if (ret) goto error; @@ -2044,11 +1803,13 @@ error: return ret; } -static int logical_render_ring_init(struct intel_engine_cs *engine) +int logical_render_ring_init(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; int ret; + logical_ring_setup(engine); + if (HAS_L3_DPF(dev_priv)) engine->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; @@ -2058,11 +1819,10 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) else engine->init_hw = gen8_init_render_ring; engine->init_context = gen8_init_rcs_context; - engine->cleanup = intel_fini_pipe_control; engine->emit_flush = gen8_emit_flush_render; engine->emit_request = gen8_emit_request_render; - ret = intel_init_pipe_control(engine, 4096); + ret = intel_engine_create_scratch(engine, 4096); if (ret) return ret; @@ -2085,160 +1845,11 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) return ret; } -static const struct logical_ring_info { - const char *name; - unsigned exec_id; - unsigned guc_id; - u32 mmio_base; - unsigned irq_shift; - int (*init)(struct intel_engine_cs *engine); -} logical_rings[] = { - [RCS] = { - .name = "render ring", - .exec_id = I915_EXEC_RENDER, - .guc_id = GUC_RENDER_ENGINE, - .mmio_base = RENDER_RING_BASE, - .irq_shift = GEN8_RCS_IRQ_SHIFT, - .init = logical_render_ring_init, - }, - [BCS] = { - .name = "blitter ring", - .exec_id = I915_EXEC_BLT, - .guc_id = GUC_BLITTER_ENGINE, - .mmio_base = BLT_RING_BASE, - .irq_shift = GEN8_BCS_IRQ_SHIFT, - .init = logical_ring_init, - }, - [VCS] = { - .name = "bsd ring", - .exec_id = I915_EXEC_BSD, - .guc_id = GUC_VIDEO_ENGINE, - .mmio_base = GEN6_BSD_RING_BASE, - .irq_shift = GEN8_VCS1_IRQ_SHIFT, - .init = logical_ring_init, - }, - [VCS2] = { - .name = "bsd2 ring", - .exec_id = I915_EXEC_BSD, - .guc_id = GUC_VIDEO_ENGINE2, - .mmio_base = GEN8_BSD2_RING_BASE, - .irq_shift = GEN8_VCS2_IRQ_SHIFT, - .init = logical_ring_init, - }, - [VECS] = { - .name = "video enhancement ring", - .exec_id = I915_EXEC_VEBOX, - .guc_id = GUC_VIDEOENHANCE_ENGINE, - .mmio_base = VEBOX_RING_BASE, - .irq_shift = GEN8_VECS_IRQ_SHIFT, - .init = logical_ring_init, - }, -}; - -static struct intel_engine_cs * -logical_ring_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) -{ - const struct logical_ring_info *info = &logical_rings[id]; - struct intel_engine_cs *engine = &dev_priv->engine[id]; - enum forcewake_domains fw_domains; - - engine->id = id; - engine->name = info->name; - engine->exec_id = info->exec_id; - engine->guc_id = info->guc_id; - engine->mmio_base = info->mmio_base; - - engine->i915 = dev_priv; - - /* Intentionally left blank. */ - engine->buffer = NULL; - - fw_domains = intel_uncore_forcewake_for_reg(dev_priv, - RING_ELSP(engine), - FW_REG_WRITE); - - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - RING_CONTEXT_STATUS_PTR(engine), - FW_REG_READ | FW_REG_WRITE); - - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - RING_CONTEXT_STATUS_BUF_BASE(engine), - FW_REG_READ); - - engine->fw_domains = fw_domains; - - INIT_LIST_HEAD(&engine->active_list); - INIT_LIST_HEAD(&engine->request_list); - INIT_LIST_HEAD(&engine->buffers); - INIT_LIST_HEAD(&engine->execlist_queue); - spin_lock_init(&engine->execlist_lock); - - tasklet_init(&engine->irq_tasklet, - intel_lrc_irq_handler, (unsigned long)engine); - - logical_ring_init_platform_invariants(engine); - logical_ring_default_vfuncs(engine); - logical_ring_default_irqs(engine, info->irq_shift); - - intel_engine_init_hangcheck(engine); - i915_gem_batch_pool_init(&dev_priv->drm, &engine->batch_pool); - - return engine; -} - -/** - * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers - * @dev: DRM device. - * - * This function inits the engines for an Execlists submission style (the - * equivalent in the legacy ringbuffer submission world would be - * i915_gem_init_engines). It does it only for those engines that are present in - * the hardware. - * - * Return: non-zero if the initialization failed. - */ -int intel_logical_rings_init(struct drm_device *dev) +int logical_xcs_ring_init(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - unsigned int mask = 0; - unsigned int i; - int ret; - - WARN_ON(INTEL_INFO(dev_priv)->ring_mask & - GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES)); - - for (i = 0; i < ARRAY_SIZE(logical_rings); i++) { - if (!HAS_ENGINE(dev_priv, i)) - continue; - - if (!logical_rings[i].init) - continue; + logical_ring_setup(engine); - ret = logical_rings[i].init(logical_ring_setup(dev_priv, i)); - if (ret) - goto cleanup; - - mask |= ENGINE_MASK(i); - } - - /* - * Catch failures to update logical_rings table when the new engines - * are added to the driver by a warning and disabling the forgotten - * engines. - */ - if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) { - struct intel_device_info *info = - (struct intel_device_info *)&dev_priv->info; - info->ring_mask = mask; - } - - return 0; - -cleanup: - for (i = 0; i < I915_NUM_ENGINES; i++) - intel_logical_ring_cleanup(&dev_priv->engine[i]); - - return ret; + return logical_ring_init(engine); } static u32 @@ -2259,24 +1870,24 @@ make_rpcs(struct drm_i915_private *dev_priv) * must make an explicit request through RPCS for full * enablement. */ - if (INTEL_INFO(dev_priv)->has_slice_pg) { + if (INTEL_INFO(dev_priv)->sseu.has_slice_pg) { rpcs |= GEN8_RPCS_S_CNT_ENABLE; - rpcs |= INTEL_INFO(dev_priv)->slice_total << + rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask) << GEN8_RPCS_S_CNT_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } - if (INTEL_INFO(dev_priv)->has_subslice_pg) { + if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { rpcs |= GEN8_RPCS_SS_CNT_ENABLE; - rpcs |= INTEL_INFO(dev_priv)->subslice_per_slice << + rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) << GEN8_RPCS_SS_CNT_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } - if (INTEL_INFO(dev_priv)->has_eu_pg) { - rpcs |= INTEL_INFO(dev_priv)->eu_per_subslice << + if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) { + rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; - rpcs |= INTEL_INFO(dev_priv)->eu_per_subslice << + rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } @@ -2305,38 +1916,13 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) return indirect_ctx_offset; } -static int -populate_lr_context(struct i915_gem_context *ctx, - struct drm_i915_gem_object *ctx_obj, - struct intel_engine_cs *engine, - struct intel_ringbuffer *ringbuf) +static void execlists_init_reg_state(u32 *reg_state, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_ring *ring) { - struct drm_i915_private *dev_priv = ctx->i915; - struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; - void *vaddr; - u32 *reg_state; - int ret; - - if (!ppgtt) - ppgtt = dev_priv->mm.aliasing_ppgtt; - - ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); - if (ret) { - DRM_DEBUG_DRIVER("Could not set to CPU domain\n"); - return ret; - } - - vaddr = i915_gem_object_pin_map(ctx_obj); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); - return ret; - } - ctx_obj->dirty = true; - - /* The second page of the context object contains some fields which must - * be set up prior to the first execution. */ - reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; + struct drm_i915_private *dev_priv = engine->i915; + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt; /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM * commands followed by (reg, value) pairs. The values we are setting here are @@ -2350,19 +1936,16 @@ populate_lr_context(struct i915_gem_context *ctx, _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | (HAS_RESOURCE_STREAMER(dev_priv) ? - CTX_CTRL_RS_CTX_ENABLE : 0))); + CTX_CTRL_RS_CTX_ENABLE : 0))); ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base), 0); - /* Ring buffer start address is not known until the buffer is pinned. - * It is written to the context image in execlists_update_context() - */ ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START, RING_START(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, RING_CTL(engine->mmio_base), - ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); + ((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, RING_BBADDR_UDW(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, @@ -2383,9 +1966,9 @@ populate_lr_context(struct i915_gem_context *ctx, RING_INDIRECT_CTX(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET, RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0); - if (engine->wa_ctx.obj) { + if (engine->wa_ctx.vma) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj); + u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); reg_state[CTX_RCS_INDIRECT_CTX+1] = (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) | @@ -2440,6 +2023,36 @@ populate_lr_context(struct i915_gem_context *ctx, ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, make_rpcs(dev_priv)); } +} + +static int +populate_lr_context(struct i915_gem_context *ctx, + struct drm_i915_gem_object *ctx_obj, + struct intel_engine_cs *engine, + struct intel_ring *ring) +{ + void *vaddr; + int ret; + + ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); + if (ret) { + DRM_DEBUG_DRIVER("Could not set to CPU domain\n"); + return ret; + } + + vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); + return ret; + } + ctx_obj->dirty = true; + + /* The second page of the context object contains some fields which must + * be set up prior to the first execution. */ + + execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, + ctx, engine, ring); i915_gem_object_unpin_map(ctx_obj); @@ -2484,26 +2097,14 @@ uint32_t intel_lr_context_size(struct intel_engine_cs *engine) return ret; } -/** - * execlists_context_deferred_alloc() - create the LRC specific bits of a context - * @ctx: LR context to create. - * @engine: engine to be used with the context. - * - * This function can be called more than once, with different engines, if we plan - * to use the context with them. The context backing objects and the ringbuffers - * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why - * the creation is a deferred call: it's better to make sure first that we need to use - * a given ring with the context. - * - * Return: non-zero on error. - */ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { struct drm_i915_gem_object *ctx_obj; struct intel_context *ce = &ctx->engine[engine->id]; + struct i915_vma *vma; uint32_t context_size; - struct intel_ringbuffer *ringbuf; + struct intel_ring *ring; int ret; WARN_ON(ce->state); @@ -2519,60 +2120,63 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, return PTR_ERR(ctx_obj); } - ringbuf = intel_engine_create_ringbuffer(engine, ctx->ring_size); - if (IS_ERR(ringbuf)) { - ret = PTR_ERR(ringbuf); + vma = i915_vma_create(ctx_obj, &ctx->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto error_deref_obj; + } + + ring = intel_engine_create_ring(engine, ctx->ring_size); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); goto error_deref_obj; } - ret = populate_lr_context(ctx, ctx_obj, engine, ringbuf); + ret = populate_lr_context(ctx, ctx_obj, engine, ring); if (ret) { DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); - goto error_ringbuf; + goto error_ring_free; } - ce->ringbuf = ringbuf; - ce->state = ctx_obj; + ce->ring = ring; + ce->state = vma; ce->initialised = engine->init_context == NULL; return 0; -error_ringbuf: - intel_ringbuffer_free(ringbuf); +error_ring_free: + intel_ring_free(ring); error_deref_obj: - drm_gem_object_unreference(&ctx_obj->base); - ce->ringbuf = NULL; - ce->state = NULL; + i915_gem_object_put(ctx_obj); return ret; } -void intel_lr_context_reset(struct drm_i915_private *dev_priv, - struct i915_gem_context *ctx) +void intel_lr_context_resume(struct drm_i915_private *dev_priv) { + struct i915_gem_context *ctx = dev_priv->kernel_context; struct intel_engine_cs *engine; for_each_engine(engine, dev_priv) { struct intel_context *ce = &ctx->engine[engine->id]; - struct drm_i915_gem_object *ctx_obj = ce->state; void *vaddr; uint32_t *reg_state; - if (!ctx_obj) + if (!ce->state) continue; - vaddr = i915_gem_object_pin_map(ctx_obj); + vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); if (WARN_ON(IS_ERR(vaddr))) continue; reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - ctx_obj->dirty = true; reg_state[CTX_RING_HEAD+1] = 0; reg_state[CTX_RING_TAIL+1] = 0; - i915_gem_object_unpin_map(ctx_obj); + ce->state->obj->dirty = true; + i915_gem_object_unpin_map(ce->state->obj); - ce->ringbuf->head = 0; - ce->ringbuf->tail = 0; + ce->ring->head = 0; + ce->ring->tail = 0; } } |