From 4a118ecbe99c93cf9f9582e83a88d03f18d6cb84 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Oct 2017 22:32:36 +0100 Subject: drm/i915: Filter out spurious execlists context-switch interrupts Back in commit a4b2b01523a8 ("drm/i915: Don't mark an execlists context-switch when idle") we noticed the presence of late context-switch interrupts. We were able to filter those out by looking at whether the ELSP remained active, but in commit beecec901790 ("drm/i915/execlists: Preemption!") that became problematic as we now anticipate receiving a context-switch event for preemption while ELSP may be empty. To restore the spurious interrupt suppression, add a counter for the expected number of pending context-switches and skip if we do not need to handle this interrupt to make forward progress. v2: Don't forget to switch on for preempt. v3: Reduce the counter to a on/off boolean tracker. Declare the HW as active when we first submit, and idle after the final completion event (with which we confirm the HW says it is idle), and track each source of activity separately. With a finite number of sources, it should aide us in debugging which gets stuck. Fixes: beecec901790 ("drm/i915/execlists: Preemption!") Signed-off-by: Chris Wilson Cc: Michal Winiarski Cc: Tvrtko Ursulin Cc: Arkadiusz Hiler Cc: Mika Kuoppala Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171023213237.26536-3-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_ringbuffer.h | 34 +++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 17186f067408..6a42ed618a28 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -241,9 +241,17 @@ struct intel_engine_execlists { } port[EXECLIST_MAX_PORTS]; /** - * @preempt: are we currently handling a preempting context switch? + * @active: is the HW active? We consider the HW as active after + * submitting any context for execution and until we have seen the + * last context completion event. After that, we do not expect any + * more events until we submit, and so can park the HW. + * + * As we have a small number of different sources from which we feed + * the HW, we track the state of each inside a single bitfield. */ - bool preempt; + unsigned int active; +#define EXECLISTS_ACTIVE_USER 0 +#define EXECLISTS_ACTIVE_PREEMPT 1 /** * @port_mask: number of execlist ports - 1 @@ -525,6 +533,27 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; +static inline void +execlists_set_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __set_bit(bit, (unsigned long *)&execlists->active); +} + +static inline void +execlists_clear_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __clear_bit(bit, (unsigned long *)&execlists->active); +} + +static inline bool +execlists_is_active(const struct intel_engine_execlists *execlists, + unsigned int bit) +{ + return test_bit(bit, (unsigned long *)&execlists->active); +} + static inline unsigned int execlists_num_ports(const struct intel_engine_execlists * const execlists) { @@ -538,6 +567,7 @@ execlists_port_complete(struct intel_engine_execlists * const execlists, const unsigned int m = execlists->port_mask; GEM_BUG_ON(port_index(port, execlists) != 0); + GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); memmove(port, port + 1, m * sizeof(struct execlist_port)); memset(port + m, 0, sizeof(struct execlist_port)); -- cgit v1.2.3 From 20ccd4d3f689ac14dce8632d76769be0ac952060 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Oct 2017 23:08:55 +0100 Subject: drm/i915: Use same test for eviction and submitting kernel context During evict, we wish to idle the GPU if we see that the GGTT is full. However, our test for idle in i915_gem_evict_something() and in i915_gem_switch_to_kernel_context() do not match leading to disappointment - we never believe that we are idle and keep trying to flush the GGTT ad infinitum. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103438 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171024220855.30155-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_context.c | 7 +++---- drivers/gpu/drm/i915/i915_gem_evict.c | 3 ++- drivers/gpu/drm/i915/intel_engine_cs.c | 6 ++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 4 files changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 5bf96a258509..4f26f80b1b3e 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -897,7 +897,7 @@ int i915_switch_context(struct drm_i915_gem_request *req) return do_rcs_switch(req); } -static bool engine_has_kernel_context(struct intel_engine_cs *engine) +static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) { struct i915_gem_timeline *timeline; @@ -913,8 +913,7 @@ static bool engine_has_kernel_context(struct intel_engine_cs *engine) return false; } - return (!engine->last_retired_context || - i915_gem_context_is_kernel(engine->last_retired_context)); + return intel_engine_has_kernel_context(engine); } int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) @@ -931,7 +930,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) struct drm_i915_gem_request *req; int ret; - if (engine_has_kernel_context(engine)) + if (engine_has_idle_kernel_context(engine)) continue; req = i915_gem_request_alloc(engine, dev_priv->kernel_context); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index a6b769994d8d..60ca4f05ae94 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -46,7 +46,7 @@ static bool ggtt_is_idle(struct drm_i915_private *i915) return false; for_each_engine(engine, i915, id) { - if (engine->last_retired_context != i915->kernel_context) + if (!intel_engine_has_kernel_context(engine)) return false; } @@ -73,6 +73,7 @@ static int ggtt_flush(struct drm_i915_private *i915) if (err) return err; + GEM_BUG_ON(!ggtt_is_idle(i915)); return 0; } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ab5bf4e2e28e..f6cdc50d4237 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1585,6 +1585,12 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) return true; } +bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) +{ + return (!engine->last_retired_context || + i915_gem_context_is_kernel(engine->last_retired_context)); +} + void intel_engines_reset_default_submission(struct drm_i915_private *i915) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 6a42ed618a28..a2589aa89163 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -866,6 +866,8 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); +bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); + void intel_engines_mark_idle(struct drm_i915_private *i915); void intel_engines_reset_default_submission(struct drm_i915_private *i915); -- cgit v1.2.3 From aba5e278586b16a4fbd377e7e8403aa585d0532a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Oct 2017 15:39:41 +0100 Subject: drm/i915: Add a hook for making the engines idle (parking) and unparking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the next patch, we will want to install a callback when the engines (GT as a whole) become idle and similarly when they first become busy. To enable that callback, first rename intel_engines_mark_idle() to intel_engines_park() and provide the companion intel_engines_unpark(). Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Michał Winiarski Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171025143943.7661-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_request.c | 2 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 33 +++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_lrc.c | 3 +++ drivers/gpu/drm/i915/intel_ringbuffer.c | 5 ++++- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 ++++++- 6 files changed, 47 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bafe1cdd57d8..8364304e9d2b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3347,7 +3347,7 @@ i915_gem_idle_work_handler(struct work_struct *work) if (!intel_engines_are_idle(dev_priv)) DRM_ERROR("Timeout waiting for engines to idle\n"); - intel_engines_mark_idle(dev_priv); + intel_engines_park(dev_priv); i915_gem_timelines_mark_idle(dev_priv); GEM_BUG_ON(!dev_priv->gt.awake); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index d140fcf5c6a3..e0d6221022a8 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -259,6 +259,8 @@ static void mark_busy(struct drm_i915_private *i915) if (INTEL_GEN(i915) >= 6) gen6_rps_busy(i915); + intel_engines_unpark(i915); + queue_delayed_work(i915->wq, &i915->gt.retire_work, round_jiffies_up_relative(HZ)); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f6cdc50d4237..fedb839dff61 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1600,19 +1600,48 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915) engine->set_default_submission(engine); } -void intel_engines_mark_idle(struct drm_i915_private *i915) +/** + * intel_engines_park: called when the GT is transitioning from busy->idle + * @i915: the i915 device + * + * The GT is now idle and about to go to sleep (maybe never to wake again?). + * Time for us to tidy and put away our toys (release resources back to the + * system). + */ +void intel_engines_park(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; for_each_engine(engine, i915, id) { + if (engine->park) + engine->park(engine); + intel_engine_disarm_breadcrumbs(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); tasklet_kill(&engine->execlists.irq_tasklet); + + i915_gem_batch_pool_fini(&engine->batch_pool); engine->execlists.no_priolist = false; } } +/** + * intel_engines_unpark: called when the GT is transitioning from idle->busy + * @i915: the i915 device + * + * The GT was idle and now about to fire up with some new user requests. + */ +void intel_engines_unpark(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + if (engine->unpark) + engine->unpark(engine); + } +} + bool intel_engine_can_store_dword(struct intel_engine_cs *engine) { switch (INTEL_GEN(engine->i915)) { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d36e25607435..eeb3622803a8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1891,6 +1891,9 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) engine->cancel_requests = execlists_cancel_requests; engine->schedule = execlists_schedule; engine->execlists.irq_tasklet.func = intel_lrc_irq_handler; + + engine->park = NULL; + engine->unpark = NULL; } static void diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8da1bde442dd..05e01446b00b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2028,12 +2028,15 @@ static void i9xx_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = i9xx_submit_request; engine->cancel_requests = cancel_requests; + + engine->park = NULL; + engine->unpark = NULL; } static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) { + i9xx_set_default_submission(engine); engine->submit_request = gen6_bsd_submit_request; - engine->cancel_requests = cancel_requests; } static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a2589aa89163..a7c95f0c3101 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -365,6 +365,9 @@ struct intel_engine_cs { void (*reset_hw)(struct intel_engine_cs *engine, struct drm_i915_gem_request *req); + void (*park)(struct intel_engine_cs *engine); + void (*unpark)(struct intel_engine_cs *engine); + void (*set_default_submission)(struct intel_engine_cs *engine); struct intel_ring *(*context_pin)(struct intel_engine_cs *engine, @@ -868,7 +871,9 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv); bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); -void intel_engines_mark_idle(struct drm_i915_private *i915); +void intel_engines_park(struct drm_i915_private *i915); +void intel_engines_unpark(struct drm_i915_private *i915); + void intel_engines_reset_default_submission(struct drm_i915_private *i915); bool intel_engine_can_store_dword(struct intel_engine_cs *engine); -- cgit v1.2.3 From bcbd5c33a342bc6f4a25fed528dc3ed2a0b1c140 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Oct 2017 15:39:42 +0100 Subject: drm/i915/guc: Always enable the breadcrumbs irq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The execlists emulation on top of the GuC (used for scheduling and preemption) depends on the MI_USER_INTERRUPT for its notifications and tasklet action. As we always employ the irq, there is no advantage in ever disabling it while we are using the GuC, so allow us to arm the breadcrumb irq when enabling GuC submission and disarm upon disabling. The impact should be lessened by the delayed irq disabling we do (we only disable after receiving an interrupt for which no one was wanting), but allowing guc to explicitly manage the irq in relation to itself is simpler and prevents an issue with losing an interrupt for preemption as it is not coupled to an active request. Internally, we add a reference counter (breadcrumbs.irq_enabled) as a simple mechanism to allow GuC to keep the breadcrumb irq enabled. To improve upon always enabling the irq while guc is selected, we need to hook into the parking facility of intel_engines so that we only enable the breadcrumbs while the GT is active (one step better would be to individually park/unpark each engine). In effect, this means that we keep the breadcrumb irq always enabled for the entire duration the guc is busy, whereas before we would try to switch it off whenever we idled for more than interrupt with no associated waiters. The difference *should* be negligible in practice! v2: Stop abusing fence signaling (and its auxiliary data structures) to enable the breadcrumbs irqs. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Michał Winiarski , Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Reviewed-by: Michał Winiarski , Link: https://patchwork.freedesktop.org/patch/msgid/20171025143943.7661-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_guc_submission.c | 39 ++++++++++---------------- drivers/gpu/drm/i915/i915_irq.c | 6 +++- drivers/gpu/drm/i915/intel_breadcrumbs.c | 45 +++++++++++++++++++++++------- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +++- 4 files changed, 59 insertions(+), 36 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index f84c267728fd..141ed9df3d5c 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -523,29 +523,6 @@ static void i915_guc_submit(struct intel_engine_cs *engine) } } -static void nested_enable_signaling(struct drm_i915_gem_request *rq) -{ - /* If we use dma_fence_enable_sw_signaling() directly, lockdep - * detects an ordering issue between the fence lockclass and the - * global_timeline. This circular dependency can only occur via 2 - * different fences (but same fence lockclass), so we use the nesting - * annotation here to prevent the warn, equivalent to the nesting - * inside i915_gem_request_submit() for when we also enable the - * signaler. - */ - - if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &rq->fence.flags)) - return; - - GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); - trace_dma_fence_enable_signal(&rq->fence); - - spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING); - intel_engine_enable_signaling(rq, true); - spin_unlock(&rq->lock); -} - static void port_assign(struct execlist_port *port, struct drm_i915_gem_request *rq) { @@ -555,7 +532,6 @@ static void port_assign(struct execlist_port *port, i915_gem_request_put(port_request(port)); port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); - nested_enable_signaling(rq); } static void i915_guc_dequeue(struct intel_engine_cs *engine) @@ -1097,6 +1073,16 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv) rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; } +static void i915_guc_submission_park(struct intel_engine_cs *engine) +{ + intel_engine_unpin_breadcrumbs_irq(engine); +} + +static void i915_guc_submission_unpark(struct intel_engine_cs *engine) +{ + intel_engine_pin_breadcrumbs_irq(engine); +} + int i915_guc_submission_enable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; @@ -1154,6 +1140,9 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) execlists->irq_tasklet.func = i915_guc_irq_handler; clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); tasklet_schedule(&execlists->irq_tasklet); + + engine->park = i915_guc_submission_park; + engine->unpark = i915_guc_submission_unpark; } return 0; @@ -1168,6 +1157,8 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; + GEM_BUG_ON(dev_priv->gt.awake); /* GT should be parked first */ + guc_interrupts_release(dev_priv); /* Revert back to manual ELSP submission */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f8205841868b..ff00e462697a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1068,6 +1068,9 @@ static void notify_ring(struct intel_engine_cs *engine) struct drm_i915_gem_request *rq = NULL; struct intel_wait *wait; + if (!engine->breadcrumbs.irq_armed) + return; + atomic_inc(&engine->irq_count); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); @@ -1101,7 +1104,8 @@ static void notify_ring(struct intel_engine_cs *engine) if (wakeup) wake_up_process(wait->tsk); } else { - __intel_engine_disarm_breadcrumbs(engine); + if (engine->breadcrumbs.irq_armed) + __intel_engine_disarm_breadcrumbs(engine); } spin_unlock(&engine->breadcrumbs.irq_lock); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 48e1ba01ccf8..1bd0a461d665 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -123,7 +123,7 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t) */ spin_lock_irq(&b->irq_lock); - if (!__intel_breadcrumbs_wakeup(b)) + if (b->irq_armed && !__intel_breadcrumbs_wakeup(b)) __intel_engine_disarm_breadcrumbs(engine); spin_unlock_irq(&b->irq_lock); if (!b->irq_armed) @@ -171,15 +171,37 @@ void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) lockdep_assert_held(&b->irq_lock); GEM_BUG_ON(b->irq_wait); + GEM_BUG_ON(!b->irq_armed); - if (b->irq_enabled) { + GEM_BUG_ON(!b->irq_enabled); + if (!--b->irq_enabled) irq_disable(engine); - b->irq_enabled = false; - } b->irq_armed = false; } +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + spin_lock_irq(&b->irq_lock); + if (!b->irq_enabled++) + irq_enable(engine); + GEM_BUG_ON(!b->irq_enabled); /* no overflow! */ + spin_unlock_irq(&b->irq_lock); +} + +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + spin_lock_irq(&b->irq_lock); + GEM_BUG_ON(!b->irq_enabled); /* no underflow! */ + if (!--b->irq_enabled) + irq_disable(engine); + spin_unlock_irq(&b->irq_lock); +} + void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; @@ -241,6 +263,9 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); struct drm_i915_private *i915 = engine->i915; + bool enabled; + + GEM_BUG_ON(!intel_irqs_enabled(i915)); lockdep_assert_held(&b->irq_lock); if (b->irq_armed) @@ -252,7 +277,6 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) * the irq. */ b->irq_armed = true; - GEM_BUG_ON(b->irq_enabled); if (I915_SELFTEST_ONLY(b->mock)) { /* For our mock objects we want to avoid interaction @@ -273,14 +297,15 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) */ /* No interrupts? Kick the waiter every jiffie! */ - if (intel_irqs_enabled(i915)) { - if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) - irq_enable(engine); - b->irq_enabled = true; + enabled = false; + if (!b->irq_enabled++ && + !test_bit(engine->id, &i915->gpu_error.test_irq_rings)) { + irq_enable(engine); + enabled = true; } enable_fake_irq(b); - return true; + return enabled; } static inline struct intel_wait *to_wait(struct rb_node *node) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a7c95f0c3101..e5a0d489a304 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -339,9 +339,9 @@ struct intel_engine_cs { struct timer_list hangcheck; /* detect missed interrupts */ unsigned int hangcheck_interrupts; + unsigned int irq_enabled; bool irq_armed : 1; - bool irq_enabled : 1; I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; @@ -848,6 +848,9 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); #define ENGINE_WAKEUP_WAITER BIT(0) #define ENGINE_WAKEUP_ASLEEP BIT(1) +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); + void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); -- cgit v1.2.3 From df77cd83d5566ef6a04528bb08732ccbf829580e Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Wed, 25 Oct 2017 22:00:15 +0200 Subject: drm/i915: Extract "emit write" part of emit breadcrumb functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's separate the "emit" part from touching any internal structures, this way we can have a generic "emit coherent GGTT write" function. We would like to reuse this functionality for emitting HWSP write, to confirm that preempt-to-idle has finished. v2: Reorder args to match emit_pipe_control, s/render/rcs (Chris) Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171025200020.16636-8-michal.winiarski@intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 28 +++++++----------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 38 +++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e821c1eba7a4..599c709fc5a7 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1794,10 +1794,8 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; - *cs++ = 0; - *cs++ = request->global_seqno; + cs = gen8_emit_ggtt_write(cs, request->global_seqno, + intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); @@ -1807,24 +1805,14 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) } static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; -static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, +static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request, u32 *cs) { /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); - /* w/a for post sync ops following a GPGPU operation we - * need a prior CS_STALL, which is emitted by the flush - * following the batch. - */ - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE; - *cs++ = intel_hws_seqno_address(request->engine); - *cs++ = 0; - *cs++ = request->global_seqno; - /* We're thrashing one dword of HWS. */ - *cs++ = 0; + cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno, + intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); @@ -1832,7 +1820,7 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, gen8_emit_wa_tail(request, cs); } -static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; +static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS; static int gen8_init_rcs_context(struct drm_i915_gem_request *req) { @@ -1991,8 +1979,8 @@ int logical_render_ring_init(struct intel_engine_cs *engine) engine->init_hw = gen8_init_render_ring; engine->init_context = gen8_init_rcs_context; engine->emit_flush = gen8_emit_flush_render; - engine->emit_breadcrumb = gen8_emit_breadcrumb_render; - engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_render_sz; + engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs; + engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz; ret = intel_engine_create_scratch(engine, PAGE_SIZE); if (ret) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index e5a0d489a304..c15161e56964 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -869,6 +869,44 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) return batch + 6; } +static inline u32 * +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset) +{ + /* We're using qword write, offset should be aligned to 8 bytes. */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + /* w/a for post sync ops following a GPGPU operation we + * need a prior CS_STALL, which is emitted by the flush + * following the batch. + */ + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = gtt_offset; + *cs++ = 0; + *cs++ = value; + /* We're thrashing one dword of HWS. */ + *cs++ = 0; + + return cs; +} + +static inline u32 * +gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) +{ + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ + GEM_BUG_ON(gtt_offset & (1 << 5)); + /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = value; + + return cs; +} + bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); -- cgit v1.2.3 From 3b8a8a30064d0b89d09e979adb4c975d892c21ef Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Wed, 25 Oct 2017 22:00:16 +0200 Subject: drm/i915: Add information needed to track engine preempt state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We shouldn't inspect ELSP context status (or any other bits depending on specific submission backend) when using GuC submission. Let's use another piece of HWSP for preempt context, to write its bit of information, meaning that preemption has finished, and hardware is now idle. Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Jeff McGee Cc: Michal Wajdeczko Cc: Oscar Mateo Reviewed-by: Chris Wilson Reviewed-by: Jeff McGee Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171025200020.16636-9-michal.winiarski@intel.com --- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c15161e56964..4a5a08985328 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -626,6 +626,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) */ #define I915_GEM_HWS_INDEX 0x30 #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +#define I915_GEM_HWS_PREEMPT_INDEX 0x32 +#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) #define I915_GEM_HWS_SCRATCH_INDEX 0x40 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) @@ -778,6 +780,11 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; } +static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) +{ + return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; +} + /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -- cgit v1.2.3 From c41937fd994a449b4e862826d7eb9f366f6d33e9 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Thu, 26 Oct 2017 15:35:58 +0200 Subject: drm/i915/guc: Preemption! With GuC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pretty similar to what we have on execlists. We're reusing most of the GEM code, however, due to GuC quirks we need a couple of extra bits. Preemption is implemented as GuC action, and actions can be pretty slow. Because of that, we're using a mutex to serialize them. Since we're requesting preemption from the tasklet, the task of creating a workitem and wrapping it in GuC action is delegated to a worker. To distinguish that preemption has finished, we're using additional piece of HWSP, and since we're not getting context switch interrupts, we're also adding a user interrupt. The fact that our special preempt context has completed unfortunately doesn't mean that we're ready to submit new work. We also need to wait for GuC to finish its own processing. v2: Don't compile out the wait for GuC, handle workqueue flush on reset, no need for ordered workqueue, put on a reviewer hat when looking at my own patches (Chris) Move struct work around in intel_guc, move user interruput outside of conditional (Michał) Keep ring around rather than chase though intel_context v3: Extract WA for flushing ggtt writes to a helper (Chris) Keep work_struct in intel_guc rather than engine (Michał) Use ordered workqueue for inject_preempt worker to avoid GuC quirks. v4: Drop now unused INTEL_GUC_PREEMPT_OPTION_IMMEDIATE (Daniele) Drop stray newlines, use container_of for intel_guc in worker, check for presence of workqueue when flushing it, rather than enable_guc_submission modparam, reorder preempt postprocessing (Chris) v5: Make wq NULL after destroying it v6: Swap struct guc_preempt_work members (Michał) Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Jeff McGee Cc: Joonas Lahtinen Cc: Oscar Mateo Cc: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171026133558.19580-1-michal.winiarski@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_gem.c | 10 ++ drivers/gpu/drm/i915/i915_guc_submission.c | 208 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_guc.h | 8 ++ drivers/gpu/drm/i915/intel_lrc.c | 4 +- drivers/gpu/drm/i915/intel_lrc.h | 1 - drivers/gpu/drm/i915/intel_ringbuffer.h | 6 + 7 files changed, 222 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 7b871802ae36..af745749509c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -373,8 +373,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value |= I915_SCHEDULER_CAP_PRIORITY; if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) && - i915_modparams.enable_execlists && - !i915_modparams.enable_guc_submission) + i915_modparams.enable_execlists) value |= I915_SCHEDULER_CAP_PREEMPTION; } break; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d803ef5f4a7f..c2506fb3a483 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2921,6 +2921,16 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) tasklet_kill(&engine->execlists.irq_tasklet); tasklet_disable(&engine->execlists.irq_tasklet); + /* + * We're using worker to queue preemption requests from the tasklet in + * GuC submission mode. + * Even though tasklet was disabled, we may still have a worker queued. + * Let's make sure that all workers scheduled before disabling the + * tasklet are completed before continuing with the reset. + */ + if (engine->i915->guc.preempt_wq) + flush_workqueue(engine->i915->guc.preempt_wq); + if (engine->irq_seqno_barrier) engine->irq_seqno_barrier(engine); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index c99ddd8457f3..3049a0781b88 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -565,6 +565,108 @@ static void guc_add_request(struct intel_guc *guc, spin_unlock(&client->wq_lock); } +/* + * When we're doing submissions using regular execlists backend, writing to + * ELSP from CPU side is enough to make sure that writes to ringbuffer pages + * pinned in mappable aperture portion of GGTT are visible to command streamer. + * Writes done by GuC on our behalf are not guaranteeing such ordering, + * therefore, to ensure the flush, we're issuing a POSTING READ. + */ +static void flush_ggtt_writes(struct i915_vma *vma) +{ + struct drm_i915_private *dev_priv = to_i915(vma->obj->base.dev); + + if (i915_vma_is_map_and_fenceable(vma)) + POSTING_READ_FW(GUC_STATUS); +} + +#define GUC_PREEMPT_FINISHED 0x1 +#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 +static void inject_preempt_context(struct work_struct *work) +{ + struct guc_preempt_work *preempt_work = + container_of(work, typeof(*preempt_work), work); + struct intel_engine_cs *engine = preempt_work->engine; + struct intel_guc *guc = container_of(preempt_work, typeof(*guc), + preempt_work[engine->id]); + struct i915_guc_client *client = guc->preempt_client; + struct intel_ring *ring = client->owner->engine[engine->id].ring; + u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner, + engine)); + u32 *cs = ring->vaddr + ring->tail; + u32 data[7]; + + if (engine->id == RCS) { + cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, + intel_hws_preempt_done_address(engine)); + } else { + cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED, + intel_hws_preempt_done_address(engine)); + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + GEM_BUG_ON(!IS_ALIGNED(ring->size, + GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32))); + GEM_BUG_ON((void *)cs - (ring->vaddr + ring->tail) != + GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32)); + + ring->tail += GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32); + ring->tail &= (ring->size - 1); + + flush_ggtt_writes(ring->vma); + + spin_lock_irq(&client->wq_lock); + guc_wq_item_append(client, engine->guc_id, ctx_desc, + ring->tail / sizeof(u64), 0); + spin_unlock_irq(&client->wq_lock); + + data[0] = INTEL_GUC_ACTION_REQUEST_PREEMPTION; + data[1] = client->stage_id; + data[2] = INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q | + INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q; + data[3] = engine->guc_id; + data[4] = guc->execbuf_client->priority; + data[5] = guc->execbuf_client->stage_id; + data[6] = guc_ggtt_offset(guc->shared_data); + + if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) { + execlists_clear_active(&engine->execlists, + EXECLISTS_ACTIVE_PREEMPT); + tasklet_schedule(&engine->execlists.irq_tasklet); + } +} + +/* + * We're using user interrupt and HWSP value to mark that preemption has + * finished and GPU is idle. Normally, we could unwind and continue similar to + * execlists submission path. Unfortunately, with GuC we also need to wait for + * it to finish its own postprocessing, before attempting to submit. Otherwise + * GuC may silently ignore our submissions, and thus we risk losing request at + * best, executing out-of-order and causing kernel panic at worst. + */ +#define GUC_PREEMPT_POSTPROCESS_DELAY_MS 10 +static void wait_for_guc_preempt_report(struct intel_engine_cs *engine) +{ + struct intel_guc *guc = &engine->i915->guc; + struct guc_shared_ctx_data *data = guc->shared_data_vaddr; + struct guc_ctx_report *report = + &data->preempt_ctx_report[engine->guc_id]; + + WARN_ON(wait_for_atomic(report->report_return_status == + INTEL_GUC_REPORT_STATUS_COMPLETE, + GUC_PREEMPT_POSTPROCESS_DELAY_MS)); + /* + * GuC is expecting that we're also going to clear the affected context + * counter, let's also reset the return status to not depend on GuC + * resetting it after recieving another preempt action + */ + report->affected_count = 0; + report->report_return_status = INTEL_GUC_REPORT_STATUS_UNKNOWN; +} + /** * i915_guc_submit() - Submit commands through GuC * @engine: engine associated with the commands @@ -574,8 +676,7 @@ static void guc_add_request(struct intel_guc *guc, */ static void i915_guc_submit(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - struct intel_guc *guc = &dev_priv->guc; + struct intel_guc *guc = &engine->i915->guc; struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; unsigned int n; @@ -588,8 +689,7 @@ static void i915_guc_submit(struct intel_engine_cs *engine) if (rq && count == 0) { port_set(&port[n], port_pack(rq, ++count)); - if (i915_vma_is_map_and_fenceable(rq->ring->vma)) - POSTING_READ_FW(GUC_STATUS); + flush_ggtt_writes(rq->ring->vma); guc_add_request(guc, rq); } @@ -617,13 +717,32 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine) bool submit = false; struct rb_node *rb; - if (port_isset(port)) - port++; - spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); - while (rb) { + + if (!rb) + goto unlock; + + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915) && port_isset(port)) { + struct guc_preempt_work *preempt_work = + &engine->i915->guc.preempt_work[engine->id]; + + if (rb_entry(rb, struct i915_priolist, node)->priority > + max(port_request(port)->priotree.priority, 0)) { + execlists_set_active(execlists, + EXECLISTS_ACTIVE_PREEMPT); + queue_work(engine->i915->guc.preempt_wq, + &preempt_work->work); + goto unlock; + } else if (port_isset(last_port)) { + goto unlock; + } + + port++; + } + + do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; @@ -653,7 +772,7 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } + } while (rb); done: execlists->first = rb; if (submit) { @@ -661,6 +780,7 @@ done: execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); i915_guc_submit(engine); } +unlock: spin_unlock_irq(&engine->timeline->lock); } @@ -669,8 +789,6 @@ static void i915_guc_irq_handler(unsigned long data) struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; - const struct execlist_port * const last_port = - &execlists->port[execlists->port_mask]; struct drm_i915_gem_request *rq; rq = port_request(&port[0]); @@ -685,7 +803,19 @@ static void i915_guc_irq_handler(unsigned long data) if (!rq) execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); - if (!port_isset(last_port)) + if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) && + intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) == + GUC_PREEMPT_FINISHED) { + execlists_cancel_port_requests(&engine->execlists); + execlists_unwind_incomplete_requests(execlists); + + wait_for_guc_preempt_report(engine); + + execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT); + intel_write_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX, 0); + } + + if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) i915_guc_dequeue(engine); } @@ -1059,6 +1189,51 @@ static void guc_ads_destroy(struct intel_guc *guc) i915_vma_unpin_and_release(&guc->ads_vma); } +static int guc_preempt_work_create(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * Even though both sending GuC action, and adding a new workitem to + * GuC workqueue are serialized (each with its own locking), since + * we're using mutliple engines, it's possible that we're going to + * issue a preempt request with two (or more - each for different + * engine) workitems in GuC queue. In this situation, GuC may submit + * all of them, which will make us very confused. + * Our preemption contexts may even already be complete - before we + * even had the chance to sent the preempt action to GuC!. Rather + * than introducing yet another lock, we can just use ordered workqueue + * to make sure we're always sending a single preemption request with a + * single workitem. + */ + guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt", + WQ_HIGHPRI); + if (!guc->preempt_wq) + return -ENOMEM; + + for_each_engine(engine, dev_priv, id) { + guc->preempt_work[id].engine = engine; + INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context); + } + + return 0; +} + +static void guc_preempt_work_destroy(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) + cancel_work_sync(&guc->preempt_work[id].work); + + destroy_workqueue(guc->preempt_wq); + guc->preempt_wq = NULL; +} + /* * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. @@ -1083,12 +1258,18 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) if (ret < 0) goto err_shared_data; + ret = guc_preempt_work_create(guc); + if (ret) + goto err_log; + ret = guc_ads_create(guc); if (ret < 0) - goto err_log; + goto err_wq; return 0; +err_wq: + guc_preempt_work_destroy(guc); err_log: intel_guc_log_destroy(guc); err_shared_data: @@ -1103,6 +1284,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) struct intel_guc *guc = &dev_priv->guc; guc_ads_destroy(guc); + guc_preempt_work_destroy(guc); intel_guc_log_destroy(guc); guc_shared_data_destroy(guc); guc_stage_desc_pool_destroy(guc); diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 6ca55c5bed3c..607e02500262 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -34,6 +34,11 @@ #include "i915_guc_reg.h" #include "i915_vma.h" +struct guc_preempt_work { + struct work_struct work; + struct intel_engine_cs *engine; +}; + /* * Top level structure of GuC. It handles firmware loading and manages client * pool and doorbells. intel_guc owns a i915_guc_client to replace the legacy @@ -60,6 +65,9 @@ struct intel_guc { struct i915_guc_client *execbuf_client; struct i915_guc_client *preempt_client; + struct guc_preempt_work preempt_work[I915_NUM_ENGINES]; + struct workqueue_struct *preempt_wq; + DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); /* Cyclic counter mod pagesize */ u32 db_cacheline; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b5d382ef8d85..6840ec8db037 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -385,7 +385,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) } } -static void +void execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) { struct intel_engine_cs *engine = @@ -696,7 +696,7 @@ unlock: } } -static void +void execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) { struct execlist_port *port = execlists->port; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 689fde1a63a9..17182ce29674 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -107,7 +107,6 @@ intel_lr_context_descriptor(struct i915_gem_context *ctx, return ctx->engine[engine->id].lrc_desc; } - /* Execlists */ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4a5a08985328..69ad875fd011 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -557,6 +557,12 @@ execlists_is_active(const struct intel_engine_execlists *execlists, return test_bit(bit, (unsigned long *)&execlists->active); } +void +execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); + +void +execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); + static inline unsigned int execlists_num_ports(const struct intel_engine_execlists * const execlists) { -- cgit v1.2.3 From 1803fcbca2e444f7972430c4dc1c3e98c6ee1bc9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 10 Nov 2017 14:26:27 +0000 Subject: drm/i915: Define an engine class enum for the uABI We want to be able to report back to userspace details about an engine's class, and in return for userspace to be able to request actions regarding certain classes of engines. To isolate the uABI from any variations between hw generations, we define an abstract class for the engines and internally map onto the hw. v2: Remove MAX from the uABI; keep it internal if we need it, but don't let userspace make the mistake of using it themselves. v3: s/OTHER/INVALID/ The use of OTHER is ill-defined, so remove it from the uABI as any future new type of engine can define a class to suit it. But keep a reserved value for an invalid class, so that we can always unambiguously express when something doesn't belong to the classification. Signed-off-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Lionel Landwerlin Reviewed-by: Joonas Lahtinen #v2 Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 10 +++++++++- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++++- include/uapi/drm/i915_drm.h | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 87778f03393b..bded9c40dbd5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -50,6 +50,8 @@ struct engine_class_info { const char *name; int (*init_legacy)(struct intel_engine_cs *engine); int (*init_execlists)(struct intel_engine_cs *engine); + + u8 uabi_class; }; static const struct engine_class_info intel_engine_classes[] = { @@ -57,21 +59,25 @@ static const struct engine_class_info intel_engine_classes[] = { .name = "rcs", .init_execlists = logical_render_ring_init, .init_legacy = intel_init_render_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_RENDER, }, [COPY_ENGINE_CLASS] = { .name = "bcs", .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_blt_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_COPY, }, [VIDEO_DECODE_CLASS] = { .name = "vcs", .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_bsd_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_VIDEO, }, [VIDEO_ENHANCEMENT_CLASS] = { .name = "vecs", .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_vebox_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE, }, }; @@ -213,13 +219,15 @@ intel_engine_setup(struct drm_i915_private *dev_priv, WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u", class_info->name, info->instance) >= sizeof(engine->name)); - engine->uabi_id = info->uabi_id; engine->hw_id = engine->guc_id = info->hw_id; engine->mmio_base = info->mmio_base; engine->irq_shift = info->irq_shift; engine->class = info->class; engine->instance = info->instance; + engine->uabi_id = info->uabi_id; + engine->uabi_class = class_info->uabi_class; + engine->context_size = __intel_engine_context_size(dev_priv, engine->class); if (WARN_ON(engine->context_size > BIT(20))) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 69ad875fd011..f3dbfe7ae6e4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -289,11 +289,14 @@ struct intel_engine_execlists { struct intel_engine_cs { struct drm_i915_private *i915; char name[INTEL_ENGINE_CS_MAX_NAME]; + enum intel_engine_id id; - unsigned int uabi_id; unsigned int hw_id; unsigned int guc_id; + u8 uabi_id; + u8 uabi_class; + u8 class; u8 instance; u32 context_size; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index ac3c6503ca27..1f7dfb22a7c2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -86,6 +86,22 @@ enum i915_mocs_table_index { I915_MOCS_CACHED, }; +/* + * Different engines serve different roles, and there may be more than one + * engine serving each role. enum drm_i915_gem_engine_class provides a + * classification of the role of the engine, which may be used when requesting + * operations to be performed on a certain subset of engines, or for providing + * information about that group. + */ +enum drm_i915_gem_engine_class { + I915_ENGINE_CLASS_RENDER = 0, + I915_ENGINE_CLASS_COPY = 1, + I915_ENGINE_CLASS_VIDEO = 2, + I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, + + I915_ENGINE_CLASS_INVALID = -1 +}; + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use -- cgit v1.2.3 From d2b4b97933f5adacfba42dc3b9200d0e21fbe2c4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 14:26:33 +0000 Subject: drm/i915: Record the default hw state after reset upon load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take a copy of the HW state after a reset upon module loading by executing a context switch from a blank context to the kernel context, thus saving the default hw state over the blank context image. We can then use the default hw state to initialise any future context, ensuring that each starts with the default view of hw state. v2: Unmap our default state from the GTT after stealing it from the context. This should stop us from accidentally overwriting it via the GTT (and frees up some precious GTT space). Testcase: igt/gem_ctx_isolation Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/scheduler.c | 2 - drivers/gpu/drm/i915/i915_debugfs.c | 1 - drivers/gpu/drm/i915/i915_drv.c | 3 + drivers/gpu/drm/i915/i915_gem.c | 115 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_context.c | 55 ++++----------- drivers/gpu/drm/i915/i915_gem_context.h | 4 +- drivers/gpu/drm/i915/intel_engine_cs.c | 17 +++++ drivers/gpu/drm/i915/intel_lrc.c | 38 +++++++---- drivers/gpu/drm/i915/intel_ringbuffer.c | 45 +++++++++---- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + include/uapi/drm/i915_drm.h | 15 +++++ 11 files changed, 224 insertions(+), 73 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f6ded475bb2c..42cc61230ca7 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -723,8 +723,6 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu) if (IS_ERR(vgpu->shadow_ctx)) return PTR_ERR(vgpu->shadow_ctx); - vgpu->shadow_ctx->engine[RCS].initialised = true; - bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES); return 0; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d89321f0468c..533ba096b9a6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1974,7 +1974,6 @@ static int i915_context_status(struct seq_file *m, void *unused) struct intel_context *ce = &ctx->engine[engine->id]; seq_printf(m, "%s: ", engine->name); - seq_putc(m, ce->initialised ? 'I' : 'i'); if (ce->state) describe_obj(m, ce->state->obj); if (ce->ring) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1b440f2b90a5..d97fe9c9439a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -406,6 +406,9 @@ static int i915_getparam(struct drm_device *dev, void *data, */ value = 1; break; + case I915_PARAM_HAS_CONTEXT_ISOLATION: + value = intel_engines_has_context_isolation(dev_priv); + break; case I915_PARAM_SLICE_MASK: value = INTEL_INFO(dev_priv)->sseu.slice_mask; if (!value) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ed335612be25..4bf118304086 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4972,6 +4972,120 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) return true; } +static int __intel_engines_record_defaults(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + /* + * As we reset the gpu during very early sanitisation, the current + * register state on the GPU should reflect its defaults values. + * We load a context onto the hw (with restore-inhibit), then switch + * over to a second context to save that default register state. We + * can then prime every new context with that state so they all start + * from the same default HW values. + */ + + ctx = i915_gem_context_create_kernel(i915, 0); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + for_each_engine(engine, i915, id) { + struct drm_i915_gem_request *rq; + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ctx; + } + + err = i915_switch_context(rq); + if (engine->init_context) + err = engine->init_context(rq); + + __i915_add_request(rq, true); + if (err) + goto err_active; + } + + err = i915_gem_switch_to_kernel_context(i915); + if (err) + goto err_active; + + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + if (err) + goto err_active; + + assert_kernel_context_is_current(i915); + + for_each_engine(engine, i915, id) { + struct i915_vma *state; + + state = ctx->engine[id].state; + if (!state) + continue; + + /* + * As we will hold a reference to the logical state, it will + * not be torn down with the context, and importantly the + * object will hold onto its vma (making it possible for a + * stray GTT write to corrupt our defaults). Unmap the vma + * from the GTT to prevent such accidents and reclaim the + * space. + */ + err = i915_vma_unbind(state); + if (err) + goto err_active; + + err = i915_gem_object_set_to_cpu_domain(state->obj, false); + if (err) + goto err_active; + + engine->default_state = i915_gem_object_get(state->obj); + } + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { + unsigned int found = intel_engines_has_context_isolation(i915); + + /* + * Make sure that classes with multiple engine instances all + * share the same basic configuration. + */ + for_each_engine(engine, i915, id) { + unsigned int bit = BIT(engine->uabi_class); + unsigned int expected = engine->default_state ? bit : 0; + + if ((found & bit) != expected) { + DRM_ERROR("mismatching default context state for class %d on engine %s\n", + engine->uabi_class, engine->name); + } + } + } + +out_ctx: + i915_gem_context_set_closed(ctx); + i915_gem_context_put(ctx); + return err; + +err_active: + /* + * If we have to abandon now, we expect the engines to be idle + * and ready to be torn-down. First try to flush any remaining + * request, ensure we are pointing at the kernel context and + * then remove it. + */ + if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) + goto out_ctx; + + if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED))) + goto out_ctx; + + i915_gem_contexts_lost(i915); + goto out_ctx; +} + int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; @@ -5038,6 +5152,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_init_clock_gating(dev_priv); + ret = __intel_engines_record_defaults(dev_priv); out_unlock: if (ret == -EIO) { /* Allow engine initialisation to fail by marking the GPU as diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index c05c3d7d21a5..2db040695035 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -418,8 +418,8 @@ out: return ctx; } -static struct i915_gem_context * -create_kernel_context(struct drm_i915_private *i915, int prio) +struct i915_gem_context * +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) { struct i915_gem_context *ctx; @@ -473,7 +473,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) ida_init(&dev_priv->contexts.hw_ida); /* lowest priority; idle task */ - ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN); + ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context\n"); err = PTR_ERR(ctx); @@ -487,7 +487,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) dev_priv->kernel_context = ctx; /* highest priority; preempting task */ - ctx = create_kernel_context(dev_priv, INT_MAX); + ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default preempt context\n"); err = PTR_ERR(ctx); @@ -522,28 +522,6 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) engine->context_unpin(engine, engine->last_retired_context); engine->last_retired_context = NULL; } - - /* Force the GPU state to be restored on enabling */ - if (!i915_modparams.enable_execlists) { - struct i915_gem_context *ctx; - - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - if (!i915_gem_context_is_default(ctx)) - continue; - - for_each_engine(engine, dev_priv, id) - ctx->engine[engine->id].initialised = false; - - ctx->remap_slice = ALL_L3_SLICES(dev_priv); - } - - for_each_engine(engine, dev_priv, id) { - struct intel_context *kce = - &dev_priv->kernel_context->engine[engine->id]; - - kce->initialised = true; - } - } } void i915_gem_contexts_fini(struct drm_i915_private *i915) @@ -718,9 +696,6 @@ static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, if (to->remap_slice) return false; - if (!to->engine[RCS].initialised) - return false; - if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) return false; @@ -795,11 +770,14 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) return ret; } - if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) - /* NB: If we inhibit the restore, the context is not allowed to - * die because future work may end up depending on valid address - * space. This means we must enforce that a page table load - * occur when this occurs. */ + if (i915_gem_context_is_kernel(to)) + /* + * The kernel context(s) is treated as pure scratch and is not + * expected to retain any state (as we sacrifice it during + * suspend and on resume it may be corrupted). This is ok, + * as nothing actually executes using the kernel context; it + * is purely used for flushing user contexts. + */ hw_flags = MI_RESTORE_INHIBIT; else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings) hw_flags = MI_FORCE_RESTORE; @@ -843,15 +821,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) to->remap_slice &= ~(1<engine[RCS].initialised) { - if (engine->init_context) { - ret = engine->init_context(req); - if (ret) - return ret; - } - to->engine[RCS].initialised = true; - } - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 44688e22a5c2..4bfb72f8e1cb 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -157,7 +157,6 @@ struct i915_gem_context { u32 *lrc_reg_state; u64 lrc_desc; int pin_count; - bool initialised; } engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ @@ -292,6 +291,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +struct i915_gem_context * +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); + static inline struct i915_gem_context * i915_gem_context_get(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 6d0b38189a7d..868c07a693b5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -687,6 +687,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + if (engine->default_state) + i915_gem_object_put(engine->default_state); + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) engine->context_unpin(engine, engine->i915->preempt_context); engine->context_unpin(engine, engine->i915->kernel_context); @@ -1705,6 +1708,20 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) } } +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int which; + + which = 0; + for_each_engine(engine, i915, id) + if (engine->default_state) + which |= BIT(engine->uabi_class); + + return which; +} + static void print_request(struct drm_printer *m, struct drm_i915_gem_request *rq, const char *prefix) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1baaab35905c..0c93f27f36ee 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1180,7 +1180,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; u32 *cs; - int ret; GEM_BUG_ON(!ce->pin_count); @@ -1194,14 +1193,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) if (IS_ERR(cs)) return PTR_ERR(cs); - if (!ce->initialised) { - ret = engine->init_context(request); - if (ret) - return ret; - - ce->initialised = true; - } - /* Note that after this point, we have committed to using * this request as it is being used to both track the * state of engine initialisation and liveness of the @@ -2133,7 +2124,6 @@ static void execlists_init_reg_state(u32 *regs, CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine), _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | (HAS_RESOURCE_STREAMER(dev_priv) ? CTX_CTRL_RS_CTX_ENABLE : 0))); CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); @@ -2210,6 +2200,7 @@ populate_lr_context(struct i915_gem_context *ctx, struct intel_ring *ring) { void *vaddr; + u32 *regs; int ret; ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); @@ -2226,11 +2217,31 @@ populate_lr_context(struct i915_gem_context *ctx, } ctx_obj->mm.dirty = true; + if (engine->default_state) { + /* + * We only want to copy over the template context state; + * skipping over the headers reserved for GuC communication, + * leaving those as zero. + */ + const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE; + void *defaults; + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(defaults)) + return PTR_ERR(defaults); + + memcpy(vaddr + start, defaults + start, engine->context_size); + i915_gem_object_unpin_map(engine->default_state); + } + /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ - - execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, - ctx, engine, ring); + regs = vaddr + LRC_STATE_PN * PAGE_SIZE; + execlists_init_reg_state(regs, ctx, engine, ring); + if (!engine->default_state) + regs[CTX_CONTEXT_CONTROL + 1] |= + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); i915_gem_object_unpin_map(ctx_obj); @@ -2283,7 +2294,6 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ce->ring = ring; ce->state = vma; - ce->initialised |= engine->init_context == NULL; return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7e2a671882fb..464dc58af27b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1384,11 +1384,34 @@ alloc_context_vma(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; struct drm_i915_gem_object *obj; struct i915_vma *vma; + int err; obj = i915_gem_object_create(i915, engine->context_size); if (IS_ERR(obj)) return ERR_CAST(obj); + if (engine->default_state) { + void *defaults, *vaddr; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(defaults)) { + err = PTR_ERR(defaults); + goto err_map; + } + + memcpy(vaddr, defaults, engine->context_size); + + i915_gem_object_unpin_map(engine->default_state); + i915_gem_object_unpin_map(obj); + } + /* * Try to make the context utilize L3 as well as LLC. * @@ -1410,10 +1433,18 @@ alloc_context_vma(struct intel_engine_cs *engine) } vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); - if (IS_ERR(vma)) - i915_gem_object_put(obj); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } return vma; + +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); + return ERR_PTR(err); } static struct intel_ring * @@ -1449,16 +1480,6 @@ intel_ring_context_pin(struct intel_engine_cs *engine, ce->state->obj->pin_global++; } - /* The kernel context is only used as a placeholder for flushing the - * active context. It is never used for submitting user rendering and - * as such never requires the golden render context, and so we can skip - * emitting it when we switch to the kernel context. This is required - * as during eviction we cannot allocate and pin the renderstate in - * order to initialise the context. - */ - if (i915_gem_context_is_kernel(ctx)) - ce->initialised = true; - i915_gem_context_get(ctx); out: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f3dbfe7ae6e4..337222859166 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -306,6 +306,7 @@ struct intel_engine_cs { struct intel_ring *buffer; struct intel_timeline *timeline; + struct drm_i915_gem_object *default_state; struct intel_render_state *render_state; atomic_t irq_count; @@ -932,6 +933,7 @@ void intel_engines_park(struct drm_i915_private *i915); void intel_engines_unpark(struct drm_i915_private *i915); void intel_engines_reset_default_submission(struct drm_i915_private *i915); +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); bool intel_engine_can_store_dword(struct intel_engine_cs *engine); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 1f7dfb22a7c2..6c02ced663f8 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -466,6 +466,21 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 +/* + * Query whether every context (both per-file default and user created) is + * isolated (insofar as HW supports). If this parameter is not true, then + * freshly created contexts may inherit values from an existing context, + * rather than default HW values. If true, it also ensures (insofar as HW + * supports) that all state set by this context will not leak to any other + * context. + * + * As not every engine across every gen support contexts, the returned + * value reports the support of context isolation for individual engines by + * returning a bitmask of each engine class set to true if that class supports + * isolation. + */ +#define I915_PARAM_HAS_CONTEXT_ISOLATION 50 + typedef struct drm_i915_getparam { __s32 param; /* -- cgit v1.2.3 From 7c2fa7faf18f80f9fbbe7fcad8072604304db5dd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Nov 2017 14:26:34 +0000 Subject: drm/i915: Stop caching the "golden" renderstate As we now record the default HW state and so only emit the "golden" renderstate once to prepare the HW, there is no advantage in keeping the renderstate batch around as it will never be used again. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem_render_state.c | 141 +++++++++------------------ drivers/gpu/drm/i915/i915_gem_render_state.h | 4 +- drivers/gpu/drm/i915/intel_engine_cs.c | 9 +- drivers/gpu/drm/i915/intel_lrc.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 - 7 files changed, 54 insertions(+), 109 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0be9a918697a..40012b6daea2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -67,7 +67,6 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_render_state.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 3703dc91eeda..c2723a06fbb4 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -26,10 +26,12 @@ */ #include "i915_drv.h" +#include "i915_gem_render_state.h" #include "intel_renderstate.h" struct intel_render_state { const struct intel_renderstate_rodata *rodata; + struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 batch_offset; u32 batch_size; @@ -40,6 +42,9 @@ struct intel_render_state { static const struct intel_renderstate_rodata * render_state_get_rodata(const struct intel_engine_cs *engine) { + if (engine->id != RCS) + return NULL; + switch (INTEL_GEN(engine->i915)) { case 6: return &gen6_null_state; @@ -74,17 +79,16 @@ static int render_state_setup(struct intel_render_state *so, struct drm_i915_private *i915) { const struct intel_renderstate_rodata *rodata = so->rodata; - struct drm_i915_gem_object *obj = so->vma->obj; unsigned int i = 0, reloc_index = 0; unsigned int needs_clflush; u32 *d; int ret; - ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); + ret = i915_gem_obj_prepare_shmem_write(so->obj, &needs_clflush); if (ret) return ret; - d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0)); + d = kmap_atomic(i915_gem_object_get_dirty_page(so->obj, 0)); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; @@ -112,7 +116,7 @@ static int render_state_setup(struct intel_render_state *so, goto err; } - so->batch_offset = so->vma->node.start; + so->batch_offset = i915_ggtt_offset(so->vma); so->batch_size = rodata->batch_items * sizeof(u32); while (i % CACHELINE_DWORDS) @@ -160,9 +164,9 @@ static int render_state_setup(struct intel_render_state *so, drm_clflush_virt_range(d, i * sizeof(u32)); kunmap_atomic(d); - ret = i915_gem_object_set_to_gtt_domain(obj, false); + ret = i915_gem_object_set_to_gtt_domain(so->obj, false); out: - i915_gem_obj_finish_shmem_access(obj); + i915_gem_obj_finish_shmem_access(so->obj); return ret; err: @@ -173,112 +177,61 @@ err: #undef OUT_BATCH -int i915_gem_render_state_init(struct intel_engine_cs *engine) +int i915_gem_render_state_emit(struct drm_i915_gem_request *rq) { - struct intel_render_state *so; - const struct intel_renderstate_rodata *rodata; - struct drm_i915_gem_object *obj; - int ret; + struct intel_engine_cs *engine = rq->engine; + struct intel_render_state so = {}; /* keep the compiler happy */ + int err; - if (engine->id != RCS) + so.rodata = render_state_get_rodata(engine); + if (!so.rodata) return 0; - rodata = render_state_get_rodata(engine); - if (!rodata) - return 0; - - if (rodata->batch_items * 4 > PAGE_SIZE) + if (so.rodata->batch_items * 4 > PAGE_SIZE) return -EINVAL; - so = kmalloc(sizeof(*so), GFP_KERNEL); - if (!so) - return -ENOMEM; - - obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); - if (IS_ERR(obj)) { - ret = PTR_ERR(obj); - goto err_free; - } + so.obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(so.obj)) + return PTR_ERR(so.obj); - so->vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); - if (IS_ERR(so->vma)) { - ret = PTR_ERR(so->vma); + so.vma = i915_vma_instance(so.obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(so.vma)) { + err = PTR_ERR(so.vma); goto err_obj; } - so->rodata = rodata; - engine->render_state = so; - return 0; - -err_obj: - i915_gem_object_put(obj); -err_free: - kfree(so); - return ret; -} - -int i915_gem_render_state_emit(struct drm_i915_gem_request *req) -{ - struct intel_render_state *so; - int ret; - - lockdep_assert_held(&req->i915->drm.struct_mutex); - - so = req->engine->render_state; - if (!so) - return 0; - - /* Recreate the page after shrinking */ - if (!i915_gem_object_has_pages(so->vma->obj)) - so->batch_offset = -1; - - ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (ret) - return ret; + err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto err_vma; - if (so->vma->node.start != so->batch_offset) { - ret = render_state_setup(so, req->i915); - if (ret) - goto err_unpin; - } + err = render_state_setup(&so, rq->i915); + if (err) + goto err_unpin; - ret = req->engine->emit_flush(req, EMIT_INVALIDATE); - if (ret) + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) goto err_unpin; - ret = req->engine->emit_bb_start(req, - so->batch_offset, so->batch_size, - I915_DISPATCH_SECURE); - if (ret) + err = engine->emit_bb_start(rq, + so.batch_offset, so.batch_size, + I915_DISPATCH_SECURE); + if (err) goto err_unpin; - if (so->aux_size > 8) { - ret = req->engine->emit_bb_start(req, - so->aux_offset, so->aux_size, - I915_DISPATCH_SECURE); - if (ret) + if (so.aux_size > 8) { + err = engine->emit_bb_start(rq, + so.aux_offset, so.aux_size, + I915_DISPATCH_SECURE); + if (err) goto err_unpin; } - i915_vma_move_to_active(so->vma, req, 0); + i915_vma_move_to_active(so.vma, rq, 0); err_unpin: - i915_vma_unpin(so->vma); - return ret; -} - -void i915_gem_render_state_fini(struct intel_engine_cs *engine) -{ - struct intel_render_state *so; - struct drm_i915_gem_object *obj; - - so = fetch_and_zero(&engine->render_state); - if (!so) - return; - - obj = so->vma->obj; - - i915_vma_close(so->vma); - __i915_gem_object_release_unless_active(obj); - - kfree(so); + i915_vma_unpin(so.vma); +err_vma: + i915_vma_close(so.vma); +err_obj: + __i915_gem_object_release_unless_active(so.obj); + return err; } diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 87481845799d..86369520482e 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -26,8 +26,6 @@ struct drm_i915_gem_request; -int i915_gem_render_state_init(struct intel_engine_cs *engine); -int i915_gem_render_state_emit(struct drm_i915_gem_request *req); -void i915_gem_render_state_fini(struct intel_engine_cs *engine); +int i915_gem_render_state_emit(struct drm_i915_gem_request *rq); #endif /* _I915_GEM_RENDER_STATE_H_ */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 868c07a693b5..a0f9d0eb4bce 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -641,21 +641,15 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) goto err_unpin_preempt; - ret = i915_gem_render_state_init(engine); - if (ret) - goto err_breadcrumbs; - if (HWS_NEEDS_PHYSICAL(engine->i915)) ret = init_phys_status_page(engine); else ret = init_status_page(engine); if (ret) - goto err_rs_fini; + goto err_breadcrumbs; return 0; -err_rs_fini: - i915_gem_render_state_fini(engine); err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: @@ -682,7 +676,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) else cleanup_status_page(engine); - i915_gem_render_state_fini(engine); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0c93f27f36ee..58d050a9a866 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -136,6 +136,7 @@ #include #include #include "i915_drv.h" +#include "i915_gem_render_state.h" #include "intel_mocs.h" #define RING_EXECLIST_QFULL (1 << 0x2) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 464dc58af27b..3321b801e77d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -28,9 +28,12 @@ */ #include + #include -#include "i915_drv.h" #include + +#include "i915_drv.h" +#include "i915_gem_render_state.h" #include "i915_trace.h" #include "intel_drv.h" diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 337222859166..ef22c994038b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -165,7 +165,6 @@ struct i915_ctx_workarounds { }; struct drm_i915_gem_request; -struct intel_render_state; /* * Engine IDs definitions. @@ -307,7 +306,6 @@ struct intel_engine_cs { struct intel_timeline *timeline; struct drm_i915_gem_object *default_state; - struct intel_render_state *render_state; atomic_t irq_count; unsigned long irq_posted; -- cgit v1.2.3 From fd13821219dda093e402c5849e5d4525bb64b4f3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Nov 2017 15:12:04 +0000 Subject: drm/i915: Make request's wait-for-space explicit At the start of building a request, we would wait for roughly enough space to fit the average request (to reduce the likelihood of having to wait and abort partway through request construction). To achieve we would try to begin a 0-length command packet, this just adds extra confusion so make the wait-for-space explicit, as in the next patch we want to move it from the backend to the i915_gem_request_alloc() so it can ensure that the wait-for-space is the first operation in building a new request. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171115151204.8105-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 8 ++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 56 +++++++++++++++++++++------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 3 files changed, 41 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 58d050a9a866..ebd9596fe83b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1180,7 +1180,7 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; - u32 *cs; + int ret; GEM_BUG_ON(!ce->pin_count); @@ -1190,9 +1190,9 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) */ request->reserved_space += EXECLISTS_REQUEST_SIZE; - cs = intel_ring_begin(request, 0); - if (IS_ERR(cs)) - return PTR_ERR(cs); + ret = intel_ring_wait_for_space(request->ring, request->reserved_space); + if (ret) + return ret; /* Note that after this point, we have committed to using * this request as it is being used to both track the diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3321b801e77d..12e734b29463 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1578,7 +1578,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) static int ring_request_alloc(struct drm_i915_gem_request *request) { - u32 *cs; + int ret; GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); @@ -1588,37 +1588,24 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) */ request->reserved_space += LEGACY_REQUEST_SIZE; - cs = intel_ring_begin(request, 0); - if (IS_ERR(cs)) - return PTR_ERR(cs); + ret = intel_ring_wait_for_space(request->ring, request->reserved_space); + if (ret) + return ret; request->reserved_space -= LEGACY_REQUEST_SIZE; return 0; } -static noinline int wait_for_space(struct drm_i915_gem_request *req, - unsigned int bytes) +static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) { - struct intel_ring *ring = req->ring; struct drm_i915_gem_request *target; long timeout; - lockdep_assert_held(&req->i915->drm.struct_mutex); + lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); if (intel_ring_update_space(ring) >= bytes) return 0; - /* - * Space is reserved in the ringbuffer for finalising the request, - * as that cannot be allowed to fail. During request finalisation, - * reserved_space is set to 0 to stop the overallocation and the - * assumption is that then we never need to wait (which has the - * risk of failing with EINTR). - * - * See also i915_gem_request_alloc() and i915_add_request(). - */ - GEM_BUG_ON(!req->reserved_space); - list_for_each_entry(target, &ring->request_list, ring_link) { /* Would completion of this request free enough space? */ if (bytes <= __intel_ring_space(target->postfix, @@ -1642,6 +1629,22 @@ static noinline int wait_for_space(struct drm_i915_gem_request *req, return 0; } +int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes) +{ + GEM_BUG_ON(bytes > ring->effective_size); + if (unlikely(bytes > ring->effective_size - ring->emit)) + bytes += ring->size - ring->emit; + + if (unlikely(bytes > ring->space)) { + int ret = wait_for_space(ring, bytes); + if (unlikely(ret)) + return ret; + } + + GEM_BUG_ON(ring->space < bytes); + return 0; +} + u32 *intel_ring_begin(struct drm_i915_gem_request *req, unsigned int num_dwords) { @@ -1681,7 +1684,20 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, } if (unlikely(total_bytes > ring->space)) { - int ret = wait_for_space(req, total_bytes); + int ret; + + /* + * Space is reserved in the ringbuffer for finalising the + * request, as that cannot be allowed to fail. During request + * finalisation, reserved_space is set to 0 to stop the + * overallocation and the assumption is that then we never need + * to wait (which has the risk of failing with EINTR). + * + * See also i915_gem_request_alloc() and i915_add_request(). + */ + GEM_BUG_ON(!req->reserved_space); + + ret = wait_for_space(ring, total_bytes); if (unlikely(ret)) return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index ef22c994038b..15a15cb876a6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -660,6 +660,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); +int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes); u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, unsigned int n); -- cgit v1.2.3 From c6dce8f140bc19efb04afc8c0d11897ead000946 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Thu, 16 Nov 2017 19:02:37 +0530 Subject: drm/i915: Update execlists tasklet naming intel_lrc_irq_handler and i915_guc_irq_handler are HW submission related tasklet functions. Name them with "submission_tasklet" suffix and remove intel/i915 prefix as they are static. Also rename irq_tasklet as just tasklet for clarity. v2: s/_bh/_tasklet (Chris) Suggested-by: Michal Wajdeczko Signed-off-by: Sagar Arun Kamble Cc: Michal Wajdeczko Cc: Michal Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Michal Wajdeczko Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1510839162-25197-2-git-send-email-sagar.a.kamble@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 10 +++++----- drivers/gpu/drm/i915/i915_guc_submission.c | 6 +++--- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 17 +++++++++-------- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++-- 6 files changed, 21 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bf8fea792048..61ba321e9970 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2933,13 +2933,13 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) * Prevent request submission to the hardware until we have * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request - * to a second via its engine->irq_tasklet *just* as we are + * to a second via its execlists->tasklet *just* as we are * calling engine->init_hw() and also writing the ELSP. - * Turning off the engine->irq_tasklet until the reset is over + * Turning off the execlists->tasklet until the reset is over * prevents the race. */ - tasklet_kill(&engine->execlists.irq_tasklet); - tasklet_disable(&engine->execlists.irq_tasklet); + tasklet_kill(&engine->execlists.tasklet); + tasklet_disable(&engine->execlists.tasklet); /* * We're using worker to queue preemption requests from the tasklet in @@ -3128,7 +3128,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) { - tasklet_enable(&engine->execlists.irq_tasklet); + tasklet_enable(&engine->execlists.tasklet); kthread_unpark(engine->breadcrumbs.signaler); intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 0ba2fc04fe9c..7f3e63da519b 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -650,7 +650,7 @@ static void inject_preempt_context(struct work_struct *work) if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) { execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); - tasklet_schedule(&engine->execlists.irq_tasklet); + tasklet_schedule(&engine->execlists.tasklet); } } @@ -799,7 +799,7 @@ unlock: spin_unlock_irq(&engine->timeline->lock); } -static void i915_guc_irq_handler(unsigned long data) +static void guc_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1439,7 +1439,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct intel_engine_execlists * const execlists = &engine->execlists; - execlists->irq_tasklet.func = i915_guc_irq_handler; + execlists->tasklet.func = guc_submission_tasklet; engine->park = i915_guc_submission_park; engine->unpark = i915_guc_submission_unpark; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ff00e462697a..4fb183ae7a07 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1404,7 +1404,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) } if (tasklet) - tasklet_hi_schedule(&execlists->irq_tasklet); + tasklet_hi_schedule(&execlists->tasklet); } static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a42b738e79e7..9897c7f78c51 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1585,7 +1585,7 @@ void intel_engines_park(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { /* Flush the residual irq tasklets first. */ intel_engine_disarm_breadcrumbs(engine); - tasklet_kill(&engine->execlists.irq_tasklet); + tasklet_kill(&engine->execlists.tasklet); /* * We are committed now to parking the engines, make sure there diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ebd9596fe83b..be6c39adebdf 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -781,7 +781,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. */ -static void intel_lrc_irq_handler(unsigned long data) +static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; @@ -947,7 +947,7 @@ static void insert_request(struct intel_engine_cs *engine, list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); if (ptr_unmask_bits(p, 1)) - tasklet_hi_schedule(&engine->execlists.irq_tasklet); + tasklet_hi_schedule(&engine->execlists.tasklet); } static void execlists_submit_request(struct drm_i915_gem_request *request) @@ -1503,7 +1503,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) /* After a GPU reset, we may have requests to replay */ if (execlists->first) - tasklet_schedule(&execlists->irq_tasklet); + tasklet_schedule(&execlists->tasklet); return 0; } @@ -1881,8 +1881,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) * Tasklet cannot be active at this point due intel_mark_active/idle * so this is just for documentation. */ - if (WARN_ON(test_bit(TASKLET_STATE_SCHED, &engine->execlists.irq_tasklet.state))) - tasklet_kill(&engine->execlists.irq_tasklet); + if (WARN_ON(test_bit(TASKLET_STATE_SCHED, + &engine->execlists.tasklet.state))) + tasklet_kill(&engine->execlists.tasklet); dev_priv = engine->i915; @@ -1906,7 +1907,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) engine->submit_request = execlists_submit_request; engine->cancel_requests = execlists_cancel_requests; engine->schedule = execlists_schedule; - engine->execlists.irq_tasklet.func = intel_lrc_irq_handler; + engine->execlists.tasklet.func = execlists_submission_tasklet; engine->park = NULL; engine->unpark = NULL; @@ -1968,8 +1969,8 @@ logical_ring_setup(struct intel_engine_cs *engine) engine->execlists.fw_domains = fw_domains; - tasklet_init(&engine->execlists.irq_tasklet, - intel_lrc_irq_handler, (unsigned long)engine); + tasklet_init(&engine->execlists.tasklet, + execlists_submission_tasklet, (unsigned long)engine); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 15a15cb876a6..c00804ed64c6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -193,9 +193,9 @@ struct i915_priolist { */ struct intel_engine_execlists { /** - * @irq_tasklet: softirq tasklet for bottom handler + * @tasklet: softirq tasklet for bottom handler */ - struct tasklet_struct irq_tasklet; + struct tasklet_struct tasklet; /** * @default_priolist: priority list for I915_PRIORITY_NORMAL -- cgit v1.2.3