summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_breadcrumbs.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-02-27 20:58:48 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2017-02-27 21:57:20 +0000
commit56299fb7d9047cc1d25362827073b2ac0984ed21 (patch)
tree311ae5bcaec77364beb1a0514886e73534a5516d /drivers/gpu/drm/i915/intel_breadcrumbs.c
parent8d769ea7bc16c34c9dc5143be021e943014c4cd1 (diff)
downloadlinux-56299fb7d9047cc1d25362827073b2ac0984ed21.tar.bz2
drm/i915: Signal first fence from irq handler if complete
As execlists and other non-semaphore multi-engine devices coordinate between engines using interrupts, we can shave off a few 10s of microsecond of scheduling latency by doing the fence signaling from the interrupt as opposed to a RT kthread. (Realistically the delay adds about 1% to an individual cross-engine workload.) We only signal the first fence in order to limit the amount of work we move into the interrupt handler. We also have to remember that our breadcrumbs may be unordered with respect to the interrupt and so we still require the waiter process to perform some heavyweight coherency fixups, as well as traversing the tree of waiters. v2: No need for early exit in irq handler - it breaks the flow between patches and prevents the tracepoint v3: Restore rcu hold across irq signaling of request Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170227205850.2828-2-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/intel_breadcrumbs.c')
-rw-r--r--drivers/gpu/drm/i915/intel_breadcrumbs.c32
1 files changed, 8 insertions, 24 deletions
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index c8361f350350..e7f4a4c923a2 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -28,26 +28,18 @@
unsigned int intel_engine_wakeup(struct intel_engine_cs *engine)
{
+ struct intel_wait *wait;
+ unsigned long flags;
unsigned int result = 0;
- /* Note that for this not to dangerously chase a dangling pointer,
- * we must hold the rcu_read_lock here.
- *
- * Also note that tsk is likely to be in !TASK_RUNNING state so an
- * early test for tsk->state != TASK_RUNNING before wake_up_process()
- * is unlikely to be beneficial.
- */
- if (intel_engine_has_waiter(engine)) {
- struct task_struct *tsk;
-
+ spin_lock_irqsave(&engine->breadcrumbs.lock, flags);
+ wait = engine->breadcrumbs.first_wait;
+ if (wait) {
result = ENGINE_WAKEUP_WAITER;
-
- rcu_read_lock();
- tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
- if (tsk && !wake_up_process(tsk))
+ if (!wake_up_process(wait->tsk))
result |= ENGINE_WAKEUP_ACTIVE;
- rcu_read_unlock();
}
+ spin_unlock_irqrestore(&engine->breadcrumbs.lock, flags);
return result;
}
@@ -301,7 +293,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
}
rb_link_node(&wait->node, parent, p);
rb_insert_color(&wait->node, &b->waiters);
- GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh));
if (completed) {
struct rb_node *next = rb_next(completed);
@@ -310,7 +301,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
if (next && next != &wait->node) {
GEM_BUG_ON(first);
b->first_wait = to_wait(next);
- rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
/* As there is a delay between reading the current
* seqno, processing the completed tasks and selecting
* the next waiter, we may have missed the interrupt
@@ -337,7 +327,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
if (first) {
GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
b->first_wait = wait;
- rcu_assign_pointer(b->irq_seqno_bh, wait->tsk);
/* After assigning ourselves as the new bottom-half, we must
* perform a cursory check to prevent a missed interrupt.
* Either we miss the interrupt whilst programming the hardware,
@@ -348,7 +337,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
*/
__intel_breadcrumbs_enable_irq(b);
}
- GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh));
GEM_BUG_ON(!b->first_wait);
GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
@@ -396,8 +384,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
const int priority = wakeup_priority(b, wait->tsk);
struct rb_node *next;
- GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk);
-
/* We are the current bottom-half. Find the next candidate,
* the first waiter in the queue on the remaining oldest
* request. As multiple seqnos may complete in the time it
@@ -439,13 +425,11 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
* exception rather than a seqno completion.
*/
b->first_wait = to_wait(next);
- rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
if (b->first_wait->seqno != wait->seqno)
__intel_breadcrumbs_enable_irq(b);
wake_up_process(b->first_wait->tsk);
} else {
b->first_wait = NULL;
- rcu_assign_pointer(b->irq_seqno_bh, NULL);
__intel_breadcrumbs_disable_irq(b);
}
} else {
@@ -459,7 +443,6 @@ out:
GEM_BUG_ON(b->first_wait == wait);
GEM_BUG_ON(rb_first(&b->waiters) !=
(b->first_wait ? &b->first_wait->node : NULL));
- GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
}
void intel_engine_remove_wait(struct intel_engine_cs *engine,
@@ -621,6 +604,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
return;
request->signaling.wait.tsk = b->signaler;
+ request->signaling.wait.request = request;
request->signaling.wait.seqno = seqno;
i915_gem_request_get(request);