diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_request.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.c | 295 |
1 files changed, 224 insertions, 71 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 3839712ebd23..ad175d662b4e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -29,6 +29,7 @@ #include <linux/sched.h> #include <linux/sched/clock.h> #include <linux/sched/signal.h> +#include <linux/sched/mm.h> #include "gem/i915_gem_context.h" #include "gt/intel_breadcrumbs.h" @@ -96,9 +97,9 @@ static signed long i915_fence_wait(struct dma_fence *fence, bool interruptible, signed long timeout) { - return i915_request_wait(to_request(fence), - interruptible | I915_WAIT_PRIORITY, - timeout); + return i915_request_wait_timeout(to_request(fence), + interruptible | I915_WAIT_PRIORITY, + timeout); } struct kmem_cache *i915_request_slab_cache(void) @@ -113,6 +114,10 @@ static void i915_fence_release(struct dma_fence *fence) GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT && rq->guc_prio != GUC_PRIO_FINI); + i915_request_free_capture_list(fetch_and_zero(&rq->capture_list)); + if (i915_vma_snapshot_present(&rq->batch_snapshot)) + i915_vma_snapshot_put_onstack(&rq->batch_snapshot); + /* * The request is put onto a RCU freelist (i.e. the address * is immediately reused), mark the fences as being freed now. @@ -186,19 +191,6 @@ void i915_request_notify_execute_cb_imm(struct i915_request *rq) __notify_execute_cb(rq, irq_work_imm); } -static void free_capture_list(struct i915_request *request) -{ - struct i915_capture_list *capture; - - capture = fetch_and_zero(&request->capture_list); - while (capture) { - struct i915_capture_list *next = capture->next; - - kfree(capture); - capture = next; - } -} - static void __i915_request_fill(struct i915_request *rq, u8 val) { void *vaddr = rq->ring->vaddr; @@ -303,6 +295,37 @@ static void __rq_cancel_watchdog(struct i915_request *rq) i915_request_put(rq); } +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) + +/** + * i915_request_free_capture_list - Free a capture list + * @capture: Pointer to the first list item or NULL + * + */ +void i915_request_free_capture_list(struct i915_capture_list *capture) +{ + while (capture) { + struct i915_capture_list *next = capture->next; + + i915_vma_snapshot_put(capture->vma_snapshot); + capture = next; + } +} + +#define assert_capture_list_is_null(_rq) GEM_BUG_ON((_rq)->capture_list) + +#define clear_capture_list(_rq) ((_rq)->capture_list = NULL) + +#else + +#define i915_request_free_capture_list(_a) do {} while (0) + +#define assert_capture_list_is_null(_a) do {} while (0) + +#define clear_capture_list(_rq) do {} while (0) + +#endif + bool i915_request_retire(struct i915_request *rq) { if (!__i915_request_is_complete(rq)) @@ -339,7 +362,7 @@ bool i915_request_retire(struct i915_request *rq) } if (test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) - atomic_dec(&rq->engine->gt->rps.num_waiters); + intel_rps_dec_waiters(&rq->engine->gt->rps); /* * We only loosely track inflight requests across preemption, @@ -359,7 +382,6 @@ bool i915_request_retire(struct i915_request *rq) intel_context_exit(rq->context); intel_context_unpin(rq->context); - free_capture_list(rq); i915_sched_node_fini(&rq->sched); i915_request_put(rq); @@ -719,7 +741,7 @@ void i915_request_cancel(struct i915_request *rq, int error) intel_context_cancel_request(rq->context, rq); } -static int __i915_sw_fence_call +static int submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct i915_request *request = @@ -755,7 +777,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } -static int __i915_sw_fence_call +static int semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct i915_request *rq = container_of(fence, typeof(*rq), semaphore); @@ -829,13 +851,18 @@ static void __i915_request_ctor(void *arg) i915_sw_fence_init(&rq->submit, submit_notify); i915_sw_fence_init(&rq->semaphore, semaphore_notify); - dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0); - - rq->capture_list = NULL; + clear_capture_list(rq); + rq->batch_snapshot.present = false; init_llist_head(&rq->execute_cb); } +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#define clear_batch_ptr(_rq) ((_rq)->batch = NULL) +#else +#define clear_batch_ptr(_a) do {} while (0) +#endif + struct i915_request * __i915_request_create(struct intel_context *ce, gfp_t gfp) { @@ -905,17 +932,12 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; - kref_init(&rq->fence.refcount); - rq->fence.flags = 0; - rq->fence.error = 0; - INIT_LIST_HEAD(&rq->fence.cb_list); - ret = intel_timeline_get_seqno(tl, rq, &seqno); if (ret) goto err_free; - rq->fence.context = tl->fence_context; - rq->fence.seqno = seqno; + dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, + tl->fence_context, seqno); RCU_INIT_POINTER(rq->timeline, tl); rq->hwsp_seqno = tl->hwsp_seqno; @@ -932,10 +954,11 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) i915_sched_node_reinit(&rq->sched); /* No zalloc, everything must be cleared after use */ - rq->batch = NULL; + clear_batch_ptr(rq); __rq_init_watchdog(rq); - GEM_BUG_ON(rq->capture_list); + assert_capture_list_is_null(rq); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); + GEM_BUG_ON(i915_vma_snapshot_present(&rq->batch_snapshot)); /* * Reserve space in the ring buffer for all the commands required to @@ -1152,6 +1175,12 @@ __emit_semaphore_wait(struct i915_request *to, return 0; } +static bool +can_use_semaphore_wait(struct i915_request *to, struct i915_request *from) +{ + return to->engine->gt->ggtt == from->engine->gt->ggtt; +} + static int emit_semaphore_wait(struct i915_request *to, struct i915_request *from, @@ -1160,6 +1189,9 @@ emit_semaphore_wait(struct i915_request *to, const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask; struct i915_sw_fence *wait = &to->submit; + if (!can_use_semaphore_wait(to, from)) + goto await_fence; + if (!intel_context_use_semaphores(to->context)) goto await_fence; @@ -1263,7 +1295,8 @@ __i915_request_await_execution(struct i915_request *to, * immediate execution, and so we must wait until it reaches the * active slot. */ - if (intel_engine_has_semaphores(to->engine) && + if (can_use_semaphore_wait(to, from) && + intel_engine_has_semaphores(to->engine) && !i915_request_has_initial_breadcrumb(to)) { err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); if (err < 0) @@ -1332,6 +1365,25 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) return err; } +static inline bool is_parallel_rq(struct i915_request *rq) +{ + return intel_context_is_parallel(rq->context); +} + +static inline struct intel_context *request_to_parent(struct i915_request *rq) +{ + return intel_context_to_parent(rq->context); +} + +static bool is_same_parallel_context(struct i915_request *to, + struct i915_request *from) +{ + if (is_parallel_rq(to)) + return request_to_parent(to) == request_to_parent(from); + + return false; +} + int i915_request_await_execution(struct i915_request *rq, struct dma_fence *fence) @@ -1363,11 +1415,14 @@ i915_request_await_execution(struct i915_request *rq, * want to run our callback in all cases. */ - if (dma_fence_is_i915(fence)) + if (dma_fence_is_i915(fence)) { + if (is_same_parallel_context(rq, to_request(fence))) + continue; ret = __i915_request_await_execution(rq, to_request(fence)); - else + } else { ret = i915_request_await_external(rq, fence); + } if (ret < 0) return ret; } while (--nchild); @@ -1468,10 +1523,13 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) fence)) continue; - if (dma_fence_is_i915(fence)) + if (dma_fence_is_i915(fence)) { + if (is_same_parallel_context(rq, to_request(fence))) + continue; ret = i915_request_await_request(rq, to_request(fence)); - else + } else { ret = i915_request_await_external(rq, fence); + } if (ret < 0) return ret; @@ -1523,35 +1581,51 @@ i915_request_await_object(struct i915_request *to, } static struct i915_request * -__i915_request_add_to_timeline(struct i915_request *rq) +__i915_request_ensure_parallel_ordering(struct i915_request *rq, + struct intel_timeline *timeline) { - struct intel_timeline *timeline = i915_request_timeline(rq); struct i915_request *prev; - /* - * Dependency tracking and request ordering along the timeline - * is special cased so that we can eliminate redundant ordering - * operations while building the request (we know that the timeline - * itself is ordered, and here we guarantee it). - * - * As we know we will need to emit tracking along the timeline, - * we embed the hooks into our request struct -- at the cost of - * having to have specialised no-allocation interfaces (which will - * be beneficial elsewhere). - * - * A second benefit to open-coding i915_request_await_request is - * that we can apply a slight variant of the rules specialised - * for timelines that jump between engines (such as virtual engines). - * If we consider the case of virtual engine, we must emit a dma-fence - * to prevent scheduling of the second request until the first is - * complete (to maximise our greedy late load balancing) and this - * precludes optimising to use semaphores serialisation of a single - * timeline across engines. - */ + GEM_BUG_ON(!is_parallel_rq(rq)); + + prev = request_to_parent(rq)->parallel.last_rq; + if (prev) { + if (!__i915_request_is_complete(prev)) { + i915_sw_fence_await_sw_fence(&rq->submit, + &prev->submit, + &rq->submitq); + + if (rq->engine->sched_engine->schedule) + __i915_sched_node_add_dependency(&rq->sched, + &prev->sched, + &rq->dep, + 0); + } + i915_request_put(prev); + } + + request_to_parent(rq)->parallel.last_rq = i915_request_get(rq); + + return to_request(__i915_active_fence_set(&timeline->last_request, + &rq->fence)); +} + +static struct i915_request * +__i915_request_ensure_ordering(struct i915_request *rq, + struct intel_timeline *timeline) +{ + struct i915_request *prev; + + GEM_BUG_ON(is_parallel_rq(rq)); + prev = to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); + if (prev && !__i915_request_is_complete(prev)) { bool uses_guc = intel_engine_uses_guc(rq->engine); + bool pow2 = is_power_of_2(READ_ONCE(prev->engine)->mask | + rq->engine->mask); + bool same_context = prev->context == rq->context; /* * The requests are supposed to be kept in order. However, @@ -1559,13 +1633,11 @@ __i915_request_add_to_timeline(struct i915_request *rq) * is used as a barrier for external modification to this * context. */ - GEM_BUG_ON(prev->context == rq->context && + GEM_BUG_ON(same_context && i915_seqno_passed(prev->fence.seqno, rq->fence.seqno)); - if ((!uses_guc && - is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) || - (uses_guc && prev->context == rq->context)) + if ((same_context && uses_guc) || (!uses_guc && pow2)) i915_sw_fence_await_sw_fence(&rq->submit, &prev->submit, &rq->submitq); @@ -1580,6 +1652,50 @@ __i915_request_add_to_timeline(struct i915_request *rq) 0); } + return prev; +} + +static struct i915_request * +__i915_request_add_to_timeline(struct i915_request *rq) +{ + struct intel_timeline *timeline = i915_request_timeline(rq); + struct i915_request *prev; + + /* + * Dependency tracking and request ordering along the timeline + * is special cased so that we can eliminate redundant ordering + * operations while building the request (we know that the timeline + * itself is ordered, and here we guarantee it). + * + * As we know we will need to emit tracking along the timeline, + * we embed the hooks into our request struct -- at the cost of + * having to have specialised no-allocation interfaces (which will + * be beneficial elsewhere). + * + * A second benefit to open-coding i915_request_await_request is + * that we can apply a slight variant of the rules specialised + * for timelines that jump between engines (such as virtual engines). + * If we consider the case of virtual engine, we must emit a dma-fence + * to prevent scheduling of the second request until the first is + * complete (to maximise our greedy late load balancing) and this + * precludes optimising to use semaphores serialisation of a single + * timeline across engines. + * + * We do not order parallel submission requests on the timeline as each + * parallel submission context has its own timeline and the ordering + * rules for parallel requests are that they must be submitted in the + * order received from the execbuf IOCTL. So rather than using the + * timeline we store a pointer to last request submitted in the + * relationship in the gem context and insert a submission fence + * between that request and request passed into this function or + * alternatively we use completion fence if gem context has a single + * timeline and this is the first submission of an execbuf IOCTL. + */ + if (likely(!is_parallel_rq(rq))) + prev = __i915_request_ensure_ordering(rq, timeline); + else + prev = __i915_request_ensure_parallel_ordering(rq, timeline); + /* * Make sure that no request gazumped us - if it was allocated after * our i915_request_alloc() and called __i915_request_add() before @@ -1771,23 +1887,27 @@ static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb) } /** - * i915_request_wait - wait until execution of request has finished + * i915_request_wait_timeout - wait until execution of request has finished * @rq: the request to wait upon * @flags: how to wait * @timeout: how long to wait in jiffies * - * i915_request_wait() waits for the request to be completed, for a + * i915_request_wait_timeout() waits for the request to be completed, for a * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an * unbounded wait). * * Returns the remaining time (in jiffies) if the request completed, which may - * be zero or -ETIME if the request is unfinished after the timeout expires. + * be zero if the request is unfinished after the timeout expires. + * If the timeout is 0, it will return 1 if the fence is signaled. + * * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is * pending before the request completes. + * + * NOTE: This function has the same wait semantics as dma-fence. */ -long i915_request_wait(struct i915_request *rq, - unsigned int flags, - long timeout) +long i915_request_wait_timeout(struct i915_request *rq, + unsigned int flags, + long timeout) { const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; @@ -1797,7 +1917,7 @@ long i915_request_wait(struct i915_request *rq, GEM_BUG_ON(timeout < 0); if (dma_fence_is_signaled(&rq->fence)) - return timeout; + return timeout ?: 1; if (!timeout) return -ETIME; @@ -1835,7 +1955,7 @@ long i915_request_wait(struct i915_request *rq, * completion. That requires having a good predictor for the request * duration, which we currently lack. */ - if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) && + if (CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT && __i915_spin_request(rq, state)) goto out; @@ -1906,6 +2026,39 @@ out: return timeout; } +/** + * i915_request_wait - wait until execution of request has finished + * @rq: the request to wait upon + * @flags: how to wait + * @timeout: how long to wait in jiffies + * + * i915_request_wait() waits for the request to be completed, for a + * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an + * unbounded wait). + * + * Returns the remaining time (in jiffies) if the request completed, which may + * be zero or -ETIME if the request is unfinished after the timeout expires. + * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is + * pending before the request completes. + * + * NOTE: This function behaves differently from dma-fence wait semantics for + * timeout = 0. It returns 0 on success, and -ETIME if not signaled. + */ +long i915_request_wait(struct i915_request *rq, + unsigned int flags, + long timeout) +{ + long ret = i915_request_wait_timeout(rq, flags, timeout); + + if (!ret) + return -ETIME; + + if (ret > 0 && !timeout) + return 0; + + return ret; +} + static int print_sched_attr(const struct i915_sched_attr *attr, char *buf, int x, int len) { |