diff options
author | Dave Airlie <airlied@redhat.com> | 2020-09-09 07:53:59 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2020-09-09 07:55:22 +1000 |
commit | 1f4b2aca794f7aeb918ed5f0d7221d68a81d6b43 (patch) | |
tree | e7047fddc2e409049ca2748804fbe758dd780950 /drivers/gpu/drm/i915/gem | |
parent | 61d98185b41c5ddc442fcfef7dd5a7b289ef69f5 (diff) | |
parent | e0ee152fce25dc9269c7ea5280c98aa4b3682759 (diff) | |
download | linux-1f4b2aca794f7aeb918ed5f0d7221d68a81d6b43.tar.bz2 |
Merge tag 'drm-intel-gt-next-2020-09-07' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
(Same content as drm-intel-gt-next-2020-09-04-3, S-o-b's added)
UAPI Changes:
(- Potential implicit changes from WW locking refactoring)
Cross-subsystem Changes:
(- WW locking changes should align the i915 locking more with others)
Driver Changes:
- MAJOR: Apply WW locking across the driver (Maarten)
- Reverts for 5 commits to make applying WW locking faster (Maarten)
- Disable preparser around invalidations on Tigerlake for non-RCS engines (Chris)
- Add missing dma_fence_put() for error case of syncobj timeline (Chris)
- Parse command buffer earlier in eb_relocate(slow) to facilitate backoff (Maarten)
- Pin engine before pinning all objects (Maarten)
- Rework intel_context pinning to do everything outside of pin_mutex (Maarten)
- Avoid tracking GEM context until registered (Cc: stable, Chris)
- Provide a fastpath for waiting on vma bindings (Chris)
- Fixes to preempt-to-busy mechanism (Chris)
- Distinguish the virtual breadcrumbs from the irq breadcrumbs (Chris)
- Switch to object allocations for page directories (Chris)
- Hold context/request reference while breadcrumbs are active (Chris)
- Make sure execbuffer always passes ww state to i915_vma_pin (Maarten)
- Code refactoring to facilitate use of WW locking (Maarten)
- Locking refactoring to use more granular locking (Maarten, Chris)
- Support for multiple pinned timelines per engine (Chris)
- Move complication of I915_GEM_THROTTLE to the ioctl from general code (Chris)
- Make active tracking/vma page-directory stash work preallocated (Chris)
- Avoid flushing submission tasklet too often (Chris)
- Reduce context termination list iteration guard to RCU (Chris)
- Reductions to locking contention (Chris)
- Fixes for issues found by CI (Chris)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <jlahtine@jlahtine-mobl.ger.corp.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200907130039.GA27766@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
20 files changed, 1366 insertions, 815 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index 278664f831e7..272cf3ea68d5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -32,12 +32,13 @@ static void vma_clear_pages(struct i915_vma *vma) vma->pages = NULL; } -static int vma_bind(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +static void vma_bind(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { - return vm->vma_ops.bind_vma(vm, vma, cache_level, flags); + vm->vma_ops.bind_vma(vm, stash, vma, cache_level, flags); } static void vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) @@ -157,6 +158,7 @@ static void clear_pages_worker(struct work_struct *work) struct clear_pages_work *w = container_of(work, typeof(*w), work); struct drm_i915_gem_object *obj = w->sleeve->vma->obj; struct i915_vma *vma = w->sleeve->vma; + struct i915_gem_ww_ctx ww; struct i915_request *rq; struct i915_vma *batch; int err = w->dma.error; @@ -172,17 +174,20 @@ static void clear_pages_worker(struct work_struct *work) obj->read_domains = I915_GEM_GPU_DOMAINS; obj->write_domain = 0; - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (unlikely(err)) + i915_gem_ww_ctx_init(&ww, false); + intel_engine_pm_get(w->ce->engine); +retry: + err = intel_context_pin_ww(w->ce, &ww); + if (err) goto out_signal; - batch = intel_emit_vma_fill_blt(w->ce, vma, w->value); + batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value); if (IS_ERR(batch)) { err = PTR_ERR(batch); - goto out_unpin; + goto out_ctx; } - rq = intel_context_create_request(w->ce); + rq = i915_request_create(w->ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_batch; @@ -224,9 +229,19 @@ out_request: i915_request_add(rq); out_batch: intel_emit_vma_release(w->ce, batch); -out_unpin: - i915_vma_unpin(vma); +out_ctx: + intel_context_unpin(w->ce); out_signal: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + i915_vma_unpin(w->sleeve->vma); + intel_engine_pm_put(w->ce->engine); + if (unlikely(err)) { dma_fence_set_error(&w->dma, err); dma_fence_signal(&w->dma); @@ -234,6 +249,44 @@ out_signal: } } +static int pin_wait_clear_pages_work(struct clear_pages_work *w, + struct intel_context *ce) +{ + struct i915_vma *vma = w->sleeve->vma; + struct i915_gem_ww_ctx ww; + int err; + + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(vma->obj, &ww); + if (err) + goto out; + + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); + if (unlikely(err)) + goto out; + + err = i915_sw_fence_await_reservation(&w->wait, + vma->obj->base.resv, NULL, + true, 0, I915_FENCE_GFP); + if (err) + goto err_unpin_vma; + + dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma); + +err_unpin_vma: + if (err) + i915_vma_unpin(vma); +out: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + return err; +} + static int __i915_sw_fence_call clear_pages_work_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) @@ -287,17 +340,9 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0); i915_sw_fence_init(&work->wait, clear_pages_work_notify); - i915_gem_object_lock(obj); - err = i915_sw_fence_await_reservation(&work->wait, - obj->base.resv, NULL, true, 0, - I915_FENCE_GFP); - if (err < 0) { + err = pin_wait_clear_pages_work(work, ce); + if (err < 0) dma_fence_set_error(&work->dma, err); - } else { - dma_resv_add_excl_fence(obj->base.resv, &work->dma); - err = 0; - } - i915_gem_object_unlock(obj); dma_fence_get(&work->dma); i915_sw_fence_commit(&work->wait); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index d0bdb6d447ed..cf5ecbde9e06 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -439,29 +439,36 @@ static bool __cancel_engine(struct intel_engine_cs *engine) return __reset_engine(engine); } -static struct intel_engine_cs *__active_engine(struct i915_request *rq) +static bool +__active_engine(struct i915_request *rq, struct intel_engine_cs **active) { struct intel_engine_cs *engine, *locked; + bool ret = false; /* * Serialise with __i915_request_submit() so that it sees * is-banned?, or we know the request is already inflight. + * + * Note that rq->engine is unstable, and so we double + * check that we have acquired the lock on the final engine. */ locked = READ_ONCE(rq->engine); spin_lock_irq(&locked->active.lock); while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { spin_unlock(&locked->active.lock); - spin_lock(&engine->active.lock); locked = engine; + spin_lock(&locked->active.lock); } - engine = NULL; - if (i915_request_is_active(rq) && rq->fence.error != -EIO) - engine = rq->engine; + if (!i915_request_completed(rq)) { + if (i915_request_is_active(rq) && rq->fence.error != -EIO) + *active = locked; + ret = true; + } spin_unlock_irq(&locked->active.lock); - return engine; + return ret; } static struct intel_engine_cs *active_engine(struct intel_context *ce) @@ -472,17 +479,16 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) if (!ce->timeline) return NULL; - mutex_lock(&ce->timeline->mutex); - list_for_each_entry_reverse(rq, &ce->timeline->requests, link) { - if (i915_request_completed(rq)) - break; + rcu_read_lock(); + list_for_each_entry_rcu(rq, &ce->timeline->requests, link) { + if (i915_request_is_active(rq) && i915_request_completed(rq)) + continue; /* Check with the backend if the request is inflight */ - engine = __active_engine(rq); - if (engine) + if (__active_engine(rq, &engine)) break; } - mutex_unlock(&ce->timeline->mutex); + rcu_read_unlock(); return engine; } @@ -713,6 +719,7 @@ __create_context(struct drm_i915_private *i915) ctx->i915 = i915; ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->link); spin_lock_init(&ctx->stale.lock); INIT_LIST_HEAD(&ctx->stale.engines); @@ -740,10 +747,6 @@ __create_context(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; - spin_lock(&i915->gem.contexts.lock); - list_add_tail(&ctx->link, &i915->gem.contexts.list); - spin_unlock(&i915->gem.contexts.lock); - return ctx; err_free: @@ -889,7 +892,7 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { struct intel_timeline *timeline; - timeline = intel_timeline_create(&i915->gt, NULL); + timeline = intel_timeline_create(&i915->gt); if (IS_ERR(timeline)) { context_close(ctx); return ERR_CAST(timeline); @@ -931,6 +934,7 @@ static int gem_context_register(struct i915_gem_context *ctx, struct drm_i915_file_private *fpriv, u32 *id) { + struct drm_i915_private *i915 = ctx->i915; struct i915_address_space *vm; int ret; @@ -949,8 +953,16 @@ static int gem_context_register(struct i915_gem_context *ctx, /* And finally expose ourselves to userspace via the idr */ ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL); if (ret) - put_pid(fetch_and_zero(&ctx->pid)); + goto err_pid; + + spin_lock(&i915->gem.contexts.lock); + list_add_tail(&ctx->link, &i915->gem.contexts.list); + spin_unlock(&i915->gem.contexts.lock); + return 0; + +err_pid: + put_pid(fetch_and_zero(&ctx->pid)); return ret; } @@ -1094,6 +1106,7 @@ I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); static int context_barrier_task(struct i915_gem_context *ctx, intel_engine_mask_t engines, bool (*skip)(struct intel_context *ce, void *data), + int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data), int (*emit)(struct i915_request *rq, void *data), void (*task)(void *data), void *data) @@ -1101,6 +1114,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, struct context_barrier_task *cb; struct i915_gem_engines_iter it; struct i915_gem_engines *e; + struct i915_gem_ww_ctx ww; struct intel_context *ce; int err = 0; @@ -1138,10 +1152,21 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (skip && skip(ce, data)) continue; - rq = intel_context_create_request(ce); + i915_gem_ww_ctx_init(&ww, true); +retry: + err = intel_context_pin_ww(ce, &ww); + if (err) + goto err; + + if (pin) + err = pin(ce, &ww, data); + if (err) + goto err_unpin; + + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - break; + goto err_unpin; } err = 0; @@ -1151,6 +1176,16 @@ static int context_barrier_task(struct i915_gem_context *ctx, err = i915_active_add_request(&cb->base, rq); i915_request_add(rq); +err_unpin: + intel_context_unpin(ce); +err: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + if (err) break; } @@ -1206,6 +1241,17 @@ static void set_ppgtt_barrier(void *data) i915_vm_close(old); } +static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data) +{ + struct i915_address_space *vm = ce->vm; + + if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915)) + /* ppGTT is not part of the legacy context image */ + return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww); + + return 0; +} + static int emit_ppgtt_update(struct i915_request *rq, void *data) { struct i915_address_space *vm = rq->context->vm; @@ -1262,20 +1308,10 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) static bool skip_ppgtt_update(struct intel_context *ce, void *data) { - if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) - return true; - if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915)) - return false; - - if (!atomic_read(&ce->pin_count)) - return true; - - /* ppGTT is not part of the legacy context image */ - if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm))) - return true; - - return false; + return !ce->state; + else + return !atomic_read(&ce->pin_count); } static int set_ppgtt(struct drm_i915_file_private *file_priv, @@ -1326,6 +1362,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, */ err = context_barrier_task(ctx, ALL_ENGINES, skip_ppgtt_update, + pin_ppgtt_update, emit_ppgtt_update, set_ppgtt_barrier, old); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 2679380159fc..27fddc22a7c6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -128,7 +128,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire if (err) return err; - err = i915_gem_object_lock_interruptible(obj); + err = i915_gem_object_lock_interruptible(obj, NULL); if (err) goto out; @@ -149,7 +149,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct if (err) return err; - err = i915_gem_object_lock_interruptible(obj); + err = i915_gem_object_lock_interruptible(obj, NULL); if (err) goto out; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 7f76fc68f498..7c90a63c273d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -32,11 +32,17 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) if (!i915_gem_object_is_framebuffer(obj)) return; - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); __i915_gem_object_flush_for_display(obj); i915_gem_object_unlock(obj); } +void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) +{ + if (i915_gem_object_is_framebuffer(obj)) + __i915_gem_object_flush_for_display(obj); +} + /** * Moves a single object to the WC read, and possibly write domain. * @obj: object to act on @@ -197,18 +203,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, if (ret) return ret; - ret = i915_gem_object_lock_interruptible(obj); - if (ret) - return ret; - /* Always invalidate stale cachelines */ if (obj->cache_level != cache_level) { i915_gem_object_set_cache_coherency(obj, cache_level); obj->cache_dirty = true; } - i915_gem_object_unlock(obj); - /* The cache-level will be applied when each vma is rebound. */ return i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE | @@ -293,7 +293,12 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, goto out; } + ret = i915_gem_object_lock_interruptible(obj, NULL); + if (ret) + goto out; + ret = i915_gem_object_set_cache_level(obj, level); + i915_gem_object_unlock(obj); out: i915_gem_object_put(obj); @@ -313,6 +318,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, unsigned int flags) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_gem_ww_ctx ww; struct i915_vma *vma; int ret; @@ -320,6 +326,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) return ERR_PTR(-EINVAL); + i915_gem_ww_ctx_init(&ww, true); +retry: + ret = i915_gem_object_lock(obj, &ww); + if (ret) + goto err; /* * The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is @@ -334,7 +345,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE); if (ret) - return ERR_PTR(ret); + goto err; /* * As the user may map the buffer once pinned in the display plane @@ -347,18 +358,31 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma = ERR_PTR(-ENOSPC); if ((flags & PIN_MAPPABLE) == 0 && (!view || view->type == I915_GGTT_VIEW_NORMAL)) - vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, - flags | - PIN_MAPPABLE | - PIN_NONBLOCK); - if (IS_ERR(vma)) - vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); - if (IS_ERR(vma)) - return vma; + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment, + flags | PIN_MAPPABLE | + PIN_NONBLOCK); + if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, + alignment, flags); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; + } vma->display_alignment = max_t(u64, vma->display_alignment, alignment); - i915_gem_object_flush_if_display(obj); + i915_gem_object_flush_if_display_locked(obj); + +err: + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + if (ret) + return ERR_PTR(ret); return vma; } @@ -536,7 +560,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (err) goto out; - err = i915_gem_object_lock_interruptible(obj); + err = i915_gem_object_lock_interruptible(obj, NULL); if (err) goto out_unpin; @@ -576,19 +600,17 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; - ret = i915_gem_object_lock_interruptible(obj); - if (ret) - return ret; + assert_object_held(obj); ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); if (ret) - goto err_unlock; + return ret; ret = i915_gem_object_pin_pages(obj); if (ret) - goto err_unlock; + return ret; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || !static_cpu_has(X86_FEATURE_CLFLUSH)) { @@ -616,8 +638,6 @@ out: err_unpin: i915_gem_object_unpin_pages(obj); -err_unlock: - i915_gem_object_unlock(obj); return ret; } @@ -630,20 +650,18 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; - ret = i915_gem_object_lock_interruptible(obj); - if (ret) - return ret; + assert_object_held(obj); ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT); if (ret) - goto err_unlock; + return ret; ret = i915_gem_object_pin_pages(obj); if (ret) - goto err_unlock; + return ret; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || !static_cpu_has(X86_FEATURE_CLFLUSH)) { @@ -680,7 +698,5 @@ out: err_unpin: i915_gem_object_unpin_pages(obj); -err_unlock: - i915_gem_object_unlock(obj); return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 322642fb765f..804339255df1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -41,11 +41,6 @@ struct eb_vma { u32 handle; }; -struct eb_vma_array { - struct kref kref; - struct eb_vma vma[]; -}; - enum { FORCE_CPU_RELOC = 1, FORCE_GTT_RELOC, @@ -58,9 +53,11 @@ enum { #define __EXEC_OBJECT_NEEDS_MAP BIT(29) #define __EXEC_OBJECT_NEEDS_BIAS BIT(28) #define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */ +#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) #define __EXEC_HAS_RELOC BIT(31) -#define __EXEC_INTERNAL_FLAGS (~0u << 31) +#define __EXEC_ENGINE_PINNED BIT(30) +#define __EXEC_INTERNAL_FLAGS (~0u << 30) #define UPDATE PIN_OFFSET_FIXED #define BATCH_OFFSET_BIAS (256*1024) @@ -261,6 +258,8 @@ struct i915_execbuffer { /** list of vma that have execobj.relocation_count */ struct list_head relocs; + struct i915_gem_ww_ctx ww; + /** * Track the most recently used object for relocations, as we * frequently have to perform multiple relocations within the same @@ -276,19 +275,22 @@ struct i915_execbuffer { bool has_fence : 1; bool needs_unfenced : 1; - struct i915_vma *target; struct i915_request *rq; - struct i915_vma *rq_vma; u32 *rq_cmd; unsigned int rq_size; + struct intel_gt_buffer_pool_node *pool; } reloc_cache; + struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */ + struct intel_context *reloc_context; + u64 invalid_flags; /** Set of execobj.flags that are invalid */ u32 context_flags; /** Set of execobj.flags to insert from the ctx */ u32 batch_start_offset; /** Location within object of batch */ u32 batch_len; /** Length of batch within object */ u32 batch_flags; /** Flags composed for emit_bb_start() */ + struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ /** * Indicate either the size of the hastable used to resolve @@ -297,12 +299,16 @@ struct i915_execbuffer { */ int lut_size; struct hlist_head *buckets; /** ht for relocation handles */ - struct eb_vma_array *array; struct eb_fence *fences; unsigned long num_fences; }; +static int eb_parse(struct i915_execbuffer *eb); +static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, + bool throttle); +static void eb_unpin_engine(struct i915_execbuffer *eb); + static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { return intel_engine_requires_cmd_parser(eb->engine) || @@ -310,62 +316,8 @@ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) eb->args->batch_len); } -static struct eb_vma_array *eb_vma_array_create(unsigned int count) -{ - struct eb_vma_array *arr; - - arr = kvmalloc(struct_size(arr, vma, count), GFP_KERNEL | __GFP_NOWARN); - if (!arr) - return NULL; - - kref_init(&arr->kref); - arr->vma[0].vma = NULL; - - return arr; -} - -static inline void eb_unreserve_vma(struct eb_vma *ev) -{ - struct i915_vma *vma = ev->vma; - - if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) - __i915_vma_unpin_fence(vma); - - if (ev->flags & __EXEC_OBJECT_HAS_PIN) - __i915_vma_unpin(vma); - - ev->flags &= ~(__EXEC_OBJECT_HAS_PIN | - __EXEC_OBJECT_HAS_FENCE); -} - -static void eb_vma_array_destroy(struct kref *kref) -{ - struct eb_vma_array *arr = container_of(kref, typeof(*arr), kref); - struct eb_vma *ev = arr->vma; - - while (ev->vma) { - eb_unreserve_vma(ev); - i915_vma_put(ev->vma); - ev++; - } - - kvfree(arr); -} - -static void eb_vma_array_put(struct eb_vma_array *arr) -{ - kref_put(&arr->kref, eb_vma_array_destroy); -} - static int eb_create(struct i915_execbuffer *eb) { - /* Allocate an extra slot for use by the command parser + sentinel */ - eb->array = eb_vma_array_create(eb->buffer_count + 2); - if (!eb->array) - return -ENOMEM; - - eb->vma = eb->array->vma; - if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { unsigned int size = 1 + ilog2(eb->buffer_count); @@ -399,10 +351,8 @@ static int eb_create(struct i915_execbuffer *eb) break; } while (--size); - if (unlikely(!size)) { - eb_vma_array_put(eb->array); + if (unlikely(!size)) return -ENOMEM; - } eb->lut_size = size; } else { @@ -486,16 +436,17 @@ eb_pin_vma(struct i915_execbuffer *eb, pin_flags |= PIN_GLOBAL; /* Attempt to reuse the current location if available */ - if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) { + /* TODO: Add -EDEADLK handling here */ + if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))) { if (entry->flags & EXEC_OBJECT_PINNED) return false; /* Failing that pick any _free_ space if suitable */ - if (unlikely(i915_vma_pin(vma, - entry->pad_to_size, - entry->alignment, - eb_pin_flags(entry, ev->flags) | - PIN_USER | PIN_NOEVICT))) + if (unlikely(i915_vma_pin_ww(vma, &eb->ww, + entry->pad_to_size, + entry->alignment, + eb_pin_flags(entry, ev->flags) | + PIN_USER | PIN_NOEVICT))) return false; } @@ -513,6 +464,19 @@ eb_pin_vma(struct i915_execbuffer *eb, return !eb_vma_misplaced(entry, vma, ev->flags); } +static inline void +eb_unreserve_vma(struct eb_vma *ev) +{ + if (!(ev->flags & __EXEC_OBJECT_HAS_PIN)) + return; + + if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) + __i915_vma_unpin_fence(ev->vma); + + __i915_vma_unpin(ev->vma); + ev->flags &= ~__EXEC_OBJECT_RESERVED; +} + static int eb_validate_vma(struct i915_execbuffer *eb, struct drm_i915_gem_exec_object2 *entry, @@ -604,16 +568,6 @@ eb_add_vma(struct i915_execbuffer *eb, eb->batch = ev; } - - if (eb_pin_vma(eb, entry, ev)) { - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start | UPDATE; - eb->args->flags |= __EXEC_HAS_RELOC; - } - } else { - eb_unreserve_vma(ev); - list_add_tail(&ev->bind_link, &eb->unbound); - } } static inline int use_cpu_reloc(const struct reloc_cache *cache, @@ -633,7 +587,7 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache, obj->cache_level != I915_CACHE_NONE); } -static int eb_reserve_vma(const struct i915_execbuffer *eb, +static int eb_reserve_vma(struct i915_execbuffer *eb, struct eb_vma *ev, u64 pin_flags) { @@ -648,7 +602,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, return err; } - err = i915_vma_pin(vma, + err = i915_vma_pin_ww(vma, &eb->ww, entry->pad_to_size, entry->alignment, eb_pin_flags(entry, ev->flags) | pin_flags); if (err) @@ -698,10 +652,6 @@ static int eb_reserve(struct i915_execbuffer *eb) * This avoid unnecessary unbinding of later objects in order to make * room for the earlier objects *unless* we need to defragment. */ - - if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex)) - return -EINTR; - pass = 0; do { list_for_each_entry(ev, &eb->unbound, bind_link) { @@ -709,8 +659,8 @@ static int eb_reserve(struct i915_execbuffer *eb) if (err) break; } - if (!(err == -ENOSPC || err == -EAGAIN)) - break; + if (err != -ENOSPC) + return err; /* Resort *all* the objects into priority order */ INIT_LIST_HEAD(&eb->unbound); @@ -740,13 +690,6 @@ static int eb_reserve(struct i915_execbuffer *eb) } list_splice_tail(&last, &eb->unbound); - if (err == -EAGAIN) { - mutex_unlock(&eb->i915->drm.struct_mutex); - flush_workqueue(eb->i915->mm.userptr_wq); - mutex_lock(&eb->i915->drm.struct_mutex); - continue; - } - switch (pass++) { case 0: break; @@ -757,20 +700,15 @@ static int eb_reserve(struct i915_execbuffer *eb) err = i915_gem_evict_vm(eb->context->vm); mutex_unlock(&eb->context->vm->mutex); if (err) - goto unlock; + return err; break; default: - err = -ENOSPC; - goto unlock; + return -ENOSPC; } pin_flags = PIN_USER; } while (1); - -unlock: - mutex_unlock(&eb->i915->drm.struct_mutex); - return err; } static unsigned int eb_batch_index(const struct i915_execbuffer *eb) @@ -893,12 +831,12 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) static int eb_lookup_vmas(struct i915_execbuffer *eb) { + struct drm_i915_private *i915 = eb->i915; unsigned int batch = eb_batch_index(eb); unsigned int i; int err = 0; INIT_LIST_HEAD(&eb->relocs); - INIT_LIST_HEAD(&eb->unbound); for (i = 0; i < eb->buffer_count; i++) { struct i915_vma *vma; @@ -906,22 +844,83 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) vma = eb_lookup_vma(eb, eb->exec[i].handle); if (IS_ERR(vma)) { err = PTR_ERR(vma); - break; + goto err; } err = eb_validate_vma(eb, &eb->exec[i], vma); if (unlikely(err)) { i915_vma_put(vma); - break; + goto err; } eb_add_vma(eb, i, batch, vma); } + if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) { + drm_dbg(&i915->drm, + "Attempting to use self-modifying batch buffer\n"); + return -EINVAL; + } + + if (range_overflows_t(u64, + eb->batch_start_offset, eb->batch_len, + eb->batch->vma->size)) { + drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n"); + return -EINVAL; + } + + if (eb->batch_len == 0) + eb->batch_len = eb->batch->vma->size - eb->batch_start_offset; + + return 0; + +err: eb->vma[i].vma = NULL; return err; } +static int eb_validate_vmas(struct i915_execbuffer *eb) +{ + unsigned int i; + int err; + + INIT_LIST_HEAD(&eb->unbound); + + for (i = 0; i < eb->buffer_count; i++) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; + + err = i915_gem_object_lock(vma->obj, &eb->ww); + if (err) + return err; + + if (eb_pin_vma(eb, entry, ev)) { + if (entry->offset != vma->node.start) { + entry->offset = vma->node.start | UPDATE; + eb->args->flags |= __EXEC_HAS_RELOC; + } + } else { + eb_unreserve_vma(ev); + + list_add_tail(&ev->bind_link, &eb->unbound); + if (drm_mm_node_allocated(&vma->node)) { + err = i915_vma_unbind(vma); + if (err) + return err; + } + } + + GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && + eb_vma_misplaced(&eb->exec[i], vma, ev->flags)); + } + + if (!list_empty(&eb->unbound)) + return eb_reserve(eb); + + return 0; +} + static struct eb_vma * eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) { @@ -942,13 +941,31 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) } } +static void eb_release_vmas(struct i915_execbuffer *eb, bool final) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; + + if (!vma) + break; + + eb_unreserve_vma(ev); + + if (final) + i915_vma_put(vma); + } + + eb_unpin_engine(eb); +} + static void eb_destroy(const struct i915_execbuffer *eb) { GEM_BUG_ON(eb->reloc_cache.rq); - if (eb->array) - eb_vma_array_put(eb->array); - if (eb->lut_size > 0) kfree(eb->buckets); } @@ -960,6 +977,14 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, return gen8_canonical_addr((int)reloc->delta + target->node.start); } +static void reloc_cache_clear(struct reloc_cache *cache) +{ + cache->rq = NULL; + cache->rq_cmd = NULL; + cache->pool = NULL; + cache->rq_size = 0; +} + static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { @@ -972,8 +997,7 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->has_fence = cache->gen < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; - cache->rq = NULL; - cache->target = NULL; + reloc_cache_clear(cache); } static inline void *unmask_page(unsigned long p) @@ -995,132 +1019,60 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) return &i915->ggtt; } -#define RELOC_TAIL 4 - -static int reloc_gpu_chain(struct reloc_cache *cache) +static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache) { - struct intel_gt_buffer_pool_node *pool; - struct i915_request *rq = cache->rq; - struct i915_vma *batch; - u32 *cmd; - int err; - - pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE); - if (IS_ERR(pool)) - return PTR_ERR(pool); - - batch = i915_vma_instance(pool->obj, rq->context->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_pool; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); - if (err) - goto out_pool; - - GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32)); - cmd = cache->rq_cmd + cache->rq_size; - *cmd++ = MI_ARB_CHECK; - if (cache->gen >= 8) - *cmd++ = MI_BATCH_BUFFER_START_GEN8; - else if (cache->gen >= 6) - *cmd++ = MI_BATCH_BUFFER_START; - else - *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *cmd++ = lower_32_bits(batch->node.start); - *cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */ - i915_gem_object_flush_map(cache->rq_vma->obj); - i915_gem_object_unpin_map(cache->rq_vma->obj); - cache->rq_vma = NULL; - - err = intel_gt_buffer_pool_mark_active(pool, rq); - if (err == 0) { - i915_vma_lock(batch); - err = i915_request_await_object(rq, batch->obj, false); - if (err == 0) - err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); - } - i915_vma_unpin(batch); - if (err) - goto out_pool; - - cmd = i915_gem_object_pin_map(batch->obj, - cache->has_llc ? - I915_MAP_FORCE_WB : - I915_MAP_FORCE_WC); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out_pool; - } - - /* Return with batch mapping (cmd) still pinned */ - cache->rq_cmd = cmd; - cache->rq_size = 0; - cache->rq_vma = batch; - -out_pool: - intel_gt_buffer_pool_put(pool); - return err; -} + if (!cache->pool) + return; -static unsigned int reloc_bb_flags(const struct reloc_cache *cache) -{ - return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE; + /* + * This is a bit nasty, normally we keep objects locked until the end + * of execbuffer, but we already submit this, and have to unlock before + * dropping the reference. Fortunately we can only hold 1 pool node at + * a time, so this should be harmless. + */ + i915_gem_ww_unlock_single(cache->pool->obj); + intel_gt_buffer_pool_put(cache->pool); + cache->pool = NULL; } -static int reloc_gpu_flush(struct reloc_cache *cache) +static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache) { - struct i915_request *rq; - int err; + struct drm_i915_gem_object *obj = cache->rq->batch->obj; - rq = fetch_and_zero(&cache->rq); - if (!rq) - return 0; - - if (cache->rq_vma) { - struct drm_i915_gem_object *obj = cache->rq_vma->obj; + GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); + cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); - cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END; - - __i915_gem_object_flush_map(obj, - 0, sizeof(u32) * cache->rq_size); - i915_gem_object_unpin_map(obj); - } + __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1)); + i915_gem_object_unpin_map(obj); - err = 0; - if (rq->engine->emit_init_breadcrumb) - err = rq->engine->emit_init_breadcrumb(rq); - if (!err) - err = rq->engine->emit_bb_start(rq, - rq->batch->node.start, - PAGE_SIZE, - reloc_bb_flags(cache)); - if (err) - i915_request_set_error_once(rq, err); + intel_gt_chipset_flush(cache->rq->engine->gt); - intel_gt_chipset_flush(rq->engine->gt); - i915_request_add(rq); + i915_request_add(cache->rq); + reloc_cache_put_pool(eb, cache); + reloc_cache_clear(cache); - return err; + eb->reloc_pool = NULL; } -static void reloc_cache_reset(struct reloc_cache *cache) +static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) { void *vaddr; + if (cache->rq) + reloc_gpu_flush(eb, cache); + if (!cache->vaddr) return; vaddr = unmask_page(cache->vaddr); if (cache->vaddr & KMAP) { + struct drm_i915_gem_object *obj = + (struct drm_i915_gem_object *)cache->node.mm; if (cache->vaddr & CLFLUSH_AFTER) mb(); kunmap_atomic(vaddr); - i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); + i915_gem_object_finish_access(obj); } else { struct i915_ggtt *ggtt = cache_to_ggtt(cache); @@ -1145,9 +1097,10 @@ static void reloc_cache_reset(struct reloc_cache *cache) static void *reloc_kmap(struct drm_i915_gem_object *obj, struct reloc_cache *cache, - unsigned long page) + unsigned long pageno) { void *vaddr; + struct page *page; if (cache->vaddr) { kunmap_atomic(unmask_page(cache->vaddr)); @@ -1168,17 +1121,22 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, mb(); } - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); + page = i915_gem_object_get_page(obj, pageno); + if (!obj->mm.dirty) + set_page_dirty(page); + + vaddr = kmap_atomic(page); cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; - cache->page = page; + cache->page = pageno; return vaddr; } static void *reloc_iomap(struct drm_i915_gem_object *obj, - struct reloc_cache *cache, + struct i915_execbuffer *eb, unsigned long page) { + struct reloc_cache *cache = &eb->reloc_cache; struct i915_ggtt *ggtt = cache_to_ggtt(cache); unsigned long offset; void *vaddr; @@ -1196,16 +1154,17 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, if (use_cpu_reloc(cache, obj)) return NULL; - i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); if (err) return ERR_PTR(err); - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONBLOCK /* NOWARN */ | - PIN_NOEVICT); + vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); + if (vma == ERR_PTR(-EDEADLK)) + return vma; + if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); mutex_lock(&ggtt->vm.mutex); @@ -1241,9 +1200,10 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, } static void *reloc_vaddr(struct drm_i915_gem_object *obj, - struct reloc_cache *cache, + struct i915_execbuffer *eb, unsigned long page) { + struct reloc_cache *cache = &eb->reloc_cache; void *vaddr; if (cache->page == page) { @@ -1251,7 +1211,7 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj, } else { vaddr = NULL; if ((cache->vaddr & KMAP) == 0) - vaddr = reloc_iomap(obj, cache, page); + vaddr = reloc_iomap(obj, eb, page); if (!vaddr) vaddr = reloc_kmap(obj, cache, page); } @@ -1287,7 +1247,7 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) struct drm_i915_gem_object *obj = vma->obj; int err; - i915_vma_lock(vma); + assert_vma_held(vma); if (obj->cache_dirty & ~obj->cache_coherent) i915_gem_clflush_object(obj, 0); @@ -1297,25 +1257,31 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - return err; } static int __reloc_gpu_alloc(struct i915_execbuffer *eb, struct intel_engine_cs *engine, + struct i915_vma *vma, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; - struct intel_gt_buffer_pool_node *pool; + struct intel_gt_buffer_pool_node *pool = eb->reloc_pool; struct i915_request *rq; struct i915_vma *batch; u32 *cmd; int err; - pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE); - if (IS_ERR(pool)) - return PTR_ERR(pool); + if (!pool) { + pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE); + if (IS_ERR(pool)) + return PTR_ERR(pool); + } + eb->reloc_pool = NULL; + + err = i915_gem_object_lock(pool->obj, &eb->ww); + if (err) + goto err_pool; cmd = i915_gem_object_pin_map(pool->obj, cache->has_llc ? @@ -1323,35 +1289,42 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, I915_MAP_FORCE_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out_pool; + goto err_pool; } - batch = i915_vma_instance(pool->obj, eb->context->vm, NULL); + batch = i915_vma_instance(pool->obj, vma->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto err_unmap; } - err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); + err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK); if (err) goto err_unmap; if (engine == eb->context->engine) { rq = i915_request_create(eb->context); } else { - struct intel_context *ce; + struct intel_context *ce = eb->reloc_context; - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto err_unpin; + if (!ce) { + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto err_unpin; + } + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(eb->context->vm); + eb->reloc_context = ce; } - i915_vm_put(ce->vm); - ce->vm = i915_vm_get(eb->context->vm); + err = intel_context_pin_ww(ce, &eb->ww); + if (err) + goto err_unpin; - rq = intel_context_create_request(ce); - intel_context_put(ce); + rq = i915_request_create(ce); + intel_context_unpin(ce); } if (IS_ERR(rq)) { err = PTR_ERR(rq); @@ -1362,11 +1335,20 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_request; - i915_vma_lock(batch); + err = reloc_move_to_gpu(rq, vma); + if (err) + goto err_request; + + err = eb->engine->emit_bb_start(rq, + batch->node.start, PAGE_SIZE, + cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); + if (err) + goto skip_request; + + assert_vma_held(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); if (err) goto skip_request; @@ -1376,10 +1358,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, cache->rq = rq; cache->rq_cmd = cmd; cache->rq_size = 0; - cache->rq_vma = batch; + cache->pool = pool; /* Return with batch mapping (cmd) still pinned */ - goto out_pool; + return 0; skip_request: i915_request_set_error_once(rq, err); @@ -1389,8 +1371,8 @@ err_unpin: i915_vma_unpin(batch); err_unmap: i915_gem_object_unpin_map(pool->obj); -out_pool: - intel_gt_buffer_pool_put(pool); +err_pool: + eb->reloc_pool = pool; return err; } @@ -1405,9 +1387,12 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, { struct reloc_cache *cache = &eb->reloc_cache; u32 *cmd; - int err; + + if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) + reloc_gpu_flush(eb, cache); if (unlikely(!cache->rq)) { + int err; struct intel_engine_cs *engine = eb->engine; if (!reloc_can_use_engine(engine)) { @@ -1416,31 +1401,11 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, return ERR_PTR(-ENODEV); } - err = __reloc_gpu_alloc(eb, engine, len); + err = __reloc_gpu_alloc(eb, engine, vma, len); if (unlikely(err)) return ERR_PTR(err); } - if (vma != cache->target) { - err = reloc_move_to_gpu(cache->rq, vma); - if (unlikely(err)) { - i915_request_set_error_once(cache->rq, err); - return ERR_PTR(err); - } - - cache->target = vma; - } - - if (unlikely(cache->rq_size + len > - PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) { - err = reloc_gpu_chain(cache); - if (unlikely(err)) { - i915_request_set_error_once(cache->rq, err); - return ERR_PTR(err); - } - } - - GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32)); cmd = cache->rq_cmd + cache->rq_size; cache->rq_size += len; @@ -1490,7 +1455,9 @@ static bool __reloc_entry_gpu(struct i915_execbuffer *eb, len = 3; batch = reloc_gpu(eb, vma, len); - if (IS_ERR(batch)) + if (batch == ERR_PTR(-EDEADLK)) + return (s64)-EDEADLK; + else if (IS_ERR(batch)) return false; addr = gen8_canonical_addr(vma->node.start + offset); @@ -1543,7 +1510,7 @@ static bool __reloc_entry_gpu(struct i915_execbuffer *eb, return true; } -static bool reloc_entry_gpu(struct i915_execbuffer *eb, +static int reloc_entry_gpu(struct i915_execbuffer *eb, struct i915_vma *vma, u64 offset, u64 target_addr) @@ -1565,14 +1532,17 @@ relocate_entry(struct i915_vma *vma, { u64 target_addr = relocation_target(reloc, target); u64 offset = reloc->offset; + int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr); - if (!reloc_entry_gpu(eb, vma, offset, target_addr)) { + if (reloc_gpu < 0) + return reloc_gpu; + + if (!reloc_gpu) { bool wide = eb->reloc_cache.use_64bit_reloc; void *vaddr; repeat: - vaddr = reloc_vaddr(vma->obj, - &eb->reloc_cache, + vaddr = reloc_vaddr(vma->obj, eb, offset >> PAGE_SHIFT); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -1723,7 +1693,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) * we would try to acquire the struct mutex again. Obviously * this is bad and so lockdep complains vehemently. */ - copied = __copy_from_user(r, urelocs, count * sizeof(r[0])); + pagefault_disable(); + copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); + pagefault_enable(); if (unlikely(copied)) { remain = -EFAULT; goto out; @@ -1767,74 +1739,400 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) urelocs += ARRAY_SIZE(stack); } while (remain); out: - reloc_cache_reset(&eb->reloc_cache); + reloc_cache_reset(&eb->reloc_cache, eb); return remain; } -static int eb_relocate(struct i915_execbuffer *eb) +static int +eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev) { + const struct drm_i915_gem_exec_object2 *entry = ev->exec; + struct drm_i915_gem_relocation_entry *relocs = + u64_to_ptr(typeof(*relocs), entry->relocs_ptr); + unsigned int i; int err; - err = eb_lookup_vmas(eb); - if (err) - return err; + for (i = 0; i < entry->relocation_count; i++) { + u64 offset = eb_relocate_entry(eb, ev, &relocs[i]); + + if ((s64)offset < 0) { + err = (int)offset; + goto err; + } + } + err = 0; +err: + reloc_cache_reset(&eb->reloc_cache, eb); + return err; +} + +static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) +{ + const char __user *addr, *end; + unsigned long size; + char __maybe_unused c; + + size = entry->relocation_count; + if (size == 0) + return 0; - if (!list_empty(&eb->unbound)) { - err = eb_reserve(eb); + if (size > N_RELOC(ULONG_MAX)) + return -EINVAL; + + addr = u64_to_user_ptr(entry->relocs_ptr); + size *= sizeof(struct drm_i915_gem_relocation_entry); + if (!access_ok(addr, size)) + return -EFAULT; + + end = addr + size; + for (; addr < end; addr += PAGE_SIZE) { + int err = __get_user(c, addr); if (err) return err; } + return __get_user(c, end - 1); +} - /* The objects are in their final locations, apply the relocations. */ - if (eb->args->flags & __EXEC_HAS_RELOC) { - struct eb_vma *ev; - int flush; +static int eb_copy_relocations(const struct i915_execbuffer *eb) +{ + struct drm_i915_gem_relocation_entry *relocs; + const unsigned int count = eb->buffer_count; + unsigned int i; + int err; - list_for_each_entry(ev, &eb->relocs, reloc_link) { + for (i = 0; i < count; i++) { + const unsigned int nreloc = eb->exec[i].relocation_count; + struct drm_i915_gem_relocation_entry __user *urelocs; + unsigned long size; + unsigned long copied; + + if (nreloc == 0) + continue; + + err = check_relocations(&eb->exec[i]); + if (err) + goto err; + + urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); + size = nreloc * sizeof(*relocs); + + relocs = kvmalloc_array(size, 1, GFP_KERNEL); + if (!relocs) { + err = -ENOMEM; + goto err; + } + + /* copy_from_user is limited to < 4GiB */ + copied = 0; + do { + unsigned int len = + min_t(u64, BIT_ULL(31), size - copied); + + if (__copy_from_user((char *)relocs + copied, + (char __user *)urelocs + copied, + len)) + goto end; + + copied += len; + } while (copied < size); + + /* + * As we do not update the known relocation offsets after + * relocating (due to the complexities in lock handling), + * we need to mark them as invalid now so that we force the + * relocation processing next time. Just in case the target + * object is evicted and then rebound into its old + * presumed_offset before the next execbuffer - if that + * happened we would make the mistake of assuming that the + * relocations were valid. + */ + if (!user_access_begin(urelocs, size)) + goto end; + + for (copied = 0; copied < nreloc; copied++) + unsafe_put_user(-1, + &urelocs[copied].presumed_offset, + end_user); + user_access_end(); + + eb->exec[i].relocs_ptr = (uintptr_t)relocs; + } + + return 0; + +end_user: + user_access_end(); +end: + kvfree(relocs); + err = -EFAULT; +err: + while (i--) { + relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); + if (eb->exec[i].relocation_count) + kvfree(relocs); + } + return err; +} + +static int eb_prefault_relocations(const struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + int err; + + err = check_relocations(&eb->exec[i]); + if (err) + return err; + } + + return 0; +} + +static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb, + struct i915_request *rq) +{ + bool have_copy = false; + struct eb_vma *ev; + int err = 0; + +repeat: + if (signal_pending(current)) { + err = -ERESTARTSYS; + goto out; + } + + /* We may process another execbuffer during the unlock... */ + eb_release_vmas(eb, false); + i915_gem_ww_ctx_fini(&eb->ww); + + if (rq) { + /* nonblocking is always false */ + if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) { + i915_request_put(rq); + rq = NULL; + + err = -EINTR; + goto err_relock; + } + + i915_request_put(rq); + rq = NULL; + } + + /* + * We take 3 passes through the slowpatch. + * + * 1 - we try to just prefault all the user relocation entries and + * then attempt to reuse the atomic pagefault disabled fast path again. + * + * 2 - we copy the user entries to a local buffer here outside of the + * local and allow ourselves to wait upon any rendering before + * relocations + * + * 3 - we already have a local copy of the relocation entries, but + * were interrupted (EAGAIN) whilst waiting for the objects, try again. + */ + if (!err) { + err = eb_prefault_relocations(eb); + } else if (!have_copy) { + err = eb_copy_relocations(eb); + have_copy = err == 0; + } else { + cond_resched(); + err = 0; + } + + if (!err) + flush_workqueue(eb->i915->mm.userptr_wq); + +err_relock: + i915_gem_ww_ctx_init(&eb->ww, true); + if (err) + goto out; + + /* reacquire the objects */ +repeat_validate: + rq = eb_pin_engine(eb, false); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + rq = NULL; + goto err; + } + + /* We didn't throttle, should be NULL */ + GEM_WARN_ON(rq); + + err = eb_validate_vmas(eb); + if (err) + goto err; + + GEM_BUG_ON(!eb->batch); + + list_for_each_entry(ev, &eb->relocs, reloc_link) { + if (!have_copy) { + pagefault_disable(); err = eb_relocate_vma(eb, ev); + pagefault_enable(); + if (err) + break; + } else { + err = eb_relocate_vma_slow(eb, ev); if (err) break; } + } + + if (err == -EDEADLK) + goto err; + + if (err && !have_copy) + goto repeat; + + if (err) + goto err; + + /* as last step, parse the command buffer */ + err = eb_parse(eb); + if (err) + goto err; + + /* + * Leave the user relocations as are, this is the painfully slow path, + * and we want to avoid the complication of dropping the lock whilst + * having buffers reserved in the aperture and so causing spurious + * ENOSPC for random operations. + */ - flush = reloc_gpu_flush(&eb->reloc_cache); +err: + if (err == -EDEADLK) { + eb_release_vmas(eb, false); + err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) - err = flush; + goto repeat_validate; } + if (err == -EAGAIN) + goto repeat; + +out: + if (have_copy) { + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + const struct drm_i915_gem_exec_object2 *entry = + &eb->exec[i]; + struct drm_i915_gem_relocation_entry *relocs; + + if (!entry->relocation_count) + continue; + + relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); + kvfree(relocs); + } + } + + if (rq) + i915_request_put(rq); + return err; } -static int eb_move_to_gpu(struct i915_execbuffer *eb) +static int eb_relocate_parse(struct i915_execbuffer *eb) { - const unsigned int count = eb->buffer_count; - struct ww_acquire_ctx acquire; - unsigned int i; - int err = 0; + int err; + struct i915_request *rq = NULL; + bool throttle = true; - ww_acquire_init(&acquire, &reservation_ww_class); +retry: + rq = eb_pin_engine(eb, throttle); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + rq = NULL; + if (err != -EDEADLK) + return err; - for (i = 0; i < count; i++) { - struct eb_vma *ev = &eb->vma[i]; - struct i915_vma *vma = ev->vma; + goto err; + } + + if (rq) { + bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; + + /* Need to drop all locks now for throttling, take slowpath */ + err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0); + if (err == -ETIME) { + if (nonblock) { + err = -EWOULDBLOCK; + i915_request_put(rq); + goto err; + } + goto slow; + } + i915_request_put(rq); + rq = NULL; + } - err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire); - if (err == -EDEADLK) { - GEM_BUG_ON(i == 0); - do { - int j = i - 1; + /* only throttle once, even if we didn't need to throttle */ + throttle = false; - ww_mutex_unlock(&eb->vma[j].vma->resv->lock); + err = eb_validate_vmas(eb); + if (err == -EAGAIN) + goto slow; + else if (err) + goto err; - swap(eb->vma[i], eb->vma[j]); - } while (--i); + /* The objects are in their final locations, apply the relocations. */ + if (eb->args->flags & __EXEC_HAS_RELOC) { + struct eb_vma *ev; - err = ww_mutex_lock_slow_interruptible(&vma->resv->lock, - &acquire); + list_for_each_entry(ev, &eb->relocs, reloc_link) { + err = eb_relocate_vma(eb, ev); + if (err) + break; } - if (err) - break; + + if (err == -EDEADLK) + goto err; + else if (err) + goto slow; + } + + if (!err) + err = eb_parse(eb); + +err: + if (err == -EDEADLK) { + eb_release_vmas(eb, false); + err = i915_gem_ww_ctx_backoff(&eb->ww); + if (!err) + goto retry; } - ww_acquire_done(&acquire); + + return err; + +slow: + err = eb_relocate_parse_slow(eb, rq); + if (err) + /* + * If the user expects the execobject.offset and + * reloc.presumed_offset to be an exact match, + * as for using NO_RELOC, then we cannot update + * the execobject.offset until we have completed + * relocation. + */ + eb->args->flags &= ~__EXEC_HAS_RELOC; + + return err; +} + +static int eb_move_to_gpu(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i = count; + int err = 0; while (i--) { struct eb_vma *ev = &eb->vma[i]; @@ -1879,13 +2177,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) if (err == 0) err = i915_vma_move_to_active(vma, eb->request, flags); - - i915_vma_unlock(vma); - eb_unreserve_vma(ev); } - ww_acquire_fini(&acquire); - - eb_vma_array_put(fetch_and_zero(&eb->array)); if (unlikely(err)) goto err_skip; @@ -1950,7 +2242,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) } static struct i915_vma * -shadow_batch_pin(struct drm_i915_gem_object *obj, +shadow_batch_pin(struct i915_execbuffer *eb, + struct drm_i915_gem_object *obj, struct i915_address_space *vm, unsigned int flags) { @@ -1961,7 +2254,7 @@ shadow_batch_pin(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return vma; - err = i915_vma_pin(vma, 0, 0, flags); + err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags); if (err) return ERR_PTR(err); @@ -2013,7 +2306,7 @@ __parser_mark_active(struct i915_vma *vma, { struct intel_gt_buffer_pool_node *node = vma->private; - return i915_active_ref(&node->active, tl, fence); + return i915_active_ref(&node->active, tl->fence_context, fence); } static int @@ -2077,36 +2370,26 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, if (err) goto err_commit; - err = dma_resv_lock_interruptible(pw->batch->resv, NULL); - if (err) - goto err_commit; - err = dma_resv_reserve_shared(pw->batch->resv, 1); if (err) - goto err_commit_unlock; + goto err_commit; /* Wait for all writes (and relocs) into the batch to complete */ err = i915_sw_fence_await_reservation(&pw->base.chain, pw->batch->resv, NULL, false, 0, I915_FENCE_GFP); if (err < 0) - goto err_commit_unlock; + goto err_commit; /* Keep the batch alive and unwritten as we parse */ dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); - dma_resv_unlock(pw->batch->resv); - /* Force execution to wait for completion of the parser */ - dma_resv_lock(shadow->resv, NULL); dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); - dma_resv_unlock(shadow->resv); dma_fence_work_commit_imm(&pw->base); return 0; -err_commit_unlock: - dma_resv_unlock(pw->batch->resv); err_commit: i915_sw_fence_set_error_once(&pw->base.chain, err); dma_fence_work_commit_imm(&pw->base); @@ -2121,16 +2404,33 @@ err_free: return err; } +static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma) +{ + /* + * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure + * batch" bit. Hence we need to pin secure batches into the global gtt. + * hsw should have this fixed, but bdw mucks it up again. */ + if (eb->batch_flags & I915_DISPATCH_SECURE) + return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0); + + return NULL; +} + static int eb_parse(struct i915_execbuffer *eb) { struct drm_i915_private *i915 = eb->i915; - struct intel_gt_buffer_pool_node *pool; - struct i915_vma *shadow, *trampoline; + struct intel_gt_buffer_pool_node *pool = eb->batch_pool; + struct i915_vma *shadow, *trampoline, *batch; unsigned int len; int err; - if (!eb_use_cmdparser(eb)) - return 0; + if (!eb_use_cmdparser(eb)) { + batch = eb_dispatch_secure(eb, eb->batch->vma); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + goto secure_batch; + } len = eb->batch_len; if (!CMDPARSER_USES_GGTT(eb->i915)) { @@ -2147,11 +2447,18 @@ static int eb_parse(struct i915_execbuffer *eb) len += I915_CMD_PARSER_TRAMPOLINE_SIZE; } - pool = intel_gt_get_buffer_pool(eb->engine->gt, len); - if (IS_ERR(pool)) - return PTR_ERR(pool); + if (!pool) { + pool = intel_gt_get_buffer_pool(eb->engine->gt, len); + if (IS_ERR(pool)) + return PTR_ERR(pool); + eb->batch_pool = pool; + } - shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER); + err = i915_gem_object_lock(pool->obj, &eb->ww); + if (err) + goto err; + + shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER); if (IS_ERR(shadow)) { err = PTR_ERR(shadow); goto err; @@ -2163,7 +2470,7 @@ static int eb_parse(struct i915_execbuffer *eb) if (CMDPARSER_USES_GGTT(eb->i915)) { trampoline = shadow; - shadow = shadow_batch_pin(pool->obj, + shadow = shadow_batch_pin(eb, pool->obj, &eb->engine->gt->ggtt->vm, PIN_GLOBAL); if (IS_ERR(shadow)) { @@ -2176,42 +2483,43 @@ static int eb_parse(struct i915_execbuffer *eb) eb->batch_flags |= I915_DISPATCH_SECURE; } + batch = eb_dispatch_secure(eb, shadow); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_trampoline; + } + err = eb_parse_pipeline(eb, shadow, trampoline); if (err) - goto err_trampoline; + goto err_unpin_batch; - eb->vma[eb->buffer_count].vma = i915_vma_get(shadow); - eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN; eb->batch = &eb->vma[eb->buffer_count++]; - eb->vma[eb->buffer_count].vma = NULL; + eb->batch->vma = i915_vma_get(shadow); + eb->batch->flags = __EXEC_OBJECT_HAS_PIN; eb->trampoline = trampoline; eb->batch_start_offset = 0; +secure_batch: + if (batch) { + eb->batch = &eb->vma[eb->buffer_count++]; + eb->batch->flags = __EXEC_OBJECT_HAS_PIN; + eb->batch->vma = i915_vma_get(batch); + } return 0; +err_unpin_batch: + if (batch) + i915_vma_unpin(batch); err_trampoline: if (trampoline) i915_vma_unpin(trampoline); err_shadow: i915_vma_unpin(shadow); err: - intel_gt_buffer_pool_put(pool); return err; } -static void -add_to_client(struct i915_request *rq, struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - - rq->file_priv = file_priv; - - spin_lock(&file_priv->mm.lock); - list_add_tail(&rq->client_link, &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); -} - static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch) { int err; @@ -2293,7 +2601,7 @@ static const enum intel_engine_id user_ring_map[] = { [I915_EXEC_VEBOX] = VECS0 }; -static struct i915_request *eb_throttle(struct intel_context *ce) +static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce) { struct intel_ring *ring = ce->ring; struct intel_timeline *tl = ce->timeline; @@ -2327,31 +2635,26 @@ static struct i915_request *eb_throttle(struct intel_context *ce) return i915_request_get(rq); } -static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) +static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle) { + struct intel_context *ce = eb->context; struct intel_timeline *tl; - struct i915_request *rq; + struct i915_request *rq = NULL; int err; - /* - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - err = intel_gt_terminally_wedged(ce->engine->gt); - if (err) - return err; + GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED); if (unlikely(intel_context_is_banned(ce))) - return -EIO; + return ERR_PTR(-EIO); /* * Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - err = intel_context_pin(ce); + err = intel_context_pin_ww(ce, &eb->ww); if (err) - return err; + return ERR_PTR(err); /* * Take a local wakeref for preparing to dispatch the execbuf as @@ -2363,45 +2666,17 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) */ tl = intel_context_timeline_lock(ce); if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto err_unpin; + intel_context_unpin(ce); + return ERR_CAST(tl); } intel_context_enter(ce); - rq = eb_throttle(ce); - + if (throttle) + rq = eb_throttle(eb, ce); intel_context_timeline_unlock(tl); - if (rq) { - bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; - long timeout; - - timeout = MAX_SCHEDULE_TIMEOUT; - if (nonblock) - timeout = 0; - - timeout = i915_request_wait(rq, - I915_WAIT_INTERRUPTIBLE, - timeout); - i915_request_put(rq); - - if (timeout < 0) { - err = nonblock ? -EWOULDBLOCK : timeout; - goto err_exit; - } - } - - eb->engine = ce->engine; - eb->context = ce; - return 0; - -err_exit: - mutex_lock(&tl->mutex); - intel_context_exit(ce); - intel_context_timeline_unlock(tl); -err_unpin: - intel_context_unpin(ce); - return err; + eb->args->flags |= __EXEC_ENGINE_PINNED; + return rq; } static void eb_unpin_engine(struct i915_execbuffer *eb) @@ -2409,6 +2684,11 @@ static void eb_unpin_engine(struct i915_execbuffer *eb) struct intel_context *ce = eb->context; struct intel_timeline *tl = ce->timeline; + if (!(eb->args->flags & __EXEC_ENGINE_PINNED)) + return; + + eb->args->flags &= ~__EXEC_ENGINE_PINNED; + mutex_lock(&tl->mutex); intel_context_exit(ce); mutex_unlock(&tl->mutex); @@ -2417,11 +2697,10 @@ static void eb_unpin_engine(struct i915_execbuffer *eb) } static unsigned int -eb_select_legacy_ring(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +eb_select_legacy_ring(struct i915_execbuffer *eb) { struct drm_i915_private *i915 = eb->i915; + struct drm_i915_gem_execbuffer2 *args = eb->args; unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; if (user_ring_id != I915_EXEC_BSD && @@ -2436,7 +2715,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { - bsd_idx = gen8_dispatch_bsd_engine(i915, file); + bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file); } else if (bsd_idx >= I915_EXEC_BSD_RING1 && bsd_idx <= I915_EXEC_BSD_RING2) { bsd_idx >>= I915_EXEC_BSD_SHIFT; @@ -2461,30 +2740,61 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, } static int -eb_pin_engine(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +eb_select_engine(struct i915_execbuffer *eb) { struct intel_context *ce; unsigned int idx; int err; if (i915_gem_context_user_engines(eb->gem_context)) - idx = args->flags & I915_EXEC_RING_MASK; + idx = eb->args->flags & I915_EXEC_RING_MASK; else - idx = eb_select_legacy_ring(eb, file, args); + idx = eb_select_legacy_ring(eb); ce = i915_gem_context_get_engine(eb->gem_context, idx); if (IS_ERR(ce)) return PTR_ERR(ce); - err = __eb_pin_engine(eb, ce); - intel_context_put(ce); + intel_gt_pm_get(ce->engine->gt); + + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + err = intel_context_alloc_state(ce); + if (err) + goto err; + } + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + err = intel_gt_terminally_wedged(ce->engine->gt); + if (err) + goto err; + eb->context = ce; + eb->engine = ce->engine; + + /* + * Make sure engine pool stays alive even if we call intel_context_put + * during ww handling. The pool is destroyed when last pm reference + * is dropped, which breaks our -EDEADLK handling. + */ + return err; + +err: + intel_gt_pm_put(ce->engine->gt); + intel_context_put(ce); return err; } static void +eb_put_engine(struct i915_execbuffer *eb) +{ + intel_gt_pm_put(eb->engine->gt); + intel_context_put(eb->context); +} + +static void __free_fence_array(struct eb_fence *fences, unsigned int n) { while (n--) { @@ -2573,6 +2883,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb, if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); + dma_fence_put(fence); drm_syncobj_put(syncobj); return err; } @@ -2860,6 +3171,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, args->flags |= __EXEC_HAS_RELOC; eb.exec = exec; + eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); + eb.vma[0].vma = NULL; + eb.reloc_pool = eb.batch_pool = NULL; + eb.reloc_context = NULL; eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; reloc_cache_init(&eb.reloc_cache, eb.i915); @@ -2928,11 +3243,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_destroy; - err = eb_pin_engine(&eb, file, args); + err = eb_select_engine(&eb); if (unlikely(err)) goto err_context; - err = eb_relocate(&eb); + err = eb_lookup_vmas(&eb); + if (err) { + eb_release_vmas(&eb, true); + goto err_engine; + } + + i915_gem_ww_ctx_init(&eb.ww, true); + + err = eb_relocate_parse(&eb); if (err) { /* * If the user expects the execobject.offset and @@ -2945,54 +3268,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } - if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) { - drm_dbg(&i915->drm, - "Attempting to use self-modifying batch buffer\n"); - err = -EINVAL; - goto err_vma; - } - - if (range_overflows_t(u64, - eb.batch_start_offset, eb.batch_len, - eb.batch->vma->size)) { - drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n"); - err = -EINVAL; - goto err_vma; - } - - if (eb.batch_len == 0) - eb.batch_len = eb.batch->vma->size - eb.batch_start_offset; - - err = eb_parse(&eb); - if (err) - goto err_vma; + ww_acquire_done(&eb.ww.ctx); - /* - * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure - * batch" bit. Hence we need to pin secure batches into the global gtt. - * hsw should have this fixed, but bdw mucks it up again. */ batch = eb.batch->vma; - if (eb.batch_flags & I915_DISPATCH_SECURE) { - struct i915_vma *vma; - - /* - * So on first glance it looks freaky that we pin the batch here - * outside of the reservation loop. But: - * - The batch is already pinned into the relevant ppgtt, so we - * already have the backing storage fully allocated. - * - No other BO uses the global gtt (well contexts, but meh), - * so we don't really have issues with multiple objects not - * fitting due to fragmentation. - * So this is actually safe. - */ - vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_parse; - } - - batch = vma; - } /* All GPU relocation batches must be submitted prior to the user rq */ GEM_BUG_ON(eb.reloc_cache.rq); @@ -3001,7 +3279,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.request = i915_request_create(eb.context); if (IS_ERR(eb.request)) { err = PTR_ERR(eb.request); - goto err_batch_unpin; + goto err_vma; } if (in_fence) { @@ -3038,13 +3316,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, * to explicitly hold another reference here. */ eb.request->batch = batch; - if (batch->private) - intel_gt_buffer_pool_mark_active(batch->private, eb.request); + if (eb.batch_pool) + intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request); trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb, batch); err_request: - add_to_client(eb.request, file); i915_request_get(eb.request); eb_request_add(&eb); @@ -3063,16 +3340,21 @@ err_request: } i915_request_put(eb.request); -err_batch_unpin: - if (eb.batch_flags & I915_DISPATCH_SECURE) - i915_vma_unpin(batch); -err_parse: - if (batch->private) - intel_gt_buffer_pool_put(batch->private); err_vma: + eb_release_vmas(&eb, true); if (eb.trampoline) i915_vma_unpin(eb.trampoline); - eb_unpin_engine(&eb); + WARN_ON(err == -EDEADLK); + i915_gem_ww_ctx_fini(&eb.ww); + + if (eb.batch_pool) + intel_gt_buffer_pool_put(eb.batch_pool); + if (eb.reloc_pool) + intel_gt_buffer_pool_put(eb.reloc_pool); + if (eb.reloc_context) + intel_context_put(eb.reloc_context); +err_engine: + eb_put_engine(&eb); err_context: i915_gem_context_put(eb.gem_context); err_destroy: @@ -3089,7 +3371,7 @@ err_ext: static size_t eb_element_size(void) { - return sizeof(struct drm_i915_gem_exec_object2); + return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma); } static bool check_buffer_count(size_t count) @@ -3145,7 +3427,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, /* Copy in the exec list from userland */ exec_list = kvmalloc_array(count, sizeof(*exec_list), __GFP_NOWARN | GFP_KERNEL); - exec2_list = kvmalloc_array(count, eb_element_size(), + + /* Allocate extra slots for use by the command parser */ + exec2_list = kvmalloc_array(count + 2, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec_list == NULL || exec2_list == NULL) { drm_dbg(&i915->drm, @@ -3222,7 +3506,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, if (err) return err; - exec2_list = kvmalloc_array(count, eb_element_size(), + /* Allocate extra slots for use by the command parser */ + exec2_list = kvmalloc_array(count + 2, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec2_list == NULL) { drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n", diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 753f82d87a31..3d69e51f3e4d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -283,37 +283,46 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) struct intel_runtime_pm *rpm = &i915->runtime_pm; struct i915_ggtt *ggtt = &i915->ggtt; bool write = area->vm_flags & VM_WRITE; + struct i915_gem_ww_ctx ww; intel_wakeref_t wakeref; struct i915_vma *vma; pgoff_t page_offset; int srcu; int ret; - /* Sanity check that we allow writing into this object */ - if (i915_gem_object_is_readonly(obj) && write) - return VM_FAULT_SIGBUS; - /* We don't use vmf->pgoff since that has the fake offset */ page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; trace_i915_gem_object_fault(obj, page_offset, true, write); - ret = i915_gem_object_pin_pages(obj); + wakeref = intel_runtime_pm_get(rpm); + + i915_gem_ww_ctx_init(&ww, true); +retry: + ret = i915_gem_object_lock(obj, &ww); if (ret) - goto err; + goto err_rpm; - wakeref = intel_runtime_pm_get(rpm); + /* Sanity check that we allow writing into this object */ + if (i915_gem_object_is_readonly(obj) && write) { + ret = -EFAULT; + goto err_rpm; + } - ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); + ret = i915_gem_object_pin_pages(obj); if (ret) goto err_rpm; + ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); + if (ret) + goto err_pages; + /* Now pin it into the GTT as needed */ - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONBLOCK /* NOWARN */ | - PIN_NOEVICT); - if (IS_ERR(vma)) { + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); + if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) { /* Use a partial view if it is bigger than available space */ struct i915_ggtt_view view = compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); @@ -328,11 +337,11 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) * all hope that the hardware is able to track future writes. */ - vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); - if (IS_ERR(vma)) { + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags); + if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) { flags = PIN_MAPPABLE; view.type = I915_GGTT_VIEW_PARTIAL; - vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags); } /* The entire mappable GGTT is pinned? Unexpected! */ @@ -389,10 +398,16 @@ err_unpin: __i915_vma_unpin(vma); err_reset: intel_gt_reset_unlock(ggtt->vm.gt, srcu); +err_pages: + i915_gem_object_unpin_pages(obj); err_rpm: + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); intel_runtime_pm_put(rpm, wakeref); - i915_gem_object_unpin_pages(obj); -err: return i915_error_to_vmf_fault(ret); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 9cf4ad78ece6..d46db8d8f38e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -110,20 +110,44 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) #define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv) -static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) +static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + bool intr) { - dma_resv_lock(obj->base.resv, NULL); + int ret; + + if (intr) + ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL); + else + ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL); + + if (!ret && ww) + list_add_tail(&obj->obj_link, &ww->obj_list); + if (ret == -EALREADY) + ret = 0; + + if (ret == -EDEADLK) + ww->contended = obj; + + return ret; } -static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww) { - return dma_resv_trylock(obj->base.resv); + return __i915_gem_object_lock(obj, ww, ww && ww->intr); } -static inline int -i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj) +static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww) { - return dma_resv_lock_interruptible(obj->base.resv, NULL); + WARN_ON(ww && !ww->intr); + return __i915_gem_object_lock(obj, ww, true); +} + +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +{ + return dma_resv_trylock(obj->base.resv); } static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) @@ -412,7 +436,6 @@ static inline void i915_gem_object_finish_access(struct drm_i915_gem_object *obj) { i915_gem_object_unpin_pages(obj); - i915_gem_object_unlock(obj); } static inline struct intel_engine_cs * @@ -435,6 +458,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); +void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index bfdb32d46877..d93eb36160c9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -14,6 +14,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, struct i915_vma *vma, + struct i915_gem_ww_ctx *ww, u32 value) { struct drm_i915_private *i915 = ce->vm->i915; @@ -39,10 +40,24 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, goto out_pm; } + err = i915_gem_object_lock(pool->obj, ww); + if (err) + goto out_put; + + batch = i915_vma_instance(pool->obj, ce->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put; + } + + err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_put; + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out_put; + goto out_unpin; } rem = vma->size; @@ -84,19 +99,11 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, intel_gt_chipset_flush(ce->vm->gt); - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - batch->private = pool; return batch; +out_unpin: + i915_vma_unpin(batch); out_put: intel_gt_buffer_pool_put(pool); out_pm: @@ -108,11 +115,9 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq) { int err; - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, false); if (err == 0) err = i915_vma_move_to_active(vma, rq, 0); - i915_vma_unlock(vma); if (unlikely(err)) return err; @@ -141,6 +146,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, struct intel_context *ce, u32 value) { + struct i915_gem_ww_ctx ww; struct i915_request *rq; struct i915_vma *batch; struct i915_vma *vma; @@ -150,17 +156,28 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (unlikely(err)) - return err; + i915_gem_ww_ctx_init(&ww, true); + intel_engine_pm_get(ce->engine); +retry: + err = i915_gem_object_lock(obj, &ww); + if (err) + goto out; - batch = intel_emit_vma_fill_blt(ce, vma, value); + err = intel_context_pin_ww(ce, &ww); + if (err) + goto out; + + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); + if (err) + goto out_ctx; + + batch = intel_emit_vma_fill_blt(ce, vma, &ww, value); if (IS_ERR(batch)) { err = PTR_ERR(batch); - goto out_unpin; + goto out_vma; } - rq = intel_context_create_request(ce); + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_batch; @@ -170,11 +187,9 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, if (unlikely(err)) goto out_request; - i915_vma_lock(vma); err = move_obj_to_gpu(vma->obj, rq, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); if (unlikely(err)) goto out_request; @@ -193,8 +208,18 @@ out_request: i915_request_add(rq); out_batch: intel_emit_vma_release(ce, batch); -out_unpin: +out_vma: i915_vma_unpin(vma); +out_ctx: + intel_context_unpin(ce); +out: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + intel_engine_pm_put(ce->engine); return err; } @@ -210,6 +235,7 @@ static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size) } struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, struct i915_vma *src, struct i915_vma *dst) { @@ -236,10 +262,24 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, goto out_pm; } + err = i915_gem_object_lock(pool->obj, ww); + if (err) + goto out_put; + + batch = i915_vma_instance(pool->obj, ce->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put; + } + + err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_put; + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out_put; + goto out_unpin; } rem = src->size; @@ -296,20 +336,11 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, i915_gem_object_unpin_map(pool->obj); intel_gt_chipset_flush(ce->vm->gt); - - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - batch->private = pool; return batch; +out_unpin: + i915_vma_unpin(batch); out_put: intel_gt_buffer_pool_put(pool); out_pm: @@ -321,10 +352,9 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, struct drm_i915_gem_object *dst, struct intel_context *ce) { - struct drm_gem_object *objs[] = { &src->base, &dst->base }; struct i915_address_space *vm = ce->vm; struct i915_vma *vma[2], *batch; - struct ww_acquire_ctx acquire; + struct i915_gem_ww_ctx ww; struct i915_request *rq; int err, i; @@ -332,25 +362,36 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, if (IS_ERR(vma[0])) return PTR_ERR(vma[0]); - err = i915_vma_pin(vma[0], 0, 0, PIN_USER); - if (unlikely(err)) - return err; - vma[1] = i915_vma_instance(dst, vm, NULL); if (IS_ERR(vma[1])) - goto out_unpin_src; + return PTR_ERR(vma); - err = i915_vma_pin(vma[1], 0, 0, PIN_USER); + i915_gem_ww_ctx_init(&ww, true); + intel_engine_pm_get(ce->engine); +retry: + err = i915_gem_object_lock(src, &ww); + if (!err) + err = i915_gem_object_lock(dst, &ww); + if (!err) + err = intel_context_pin_ww(ce, &ww); + if (err) + goto out; + + err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER); + if (err) + goto out_ctx; + + err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER); if (unlikely(err)) goto out_unpin_src; - batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]); + batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto out_unpin_dst; } - rq = intel_context_create_request(ce); + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_batch; @@ -360,14 +401,10 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, if (unlikely(err)) goto out_request; - err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire); - if (unlikely(err)) - goto out_request; - for (i = 0; i < ARRAY_SIZE(vma); i++) { err = move_obj_to_gpu(vma[i]->obj, rq, i); if (unlikely(err)) - goto out_unlock; + goto out_request; } for (i = 0; i < ARRAY_SIZE(vma); i++) { @@ -375,20 +412,19 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, err = i915_vma_move_to_active(vma[i], rq, flags); if (unlikely(err)) - goto out_unlock; + goto out_request; } if (rq->engine->emit_init_breadcrumb) { err = rq->engine->emit_init_breadcrumb(rq); if (unlikely(err)) - goto out_unlock; + goto out_request; } err = rq->engine->emit_bb_start(rq, batch->node.start, batch->node.size, 0); -out_unlock: - drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire); + out_request: if (unlikely(err)) i915_request_set_error_once(rq, err); @@ -400,6 +436,16 @@ out_unpin_dst: i915_vma_unpin(vma[1]); out_unpin_src: i915_vma_unpin(vma[0]); +out_ctx: + intel_context_unpin(ce); +out: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + intel_engine_pm_put(ce->engine); return err; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h index 8bcd336a90dc..2409fdcccf0e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h @@ -13,12 +13,15 @@ #include "i915_vma.h" struct drm_i915_gem_object; +struct i915_gem_ww_ctx; struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, struct i915_vma *vma, + struct i915_gem_ww_ctx *ww, u32 value); struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, struct i915_vma *src, struct i915_vma *dst); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 5335f799b548..b5c15557cc87 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -123,6 +123,15 @@ struct drm_i915_gem_object { struct list_head lut_list; spinlock_t lut_lock; /* guards lut_list */ + /** + * @obj_link: Link into @i915_gem_ww_ctx.obj_list + * + * When we lock this object through i915_gem_object_lock() with a + * context, we add it to the list to ensure we can unlock everything + * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called. + */ + struct list_head obj_link; + /** Stolen memory for this object, instead of being backed by shmem. */ struct drm_mm_node *stolen; union { @@ -282,6 +291,7 @@ struct drm_i915_gem_object { } userptr; unsigned long scratch; + u64 encode; void *gvt_info; }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 3d215164dd5a..40d3e40500fa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -84,7 +84,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); drm_WARN_ON(&i915->drm, i915_gem_object_set_to_gtt_domain(obj, false)); i915_gem_object_unlock(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index 540ef0551789..1929d6cf4150 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -9,6 +9,7 @@ #include <drm/drm_file.h> #include "i915_drv.h" +#include "i915_gem_context.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" @@ -35,9 +36,10 @@ int i915_gem_throttle_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; struct drm_i915_file_private *file_priv = file->driver_priv; - unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; - struct i915_request *request, *target = NULL; + struct i915_gem_context *ctx; + unsigned long idx; long ret; /* ABI: return -EIO if already wedged */ @@ -45,27 +47,54 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_link) { - if (time_after_eq(request->emitted_jiffies, recent_enough)) - break; + rcu_read_lock(); + xa_for_each(&file_priv->context_xa, idx, ctx) { + struct i915_gem_engines_iter it; + struct intel_context *ce; - if (target && xchg(&target->file_priv, NULL)) - list_del(&target->client_link); + if (!kref_get_unless_zero(&ctx->ref)) + continue; + rcu_read_unlock(); - target = request; - } - if (target) - i915_request_get(target); - spin_unlock(&file_priv->mm.lock); + for_each_gem_engine(ce, + i915_gem_context_lock_engines(ctx), + it) { + struct i915_request *rq, *target = NULL; + + if (!ce->timeline) + continue; + + mutex_lock(&ce->timeline->mutex); + list_for_each_entry_reverse(rq, + &ce->timeline->requests, + link) { + if (i915_request_completed(rq)) + break; - if (!target) - return 0; + if (time_after(rq->emitted_jiffies, + recent_enough)) + continue; - ret = i915_request_wait(target, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - i915_request_put(target); + target = i915_request_get(rq); + break; + } + mutex_unlock(&ce->timeline->mutex); + if (!target) + continue; + + ret = i915_request_wait(target, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + i915_request_put(target); + if (ret < 0) + break; + } + i915_gem_context_unlock_engines(ctx); + i915_gem_context_put(ctx); + + rcu_read_lock(); + } + rcu_read_unlock(); return ret < 0 ? ret : 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index ff72ee2fd9cd..ffcaee74a249 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -249,7 +249,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * whilst executing a fenced command for an untiled object. */ - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); if (i915_gem_object_is_framebuffer(obj)) { i915_gem_object_unlock(obj); return -EBUSY; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 8291ede6902c..5daf4a2be422 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -393,7 +393,7 @@ static int igt_mock_exhaust_device_supported_pages(void *arg) */ for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { - unsigned int combination = 0; + unsigned int combination = SZ_4K; /* Required for ppGTT */ for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { if (i & BIT(j)) @@ -947,7 +947,7 @@ static int gpu_write(struct intel_context *ce, { int err; - i915_gem_object_lock(vma->obj); + i915_gem_object_lock(vma->obj, NULL); err = i915_gem_object_set_to_gtt_domain(vma->obj, true); i915_gem_object_unlock(vma->obj); if (err) @@ -964,9 +964,10 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) unsigned long n; int err; + i915_gem_object_lock(obj, NULL); err = i915_gem_object_prepare_read(obj, &needs_flush); if (err) - return err; + goto err_unlock; for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); @@ -986,6 +987,8 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) } i915_gem_object_finish_access(obj); +err_unlock: + i915_gem_object_unlock(obj); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 299c29e9ad86..4e36d4897ea6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -75,7 +75,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine) if (err) goto err_unpin; - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 87d7d8aa080f..7049a6bbc03d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -27,9 +27,10 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) u32 *cpu; int err; + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush); if (err) - return err; + goto out; page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); @@ -46,7 +47,9 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) kunmap_atomic(map); i915_gem_object_finish_access(ctx->obj); - return 0; +out: + i915_gem_object_unlock(ctx->obj); + return err; } static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) @@ -57,9 +60,10 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) u32 *cpu; int err; + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush); if (err) - return err; + goto out; page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); @@ -73,7 +77,9 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) kunmap_atomic(map); i915_gem_object_finish_access(ctx->obj); - return 0; +out: + i915_gem_object_unlock(ctx->obj); + return err; } static int gtt_set(struct context *ctx, unsigned long offset, u32 v) @@ -82,7 +88,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v) u32 __iomem *map; int err = 0; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj); if (err) @@ -115,7 +121,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) u32 __iomem *map; int err = 0; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_gtt_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj); if (err) @@ -147,7 +153,7 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v) u32 *map; int err; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_wc_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj); if (err) @@ -170,7 +176,7 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v) u32 *map; int err; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_wc_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj); if (err) @@ -193,27 +199,27 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) u32 *cs; int err; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); - i915_gem_object_unlock(ctx->obj); if (err) - return err; + goto out_unlock; vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - return PTR_ERR(vma); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_unlock; + } rq = intel_engine_create_kernel_request(ctx->engine); if (IS_ERR(rq)) { - i915_vma_unpin(vma); - return PTR_ERR(rq); + err = PTR_ERR(rq); + goto out_unpin; } cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) { - i915_request_add(rq); - i915_vma_unpin(vma); - return PTR_ERR(cs); + err = PTR_ERR(cs); + goto out_rq; } if (INTEL_GEN(ctx->engine->i915) >= 8) { @@ -234,14 +240,16 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) } intel_ring_advance(rq, cs); - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - i915_vma_unpin(vma); +out_rq: i915_request_add(rq); +out_unpin: + i915_vma_unpin(vma); +out_unlock: + i915_gem_object_unlock(ctx->obj); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 7ffc3c751432..99becb86abd3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -461,9 +461,10 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) unsigned int n, m, need_flush; int err; + i915_gem_object_lock(obj, NULL); err = i915_gem_object_prepare_write(obj, &need_flush); if (err) - return err; + goto out; for (n = 0; n < real_page_count(obj); n++) { u32 *map; @@ -479,7 +480,9 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) i915_gem_object_finish_access(obj); obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; obj->write_domain = 0; - return 0; +out: + i915_gem_object_unlock(obj); + return err; } static noinline int cpu_check(struct drm_i915_gem_object *obj, @@ -488,9 +491,10 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj, unsigned int n, m, needs_flush; int err; + i915_gem_object_lock(obj, NULL); err = i915_gem_object_prepare_read(obj, &needs_flush); if (err) - return err; + goto out_unlock; for (n = 0; n < real_page_count(obj); n++) { u32 *map; @@ -527,6 +531,8 @@ out_unmap: } i915_gem_object_finish_access(obj); +out_unlock: + i915_gem_object_unlock(obj); return err; } @@ -887,24 +893,15 @@ out_file: return err; } -static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) +static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma) { - struct drm_i915_gem_object *obj; u32 *cmd; - int err; - if (INTEL_GEN(vma->vm->i915) < 8) - return ERR_PTR(-EINVAL); + GEM_BUG_ON(INTEL_GEN(vma->vm->i915) < 8); - obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } + cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); *cmd++ = MI_STORE_REGISTER_MEM_GEN8; *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); @@ -912,26 +909,12 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) *cmd++ = upper_32_bits(vma->node.start); *cmd = MI_BATCH_BUFFER_END; - __i915_gem_object_flush_map(obj, 0, 64); - i915_gem_object_unpin_map(obj); + __i915_gem_object_flush_map(rpcs, 0, 64); + i915_gem_object_unpin_map(rpcs); intel_gt_chipset_flush(vma->vm->gt); - vma = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return ERR_PTR(err); + return 0; } static int @@ -939,52 +922,68 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, struct intel_context *ce, struct i915_request **rq_out) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_request *rq; + struct i915_gem_ww_ctx ww; struct i915_vma *batch; struct i915_vma *vma; + struct drm_i915_gem_object *rpcs; int err; GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); + if (INTEL_GEN(i915) < 8) + return -EINVAL; + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) - return err; - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return err; + rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(rpcs)) + return PTR_ERR(rpcs); - batch = rpcs_query_batch(vma); + batch = i915_vma_instance(rpcs, ce->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); - goto err_vma; + goto err_put; } + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_gem_object_lock(rpcs, &ww); + if (!err) + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (!err) + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); + if (err) + goto err_put; + + err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); + if (err) + goto err_vma; + + err = rpcs_query_batch(rpcs, vma); + if (err) + goto err_batch; + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_batch; } - i915_vma_lock(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); if (err) goto skip_request; - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); if (err) goto skip_request; @@ -1000,23 +999,24 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (err) goto skip_request; - i915_vma_unpin_and_release(&batch, 0); - i915_vma_unpin(vma); - *rq_out = i915_request_get(rq); - i915_request_add(rq); - - return 0; - skip_request: - i915_request_set_error_once(rq, err); + if (err) + i915_request_set_error_once(rq, err); i915_request_add(rq); err_batch: - i915_vma_unpin_and_release(&batch, 0); + i915_vma_unpin(batch); err_vma: i915_vma_unpin(vma); - +err_put: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + i915_gem_object_put(rpcs); return err; } @@ -1709,7 +1709,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, i915_request_add(rq); - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) @@ -1748,7 +1748,7 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) if (!vm) return -ENODEV; - page = vm->scratch[0].base.page; + page = __px_page(vm->scratch[0]); if (!page) { pr_err("No scratch page!\n"); return -EINVAL; @@ -1914,8 +1914,8 @@ static int mock_context_barrier(void *arg) return -ENOMEM; counter = 0; - err = context_barrier_task(ctx, 0, - NULL, NULL, mock_barrier_task, &counter); + err = context_barrier_task(ctx, 0, NULL, NULL, NULL, + mock_barrier_task, &counter); if (err) { pr_err("Failed at line %d, err=%d\n", __LINE__, err); goto out; @@ -1927,11 +1927,8 @@ static int mock_context_barrier(void *arg) } counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, - skip_unused_engines, - NULL, - mock_barrier_task, - &counter); + err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, + NULL, NULL, mock_barrier_task, &counter); if (err) { pr_err("Failed at line %d, err=%d\n", __LINE__, err); goto out; @@ -1951,8 +1948,8 @@ static int mock_context_barrier(void *arg) counter = 0; context_barrier_inject_fault = BIT(RCS0); - err = context_barrier_task(ctx, ALL_ENGINES, - NULL, NULL, mock_barrier_task, &counter); + err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL, + mock_barrier_task, &counter); context_barrier_inject_fault = 0; if (err == -ENXIO) err = 0; @@ -1966,11 +1963,8 @@ static int mock_context_barrier(void *arg) goto out; counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, - skip_unused_engines, - NULL, - mock_barrier_task, - &counter); + err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, + NULL, NULL, mock_barrier_task, &counter); if (err) { pr_err("Failed at line %d, err=%d\n", __LINE__, err); goto out; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index a49016f8ee0d..e1d50a5a1477 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -32,46 +32,39 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb, if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + err = i915_gem_object_lock(obj, &eb->ww); + if (err) + return err; + + err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, PIN_USER | PIN_HIGH); if (err) return err; /* 8-Byte aligned */ - if (!__reloc_entry_gpu(eb, vma, - offsets[0] * sizeof(u32), - 0)) { - err = -EIO; - goto unpin_vma; - } + err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0); + if (err <= 0) + goto reloc_err; /* !8-Byte aligned */ - if (!__reloc_entry_gpu(eb, vma, - offsets[1] * sizeof(u32), - 1)) { - err = -EIO; - goto unpin_vma; - } + err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1); + if (err <= 0) + goto reloc_err; /* Skip to the end of the cmd page */ - i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1; + i = PAGE_SIZE / sizeof(u32) - 1; i -= eb->reloc_cache.rq_size; memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size, MI_NOOP, i); eb->reloc_cache.rq_size += i; - /* Force batch chaining */ - if (!__reloc_entry_gpu(eb, vma, - offsets[2] * sizeof(u32), - 2)) { - err = -EIO; - goto unpin_vma; - } + /* Force next batch */ + err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2); + if (err <= 0) + goto reloc_err; GEM_BUG_ON(!eb->reloc_cache.rq); rq = i915_request_get(eb->reloc_cache.rq); - err = reloc_gpu_flush(&eb->reloc_cache); - if (err) - goto put_rq; + reloc_gpu_flush(eb, &eb->reloc_cache); GEM_BUG_ON(eb->reloc_cache.rq); err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2); @@ -103,6 +96,11 @@ put_rq: unpin_vma: i915_vma_unpin(vma); return err; + +reloc_err: + if (!err) + err = -EIO; + goto unpin_vma; } static int igt_gpu_reloc(void *arg) @@ -124,6 +122,8 @@ static int igt_gpu_reloc(void *arg) goto err_scratch; } + intel_gt_pm_get(&eb.i915->gt); + for_each_uabi_engine(eb.engine, eb.i915) { reloc_cache_init(&eb.reloc_cache, eb.i915); memset(map, POISON_INUSE, 4096); @@ -134,15 +134,29 @@ static int igt_gpu_reloc(void *arg) err = PTR_ERR(eb.context); goto err_pm; } + eb.reloc_pool = NULL; + eb.reloc_context = NULL; - err = intel_context_pin(eb.context); - if (err) - goto err_put; + i915_gem_ww_ctx_init(&eb.ww, false); +retry: + err = intel_context_pin_ww(eb.context, &eb.ww); + if (!err) { + err = __igt_gpu_reloc(&eb, scratch); + + intel_context_unpin(eb.context); + } + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&eb.ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&eb.ww); - err = __igt_gpu_reloc(&eb, scratch); + if (eb.reloc_pool) + intel_gt_buffer_pool_put(eb.reloc_pool); + if (eb.reloc_context) + intel_context_put(eb.reloc_context); - intel_context_unpin(eb.context); -err_put: intel_context_put(eb.context); err_pm: intel_engine_pm_put(eb.engine); @@ -153,6 +167,7 @@ err_pm: if (igt_flush_test(eb.i915)) err = -EIO; + intel_gt_pm_put(&eb.i915->gt); err_scratch: i915_gem_object_put(scratch); return err; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 9c7402ce5bf9..d27d87a678c8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -103,7 +103,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) { @@ -188,7 +188,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) { @@ -528,31 +528,42 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) for_each_uabi_engine(engine, i915) { struct i915_request *rq; struct i915_vma *vma; + struct i915_gem_ww_ctx ww; int err; vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_pin(vma, 0, 0, PIN_USER); + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); if (err) - return err; + goto err; rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) { - i915_vma_unpin(vma); - return PTR_ERR(rq); + err = PTR_ERR(rq); + goto err_unpin; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); i915_request_add(rq); +err_unpin: i915_vma_unpin(vma); +err: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); if (err) return err; } @@ -1123,6 +1134,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, for_each_uabi_engine(engine, i915) { struct i915_request *rq; struct i915_vma *vma; + struct i915_gem_ww_ctx ww; vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL); if (IS_ERR(vma)) { @@ -1130,9 +1142,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, goto out_unmap; } - err = i915_vma_pin(vma, 0, 0, PIN_USER); + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); if (err) - goto out_unmap; + goto out_ww; rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { @@ -1140,11 +1156,9 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, goto out_unpin; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, false); if (err == 0) err = i915_vma_move_to_active(vma, rq, 0); - i915_vma_unlock(vma); err = engine->emit_bb_start(rq, vma->node.start, 0, 0); i915_request_get(rq); @@ -1166,6 +1180,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, out_unpin: i915_vma_unpin(vma); +out_ww: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); if (err) goto out_unmap; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 34932871b3a5..a94243dc4c5c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -44,7 +44,7 @@ static int mock_phys_object(void *arg) } /* Make the object dirty so that put_pages must do copy back the data */ - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) { |